new features

This commit is contained in:
Daniil
2026-02-27 23:33:56 +03:00
parent 937e58859a
commit dc04efe0fb
41 changed files with 2067 additions and 141 deletions
+8
View File
@@ -32,6 +32,14 @@ class TranscriptionRepository:
)
return result.scalar_one_or_none()
async def get_by_artifact_id(self, artifact_id: uuid.UUID) -> Transcription | None:
result = await self._session.execute(
select(Transcription)
.where(Transcription.artifact_id == artifact_id)
.where(Transcription.is_active.is_(True))
)
return result.scalar_one_or_none()
async def create(self, data: TranscriptionCreate) -> Transcription:
transcription = Transcription(
project_id=data.project_id,
+15
View File
@@ -67,6 +67,21 @@ async def retrieve_transcription_entry(
return TranscriptionRead.model_validate(transcription)
@router.get("/transcriptions/by-artifact/{artifact_id}/", response_model=TranscriptionRead)
async def retrieve_transcription_by_artifact(
artifact_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> TranscriptionRead:
_ = current_user
repo = TranscriptionRepository(db)
transcription = await repo.get_by_artifact_id(artifact_id)
if transcription is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
return TranscriptionRead.model_validate(transcription)
@router.patch("/transcriptions/{transcription_id}/", response_model=TranscriptionRead)
async def patch_transcription_entry(
transcription_id: uuid.UUID,
+35 -8
View File
@@ -240,11 +240,15 @@ def _make_document_from_segments(
return Document(segments=result_segments)
ProgressCallback = Callable[[float], None]
def _whisper_transcribe_sync(
*,
local_file_path: str,
model_name: str,
language: str | None,
on_progress: ProgressCallback | None = None,
) -> Document:
import whisper # type: ignore[import-untyped]
@@ -267,14 +271,35 @@ def _whisper_transcribe_sync(
probs = cast(dict[str, float], probs_raw)
language = max(probs, key=lambda k: probs[k])
result = whisper.transcribe(
audio=whisper.load_audio(local_file_path),
model=model,
word_timestamps=True,
temperature=0.2,
language=language,
verbose=False,
)
if on_progress is not None:
from unittest.mock import patch
from tqdm import tqdm as _orig_tqdm
class _ProgressTqdm(_orig_tqdm):
def update(self, n=1):
super().update(n)
if self.total:
on_progress(min(self.n / self.total * 100.0, 100.0))
with patch("whisper.transcribe.tqdm.tqdm", _ProgressTqdm):
result = whisper.transcribe(
audio=whisper.load_audio(local_file_path),
model=model,
word_timestamps=True,
temperature=0.2,
language=language,
verbose=False,
)
on_progress(100.0)
else:
result = whisper.transcribe(
audio=whisper.load_audio(local_file_path),
model=model,
word_timestamps=True,
temperature=0.2,
language=language,
verbose=None,
)
parsed = WhisperResult.model_validate(result)
@@ -296,6 +321,7 @@ async def transcribe_with_whisper(
file_key: str,
model_name: str = "tiny",
language: str | None = None,
on_progress: ProgressCallback | None = None,
) -> Document:
tmp = await storage.download_to_temp(file_key)
try:
@@ -304,6 +330,7 @@ async def transcribe_with_whisper(
local_file_path=tmp.path,
model_name=model_name,
language=language,
on_progress=on_progress,
)
)
finally: