Files
main_backend/cpv3/modules/transcription/schemas.py
T
2026-02-03 02:15:07 +03:00

147 lines
2.8 KiB
Python

from __future__ import annotations
from datetime import datetime
from typing import Literal
from uuid import UUID
from cpv3.common.schemas import Schema
TranscriptionEngineEnum = Literal["LOCAL_WHISPER", "GOOGLE_SPEECH_CLOUD"]
class TranscriptionRead(Schema):
id: UUID
project_id: UUID | None
source_file_id: UUID
artifact_id: UUID | None
engine: TranscriptionEngineEnum
language: str | None
document: dict
transcribe_options: dict | None
is_active: bool
created_at: datetime
updated_at: datetime
class TranscriptionCreate(Schema):
project_id: UUID | None = None
source_file_id: UUID
artifact_id: UUID | None = None
engine: TranscriptionEngineEnum = "LOCAL_WHISPER"
language: str | None = None
document: dict
transcribe_options: dict | None = None
class TranscriptionUpdate(Schema):
document: dict | None = None
transcribe_options: dict | None = None
# ---------------------------------- Document ----------------------------------
class Tag(Schema):
name: str
class TimeRange(Schema):
start: float
end: float
class WordNode(Schema):
text: str
semantic_tags: list[Tag]
structure_tags: list[Tag]
time: TimeRange
class LineNode(Schema):
text: str
semantic_tags: list[Tag]
structure_tags: list[Tag]
time: TimeRange
words: list[WordNode]
class SegmentNode(Schema):
text: str
semantic_tags: list[Tag]
structure_tags: list[Tag]
time: TimeRange
lines: list[LineNode]
class Document(Schema):
segments: list[SegmentNode]
class WordOptions(Schema):
highlight_words: bool = False
max_line_width: int = 32
max_line_count: int = 2
# ---------------------------------- Whisper Models ----------------------------------
class WhisperWord(Schema):
word: str
start: float
end: float
probability: float
class WhisperSegment(Schema):
id: int
seek: int
start: float
end: float
text: str
tokens: list[int]
temperature: float
avg_logprob: float
compression_ratio: float
no_speech_prob: float
words: list[WhisperWord]
class WhisperResult(Schema):
text: str
segments: list[WhisperSegment]
language: str
class WhisperParams(Schema):
file_path: str
language: str | None = None
model_name: str = "tiny"
# ---------------------------------- Google Speech Models ----------------------------------
class GoogleSpeechWord(Schema):
word: str
start: float
end: float
class GoogleSpeechSegment(Schema):
text: str
start: float
end: float
words: list[GoogleSpeechWord]
class GoogleSpeechResult(Schema):
text: str
segments: list[GoogleSpeechSegment]
language: str
class GoogleSpeechParams(Schema):
file_path: str
language_codes: list[str] | None = None