from __future__ import annotations from datetime import datetime from typing import Literal from uuid import UUID from cpv3.common.schemas import Schema TranscriptionEngineEnum = Literal["LOCAL_WHISPER", "GOOGLE_SPEECH_CLOUD"] class TranscriptionRead(Schema): id: UUID project_id: UUID | None source_file_id: UUID artifact_id: UUID | None engine: TranscriptionEngineEnum language: str | None document: dict transcribe_options: dict | None is_active: bool created_at: datetime updated_at: datetime class TranscriptionCreate(Schema): project_id: UUID | None = None source_file_id: UUID artifact_id: UUID | None = None engine: TranscriptionEngineEnum = "LOCAL_WHISPER" language: str | None = None document: dict transcribe_options: dict | None = None class TranscriptionUpdate(Schema): document: dict | None = None transcribe_options: dict | None = None # ---------------------------------- Document ---------------------------------- class Tag(Schema): name: str class TimeRange(Schema): start: float end: float class WordNode(Schema): text: str semantic_tags: list[Tag] structure_tags: list[Tag] time: TimeRange class LineNode(Schema): text: str semantic_tags: list[Tag] structure_tags: list[Tag] time: TimeRange words: list[WordNode] class SegmentNode(Schema): text: str semantic_tags: list[Tag] structure_tags: list[Tag] time: TimeRange lines: list[LineNode] class Document(Schema): segments: list[SegmentNode] class WordOptions(Schema): highlight_words: bool = False max_line_width: int = 32 max_line_count: int = 2 # ---------------------------------- Whisper Models ---------------------------------- class WhisperWord(Schema): word: str start: float end: float probability: float class WhisperSegment(Schema): id: int seek: int start: float end: float text: str tokens: list[int] temperature: float avg_logprob: float compression_ratio: float no_speech_prob: float words: list[WhisperWord] class WhisperResult(Schema): text: str segments: list[WhisperSegment] language: str class WhisperParams(Schema): file_path: str language: str | None = None model_name: str = "tiny" # ---------------------------------- Google Speech Models ---------------------------------- class GoogleSpeechWord(Schema): word: str start: float end: float class GoogleSpeechSegment(Schema): text: str start: float end: float words: list[GoogleSpeechWord] class GoogleSpeechResult(Schema): text: str segments: list[GoogleSpeechSegment] language: str class GoogleSpeechParams(Schema): file_path: str language_codes: list[str] | None = None