diff --git a/cpv3/modules/transcription/schemas.py b/cpv3/modules/transcription/schemas.py index d21aef3..4983ea1 100644 --- a/cpv3/modules/transcription/schemas.py +++ b/cpv3/modules/transcription/schemas.py @@ -7,7 +7,7 @@ from uuid import UUID from cpv3.common.schemas import Schema -TranscriptionEngineEnum = Literal["LOCAL_WHISPER", "GOOGLE_SPEECH_CLOUD"] +TranscriptionEngineEnum = Literal["LOCAL_WHISPER", "GOOGLE_SPEECH_CLOUD", "SALUTE_SPEECH"] class TranscriptionRead(Schema): @@ -144,3 +144,31 @@ class GoogleSpeechResult(Schema): class GoogleSpeechParams(Schema): file_path: str language_codes: list[str] | None = None + + +# ---------------------------------- SaluteSpeech Models ---------------------------------- + + +class SaluteSpeechWord(Schema): + word: str + start: float + end: float + + +class SaluteSpeechSegment(Schema): + text: str + start: float + end: float + words: list[SaluteSpeechWord] = [] + + +class SaluteSpeechResult(Schema): + text: str + segments: list[SaluteSpeechSegment] + language: str + + +class SaluteSpeechParams(Schema): + file_path: str + language: str | None = None + model: str = "general"