"""Tests for SaluteSpeech result parsing and document building.""" from cpv3.modules.transcription.service import ( _build_document_from_salute_result, _parse_salute_time, ) class TestParseSaluteTime: def test_simple_timestamp(self): assert _parse_salute_time("0.480s") == 0.48 def test_zero(self): assert _parse_salute_time("0.000s") == 0.0 def test_large_timestamp(self): assert _parse_salute_time("123.456s") == 123.456 def test_integer_timestamp(self): assert _parse_salute_time("5s") == 5.0 class TestBuildDocumentFromSaluteResult: def _make_raw_result(self): """Minimal SaluteSpeech API response for testing.""" return [ { "results": [ { "text": "привет мир", "normalized_text": "Привет мир.", "start": "0.480s", "end": "1.200s", "word_alignments": [ {"word": "привет", "start": "0.480s", "end": "0.840s"}, {"word": "мир", "start": "0.960s", "end": "1.200s"}, ], }, { "text": "это тест", "normalized_text": "Это тест.", "start": "1.500s", "end": "2.100s", "word_alignments": [ {"word": "это", "start": "1.500s", "end": "1.700s"}, {"word": "тест", "start": "1.800s", "end": "2.100s"}, ], }, ], "channel": 0, } ] def test_returns_document_with_segments(self): raw = self._make_raw_result() doc = _build_document_from_salute_result(raw, language="ru-RU") assert len(doc.segments) == 2 def test_segment_text(self): raw = self._make_raw_result() doc = _build_document_from_salute_result(raw, language="ru-RU") assert doc.segments[0].lines[0].text == "привет мир" def test_word_timestamps(self): raw = self._make_raw_result() doc = _build_document_from_salute_result(raw, language="ru-RU") first_word = doc.segments[0].lines[0].words[0] assert first_word.text == "привет" assert first_word.time.start == 0.48 assert first_word.time.end == 0.84 def test_segment_time_range(self): raw = self._make_raw_result() doc = _build_document_from_salute_result(raw, language="ru-RU") assert doc.segments[0].time.start == 0.48 assert doc.segments[0].time.end == 1.2 def test_empty_results(self): raw = [{"results": [], "channel": 0}] doc = _build_document_from_salute_result(raw, language="ru-RU") assert len(doc.segments) == 0 def test_missing_word_alignments(self): raw = [ { "results": [ { "text": "привет", "normalized_text": "Привет.", "start": "0.000s", "end": "0.500s", } ], "channel": 0, } ] doc = _build_document_from_salute_result(raw, language="ru-RU") assert len(doc.segments) == 1 assert doc.segments[0].time.start == 0.0