Files
main_backend/tests/integration/test_salutespeech_parsing.py
2026-04-04 00:04:38 +03:00

100 lines
3.5 KiB
Python

"""Tests for SaluteSpeech result parsing and document building."""
from cpv3.modules.transcription.service import (
_build_document_from_salute_result,
_parse_salute_time,
)
class TestParseSaluteTime:
def test_simple_timestamp(self):
assert _parse_salute_time("0.480s") == 0.48
def test_zero(self):
assert _parse_salute_time("0.000s") == 0.0
def test_large_timestamp(self):
assert _parse_salute_time("123.456s") == 123.456
def test_integer_timestamp(self):
assert _parse_salute_time("5s") == 5.0
class TestBuildDocumentFromSaluteResult:
def _make_raw_result(self):
"""Minimal SaluteSpeech API response for testing."""
return [
{
"results": [
{
"text": "привет мир",
"normalized_text": "Привет мир.",
"start": "0.480s",
"end": "1.200s",
"word_alignments": [
{"word": "привет", "start": "0.480s", "end": "0.840s"},
{"word": "мир", "start": "0.960s", "end": "1.200s"},
],
},
{
"text": "это тест",
"normalized_text": "Это тест.",
"start": "1.500s",
"end": "2.100s",
"word_alignments": [
{"word": "это", "start": "1.500s", "end": "1.700s"},
{"word": "тест", "start": "1.800s", "end": "2.100s"},
],
},
],
"channel": 0,
}
]
def test_returns_document_with_segments(self):
raw = self._make_raw_result()
doc = _build_document_from_salute_result(raw, language="ru-RU")
assert len(doc.segments) == 2
def test_segment_text(self):
raw = self._make_raw_result()
doc = _build_document_from_salute_result(raw, language="ru-RU")
assert doc.segments[0].lines[0].text == "привет мир"
def test_word_timestamps(self):
raw = self._make_raw_result()
doc = _build_document_from_salute_result(raw, language="ru-RU")
first_word = doc.segments[0].lines[0].words[0]
assert first_word.text == "привет"
assert first_word.time.start == 0.48
assert first_word.time.end == 0.84
def test_segment_time_range(self):
raw = self._make_raw_result()
doc = _build_document_from_salute_result(raw, language="ru-RU")
assert doc.segments[0].time.start == 0.48
assert doc.segments[0].time.end == 1.2
def test_empty_results(self):
raw = [{"results": [], "channel": 0}]
doc = _build_document_from_salute_result(raw, language="ru-RU")
assert len(doc.segments) == 0
def test_missing_word_alignments(self):
raw = [
{
"results": [
{
"text": "привет",
"normalized_text": "Привет.",
"start": "0.000s",
"end": "0.500s",
}
],
"channel": 0,
}
]
doc = _build_document_from_salute_result(raw, language="ru-RU")
assert len(doc.segments) == 1
assert doc.segments[0].time.start == 0.0