test(backend): add SaluteSpeech parsing and document building tests
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,99 @@
|
|||||||
|
"""Tests for SaluteSpeech result parsing and document building."""
|
||||||
|
|
||||||
|
from cpv3.modules.transcription.service import (
|
||||||
|
_build_document_from_salute_result,
|
||||||
|
_parse_salute_time,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseSaluteTime:
|
||||||
|
def test_simple_timestamp(self):
|
||||||
|
assert _parse_salute_time("0.480s") == 0.48
|
||||||
|
|
||||||
|
def test_zero(self):
|
||||||
|
assert _parse_salute_time("0.000s") == 0.0
|
||||||
|
|
||||||
|
def test_large_timestamp(self):
|
||||||
|
assert _parse_salute_time("123.456s") == 123.456
|
||||||
|
|
||||||
|
def test_integer_timestamp(self):
|
||||||
|
assert _parse_salute_time("5s") == 5.0
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuildDocumentFromSaluteResult:
|
||||||
|
def _make_raw_result(self):
|
||||||
|
"""Minimal SaluteSpeech API response for testing."""
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"text": "привет мир",
|
||||||
|
"normalized_text": "Привет мир.",
|
||||||
|
"start": "0.480s",
|
||||||
|
"end": "1.200s",
|
||||||
|
"word_alignments": [
|
||||||
|
{"word": "привет", "start": "0.480s", "end": "0.840s"},
|
||||||
|
{"word": "мир", "start": "0.960s", "end": "1.200s"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "это тест",
|
||||||
|
"normalized_text": "Это тест.",
|
||||||
|
"start": "1.500s",
|
||||||
|
"end": "2.100s",
|
||||||
|
"word_alignments": [
|
||||||
|
{"word": "это", "start": "1.500s", "end": "1.700s"},
|
||||||
|
{"word": "тест", "start": "1.800s", "end": "2.100s"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"channel": 0,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def test_returns_document_with_segments(self):
|
||||||
|
raw = self._make_raw_result()
|
||||||
|
doc = _build_document_from_salute_result(raw, language="ru-RU")
|
||||||
|
assert len(doc.segments) == 2
|
||||||
|
|
||||||
|
def test_segment_text(self):
|
||||||
|
raw = self._make_raw_result()
|
||||||
|
doc = _build_document_from_salute_result(raw, language="ru-RU")
|
||||||
|
assert doc.segments[0].lines[0].text == "привет мир"
|
||||||
|
|
||||||
|
def test_word_timestamps(self):
|
||||||
|
raw = self._make_raw_result()
|
||||||
|
doc = _build_document_from_salute_result(raw, language="ru-RU")
|
||||||
|
first_word = doc.segments[0].lines[0].words[0]
|
||||||
|
assert first_word.text == "привет"
|
||||||
|
assert first_word.time.start == 0.48
|
||||||
|
assert first_word.time.end == 0.84
|
||||||
|
|
||||||
|
def test_segment_time_range(self):
|
||||||
|
raw = self._make_raw_result()
|
||||||
|
doc = _build_document_from_salute_result(raw, language="ru-RU")
|
||||||
|
assert doc.segments[0].time.start == 0.48
|
||||||
|
assert doc.segments[0].time.end == 1.2
|
||||||
|
|
||||||
|
def test_empty_results(self):
|
||||||
|
raw = [{"results": [], "channel": 0}]
|
||||||
|
doc = _build_document_from_salute_result(raw, language="ru-RU")
|
||||||
|
assert len(doc.segments) == 0
|
||||||
|
|
||||||
|
def test_missing_word_alignments(self):
|
||||||
|
raw = [
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"text": "привет",
|
||||||
|
"normalized_text": "Привет.",
|
||||||
|
"start": "0.000s",
|
||||||
|
"end": "0.500s",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"channel": 0,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
doc = _build_document_from_salute_result(raw, language="ru-RU")
|
||||||
|
assert len(doc.segments) == 1
|
||||||
|
assert doc.segments[0].time.start == 0.0
|
||||||
Reference in New Issue
Block a user