init: new structure + fix lint errors

2026-02-03 02:15:07 +03:00
commit 67e0f22b4f
89 changed files with 7654 additions and 0 deletions
@@ -0,0 +1,266 @@
+from __future__ import annotations
+
+import asyncio
+from os import path
+from tempfile import NamedTemporaryFile
+from typing import Callable
+
+import anyio
+
+from cpv3.infrastructure.storage.base import StorageService
+from cpv3.infrastructure.storage.types import FileInfo
+from cpv3.modules.media.schemas import MediaProbeSchema
+
+
+async def probe_media(storage: StorageService, *, file_key: str) -> MediaProbeSchema:
+    tmp = await storage.download_to_temp(file_key)
+
+    try:
+        proc = await asyncio.create_subprocess_exec(
+            "ffprobe",
+            "-v",
+            "error",
+            "-show_streams",
+            "-show_format",
+            "-of",
+            "json",
+            tmp.path,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+        stdout, stderr = await proc.communicate()
+        if proc.returncode != 0:
+            raise RuntimeError(f"ffprobe failed: {stderr.decode(errors='ignore')}")
+
+        import json
+
+        raw = json.loads(stdout.decode())
+        return MediaProbeSchema.model_validate(raw)
+    finally:
+        tmp.cleanup()
+
+
+def _compute_non_silent_segments(
+    *,
+    local_audio_path: str,
+    min_silence_duration_ms: int,
+    silence_threshold_db: int,
+    padding_ms: int,
+) -> list[tuple[int, int]]:
+    from pydub import AudioSegment, silence  # type: ignore[import-untyped]
+
+    audio: AudioSegment = AudioSegment.from_file(local_audio_path)
+    duration_ms = len(audio)
+
+    raw_segments = silence.detect_nonsilent(
+        audio_segment=audio,
+        min_silence_len=min_silence_duration_ms,
+        silence_thresh=int(audio.dBFS - silence_threshold_db),
+    )
+
+    segments: list[tuple[int, int]] = []
+    for start_ms, end_ms in raw_segments:
+        start = max(0, start_ms - padding_ms)
+        end = min(duration_ms, end_ms + padding_ms)
+        if end > start:
+            segments.append((start, end))
+
+    return segments
+
+
+async def remove_silence(
+    storage: StorageService,
+    *,
+    file_key: str,
+    out_folder: str,
+    min_silence_duration_ms: int = 200,
+    silence_threshold_db: int = 16,
+    padding_ms: int = 100,
+) -> FileInfo:
+    input_tmp = await storage.download_to_temp(file_key)
+
+    try:
+        segments = await anyio.to_thread.run_sync(
+            lambda: _compute_non_silent_segments(
+                local_audio_path=input_tmp.path,
+                min_silence_duration_ms=min_silence_duration_ms,
+                silence_threshold_db=silence_threshold_db,
+                padding_ms=padding_ms,
+            )
+        )
+
+        if not segments:
+            return await storage.get_file_info(file_key)
+
+        with NamedTemporaryFile(
+            suffix=path.splitext(file_key)[1] or ".mp4", delete=False
+        ) as out:
+            out_path = out.name
+
+        try:
+            cmd: list[str] = ["ffmpeg"]
+            for start_ms, end_ms in segments:
+                start_s = start_ms / 1000.0
+                duration_s = (end_ms - start_ms) / 1000.0
+                cmd.extend(
+                    [
+                        "-ss",
+                        f"{start_s:.3f}",
+                        "-t",
+                        f"{duration_s:.3f}",
+                        "-y",
+                        "-i",
+                        input_tmp.path,
+                    ]
+                )
+
+            seg_count = len(segments)
+            parts = [f"[{i}:v:0][{i}:a:0]" for i in range(seg_count)]
+            filter_complex = "".join(parts) + f"concat=n={seg_count}:v=1:a=1[v][a]"
+
+            cmd.extend(
+                [
+                    "-filter_complex",
+                    filter_complex,
+                    "-map",
+                    "[v]",
+                    "-map",
+                    "[a]",
+                    "-c:v",
+                    "libx264",
+                    "-c:a",
+                    "aac",
+                    "-preset",
+                    "medium",
+                    out_path,
+                ]
+            )
+
+            proc = await asyncio.create_subprocess_exec(
+                *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+            )
+            _, stderr = await proc.communicate()
+            if proc.returncode != 0:
+                raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
+
+            output_key = path.join(out_folder or "", "silent", path.basename(file_key))
+            with open(out_path, "rb") as out_file:
+                _ = await storage.upload_fileobj(
+                    fileobj=out_file,
+                    file_name=path.basename(output_key),
+                    folder=path.dirname(output_key),
+                    gen_name=False,
+                    content_type="video/mp4",
+                )
+
+            return await storage.get_file_info(output_key)
+        finally:
+            import os
+
+            if os.path.exists(out_path):
+                os.remove(out_path)
+    finally:
+        input_tmp.cleanup()
+
+
+async def convert_to_mp4(
+    storage: StorageService, *, file_key: str, out_folder: str
+) -> FileInfo:
+    input_tmp = await storage.download_to_temp(file_key)
+
+    try:
+        filename_without_ext = path.splitext(path.basename(file_key))[0]
+        mp4_filename = filename_without_ext + ".mp4"
+
+        with NamedTemporaryFile(suffix=".mp4", delete=False) as out:
+            out_path = out.name
+
+        try:
+            cmd = [
+                "ffmpeg",
+                "-y",
+                "-i",
+                input_tmp.path,
+                "-c:v",
+                "libx264",
+                "-c:a",
+                "aac",
+                "-preset",
+                "medium",
+                "-f",
+                "mp4",
+                out_path,
+            ]
+
+            proc = await asyncio.create_subprocess_exec(
+                *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+            )
+            _, stderr = await proc.communicate()
+            if proc.returncode != 0:
+                raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
+
+            output_key = path.join(out_folder or "", "converted", mp4_filename)
+            with open(out_path, "rb") as out_file:
+                _ = await storage.upload_fileobj(
+                    fileobj=out_file,
+                    file_name=mp4_filename,
+                    folder=path.dirname(output_key),
+                    gen_name=False,
+                    content_type="video/mp4",
+                )
+
+            return await storage.get_file_info(output_key)
+        finally:
+            import os
+
+            if os.path.exists(out_path):
+                os.remove(out_path)
+    finally:
+        input_tmp.cleanup()
+
+
+async def convert_to_ogg_temp(
+    storage: StorageService, *, file_key: str
+) -> tuple[str, Callable[[], None]]:
+    input_tmp = await storage.download_to_temp(file_key)
+
+    filename_without_ext = path.splitext(path.basename(file_key))[0]
+    with NamedTemporaryFile(suffix=".ogg", delete=False) as out:
+        out_path = out.name
+
+    async def _run() -> None:
+        cmd = [
+            "ffmpeg",
+            "-y",
+            "-i",
+            input_tmp.path,
+            "-c:a",
+            "libopus",
+            "-b:a",
+            "24k",
+            "-vn",
+            "-ac",
+            "1",
+            "-ar",
+            "16000",
+            out_path,
+        ]
+
+        proc = await asyncio.create_subprocess_exec(
+            *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+        )
+        _, stderr = await proc.communicate()
+        if proc.returncode != 0:
+            raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
+
+    await _run()
+
+    def _cleanup() -> None:
+        import os
+
+        input_tmp.cleanup()
+        if os.path.exists(out_path):
+            os.remove(out_path)
+
+    _ = filename_without_ext
+    return out_path, _cleanup