from __future__ import annotations import asyncio from os import path from tempfile import NamedTemporaryFile from typing import Callable import anyio from cpv3.infrastructure.storage.base import StorageService from cpv3.infrastructure.storage.types import FileInfo from cpv3.modules.media.schemas import MediaProbeSchema async def probe_media(storage: StorageService, *, file_key: str) -> MediaProbeSchema: tmp = await storage.download_to_temp(file_key) try: proc = await asyncio.create_subprocess_exec( "ffprobe", "-v", "error", "-show_streams", "-show_format", "-of", "json", tmp.path, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) stdout, stderr = await proc.communicate() if proc.returncode != 0: raise RuntimeError(f"ffprobe failed: {stderr.decode(errors='ignore')}") import json raw = json.loads(stdout.decode()) return MediaProbeSchema.model_validate(raw) finally: tmp.cleanup() def _compute_non_silent_segments( *, local_audio_path: str, min_silence_duration_ms: int, silence_threshold_db: int, padding_ms: int, ) -> list[tuple[int, int]]: from pydub import AudioSegment, silence # type: ignore[import-untyped] audio: AudioSegment = AudioSegment.from_file(local_audio_path) duration_ms = len(audio) raw_segments = silence.detect_nonsilent( audio_segment=audio, min_silence_len=min_silence_duration_ms, silence_thresh=int(audio.dBFS - silence_threshold_db), ) segments: list[tuple[int, int]] = [] for start_ms, end_ms in raw_segments: start = max(0, start_ms - padding_ms) end = min(duration_ms, end_ms + padding_ms) if end > start: segments.append((start, end)) return segments async def remove_silence( storage: StorageService, *, file_key: str, out_folder: str, min_silence_duration_ms: int = 200, silence_threshold_db: int = 16, padding_ms: int = 100, ) -> FileInfo: input_tmp = await storage.download_to_temp(file_key) try: segments = await anyio.to_thread.run_sync( lambda: _compute_non_silent_segments( local_audio_path=input_tmp.path, min_silence_duration_ms=min_silence_duration_ms, silence_threshold_db=silence_threshold_db, padding_ms=padding_ms, ) ) if not segments: return await storage.get_file_info(file_key) with NamedTemporaryFile( suffix=path.splitext(file_key)[1] or ".mp4", delete=False ) as out: out_path = out.name try: cmd: list[str] = ["ffmpeg"] for start_ms, end_ms in segments: start_s = start_ms / 1000.0 duration_s = (end_ms - start_ms) / 1000.0 cmd.extend( [ "-ss", f"{start_s:.3f}", "-t", f"{duration_s:.3f}", "-y", "-i", input_tmp.path, ] ) seg_count = len(segments) parts = [f"[{i}:v:0][{i}:a:0]" for i in range(seg_count)] filter_complex = "".join(parts) + f"concat=n={seg_count}:v=1:a=1[v][a]" cmd.extend( [ "-filter_complex", filter_complex, "-map", "[v]", "-map", "[a]", "-c:v", "libx264", "-c:a", "aac", "-preset", "medium", out_path, ] ) proc = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) _, stderr = await proc.communicate() if proc.returncode != 0: raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}") output_key = path.join(out_folder or "", "silent", path.basename(file_key)) with open(out_path, "rb") as out_file: _ = await storage.upload_fileobj( fileobj=out_file, file_name=path.basename(output_key), folder=path.dirname(output_key), gen_name=False, content_type="video/mp4", ) return await storage.get_file_info(output_key) finally: import os if os.path.exists(out_path): os.remove(out_path) finally: input_tmp.cleanup() async def convert_to_mp4( storage: StorageService, *, file_key: str, out_folder: str ) -> FileInfo: input_tmp = await storage.download_to_temp(file_key) try: filename_without_ext = path.splitext(path.basename(file_key))[0] mp4_filename = filename_without_ext + ".mp4" with NamedTemporaryFile(suffix=".mp4", delete=False) as out: out_path = out.name try: cmd = [ "ffmpeg", "-y", "-i", input_tmp.path, "-c:v", "libx264", "-c:a", "aac", "-preset", "medium", "-f", "mp4", out_path, ] proc = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) _, stderr = await proc.communicate() if proc.returncode != 0: raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}") output_key = path.join(out_folder or "", "converted", mp4_filename) with open(out_path, "rb") as out_file: _ = await storage.upload_fileobj( fileobj=out_file, file_name=mp4_filename, folder=path.dirname(output_key), gen_name=False, content_type="video/mp4", ) return await storage.get_file_info(output_key) finally: import os if os.path.exists(out_path): os.remove(out_path) finally: input_tmp.cleanup() async def convert_to_ogg_temp( storage: StorageService, *, file_key: str ) -> tuple[str, Callable[[], None]]: input_tmp = await storage.download_to_temp(file_key) filename_without_ext = path.splitext(path.basename(file_key))[0] with NamedTemporaryFile(suffix=".ogg", delete=False) as out: out_path = out.name async def _run() -> None: cmd = [ "ffmpeg", "-y", "-i", input_tmp.path, "-c:a", "libopus", "-b:a", "24k", "-vn", "-ac", "1", "-ar", "16000", out_path, ] proc = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) _, stderr = await proc.communicate() if proc.returncode != 0: raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}") await _run() def _cleanup() -> None: import os input_tmp.cleanup() if os.path.exists(out_path): os.remove(out_path) _ = filename_without_ext return out_path, _cleanup