init: new structure + fix lint errors

This commit is contained in:
Daniil
2026-02-03 02:15:07 +03:00
commit 67e0f22b4f
89 changed files with 7654 additions and 0 deletions
+266
View File
@@ -0,0 +1,266 @@
from __future__ import annotations
import asyncio
from os import path
from tempfile import NamedTemporaryFile
from typing import Callable
import anyio
from cpv3.infrastructure.storage.base import StorageService
from cpv3.infrastructure.storage.types import FileInfo
from cpv3.modules.media.schemas import MediaProbeSchema
async def probe_media(storage: StorageService, *, file_key: str) -> MediaProbeSchema:
tmp = await storage.download_to_temp(file_key)
try:
proc = await asyncio.create_subprocess_exec(
"ffprobe",
"-v",
"error",
"-show_streams",
"-show_format",
"-of",
"json",
tmp.path,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"ffprobe failed: {stderr.decode(errors='ignore')}")
import json
raw = json.loads(stdout.decode())
return MediaProbeSchema.model_validate(raw)
finally:
tmp.cleanup()
def _compute_non_silent_segments(
*,
local_audio_path: str,
min_silence_duration_ms: int,
silence_threshold_db: int,
padding_ms: int,
) -> list[tuple[int, int]]:
from pydub import AudioSegment, silence # type: ignore[import-untyped]
audio: AudioSegment = AudioSegment.from_file(local_audio_path)
duration_ms = len(audio)
raw_segments = silence.detect_nonsilent(
audio_segment=audio,
min_silence_len=min_silence_duration_ms,
silence_thresh=int(audio.dBFS - silence_threshold_db),
)
segments: list[tuple[int, int]] = []
for start_ms, end_ms in raw_segments:
start = max(0, start_ms - padding_ms)
end = min(duration_ms, end_ms + padding_ms)
if end > start:
segments.append((start, end))
return segments
async def remove_silence(
storage: StorageService,
*,
file_key: str,
out_folder: str,
min_silence_duration_ms: int = 200,
silence_threshold_db: int = 16,
padding_ms: int = 100,
) -> FileInfo:
input_tmp = await storage.download_to_temp(file_key)
try:
segments = await anyio.to_thread.run_sync(
lambda: _compute_non_silent_segments(
local_audio_path=input_tmp.path,
min_silence_duration_ms=min_silence_duration_ms,
silence_threshold_db=silence_threshold_db,
padding_ms=padding_ms,
)
)
if not segments:
return await storage.get_file_info(file_key)
with NamedTemporaryFile(
suffix=path.splitext(file_key)[1] or ".mp4", delete=False
) as out:
out_path = out.name
try:
cmd: list[str] = ["ffmpeg"]
for start_ms, end_ms in segments:
start_s = start_ms / 1000.0
duration_s = (end_ms - start_ms) / 1000.0
cmd.extend(
[
"-ss",
f"{start_s:.3f}",
"-t",
f"{duration_s:.3f}",
"-y",
"-i",
input_tmp.path,
]
)
seg_count = len(segments)
parts = [f"[{i}:v:0][{i}:a:0]" for i in range(seg_count)]
filter_complex = "".join(parts) + f"concat=n={seg_count}:v=1:a=1[v][a]"
cmd.extend(
[
"-filter_complex",
filter_complex,
"-map",
"[v]",
"-map",
"[a]",
"-c:v",
"libx264",
"-c:a",
"aac",
"-preset",
"medium",
out_path,
]
)
proc = await asyncio.create_subprocess_exec(
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
_, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
output_key = path.join(out_folder or "", "silent", path.basename(file_key))
with open(out_path, "rb") as out_file:
_ = await storage.upload_fileobj(
fileobj=out_file,
file_name=path.basename(output_key),
folder=path.dirname(output_key),
gen_name=False,
content_type="video/mp4",
)
return await storage.get_file_info(output_key)
finally:
import os
if os.path.exists(out_path):
os.remove(out_path)
finally:
input_tmp.cleanup()
async def convert_to_mp4(
storage: StorageService, *, file_key: str, out_folder: str
) -> FileInfo:
input_tmp = await storage.download_to_temp(file_key)
try:
filename_without_ext = path.splitext(path.basename(file_key))[0]
mp4_filename = filename_without_ext + ".mp4"
with NamedTemporaryFile(suffix=".mp4", delete=False) as out:
out_path = out.name
try:
cmd = [
"ffmpeg",
"-y",
"-i",
input_tmp.path,
"-c:v",
"libx264",
"-c:a",
"aac",
"-preset",
"medium",
"-f",
"mp4",
out_path,
]
proc = await asyncio.create_subprocess_exec(
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
_, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
output_key = path.join(out_folder or "", "converted", mp4_filename)
with open(out_path, "rb") as out_file:
_ = await storage.upload_fileobj(
fileobj=out_file,
file_name=mp4_filename,
folder=path.dirname(output_key),
gen_name=False,
content_type="video/mp4",
)
return await storage.get_file_info(output_key)
finally:
import os
if os.path.exists(out_path):
os.remove(out_path)
finally:
input_tmp.cleanup()
async def convert_to_ogg_temp(
storage: StorageService, *, file_key: str
) -> tuple[str, Callable[[], None]]:
input_tmp = await storage.download_to_temp(file_key)
filename_without_ext = path.splitext(path.basename(file_key))[0]
with NamedTemporaryFile(suffix=".ogg", delete=False) as out:
out_path = out.name
async def _run() -> None:
cmd = [
"ffmpeg",
"-y",
"-i",
input_tmp.path,
"-c:a",
"libopus",
"-b:a",
"24k",
"-vn",
"-ac",
"1",
"-ar",
"16000",
out_path,
]
proc = await asyncio.create_subprocess_exec(
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
_, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
await _run()
def _cleanup() -> None:
import os
input_tmp.cleanup()
if os.path.exists(out_path):
os.remove(out_path)
_ = filename_without_ext
return out_path, _cleanup