init: new structure + fix lint errors
This commit is contained in:
@@ -0,0 +1,266 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from os import path
|
||||
from tempfile import NamedTemporaryFile
|
||||
from typing import Callable
|
||||
|
||||
import anyio
|
||||
|
||||
from cpv3.infrastructure.storage.base import StorageService
|
||||
from cpv3.infrastructure.storage.types import FileInfo
|
||||
from cpv3.modules.media.schemas import MediaProbeSchema
|
||||
|
||||
|
||||
async def probe_media(storage: StorageService, *, file_key: str) -> MediaProbeSchema:
|
||||
tmp = await storage.download_to_temp(file_key)
|
||||
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"ffprobe",
|
||||
"-v",
|
||||
"error",
|
||||
"-show_streams",
|
||||
"-show_format",
|
||||
"-of",
|
||||
"json",
|
||||
tmp.path,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout, stderr = await proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(f"ffprobe failed: {stderr.decode(errors='ignore')}")
|
||||
|
||||
import json
|
||||
|
||||
raw = json.loads(stdout.decode())
|
||||
return MediaProbeSchema.model_validate(raw)
|
||||
finally:
|
||||
tmp.cleanup()
|
||||
|
||||
|
||||
def _compute_non_silent_segments(
|
||||
*,
|
||||
local_audio_path: str,
|
||||
min_silence_duration_ms: int,
|
||||
silence_threshold_db: int,
|
||||
padding_ms: int,
|
||||
) -> list[tuple[int, int]]:
|
||||
from pydub import AudioSegment, silence # type: ignore[import-untyped]
|
||||
|
||||
audio: AudioSegment = AudioSegment.from_file(local_audio_path)
|
||||
duration_ms = len(audio)
|
||||
|
||||
raw_segments = silence.detect_nonsilent(
|
||||
audio_segment=audio,
|
||||
min_silence_len=min_silence_duration_ms,
|
||||
silence_thresh=int(audio.dBFS - silence_threshold_db),
|
||||
)
|
||||
|
||||
segments: list[tuple[int, int]] = []
|
||||
for start_ms, end_ms in raw_segments:
|
||||
start = max(0, start_ms - padding_ms)
|
||||
end = min(duration_ms, end_ms + padding_ms)
|
||||
if end > start:
|
||||
segments.append((start, end))
|
||||
|
||||
return segments
|
||||
|
||||
|
||||
async def remove_silence(
|
||||
storage: StorageService,
|
||||
*,
|
||||
file_key: str,
|
||||
out_folder: str,
|
||||
min_silence_duration_ms: int = 200,
|
||||
silence_threshold_db: int = 16,
|
||||
padding_ms: int = 100,
|
||||
) -> FileInfo:
|
||||
input_tmp = await storage.download_to_temp(file_key)
|
||||
|
||||
try:
|
||||
segments = await anyio.to_thread.run_sync(
|
||||
lambda: _compute_non_silent_segments(
|
||||
local_audio_path=input_tmp.path,
|
||||
min_silence_duration_ms=min_silence_duration_ms,
|
||||
silence_threshold_db=silence_threshold_db,
|
||||
padding_ms=padding_ms,
|
||||
)
|
||||
)
|
||||
|
||||
if not segments:
|
||||
return await storage.get_file_info(file_key)
|
||||
|
||||
with NamedTemporaryFile(
|
||||
suffix=path.splitext(file_key)[1] or ".mp4", delete=False
|
||||
) as out:
|
||||
out_path = out.name
|
||||
|
||||
try:
|
||||
cmd: list[str] = ["ffmpeg"]
|
||||
for start_ms, end_ms in segments:
|
||||
start_s = start_ms / 1000.0
|
||||
duration_s = (end_ms - start_ms) / 1000.0
|
||||
cmd.extend(
|
||||
[
|
||||
"-ss",
|
||||
f"{start_s:.3f}",
|
||||
"-t",
|
||||
f"{duration_s:.3f}",
|
||||
"-y",
|
||||
"-i",
|
||||
input_tmp.path,
|
||||
]
|
||||
)
|
||||
|
||||
seg_count = len(segments)
|
||||
parts = [f"[{i}:v:0][{i}:a:0]" for i in range(seg_count)]
|
||||
filter_complex = "".join(parts) + f"concat=n={seg_count}:v=1:a=1[v][a]"
|
||||
|
||||
cmd.extend(
|
||||
[
|
||||
"-filter_complex",
|
||||
filter_complex,
|
||||
"-map",
|
||||
"[v]",
|
||||
"-map",
|
||||
"[a]",
|
||||
"-c:v",
|
||||
"libx264",
|
||||
"-c:a",
|
||||
"aac",
|
||||
"-preset",
|
||||
"medium",
|
||||
out_path,
|
||||
]
|
||||
)
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
_, stderr = await proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
|
||||
|
||||
output_key = path.join(out_folder or "", "silent", path.basename(file_key))
|
||||
with open(out_path, "rb") as out_file:
|
||||
_ = await storage.upload_fileobj(
|
||||
fileobj=out_file,
|
||||
file_name=path.basename(output_key),
|
||||
folder=path.dirname(output_key),
|
||||
gen_name=False,
|
||||
content_type="video/mp4",
|
||||
)
|
||||
|
||||
return await storage.get_file_info(output_key)
|
||||
finally:
|
||||
import os
|
||||
|
||||
if os.path.exists(out_path):
|
||||
os.remove(out_path)
|
||||
finally:
|
||||
input_tmp.cleanup()
|
||||
|
||||
|
||||
async def convert_to_mp4(
|
||||
storage: StorageService, *, file_key: str, out_folder: str
|
||||
) -> FileInfo:
|
||||
input_tmp = await storage.download_to_temp(file_key)
|
||||
|
||||
try:
|
||||
filename_without_ext = path.splitext(path.basename(file_key))[0]
|
||||
mp4_filename = filename_without_ext + ".mp4"
|
||||
|
||||
with NamedTemporaryFile(suffix=".mp4", delete=False) as out:
|
||||
out_path = out.name
|
||||
|
||||
try:
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-i",
|
||||
input_tmp.path,
|
||||
"-c:v",
|
||||
"libx264",
|
||||
"-c:a",
|
||||
"aac",
|
||||
"-preset",
|
||||
"medium",
|
||||
"-f",
|
||||
"mp4",
|
||||
out_path,
|
||||
]
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
_, stderr = await proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
|
||||
|
||||
output_key = path.join(out_folder or "", "converted", mp4_filename)
|
||||
with open(out_path, "rb") as out_file:
|
||||
_ = await storage.upload_fileobj(
|
||||
fileobj=out_file,
|
||||
file_name=mp4_filename,
|
||||
folder=path.dirname(output_key),
|
||||
gen_name=False,
|
||||
content_type="video/mp4",
|
||||
)
|
||||
|
||||
return await storage.get_file_info(output_key)
|
||||
finally:
|
||||
import os
|
||||
|
||||
if os.path.exists(out_path):
|
||||
os.remove(out_path)
|
||||
finally:
|
||||
input_tmp.cleanup()
|
||||
|
||||
|
||||
async def convert_to_ogg_temp(
|
||||
storage: StorageService, *, file_key: str
|
||||
) -> tuple[str, Callable[[], None]]:
|
||||
input_tmp = await storage.download_to_temp(file_key)
|
||||
|
||||
filename_without_ext = path.splitext(path.basename(file_key))[0]
|
||||
with NamedTemporaryFile(suffix=".ogg", delete=False) as out:
|
||||
out_path = out.name
|
||||
|
||||
async def _run() -> None:
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-i",
|
||||
input_tmp.path,
|
||||
"-c:a",
|
||||
"libopus",
|
||||
"-b:a",
|
||||
"24k",
|
||||
"-vn",
|
||||
"-ac",
|
||||
"1",
|
||||
"-ar",
|
||||
"16000",
|
||||
out_path,
|
||||
]
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
_, stderr = await proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
|
||||
|
||||
await _run()
|
||||
|
||||
def _cleanup() -> None:
|
||||
import os
|
||||
|
||||
input_tmp.cleanup()
|
||||
if os.path.exists(out_path):
|
||||
os.remove(out_path)
|
||||
|
||||
_ = filename_without_ext
|
||||
return out_path, _cleanup
|
||||
Reference in New Issue
Block a user