267 lines
7.7 KiB
Python
267 lines
7.7 KiB
Python
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from os import path
|
|
from tempfile import NamedTemporaryFile
|
|
from typing import Callable
|
|
|
|
import anyio
|
|
|
|
from cpv3.infrastructure.storage.base import StorageService
|
|
from cpv3.infrastructure.storage.types import FileInfo
|
|
from cpv3.modules.media.schemas import MediaProbeSchema
|
|
|
|
|
|
async def probe_media(storage: StorageService, *, file_key: str) -> MediaProbeSchema:
|
|
tmp = await storage.download_to_temp(file_key)
|
|
|
|
try:
|
|
proc = await asyncio.create_subprocess_exec(
|
|
"ffprobe",
|
|
"-v",
|
|
"error",
|
|
"-show_streams",
|
|
"-show_format",
|
|
"-of",
|
|
"json",
|
|
tmp.path,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE,
|
|
)
|
|
stdout, stderr = await proc.communicate()
|
|
if proc.returncode != 0:
|
|
raise RuntimeError(f"ffprobe failed: {stderr.decode(errors='ignore')}")
|
|
|
|
import json
|
|
|
|
raw = json.loads(stdout.decode())
|
|
return MediaProbeSchema.model_validate(raw)
|
|
finally:
|
|
tmp.cleanup()
|
|
|
|
|
|
def _compute_non_silent_segments(
|
|
*,
|
|
local_audio_path: str,
|
|
min_silence_duration_ms: int,
|
|
silence_threshold_db: int,
|
|
padding_ms: int,
|
|
) -> list[tuple[int, int]]:
|
|
from pydub import AudioSegment, silence # type: ignore[import-untyped]
|
|
|
|
audio: AudioSegment = AudioSegment.from_file(local_audio_path)
|
|
duration_ms = len(audio)
|
|
|
|
raw_segments = silence.detect_nonsilent(
|
|
audio_segment=audio,
|
|
min_silence_len=min_silence_duration_ms,
|
|
silence_thresh=int(audio.dBFS - silence_threshold_db),
|
|
)
|
|
|
|
segments: list[tuple[int, int]] = []
|
|
for start_ms, end_ms in raw_segments:
|
|
start = max(0, start_ms - padding_ms)
|
|
end = min(duration_ms, end_ms + padding_ms)
|
|
if end > start:
|
|
segments.append((start, end))
|
|
|
|
return segments
|
|
|
|
|
|
async def remove_silence(
|
|
storage: StorageService,
|
|
*,
|
|
file_key: str,
|
|
out_folder: str,
|
|
min_silence_duration_ms: int = 200,
|
|
silence_threshold_db: int = 16,
|
|
padding_ms: int = 100,
|
|
) -> FileInfo:
|
|
input_tmp = await storage.download_to_temp(file_key)
|
|
|
|
try:
|
|
segments = await anyio.to_thread.run_sync(
|
|
lambda: _compute_non_silent_segments(
|
|
local_audio_path=input_tmp.path,
|
|
min_silence_duration_ms=min_silence_duration_ms,
|
|
silence_threshold_db=silence_threshold_db,
|
|
padding_ms=padding_ms,
|
|
)
|
|
)
|
|
|
|
if not segments:
|
|
return await storage.get_file_info(file_key)
|
|
|
|
with NamedTemporaryFile(
|
|
suffix=path.splitext(file_key)[1] or ".mp4", delete=False
|
|
) as out:
|
|
out_path = out.name
|
|
|
|
try:
|
|
cmd: list[str] = ["ffmpeg"]
|
|
for start_ms, end_ms in segments:
|
|
start_s = start_ms / 1000.0
|
|
duration_s = (end_ms - start_ms) / 1000.0
|
|
cmd.extend(
|
|
[
|
|
"-ss",
|
|
f"{start_s:.3f}",
|
|
"-t",
|
|
f"{duration_s:.3f}",
|
|
"-y",
|
|
"-i",
|
|
input_tmp.path,
|
|
]
|
|
)
|
|
|
|
seg_count = len(segments)
|
|
parts = [f"[{i}:v:0][{i}:a:0]" for i in range(seg_count)]
|
|
filter_complex = "".join(parts) + f"concat=n={seg_count}:v=1:a=1[v][a]"
|
|
|
|
cmd.extend(
|
|
[
|
|
"-filter_complex",
|
|
filter_complex,
|
|
"-map",
|
|
"[v]",
|
|
"-map",
|
|
"[a]",
|
|
"-c:v",
|
|
"libx264",
|
|
"-c:a",
|
|
"aac",
|
|
"-preset",
|
|
"medium",
|
|
out_path,
|
|
]
|
|
)
|
|
|
|
proc = await asyncio.create_subprocess_exec(
|
|
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
|
)
|
|
_, stderr = await proc.communicate()
|
|
if proc.returncode != 0:
|
|
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
|
|
|
|
output_key = path.join(out_folder or "", "silent", path.basename(file_key))
|
|
with open(out_path, "rb") as out_file:
|
|
_ = await storage.upload_fileobj(
|
|
fileobj=out_file,
|
|
file_name=path.basename(output_key),
|
|
folder=path.dirname(output_key),
|
|
gen_name=False,
|
|
content_type="video/mp4",
|
|
)
|
|
|
|
return await storage.get_file_info(output_key)
|
|
finally:
|
|
import os
|
|
|
|
if os.path.exists(out_path):
|
|
os.remove(out_path)
|
|
finally:
|
|
input_tmp.cleanup()
|
|
|
|
|
|
async def convert_to_mp4(
|
|
storage: StorageService, *, file_key: str, out_folder: str
|
|
) -> FileInfo:
|
|
input_tmp = await storage.download_to_temp(file_key)
|
|
|
|
try:
|
|
filename_without_ext = path.splitext(path.basename(file_key))[0]
|
|
mp4_filename = filename_without_ext + ".mp4"
|
|
|
|
with NamedTemporaryFile(suffix=".mp4", delete=False) as out:
|
|
out_path = out.name
|
|
|
|
try:
|
|
cmd = [
|
|
"ffmpeg",
|
|
"-y",
|
|
"-i",
|
|
input_tmp.path,
|
|
"-c:v",
|
|
"libx264",
|
|
"-c:a",
|
|
"aac",
|
|
"-preset",
|
|
"medium",
|
|
"-f",
|
|
"mp4",
|
|
out_path,
|
|
]
|
|
|
|
proc = await asyncio.create_subprocess_exec(
|
|
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
|
)
|
|
_, stderr = await proc.communicate()
|
|
if proc.returncode != 0:
|
|
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
|
|
|
|
output_key = path.join(out_folder or "", "converted", mp4_filename)
|
|
with open(out_path, "rb") as out_file:
|
|
_ = await storage.upload_fileobj(
|
|
fileobj=out_file,
|
|
file_name=mp4_filename,
|
|
folder=path.dirname(output_key),
|
|
gen_name=False,
|
|
content_type="video/mp4",
|
|
)
|
|
|
|
return await storage.get_file_info(output_key)
|
|
finally:
|
|
import os
|
|
|
|
if os.path.exists(out_path):
|
|
os.remove(out_path)
|
|
finally:
|
|
input_tmp.cleanup()
|
|
|
|
|
|
async def convert_to_ogg_temp(
|
|
storage: StorageService, *, file_key: str
|
|
) -> tuple[str, Callable[[], None]]:
|
|
input_tmp = await storage.download_to_temp(file_key)
|
|
|
|
filename_without_ext = path.splitext(path.basename(file_key))[0]
|
|
with NamedTemporaryFile(suffix=".ogg", delete=False) as out:
|
|
out_path = out.name
|
|
|
|
async def _run() -> None:
|
|
cmd = [
|
|
"ffmpeg",
|
|
"-y",
|
|
"-i",
|
|
input_tmp.path,
|
|
"-c:a",
|
|
"libopus",
|
|
"-b:a",
|
|
"24k",
|
|
"-vn",
|
|
"-ac",
|
|
"1",
|
|
"-ar",
|
|
"16000",
|
|
out_path,
|
|
]
|
|
|
|
proc = await asyncio.create_subprocess_exec(
|
|
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
|
)
|
|
_, stderr = await proc.communicate()
|
|
if proc.returncode != 0:
|
|
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
|
|
|
|
await _run()
|
|
|
|
def _cleanup() -> None:
|
|
import os
|
|
|
|
input_tmp.cleanup()
|
|
if os.path.exists(out_path):
|
|
os.remove(out_path)
|
|
|
|
_ = filename_without_ext
|
|
return out_path, _cleanup
|