new features
This commit is contained in:
@@ -1,15 +1,30 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import glob as glob_mod
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
from os import path
|
||||
from tempfile import NamedTemporaryFile
|
||||
from tempfile import NamedTemporaryFile, mkdtemp
|
||||
from typing import Callable
|
||||
|
||||
import anyio
|
||||
|
||||
from cpv3.infrastructure.storage.base import StorageService
|
||||
from cpv3.infrastructure.storage.types import FileInfo
|
||||
from cpv3.modules.media.schemas import MediaProbeSchema
|
||||
from cpv3.modules.media.schemas import FrameSpriteMetadata, MediaProbeSchema
|
||||
|
||||
FRAME_WIDTH_PX = 128
|
||||
FRAME_FPS = 1
|
||||
FRAME_JPEG_QUALITY = 5
|
||||
FRAMES_META_FILENAME = "meta.json"
|
||||
|
||||
|
||||
def get_frames_folder(user_folder: str, file_key: str) -> str:
|
||||
"""Build deterministic S3 folder path for frames based on file_key hash."""
|
||||
key_hash = hashlib.sha256(file_key.encode()).hexdigest()[:16]
|
||||
return path.join(user_folder, "frames", key_hash)
|
||||
|
||||
|
||||
async def probe_media(storage: StorageService, *, file_key: str) -> MediaProbeSchema:
|
||||
@@ -68,6 +83,160 @@ def _compute_non_silent_segments(
|
||||
return segments
|
||||
|
||||
|
||||
async def detect_silence(
|
||||
storage: StorageService,
|
||||
*,
|
||||
file_key: str,
|
||||
min_silence_duration_ms: int = 200,
|
||||
silence_threshold_db: int = 16,
|
||||
padding_ms: int = 100,
|
||||
) -> dict:
|
||||
"""Detect silent segments in a media file and return their intervals."""
|
||||
input_tmp = await storage.download_to_temp(file_key)
|
||||
|
||||
try:
|
||||
from pydub import AudioSegment # type: ignore[import-untyped]
|
||||
|
||||
audio: AudioSegment = await anyio.to_thread.run_sync(
|
||||
lambda: AudioSegment.from_file(input_tmp.path)
|
||||
)
|
||||
duration_ms = len(audio)
|
||||
|
||||
non_silent = await anyio.to_thread.run_sync(
|
||||
lambda: _compute_non_silent_segments(
|
||||
local_audio_path=input_tmp.path,
|
||||
min_silence_duration_ms=min_silence_duration_ms,
|
||||
silence_threshold_db=silence_threshold_db,
|
||||
padding_ms=padding_ms,
|
||||
)
|
||||
)
|
||||
|
||||
# Invert non-silent segments to get silent segments
|
||||
silent_segments: list[dict[str, int]] = []
|
||||
prev_end = 0
|
||||
for start_ms, end_ms in non_silent:
|
||||
if start_ms > prev_end:
|
||||
silent_segments.append({"start_ms": prev_end, "end_ms": start_ms})
|
||||
prev_end = end_ms
|
||||
if prev_end < duration_ms:
|
||||
silent_segments.append({"start_ms": prev_end, "end_ms": duration_ms})
|
||||
|
||||
return {
|
||||
"silent_segments": silent_segments,
|
||||
"duration_ms": duration_ms,
|
||||
"file_key": file_key,
|
||||
}
|
||||
finally:
|
||||
input_tmp.cleanup()
|
||||
|
||||
|
||||
async def apply_silence_cuts(
|
||||
storage: StorageService,
|
||||
*,
|
||||
file_key: str,
|
||||
out_folder: str,
|
||||
cuts: list[dict],
|
||||
output_name: str | None = None,
|
||||
) -> FileInfo:
|
||||
"""Apply explicit cut regions to a media file, concatenating the non-cut parts."""
|
||||
input_tmp = await storage.download_to_temp(file_key)
|
||||
|
||||
try:
|
||||
from pydub import AudioSegment # type: ignore[import-untyped]
|
||||
|
||||
audio: AudioSegment = await anyio.to_thread.run_sync(
|
||||
lambda: AudioSegment.from_file(input_tmp.path)
|
||||
)
|
||||
duration_ms = len(audio)
|
||||
|
||||
# Sort cuts and compute non-cut (keep) segments
|
||||
sorted_cuts = sorted(cuts, key=lambda c: c["start_ms"])
|
||||
segments: list[tuple[int, int]] = []
|
||||
prev_end = 0
|
||||
for cut in sorted_cuts:
|
||||
cut_start = max(0, cut["start_ms"])
|
||||
cut_end = min(duration_ms, cut["end_ms"])
|
||||
if cut_start > prev_end:
|
||||
segments.append((prev_end, cut_start))
|
||||
prev_end = max(prev_end, cut_end)
|
||||
if prev_end < duration_ms:
|
||||
segments.append((prev_end, duration_ms))
|
||||
|
||||
if not segments:
|
||||
return await storage.get_file_info(file_key)
|
||||
|
||||
with NamedTemporaryFile(
|
||||
suffix=path.splitext(file_key)[1] or ".mp4", delete=False
|
||||
) as out:
|
||||
out_path = out.name
|
||||
|
||||
try:
|
||||
cmd: list[str] = ["ffmpeg"]
|
||||
for start_ms, end_ms in segments:
|
||||
start_s = start_ms / 1000.0
|
||||
duration_s = (end_ms - start_ms) / 1000.0
|
||||
cmd.extend(
|
||||
[
|
||||
"-ss",
|
||||
f"{start_s:.3f}",
|
||||
"-t",
|
||||
f"{duration_s:.3f}",
|
||||
"-y",
|
||||
"-i",
|
||||
input_tmp.path,
|
||||
]
|
||||
)
|
||||
|
||||
seg_count = len(segments)
|
||||
parts = [f"[{i}:v:0][{i}:a:0]" for i in range(seg_count)]
|
||||
filter_complex = "".join(parts) + f"concat=n={seg_count}:v=1:a=1[v][a]"
|
||||
|
||||
cmd.extend(
|
||||
[
|
||||
"-filter_complex",
|
||||
filter_complex,
|
||||
"-map",
|
||||
"[v]",
|
||||
"-map",
|
||||
"[a]",
|
||||
"-c:v",
|
||||
"libx264",
|
||||
"-c:a",
|
||||
"aac",
|
||||
"-preset",
|
||||
"medium",
|
||||
out_path,
|
||||
]
|
||||
)
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
_, stderr = await proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
|
||||
|
||||
base_name = output_name or path.basename(file_key)
|
||||
output_key = path.join(out_folder or "", "silent", base_name)
|
||||
with open(out_path, "rb") as out_file:
|
||||
_ = await storage.upload_fileobj(
|
||||
fileobj=out_file,
|
||||
file_name=path.basename(output_key),
|
||||
folder=path.dirname(output_key),
|
||||
gen_name=False,
|
||||
content_type="video/mp4",
|
||||
)
|
||||
|
||||
return await storage.get_file_info(output_key)
|
||||
finally:
|
||||
import os
|
||||
|
||||
if os.path.exists(out_path):
|
||||
os.remove(out_path)
|
||||
finally:
|
||||
input_tmp.cleanup()
|
||||
|
||||
|
||||
async def remove_silence(
|
||||
storage: StorageService,
|
||||
*,
|
||||
@@ -264,3 +433,131 @@ async def convert_to_ogg_temp(
|
||||
|
||||
_ = filename_without_ext
|
||||
return out_path, _cleanup
|
||||
|
||||
|
||||
async def extract_frames(
|
||||
storage: StorageService,
|
||||
*,
|
||||
file_key: str,
|
||||
frames_folder: str,
|
||||
on_progress: Callable[[int, int], None] | None = None,
|
||||
) -> FrameSpriteMetadata:
|
||||
"""Extract video frames at 1fps via ffmpeg and upload to S3.
|
||||
|
||||
Also writes a ``meta.json`` alongside the frames for fast lookup.
|
||||
Returns metadata about the extracted frames.
|
||||
"""
|
||||
input_tmp = await storage.download_to_temp(file_key)
|
||||
tmp_dir = mkdtemp(prefix="frames_")
|
||||
|
||||
try:
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-i",
|
||||
input_tmp.path,
|
||||
"-vf",
|
||||
f"fps={FRAME_FPS},scale={FRAME_WIDTH_PX}:-1",
|
||||
"-q:v",
|
||||
str(FRAME_JPEG_QUALITY),
|
||||
path.join(tmp_dir, "%06d.jpg"),
|
||||
]
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
_, stderr = await proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(f"ffmpeg frame extraction failed: {stderr.decode(errors='ignore')}")
|
||||
|
||||
frame_files = sorted(glob_mod.glob(path.join(tmp_dir, "*.jpg")))
|
||||
frame_count = len(frame_files)
|
||||
|
||||
if frame_count == 0:
|
||||
raise RuntimeError("No frames extracted from video")
|
||||
|
||||
# Read first frame dimensions via ffprobe (avoids PIL dependency)
|
||||
probe_proc = await asyncio.create_subprocess_exec(
|
||||
"ffprobe",
|
||||
"-v", "error",
|
||||
"-select_streams", "v:0",
|
||||
"-show_entries", "stream=width,height",
|
||||
"-of", "json",
|
||||
frame_files[0],
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
probe_stdout, _ = await probe_proc.communicate()
|
||||
probe_data = json.loads(probe_stdout.decode())
|
||||
stream = probe_data.get("streams", [{}])[0]
|
||||
width = stream.get("width", FRAME_WIDTH_PX)
|
||||
height = stream.get("height", FRAME_WIDTH_PX)
|
||||
|
||||
# Upload each frame to S3
|
||||
for idx, frame_path in enumerate(frame_files):
|
||||
frame_name = path.basename(frame_path)
|
||||
with open(frame_path, "rb") as f:
|
||||
await storage.upload_fileobj(
|
||||
fileobj=f,
|
||||
file_name=frame_name,
|
||||
folder=frames_folder,
|
||||
gen_name=False,
|
||||
content_type="image/jpeg",
|
||||
)
|
||||
if on_progress is not None:
|
||||
on_progress(idx + 1, frame_count)
|
||||
|
||||
metadata = FrameSpriteMetadata(
|
||||
frame_count=frame_count,
|
||||
interval=1.0 / FRAME_FPS,
|
||||
width=width,
|
||||
height=height,
|
||||
folder_key=frames_folder,
|
||||
source_file_key=file_key,
|
||||
)
|
||||
|
||||
# Write metadata JSON to S3 for fast lookup by the frames endpoint
|
||||
meta_bytes = json.dumps(metadata.model_dump(mode="json")).encode("utf-8")
|
||||
await storage.upload_fileobj(
|
||||
fileobj=io.BytesIO(meta_bytes),
|
||||
file_name=FRAMES_META_FILENAME,
|
||||
folder=frames_folder,
|
||||
gen_name=False,
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
return metadata
|
||||
finally:
|
||||
import shutil
|
||||
|
||||
input_tmp.cleanup()
|
||||
shutil.rmtree(tmp_dir, ignore_errors=True)
|
||||
|
||||
|
||||
async def read_frames_metadata(
|
||||
storage: StorageService, *, frames_folder: str
|
||||
) -> FrameSpriteMetadata | None:
|
||||
"""Read frame extraction metadata from S3. Returns None if not found."""
|
||||
meta_key = path.join(frames_folder, FRAMES_META_FILENAME)
|
||||
if not await storage.exists(meta_key):
|
||||
return None
|
||||
raw = await storage.read(meta_key)
|
||||
return FrameSpriteMetadata.model_validate(json.loads(raw))
|
||||
|
||||
|
||||
async def delete_frames(
|
||||
storage: StorageService, *, frames_folder: str, frame_count: int
|
||||
) -> None:
|
||||
"""Delete all frame files and metadata from S3 for a given folder."""
|
||||
for i in range(1, frame_count + 1):
|
||||
key = path.join(frames_folder, f"{i:06d}.jpg")
|
||||
try:
|
||||
await storage.delete(key)
|
||||
except Exception:
|
||||
pass
|
||||
# Delete metadata file
|
||||
meta_key = path.join(frames_folder, FRAMES_META_FILENAME)
|
||||
try:
|
||||
await storage.delete(meta_key)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user