new features

This commit is contained in:
Daniil
2026-02-27 23:33:56 +03:00
parent 937e58859a
commit dc04efe0fb
41 changed files with 2067 additions and 141 deletions
+5 -2
View File
@@ -46,8 +46,11 @@ class ArtifactMediaFile(Base, BaseModelMixin):
file_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("files.id", ondelete="RESTRICT"), nullable=True, index=True
)
media_file_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("media_files.id", ondelete="RESTRICT"), index=True
media_file_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("media_files.id", ondelete="RESTRICT"),
nullable=True,
index=True,
)
artifact_type: Mapped[str] = mapped_column(String(32), default="TRANSCRIPTION_JSON")
+48 -5
View File
@@ -1,6 +1,8 @@
from __future__ import annotations
import math
import uuid
from os import path
from fastapi import APIRouter, Depends, HTTPException, Query, Response, status
from sqlalchemy.ext.asyncio import AsyncSession
@@ -8,11 +10,14 @@ from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.infrastructure.auth import get_current_user
from cpv3.infrastructure.deps import get_storage
from cpv3.infrastructure.storage.base import StorageService
from cpv3.infrastructure.storage.utils import get_user_folder
from cpv3.db.session import get_db
from cpv3.modules.media.schemas import (
ArtifactMediaFileCreate,
ArtifactMediaFileRead,
ArtifactMediaFileUpdate,
FrameItem,
FrameRangeResponse,
MediaConverterParams,
MediaFileCreate,
MediaFileRead,
@@ -20,7 +25,13 @@ from cpv3.modules.media.schemas import (
MediaProbeSchema,
MediaSilencerParams,
)
from cpv3.modules.media.service import convert_to_mp4, probe_media, remove_silence
from cpv3.modules.media.service import (
convert_to_mp4,
get_frames_folder,
probe_media,
read_frames_metadata,
remove_silence,
)
from cpv3.modules.media.repository import ArtifactRepository, MediaFileRepository
from cpv3.modules.files.schemas import FileInfoResponse
from cpv3.modules.users.models import User
@@ -46,12 +57,13 @@ async def silence_remove(
current_user: User = Depends(get_current_user),
storage: StorageService = Depends(get_storage),
) -> FileInfoResponse:
_ = current_user
user_folder = get_user_folder(current_user)
resolved_folder = f"{user_folder}/{body.folder}" if body.folder else f"{user_folder}/output_files"
info = await remove_silence(
storage,
file_key=body.file_path,
out_folder=body.folder,
out_folder=resolved_folder,
min_silence_duration_ms=body.min_silence_duration_ms,
silence_threshold_db=body.silence_threshold_db,
padding_ms=body.padding_ms,
@@ -71,9 +83,10 @@ async def convert(
current_user: User = Depends(get_current_user),
storage: StorageService = Depends(get_storage),
) -> FileInfoResponse:
_ = current_user
user_folder = get_user_folder(current_user)
resolved_folder = f"{user_folder}/{body.folder}" if body.folder else f"{user_folder}/output_files"
info = await convert_to_mp4(storage, file_key=body.file_path, out_folder=body.folder)
info = await convert_to_mp4(storage, file_key=body.file_path, out_folder=resolved_folder)
return FileInfoResponse(
file_path=info.file_path,
file_url=info.file_url,
@@ -82,6 +95,36 @@ async def convert(
)
@media_router.get("/frames/", response_model=FrameRangeResponse)
async def get_frames(
file_key: str = Query(..., description="S3 key of the source video"),
start: float = Query(0.0, ge=0, description="Start time in seconds"),
end: float = Query(..., gt=0, description="End time in seconds"),
current_user: User = Depends(get_current_user),
storage: StorageService = Depends(get_storage),
) -> FrameRangeResponse:
"""Return presigned URLs for extracted frames within a time range."""
user_folder = get_user_folder(current_user)
frames_folder = get_frames_folder(user_folder, file_key)
metadata = await read_frames_metadata(storage, frames_folder=frames_folder)
if metadata is None:
return FrameRangeResponse(interval=1.0, frames=[])
interval = metadata.interval
first_index = max(1, math.floor(start / interval) + 1)
last_index = min(metadata.frame_count, math.ceil(end / interval) + 1)
frames: list[FrameItem] = []
for i in range(first_index, last_index + 1):
key = path.join(frames_folder, f"{i:06d}.jpg")
timestamp = (i - 1) * interval
url = await storage.url(key)
frames.append(FrameItem(timestamp=timestamp, url=url))
return FrameRangeResponse(interval=interval, frames=frames)
@mediafiles_router.get("/mediafiles/", response_model=list[MediaFileRead])
async def list_mediafiles(
current_user: User = Depends(get_current_user),
+29 -2
View File
@@ -12,9 +12,11 @@ from cpv3.common.schemas import Schema
ArtifactTypeEnum = Literal[
"TRANSCRIPTION_JSON",
"SILENCE_REMOVED_VIDEO",
"CONVERTED_VIDEO",
"THUMBNAIL",
"AUDIO_PROXY",
"RENDERED_VIDEO",
"FRAME_SPRITES",
]
@@ -60,7 +62,7 @@ class ArtifactMediaFileRead(Schema):
id: UUID
project_id: UUID | None
file_id: UUID | None
media_file_id: UUID
media_file_id: UUID | None
artifact_type: ArtifactTypeEnum
@@ -74,7 +76,7 @@ class ArtifactMediaFileRead(Schema):
class ArtifactMediaFileCreate(Schema):
project_id: UUID | None = None
file_id: UUID | None = None
media_file_id: UUID
media_file_id: UUID | None = None
artifact_type: ArtifactTypeEnum
@@ -148,3 +150,28 @@ class MediaSilencerParams(Schema):
class MediaConverterParams(Schema):
file_path: str
folder: str = ""
class FrameSpriteMetadata(Schema):
"""Metadata stored in ArtifactMediaFile.meta for extracted frames."""
frame_count: int
interval: float
width: int
height: int
folder_key: str
source_file_key: str
class FrameItem(Schema):
"""Single frame in a range query response."""
timestamp: float
url: str
class FrameRangeResponse(Schema):
"""Response for GET /api/media/frames/ range query."""
interval: float
frames: list[FrameItem]
+299 -2
View File
@@ -1,15 +1,30 @@
from __future__ import annotations
import asyncio
import glob as glob_mod
import hashlib
import io
import json
from os import path
from tempfile import NamedTemporaryFile
from tempfile import NamedTemporaryFile, mkdtemp
from typing import Callable
import anyio
from cpv3.infrastructure.storage.base import StorageService
from cpv3.infrastructure.storage.types import FileInfo
from cpv3.modules.media.schemas import MediaProbeSchema
from cpv3.modules.media.schemas import FrameSpriteMetadata, MediaProbeSchema
FRAME_WIDTH_PX = 128
FRAME_FPS = 1
FRAME_JPEG_QUALITY = 5
FRAMES_META_FILENAME = "meta.json"
def get_frames_folder(user_folder: str, file_key: str) -> str:
"""Build deterministic S3 folder path for frames based on file_key hash."""
key_hash = hashlib.sha256(file_key.encode()).hexdigest()[:16]
return path.join(user_folder, "frames", key_hash)
async def probe_media(storage: StorageService, *, file_key: str) -> MediaProbeSchema:
@@ -68,6 +83,160 @@ def _compute_non_silent_segments(
return segments
async def detect_silence(
storage: StorageService,
*,
file_key: str,
min_silence_duration_ms: int = 200,
silence_threshold_db: int = 16,
padding_ms: int = 100,
) -> dict:
"""Detect silent segments in a media file and return their intervals."""
input_tmp = await storage.download_to_temp(file_key)
try:
from pydub import AudioSegment # type: ignore[import-untyped]
audio: AudioSegment = await anyio.to_thread.run_sync(
lambda: AudioSegment.from_file(input_tmp.path)
)
duration_ms = len(audio)
non_silent = await anyio.to_thread.run_sync(
lambda: _compute_non_silent_segments(
local_audio_path=input_tmp.path,
min_silence_duration_ms=min_silence_duration_ms,
silence_threshold_db=silence_threshold_db,
padding_ms=padding_ms,
)
)
# Invert non-silent segments to get silent segments
silent_segments: list[dict[str, int]] = []
prev_end = 0
for start_ms, end_ms in non_silent:
if start_ms > prev_end:
silent_segments.append({"start_ms": prev_end, "end_ms": start_ms})
prev_end = end_ms
if prev_end < duration_ms:
silent_segments.append({"start_ms": prev_end, "end_ms": duration_ms})
return {
"silent_segments": silent_segments,
"duration_ms": duration_ms,
"file_key": file_key,
}
finally:
input_tmp.cleanup()
async def apply_silence_cuts(
storage: StorageService,
*,
file_key: str,
out_folder: str,
cuts: list[dict],
output_name: str | None = None,
) -> FileInfo:
"""Apply explicit cut regions to a media file, concatenating the non-cut parts."""
input_tmp = await storage.download_to_temp(file_key)
try:
from pydub import AudioSegment # type: ignore[import-untyped]
audio: AudioSegment = await anyio.to_thread.run_sync(
lambda: AudioSegment.from_file(input_tmp.path)
)
duration_ms = len(audio)
# Sort cuts and compute non-cut (keep) segments
sorted_cuts = sorted(cuts, key=lambda c: c["start_ms"])
segments: list[tuple[int, int]] = []
prev_end = 0
for cut in sorted_cuts:
cut_start = max(0, cut["start_ms"])
cut_end = min(duration_ms, cut["end_ms"])
if cut_start > prev_end:
segments.append((prev_end, cut_start))
prev_end = max(prev_end, cut_end)
if prev_end < duration_ms:
segments.append((prev_end, duration_ms))
if not segments:
return await storage.get_file_info(file_key)
with NamedTemporaryFile(
suffix=path.splitext(file_key)[1] or ".mp4", delete=False
) as out:
out_path = out.name
try:
cmd: list[str] = ["ffmpeg"]
for start_ms, end_ms in segments:
start_s = start_ms / 1000.0
duration_s = (end_ms - start_ms) / 1000.0
cmd.extend(
[
"-ss",
f"{start_s:.3f}",
"-t",
f"{duration_s:.3f}",
"-y",
"-i",
input_tmp.path,
]
)
seg_count = len(segments)
parts = [f"[{i}:v:0][{i}:a:0]" for i in range(seg_count)]
filter_complex = "".join(parts) + f"concat=n={seg_count}:v=1:a=1[v][a]"
cmd.extend(
[
"-filter_complex",
filter_complex,
"-map",
"[v]",
"-map",
"[a]",
"-c:v",
"libx264",
"-c:a",
"aac",
"-preset",
"medium",
out_path,
]
)
proc = await asyncio.create_subprocess_exec(
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
_, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
base_name = output_name or path.basename(file_key)
output_key = path.join(out_folder or "", "silent", base_name)
with open(out_path, "rb") as out_file:
_ = await storage.upload_fileobj(
fileobj=out_file,
file_name=path.basename(output_key),
folder=path.dirname(output_key),
gen_name=False,
content_type="video/mp4",
)
return await storage.get_file_info(output_key)
finally:
import os
if os.path.exists(out_path):
os.remove(out_path)
finally:
input_tmp.cleanup()
async def remove_silence(
storage: StorageService,
*,
@@ -264,3 +433,131 @@ async def convert_to_ogg_temp(
_ = filename_without_ext
return out_path, _cleanup
async def extract_frames(
storage: StorageService,
*,
file_key: str,
frames_folder: str,
on_progress: Callable[[int, int], None] | None = None,
) -> FrameSpriteMetadata:
"""Extract video frames at 1fps via ffmpeg and upload to S3.
Also writes a ``meta.json`` alongside the frames for fast lookup.
Returns metadata about the extracted frames.
"""
input_tmp = await storage.download_to_temp(file_key)
tmp_dir = mkdtemp(prefix="frames_")
try:
cmd = [
"ffmpeg",
"-y",
"-i",
input_tmp.path,
"-vf",
f"fps={FRAME_FPS},scale={FRAME_WIDTH_PX}:-1",
"-q:v",
str(FRAME_JPEG_QUALITY),
path.join(tmp_dir, "%06d.jpg"),
]
proc = await asyncio.create_subprocess_exec(
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
_, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"ffmpeg frame extraction failed: {stderr.decode(errors='ignore')}")
frame_files = sorted(glob_mod.glob(path.join(tmp_dir, "*.jpg")))
frame_count = len(frame_files)
if frame_count == 0:
raise RuntimeError("No frames extracted from video")
# Read first frame dimensions via ffprobe (avoids PIL dependency)
probe_proc = await asyncio.create_subprocess_exec(
"ffprobe",
"-v", "error",
"-select_streams", "v:0",
"-show_entries", "stream=width,height",
"-of", "json",
frame_files[0],
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
probe_stdout, _ = await probe_proc.communicate()
probe_data = json.loads(probe_stdout.decode())
stream = probe_data.get("streams", [{}])[0]
width = stream.get("width", FRAME_WIDTH_PX)
height = stream.get("height", FRAME_WIDTH_PX)
# Upload each frame to S3
for idx, frame_path in enumerate(frame_files):
frame_name = path.basename(frame_path)
with open(frame_path, "rb") as f:
await storage.upload_fileobj(
fileobj=f,
file_name=frame_name,
folder=frames_folder,
gen_name=False,
content_type="image/jpeg",
)
if on_progress is not None:
on_progress(idx + 1, frame_count)
metadata = FrameSpriteMetadata(
frame_count=frame_count,
interval=1.0 / FRAME_FPS,
width=width,
height=height,
folder_key=frames_folder,
source_file_key=file_key,
)
# Write metadata JSON to S3 for fast lookup by the frames endpoint
meta_bytes = json.dumps(metadata.model_dump(mode="json")).encode("utf-8")
await storage.upload_fileobj(
fileobj=io.BytesIO(meta_bytes),
file_name=FRAMES_META_FILENAME,
folder=frames_folder,
gen_name=False,
content_type="application/json",
)
return metadata
finally:
import shutil
input_tmp.cleanup()
shutil.rmtree(tmp_dir, ignore_errors=True)
async def read_frames_metadata(
storage: StorageService, *, frames_folder: str
) -> FrameSpriteMetadata | None:
"""Read frame extraction metadata from S3. Returns None if not found."""
meta_key = path.join(frames_folder, FRAMES_META_FILENAME)
if not await storage.exists(meta_key):
return None
raw = await storage.read(meta_key)
return FrameSpriteMetadata.model_validate(json.loads(raw))
async def delete_frames(
storage: StorageService, *, frames_folder: str, frame_count: int
) -> None:
"""Delete all frame files and metadata from S3 for a given folder."""
for i in range(1, frame_count + 1):
key = path.join(frames_folder, f"{i:06d}.jpg")
try:
await storage.delete(key)
except Exception:
pass
# Delete metadata file
meta_key = path.join(frames_folder, FRAMES_META_FILENAME)
try:
await storage.delete(meta_key)
except Exception:
pass