init: new structure + fix lint errors

This commit is contained in:
Daniil
2026-02-03 02:15:07 +03:00
commit 67e0f22b4f
89 changed files with 7654 additions and 0 deletions
View File
+54
View File
@@ -0,0 +1,54 @@
from __future__ import annotations
import uuid
from sqlalchemy import Boolean, Float, ForeignKey, Integer, JSON, String, Text
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column
from cpv3.db.base import Base, BaseModelMixin
class MediaFile(Base, BaseModelMixin):
__tablename__ = "media_files"
owner_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("users.id", ondelete="RESTRICT"), index=True
)
project_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("projects.id", ondelete="RESTRICT"),
nullable=True,
index=True,
)
duration_seconds: Mapped[float] = mapped_column(Float)
frame_rate: Mapped[float | None] = mapped_column(Float, nullable=True)
width: Mapped[int | None] = mapped_column(Integer, nullable=True)
height: Mapped[int | None] = mapped_column(Integer, nullable=True)
probe_json: Mapped[dict | None] = mapped_column(JSON, nullable=True)
notes: Mapped[str | None] = mapped_column(Text, nullable=True)
meta: Mapped[dict | None] = mapped_column(JSON, nullable=True)
is_deleted: Mapped[bool] = mapped_column(Boolean, default=False)
class ArtifactMediaFile(Base, BaseModelMixin):
__tablename__ = "artifact_media_files"
project_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("projects.id", ondelete="RESTRICT"),
nullable=True,
index=True,
)
file_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("files.id", ondelete="RESTRICT"), nullable=True, index=True
)
media_file_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("media_files.id", ondelete="RESTRICT"), index=True
)
artifact_type: Mapped[str] = mapped_column(String(32), default="TRANSCRIPTION_JSON")
is_deleted: Mapped[bool] = mapped_column(Boolean, default=False)
+124
View File
@@ -0,0 +1,124 @@
from __future__ import annotations
import uuid
from sqlalchemy import Select, select
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.modules.media.models import ArtifactMediaFile, MediaFile
from cpv3.modules.media.schemas import (
ArtifactMediaFileCreate,
ArtifactMediaFileUpdate,
MediaFileCreate,
MediaFileUpdate,
)
from cpv3.modules.users.models import User
class MediaFileRepository:
"""Repository for MediaFile database operations."""
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def list_all(self, *, requester: User) -> list[MediaFile]:
stmt: Select[tuple[MediaFile]] = select(MediaFile).where(
MediaFile.is_deleted.is_(False)
)
if not requester.is_staff:
stmt = stmt.where(MediaFile.owner_id == requester.id)
result = await self._session.execute(stmt.order_by(MediaFile.created_at.desc()))
return list(result.scalars().all())
async def get_by_id(self, media_file_id: uuid.UUID) -> MediaFile | None:
result = await self._session.execute(
select(MediaFile).where(MediaFile.id == media_file_id)
)
media_file = result.scalar_one_or_none()
if media_file is None or media_file.is_deleted:
return None
return media_file
async def create(self, *, requester: User, data: MediaFileCreate) -> MediaFile:
media_file = MediaFile(
owner_id=requester.id,
project_id=data.project_id,
duration_seconds=data.duration_seconds,
frame_rate=data.frame_rate,
width=data.width,
height=data.height,
probe_json=data.probe_json,
notes=data.notes,
meta=data.meta,
)
self._session.add(media_file)
await self._session.commit()
await self._session.refresh(media_file)
return media_file
async def update(self, media_file: MediaFile, data: MediaFileUpdate) -> MediaFile:
for key, value in data.model_dump(exclude_unset=True).items():
if value is not None:
setattr(media_file, key, value)
await self._session.commit()
await self._session.refresh(media_file)
return media_file
async def mark_deleted(self, media_file: MediaFile) -> None:
media_file.is_deleted = True
await self._session.commit()
class ArtifactRepository:
"""Repository for ArtifactMediaFile database operations."""
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def list_all(self) -> list[ArtifactMediaFile]:
result = await self._session.execute(
select(ArtifactMediaFile)
.where(ArtifactMediaFile.is_deleted.is_(False))
.order_by(ArtifactMediaFile.created_at.desc())
)
return list(result.scalars().all())
async def get_by_id(self, artifact_id: uuid.UUID) -> ArtifactMediaFile | None:
result = await self._session.execute(
select(ArtifactMediaFile).where(ArtifactMediaFile.id == artifact_id)
)
artifact = result.scalar_one_or_none()
if artifact is None or artifact.is_deleted:
return None
return artifact
async def create(self, data: ArtifactMediaFileCreate) -> ArtifactMediaFile:
artifact = ArtifactMediaFile(
project_id=data.project_id,
file_id=data.file_id,
media_file_id=data.media_file_id,
artifact_type=data.artifact_type,
)
self._session.add(artifact)
await self._session.commit()
await self._session.refresh(artifact)
return artifact
async def update(
self, artifact: ArtifactMediaFile, data: ArtifactMediaFileUpdate
) -> ArtifactMediaFile:
for key, value in data.model_dump(exclude_unset=True).items():
if value is not None:
setattr(artifact, key, value)
await self._session.commit()
await self._session.refresh(artifact)
return artifact
async def mark_deleted(self, artifact: ArtifactMediaFile) -> None:
artifact.is_deleted = True
await self._session.commit()
+232
View File
@@ -0,0 +1,232 @@
from __future__ import annotations
import uuid
from fastapi import APIRouter, Depends, HTTPException, Query, Response, status
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.infrastructure.auth import get_current_user
from cpv3.infrastructure.deps import get_storage
from cpv3.infrastructure.storage.base import StorageService
from cpv3.db.session import get_db
from cpv3.modules.media.schemas import (
ArtifactMediaFileCreate,
ArtifactMediaFileRead,
ArtifactMediaFileUpdate,
MediaConverterParams,
MediaFileCreate,
MediaFileRead,
MediaFileUpdate,
MediaProbeSchema,
MediaSilencerParams,
)
from cpv3.modules.media.service import convert_to_mp4, probe_media, remove_silence
from cpv3.modules.media.repository import ArtifactRepository, MediaFileRepository
from cpv3.modules.files.schemas import FileInfoResponse
from cpv3.modules.users.models import User
media_router = APIRouter(prefix="/api/media", tags=["media"])
mediafiles_router = APIRouter(prefix="/api/media", tags=["mediafiles"])
artifacts_router = APIRouter(prefix="/api/media", tags=["artifacts"])
@media_router.get("/get_meta/", response_model=MediaProbeSchema)
async def get_meta(
file_path: str = Query(...),
current_user: User = Depends(get_current_user),
storage: StorageService = Depends(get_storage),
) -> MediaProbeSchema:
_ = current_user
return await probe_media(storage, file_key=file_path)
@media_router.post("/silence_remove", response_model=FileInfoResponse)
async def silence_remove(
body: MediaSilencerParams,
current_user: User = Depends(get_current_user),
storage: StorageService = Depends(get_storage),
) -> FileInfoResponse:
_ = current_user
info = await remove_silence(
storage,
file_key=body.file_path,
out_folder=body.folder,
min_silence_duration_ms=body.min_silence_duration_ms,
silence_threshold_db=body.silence_threshold_db,
padding_ms=body.padding_ms,
)
return FileInfoResponse(
file_path=info.file_path,
file_url=info.file_url,
file_size=info.file_size,
filename=info.filename,
)
@media_router.post("/convert", response_model=FileInfoResponse)
async def convert(
body: MediaConverterParams,
current_user: User = Depends(get_current_user),
storage: StorageService = Depends(get_storage),
) -> FileInfoResponse:
_ = current_user
info = await convert_to_mp4(storage, file_key=body.file_path, out_folder=body.folder)
return FileInfoResponse(
file_path=info.file_path,
file_url=info.file_url,
file_size=info.file_size,
filename=info.filename,
)
@mediafiles_router.get("/mediafiles/", response_model=list[MediaFileRead])
async def list_mediafiles(
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[MediaFileRead]:
repo = MediaFileRepository(db)
items = await repo.list_all(requester=current_user)
return [MediaFileRead.model_validate(m) for m in items]
@mediafiles_router.post(
"/mediafiles/", response_model=MediaFileRead, status_code=status.HTTP_201_CREATED
)
async def create_mediafile(
body: MediaFileCreate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> MediaFileRead:
repo = MediaFileRepository(db)
media_file = await repo.create(requester=current_user, data=body)
return MediaFileRead.model_validate(media_file)
@mediafiles_router.get("/mediafiles/{media_file_id}/", response_model=MediaFileRead)
async def retrieve_mediafile(
media_file_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> MediaFileRead:
repo = MediaFileRepository(db)
media_file = await repo.get_by_id(media_file_id)
if media_file is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and media_file.owner_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
return MediaFileRead.model_validate(media_file)
@mediafiles_router.patch("/mediafiles/{media_file_id}/", response_model=MediaFileRead)
async def patch_mediafile(
media_file_id: uuid.UUID,
body: MediaFileUpdate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> MediaFileRead:
repo = MediaFileRepository(db)
media_file = await repo.get_by_id(media_file_id)
if media_file is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and media_file.owner_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
media_file = await repo.update(media_file, body)
return MediaFileRead.model_validate(media_file)
@mediafiles_router.delete("/mediafiles/{media_file_id}/", status_code=status.HTTP_204_NO_CONTENT)
async def delete_mediafile(
media_file_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> Response:
repo = MediaFileRepository(db)
media_file = await repo.get_by_id(media_file_id)
if media_file is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and media_file.owner_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
await repo.mark_deleted(media_file)
return Response(status_code=status.HTTP_204_NO_CONTENT)
@artifacts_router.get("/artifacts/", response_model=list[ArtifactMediaFileRead])
async def list_artifact_mediafiles(
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[ArtifactMediaFileRead]:
_ = current_user
repo = ArtifactRepository(db)
items = await repo.list_all()
return [ArtifactMediaFileRead.model_validate(a) for a in items]
@artifacts_router.post(
"/artifacts/", response_model=ArtifactMediaFileRead, status_code=status.HTTP_201_CREATED
)
async def create_artifact_mediafile(
body: ArtifactMediaFileCreate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> ArtifactMediaFileRead:
_ = current_user
repo = ArtifactRepository(db)
artifact = await repo.create(body)
return ArtifactMediaFileRead.model_validate(artifact)
@artifacts_router.get("/artifacts/{artifact_id}/", response_model=ArtifactMediaFileRead)
async def retrieve_artifact_mediafile(
artifact_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> ArtifactMediaFileRead:
_ = current_user
repo = ArtifactRepository(db)
artifact = await repo.get_by_id(artifact_id)
if artifact is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
return ArtifactMediaFileRead.model_validate(artifact)
@artifacts_router.patch("/artifacts/{artifact_id}/", response_model=ArtifactMediaFileRead)
async def patch_artifact_mediafile(
artifact_id: uuid.UUID,
body: ArtifactMediaFileUpdate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> ArtifactMediaFileRead:
_ = current_user
repo = ArtifactRepository(db)
artifact = await repo.get_by_id(artifact_id)
if artifact is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
artifact = await repo.update(artifact, body)
return ArtifactMediaFileRead.model_validate(artifact)
@artifacts_router.delete("/artifacts/{artifact_id}/", status_code=status.HTTP_204_NO_CONTENT)
async def delete_artifact_mediafile(
artifact_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> Response:
_ = current_user
repo = ArtifactRepository(db)
artifact = await repo.get_by_id(artifact_id)
if artifact is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
await repo.mark_deleted(artifact)
return Response(status_code=status.HTTP_204_NO_CONTENT)
+150
View File
@@ -0,0 +1,150 @@
from __future__ import annotations
from datetime import datetime
from typing import Literal
from uuid import UUID
from pydantic import ConfigDict
from cpv3.common.schemas import Schema
ArtifactTypeEnum = Literal[
"TRANSCRIPTION_JSON",
"SILENCE_REMOVED_VIDEO",
"THUMBNAIL",
"AUDIO_PROXY",
"RENDERED_VIDEO",
]
class MediaFileRead(Schema):
id: UUID
owner_id: UUID
project_id: UUID | None
duration_seconds: float
frame_rate: float | None
width: int | None
height: int | None
probe_json: dict | None
notes: str | None
meta: dict | None
is_deleted: bool
is_active: bool
created_at: datetime
updated_at: datetime
class MediaFileCreate(Schema):
project_id: UUID | None = None
duration_seconds: float
frame_rate: float | None = None
width: int | None = None
height: int | None = None
probe_json: dict | None = None
notes: str | None = None
meta: dict | None = None
class MediaFileUpdate(Schema):
notes: str | None = None
meta: dict | None = None
is_deleted: bool | None = None
class ArtifactMediaFileRead(Schema):
id: UUID
project_id: UUID | None
file_id: UUID | None
media_file_id: UUID
artifact_type: ArtifactTypeEnum
is_deleted: bool
is_active: bool
created_at: datetime
updated_at: datetime
class ArtifactMediaFileCreate(Schema):
project_id: UUID | None = None
file_id: UUID | None = None
media_file_id: UUID
artifact_type: ArtifactTypeEnum
class ArtifactMediaFileUpdate(Schema):
is_deleted: bool | None = None
class DispositionSchema(Schema):
model_config = ConfigDict(extra="allow")
default: int | None = None
class StreamSchema(Schema):
model_config = ConfigDict(extra="allow")
index: int | None = None
codec_name: str | None = None
codec_long_name: str | None = None
profile: str | None = None
codec_type: str | None = None
codec_tag_string: str | None = None
codec_tag: str | None = None
width: int | None = None
height: int | None = None
id: str | None = None
r_frame_rate: str | None = None
avg_frame_rate: str | None = None
time_base: str | None = None
start_pts: int | None = None
start_time: str | None = None
duration_ts: int | None = None
duration: str | None = None
bit_rate: str | None = None
nb_frames: str | None = None
extradata_size: int | None = None
disposition: DispositionSchema | None = None
tags: dict[str, str] | None = None
class FormatSchema(Schema):
model_config = ConfigDict(extra="allow")
filename: str | None = None
nb_streams: int | None = None
format_name: str | None = None
format_long_name: str | None = None
start_time: str | None = None
duration: str | None = None
size: str | None = None
bit_rate: str | None = None
probe_score: int | None = None
tags: dict[str, str] | None = None
class MediaProbeSchema(Schema):
model_config = ConfigDict(extra="allow")
streams: list[StreamSchema] = []
format: FormatSchema | None = None
class MediaSilencerParams(Schema):
file_path: str
folder: str = ""
min_silence_duration_ms: int = 200
silence_threshold_db: int = 16
padding_ms: int = 100
class MediaConverterParams(Schema):
file_path: str
folder: str = ""
+266
View File
@@ -0,0 +1,266 @@
from __future__ import annotations
import asyncio
from os import path
from tempfile import NamedTemporaryFile
from typing import Callable
import anyio
from cpv3.infrastructure.storage.base import StorageService
from cpv3.infrastructure.storage.types import FileInfo
from cpv3.modules.media.schemas import MediaProbeSchema
async def probe_media(storage: StorageService, *, file_key: str) -> MediaProbeSchema:
tmp = await storage.download_to_temp(file_key)
try:
proc = await asyncio.create_subprocess_exec(
"ffprobe",
"-v",
"error",
"-show_streams",
"-show_format",
"-of",
"json",
tmp.path,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"ffprobe failed: {stderr.decode(errors='ignore')}")
import json
raw = json.loads(stdout.decode())
return MediaProbeSchema.model_validate(raw)
finally:
tmp.cleanup()
def _compute_non_silent_segments(
*,
local_audio_path: str,
min_silence_duration_ms: int,
silence_threshold_db: int,
padding_ms: int,
) -> list[tuple[int, int]]:
from pydub import AudioSegment, silence # type: ignore[import-untyped]
audio: AudioSegment = AudioSegment.from_file(local_audio_path)
duration_ms = len(audio)
raw_segments = silence.detect_nonsilent(
audio_segment=audio,
min_silence_len=min_silence_duration_ms,
silence_thresh=int(audio.dBFS - silence_threshold_db),
)
segments: list[tuple[int, int]] = []
for start_ms, end_ms in raw_segments:
start = max(0, start_ms - padding_ms)
end = min(duration_ms, end_ms + padding_ms)
if end > start:
segments.append((start, end))
return segments
async def remove_silence(
storage: StorageService,
*,
file_key: str,
out_folder: str,
min_silence_duration_ms: int = 200,
silence_threshold_db: int = 16,
padding_ms: int = 100,
) -> FileInfo:
input_tmp = await storage.download_to_temp(file_key)
try:
segments = await anyio.to_thread.run_sync(
lambda: _compute_non_silent_segments(
local_audio_path=input_tmp.path,
min_silence_duration_ms=min_silence_duration_ms,
silence_threshold_db=silence_threshold_db,
padding_ms=padding_ms,
)
)
if not segments:
return await storage.get_file_info(file_key)
with NamedTemporaryFile(
suffix=path.splitext(file_key)[1] or ".mp4", delete=False
) as out:
out_path = out.name
try:
cmd: list[str] = ["ffmpeg"]
for start_ms, end_ms in segments:
start_s = start_ms / 1000.0
duration_s = (end_ms - start_ms) / 1000.0
cmd.extend(
[
"-ss",
f"{start_s:.3f}",
"-t",
f"{duration_s:.3f}",
"-y",
"-i",
input_tmp.path,
]
)
seg_count = len(segments)
parts = [f"[{i}:v:0][{i}:a:0]" for i in range(seg_count)]
filter_complex = "".join(parts) + f"concat=n={seg_count}:v=1:a=1[v][a]"
cmd.extend(
[
"-filter_complex",
filter_complex,
"-map",
"[v]",
"-map",
"[a]",
"-c:v",
"libx264",
"-c:a",
"aac",
"-preset",
"medium",
out_path,
]
)
proc = await asyncio.create_subprocess_exec(
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
_, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
output_key = path.join(out_folder or "", "silent", path.basename(file_key))
with open(out_path, "rb") as out_file:
_ = await storage.upload_fileobj(
fileobj=out_file,
file_name=path.basename(output_key),
folder=path.dirname(output_key),
gen_name=False,
content_type="video/mp4",
)
return await storage.get_file_info(output_key)
finally:
import os
if os.path.exists(out_path):
os.remove(out_path)
finally:
input_tmp.cleanup()
async def convert_to_mp4(
storage: StorageService, *, file_key: str, out_folder: str
) -> FileInfo:
input_tmp = await storage.download_to_temp(file_key)
try:
filename_without_ext = path.splitext(path.basename(file_key))[0]
mp4_filename = filename_without_ext + ".mp4"
with NamedTemporaryFile(suffix=".mp4", delete=False) as out:
out_path = out.name
try:
cmd = [
"ffmpeg",
"-y",
"-i",
input_tmp.path,
"-c:v",
"libx264",
"-c:a",
"aac",
"-preset",
"medium",
"-f",
"mp4",
out_path,
]
proc = await asyncio.create_subprocess_exec(
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
_, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
output_key = path.join(out_folder or "", "converted", mp4_filename)
with open(out_path, "rb") as out_file:
_ = await storage.upload_fileobj(
fileobj=out_file,
file_name=mp4_filename,
folder=path.dirname(output_key),
gen_name=False,
content_type="video/mp4",
)
return await storage.get_file_info(output_key)
finally:
import os
if os.path.exists(out_path):
os.remove(out_path)
finally:
input_tmp.cleanup()
async def convert_to_ogg_temp(
storage: StorageService, *, file_key: str
) -> tuple[str, Callable[[], None]]:
input_tmp = await storage.download_to_temp(file_key)
filename_without_ext = path.splitext(path.basename(file_key))[0]
with NamedTemporaryFile(suffix=".ogg", delete=False) as out:
out_path = out.name
async def _run() -> None:
cmd = [
"ffmpeg",
"-y",
"-i",
input_tmp.path,
"-c:a",
"libopus",
"-b:a",
"24k",
"-vn",
"-ac",
"1",
"-ar",
"16000",
out_path,
]
proc = await asyncio.create_subprocess_exec(
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
_, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
await _run()
def _cleanup() -> None:
import os
input_tmp.cleanup()
if os.path.exists(out_path):
os.remove(out_path)
_ = filename_without_ext
return out_path, _cleanup