init: new structure + fix lint errors
This commit is contained in:
@@ -0,0 +1,54 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
from sqlalchemy import Boolean, Float, ForeignKey, Integer, JSON, String, Text
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from cpv3.db.base import Base, BaseModelMixin
|
||||
|
||||
|
||||
class MediaFile(Base, BaseModelMixin):
|
||||
__tablename__ = "media_files"
|
||||
|
||||
owner_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("users.id", ondelete="RESTRICT"), index=True
|
||||
)
|
||||
project_id: Mapped[uuid.UUID | None] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("projects.id", ondelete="RESTRICT"),
|
||||
nullable=True,
|
||||
index=True,
|
||||
)
|
||||
|
||||
duration_seconds: Mapped[float] = mapped_column(Float)
|
||||
frame_rate: Mapped[float | None] = mapped_column(Float, nullable=True)
|
||||
width: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
height: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
|
||||
probe_json: Mapped[dict | None] = mapped_column(JSON, nullable=True)
|
||||
notes: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
meta: Mapped[dict | None] = mapped_column(JSON, nullable=True)
|
||||
|
||||
is_deleted: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
|
||||
|
||||
class ArtifactMediaFile(Base, BaseModelMixin):
|
||||
__tablename__ = "artifact_media_files"
|
||||
|
||||
project_id: Mapped[uuid.UUID | None] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("projects.id", ondelete="RESTRICT"),
|
||||
nullable=True,
|
||||
index=True,
|
||||
)
|
||||
file_id: Mapped[uuid.UUID | None] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("files.id", ondelete="RESTRICT"), nullable=True, index=True
|
||||
)
|
||||
media_file_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("media_files.id", ondelete="RESTRICT"), index=True
|
||||
)
|
||||
|
||||
artifact_type: Mapped[str] = mapped_column(String(32), default="TRANSCRIPTION_JSON")
|
||||
is_deleted: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
@@ -0,0 +1,124 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
from sqlalchemy import Select, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from cpv3.modules.media.models import ArtifactMediaFile, MediaFile
|
||||
from cpv3.modules.media.schemas import (
|
||||
ArtifactMediaFileCreate,
|
||||
ArtifactMediaFileUpdate,
|
||||
MediaFileCreate,
|
||||
MediaFileUpdate,
|
||||
)
|
||||
from cpv3.modules.users.models import User
|
||||
|
||||
|
||||
class MediaFileRepository:
|
||||
"""Repository for MediaFile database operations."""
|
||||
|
||||
def __init__(self, session: AsyncSession) -> None:
|
||||
self._session = session
|
||||
|
||||
async def list_all(self, *, requester: User) -> list[MediaFile]:
|
||||
stmt: Select[tuple[MediaFile]] = select(MediaFile).where(
|
||||
MediaFile.is_deleted.is_(False)
|
||||
)
|
||||
if not requester.is_staff:
|
||||
stmt = stmt.where(MediaFile.owner_id == requester.id)
|
||||
|
||||
result = await self._session.execute(stmt.order_by(MediaFile.created_at.desc()))
|
||||
return list(result.scalars().all())
|
||||
|
||||
async def get_by_id(self, media_file_id: uuid.UUID) -> MediaFile | None:
|
||||
result = await self._session.execute(
|
||||
select(MediaFile).where(MediaFile.id == media_file_id)
|
||||
)
|
||||
media_file = result.scalar_one_or_none()
|
||||
if media_file is None or media_file.is_deleted:
|
||||
return None
|
||||
return media_file
|
||||
|
||||
async def create(self, *, requester: User, data: MediaFileCreate) -> MediaFile:
|
||||
media_file = MediaFile(
|
||||
owner_id=requester.id,
|
||||
project_id=data.project_id,
|
||||
duration_seconds=data.duration_seconds,
|
||||
frame_rate=data.frame_rate,
|
||||
width=data.width,
|
||||
height=data.height,
|
||||
probe_json=data.probe_json,
|
||||
notes=data.notes,
|
||||
meta=data.meta,
|
||||
)
|
||||
|
||||
self._session.add(media_file)
|
||||
await self._session.commit()
|
||||
await self._session.refresh(media_file)
|
||||
return media_file
|
||||
|
||||
async def update(self, media_file: MediaFile, data: MediaFileUpdate) -> MediaFile:
|
||||
for key, value in data.model_dump(exclude_unset=True).items():
|
||||
if value is not None:
|
||||
setattr(media_file, key, value)
|
||||
|
||||
await self._session.commit()
|
||||
await self._session.refresh(media_file)
|
||||
return media_file
|
||||
|
||||
async def mark_deleted(self, media_file: MediaFile) -> None:
|
||||
media_file.is_deleted = True
|
||||
await self._session.commit()
|
||||
|
||||
|
||||
class ArtifactRepository:
|
||||
"""Repository for ArtifactMediaFile database operations."""
|
||||
|
||||
def __init__(self, session: AsyncSession) -> None:
|
||||
self._session = session
|
||||
|
||||
async def list_all(self) -> list[ArtifactMediaFile]:
|
||||
result = await self._session.execute(
|
||||
select(ArtifactMediaFile)
|
||||
.where(ArtifactMediaFile.is_deleted.is_(False))
|
||||
.order_by(ArtifactMediaFile.created_at.desc())
|
||||
)
|
||||
return list(result.scalars().all())
|
||||
|
||||
async def get_by_id(self, artifact_id: uuid.UUID) -> ArtifactMediaFile | None:
|
||||
result = await self._session.execute(
|
||||
select(ArtifactMediaFile).where(ArtifactMediaFile.id == artifact_id)
|
||||
)
|
||||
artifact = result.scalar_one_or_none()
|
||||
if artifact is None or artifact.is_deleted:
|
||||
return None
|
||||
return artifact
|
||||
|
||||
async def create(self, data: ArtifactMediaFileCreate) -> ArtifactMediaFile:
|
||||
artifact = ArtifactMediaFile(
|
||||
project_id=data.project_id,
|
||||
file_id=data.file_id,
|
||||
media_file_id=data.media_file_id,
|
||||
artifact_type=data.artifact_type,
|
||||
)
|
||||
|
||||
self._session.add(artifact)
|
||||
await self._session.commit()
|
||||
await self._session.refresh(artifact)
|
||||
return artifact
|
||||
|
||||
async def update(
|
||||
self, artifact: ArtifactMediaFile, data: ArtifactMediaFileUpdate
|
||||
) -> ArtifactMediaFile:
|
||||
for key, value in data.model_dump(exclude_unset=True).items():
|
||||
if value is not None:
|
||||
setattr(artifact, key, value)
|
||||
|
||||
await self._session.commit()
|
||||
await self._session.refresh(artifact)
|
||||
return artifact
|
||||
|
||||
async def mark_deleted(self, artifact: ArtifactMediaFile) -> None:
|
||||
artifact.is_deleted = True
|
||||
await self._session.commit()
|
||||
@@ -0,0 +1,232 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Response, status
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from cpv3.infrastructure.auth import get_current_user
|
||||
from cpv3.infrastructure.deps import get_storage
|
||||
from cpv3.infrastructure.storage.base import StorageService
|
||||
from cpv3.db.session import get_db
|
||||
from cpv3.modules.media.schemas import (
|
||||
ArtifactMediaFileCreate,
|
||||
ArtifactMediaFileRead,
|
||||
ArtifactMediaFileUpdate,
|
||||
MediaConverterParams,
|
||||
MediaFileCreate,
|
||||
MediaFileRead,
|
||||
MediaFileUpdate,
|
||||
MediaProbeSchema,
|
||||
MediaSilencerParams,
|
||||
)
|
||||
from cpv3.modules.media.service import convert_to_mp4, probe_media, remove_silence
|
||||
from cpv3.modules.media.repository import ArtifactRepository, MediaFileRepository
|
||||
from cpv3.modules.files.schemas import FileInfoResponse
|
||||
from cpv3.modules.users.models import User
|
||||
|
||||
media_router = APIRouter(prefix="/api/media", tags=["media"])
|
||||
mediafiles_router = APIRouter(prefix="/api/media", tags=["mediafiles"])
|
||||
artifacts_router = APIRouter(prefix="/api/media", tags=["artifacts"])
|
||||
|
||||
|
||||
@media_router.get("/get_meta/", response_model=MediaProbeSchema)
|
||||
async def get_meta(
|
||||
file_path: str = Query(...),
|
||||
current_user: User = Depends(get_current_user),
|
||||
storage: StorageService = Depends(get_storage),
|
||||
) -> MediaProbeSchema:
|
||||
_ = current_user
|
||||
return await probe_media(storage, file_key=file_path)
|
||||
|
||||
|
||||
@media_router.post("/silence_remove", response_model=FileInfoResponse)
|
||||
async def silence_remove(
|
||||
body: MediaSilencerParams,
|
||||
current_user: User = Depends(get_current_user),
|
||||
storage: StorageService = Depends(get_storage),
|
||||
) -> FileInfoResponse:
|
||||
_ = current_user
|
||||
|
||||
info = await remove_silence(
|
||||
storage,
|
||||
file_key=body.file_path,
|
||||
out_folder=body.folder,
|
||||
min_silence_duration_ms=body.min_silence_duration_ms,
|
||||
silence_threshold_db=body.silence_threshold_db,
|
||||
padding_ms=body.padding_ms,
|
||||
)
|
||||
|
||||
return FileInfoResponse(
|
||||
file_path=info.file_path,
|
||||
file_url=info.file_url,
|
||||
file_size=info.file_size,
|
||||
filename=info.filename,
|
||||
)
|
||||
|
||||
|
||||
@media_router.post("/convert", response_model=FileInfoResponse)
|
||||
async def convert(
|
||||
body: MediaConverterParams,
|
||||
current_user: User = Depends(get_current_user),
|
||||
storage: StorageService = Depends(get_storage),
|
||||
) -> FileInfoResponse:
|
||||
_ = current_user
|
||||
|
||||
info = await convert_to_mp4(storage, file_key=body.file_path, out_folder=body.folder)
|
||||
return FileInfoResponse(
|
||||
file_path=info.file_path,
|
||||
file_url=info.file_url,
|
||||
file_size=info.file_size,
|
||||
filename=info.filename,
|
||||
)
|
||||
|
||||
|
||||
@mediafiles_router.get("/mediafiles/", response_model=list[MediaFileRead])
|
||||
async def list_mediafiles(
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> list[MediaFileRead]:
|
||||
repo = MediaFileRepository(db)
|
||||
items = await repo.list_all(requester=current_user)
|
||||
return [MediaFileRead.model_validate(m) for m in items]
|
||||
|
||||
|
||||
@mediafiles_router.post(
|
||||
"/mediafiles/", response_model=MediaFileRead, status_code=status.HTTP_201_CREATED
|
||||
)
|
||||
async def create_mediafile(
|
||||
body: MediaFileCreate,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> MediaFileRead:
|
||||
repo = MediaFileRepository(db)
|
||||
media_file = await repo.create(requester=current_user, data=body)
|
||||
return MediaFileRead.model_validate(media_file)
|
||||
|
||||
|
||||
@mediafiles_router.get("/mediafiles/{media_file_id}/", response_model=MediaFileRead)
|
||||
async def retrieve_mediafile(
|
||||
media_file_id: uuid.UUID,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> MediaFileRead:
|
||||
repo = MediaFileRepository(db)
|
||||
media_file = await repo.get_by_id(media_file_id)
|
||||
if media_file is None:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
|
||||
|
||||
if not current_user.is_staff and media_file.owner_id != current_user.id:
|
||||
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
|
||||
|
||||
return MediaFileRead.model_validate(media_file)
|
||||
|
||||
|
||||
@mediafiles_router.patch("/mediafiles/{media_file_id}/", response_model=MediaFileRead)
|
||||
async def patch_mediafile(
|
||||
media_file_id: uuid.UUID,
|
||||
body: MediaFileUpdate,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> MediaFileRead:
|
||||
repo = MediaFileRepository(db)
|
||||
media_file = await repo.get_by_id(media_file_id)
|
||||
if media_file is None:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
|
||||
|
||||
if not current_user.is_staff and media_file.owner_id != current_user.id:
|
||||
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
|
||||
|
||||
media_file = await repo.update(media_file, body)
|
||||
return MediaFileRead.model_validate(media_file)
|
||||
|
||||
|
||||
@mediafiles_router.delete("/mediafiles/{media_file_id}/", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def delete_mediafile(
|
||||
media_file_id: uuid.UUID,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> Response:
|
||||
repo = MediaFileRepository(db)
|
||||
media_file = await repo.get_by_id(media_file_id)
|
||||
if media_file is None:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
|
||||
|
||||
if not current_user.is_staff and media_file.owner_id != current_user.id:
|
||||
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
|
||||
|
||||
await repo.mark_deleted(media_file)
|
||||
return Response(status_code=status.HTTP_204_NO_CONTENT)
|
||||
|
||||
|
||||
@artifacts_router.get("/artifacts/", response_model=list[ArtifactMediaFileRead])
|
||||
async def list_artifact_mediafiles(
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> list[ArtifactMediaFileRead]:
|
||||
_ = current_user
|
||||
repo = ArtifactRepository(db)
|
||||
items = await repo.list_all()
|
||||
return [ArtifactMediaFileRead.model_validate(a) for a in items]
|
||||
|
||||
|
||||
@artifacts_router.post(
|
||||
"/artifacts/", response_model=ArtifactMediaFileRead, status_code=status.HTTP_201_CREATED
|
||||
)
|
||||
async def create_artifact_mediafile(
|
||||
body: ArtifactMediaFileCreate,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> ArtifactMediaFileRead:
|
||||
_ = current_user
|
||||
repo = ArtifactRepository(db)
|
||||
artifact = await repo.create(body)
|
||||
return ArtifactMediaFileRead.model_validate(artifact)
|
||||
|
||||
|
||||
@artifacts_router.get("/artifacts/{artifact_id}/", response_model=ArtifactMediaFileRead)
|
||||
async def retrieve_artifact_mediafile(
|
||||
artifact_id: uuid.UUID,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> ArtifactMediaFileRead:
|
||||
_ = current_user
|
||||
repo = ArtifactRepository(db)
|
||||
artifact = await repo.get_by_id(artifact_id)
|
||||
if artifact is None:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
|
||||
|
||||
return ArtifactMediaFileRead.model_validate(artifact)
|
||||
|
||||
|
||||
@artifacts_router.patch("/artifacts/{artifact_id}/", response_model=ArtifactMediaFileRead)
|
||||
async def patch_artifact_mediafile(
|
||||
artifact_id: uuid.UUID,
|
||||
body: ArtifactMediaFileUpdate,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> ArtifactMediaFileRead:
|
||||
_ = current_user
|
||||
repo = ArtifactRepository(db)
|
||||
artifact = await repo.get_by_id(artifact_id)
|
||||
if artifact is None:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
|
||||
|
||||
artifact = await repo.update(artifact, body)
|
||||
return ArtifactMediaFileRead.model_validate(artifact)
|
||||
|
||||
|
||||
@artifacts_router.delete("/artifacts/{artifact_id}/", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def delete_artifact_mediafile(
|
||||
artifact_id: uuid.UUID,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> Response:
|
||||
_ = current_user
|
||||
repo = ArtifactRepository(db)
|
||||
artifact = await repo.get_by_id(artifact_id)
|
||||
if artifact is None:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
|
||||
|
||||
await repo.mark_deleted(artifact)
|
||||
return Response(status_code=status.HTTP_204_NO_CONTENT)
|
||||
@@ -0,0 +1,150 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Literal
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import ConfigDict
|
||||
|
||||
from cpv3.common.schemas import Schema
|
||||
|
||||
|
||||
ArtifactTypeEnum = Literal[
|
||||
"TRANSCRIPTION_JSON",
|
||||
"SILENCE_REMOVED_VIDEO",
|
||||
"THUMBNAIL",
|
||||
"AUDIO_PROXY",
|
||||
"RENDERED_VIDEO",
|
||||
]
|
||||
|
||||
|
||||
class MediaFileRead(Schema):
|
||||
id: UUID
|
||||
owner_id: UUID
|
||||
project_id: UUID | None
|
||||
|
||||
duration_seconds: float
|
||||
frame_rate: float | None
|
||||
width: int | None
|
||||
height: int | None
|
||||
|
||||
probe_json: dict | None
|
||||
notes: str | None
|
||||
meta: dict | None
|
||||
|
||||
is_deleted: bool
|
||||
is_active: bool
|
||||
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class MediaFileCreate(Schema):
|
||||
project_id: UUID | None = None
|
||||
duration_seconds: float
|
||||
frame_rate: float | None = None
|
||||
width: int | None = None
|
||||
height: int | None = None
|
||||
probe_json: dict | None = None
|
||||
notes: str | None = None
|
||||
meta: dict | None = None
|
||||
|
||||
|
||||
class MediaFileUpdate(Schema):
|
||||
notes: str | None = None
|
||||
meta: dict | None = None
|
||||
is_deleted: bool | None = None
|
||||
|
||||
|
||||
class ArtifactMediaFileRead(Schema):
|
||||
id: UUID
|
||||
project_id: UUID | None
|
||||
file_id: UUID | None
|
||||
media_file_id: UUID
|
||||
|
||||
artifact_type: ArtifactTypeEnum
|
||||
|
||||
is_deleted: bool
|
||||
is_active: bool
|
||||
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class ArtifactMediaFileCreate(Schema):
|
||||
project_id: UUID | None = None
|
||||
file_id: UUID | None = None
|
||||
media_file_id: UUID
|
||||
artifact_type: ArtifactTypeEnum
|
||||
|
||||
|
||||
class ArtifactMediaFileUpdate(Schema):
|
||||
is_deleted: bool | None = None
|
||||
|
||||
|
||||
class DispositionSchema(Schema):
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
default: int | None = None
|
||||
|
||||
|
||||
class StreamSchema(Schema):
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
index: int | None = None
|
||||
codec_name: str | None = None
|
||||
codec_long_name: str | None = None
|
||||
profile: str | None = None
|
||||
codec_type: str | None = None
|
||||
codec_tag_string: str | None = None
|
||||
codec_tag: str | None = None
|
||||
width: int | None = None
|
||||
height: int | None = None
|
||||
id: str | None = None
|
||||
r_frame_rate: str | None = None
|
||||
avg_frame_rate: str | None = None
|
||||
time_base: str | None = None
|
||||
start_pts: int | None = None
|
||||
start_time: str | None = None
|
||||
duration_ts: int | None = None
|
||||
duration: str | None = None
|
||||
bit_rate: str | None = None
|
||||
nb_frames: str | None = None
|
||||
extradata_size: int | None = None
|
||||
disposition: DispositionSchema | None = None
|
||||
tags: dict[str, str] | None = None
|
||||
|
||||
|
||||
class FormatSchema(Schema):
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
filename: str | None = None
|
||||
nb_streams: int | None = None
|
||||
format_name: str | None = None
|
||||
format_long_name: str | None = None
|
||||
start_time: str | None = None
|
||||
duration: str | None = None
|
||||
size: str | None = None
|
||||
bit_rate: str | None = None
|
||||
probe_score: int | None = None
|
||||
tags: dict[str, str] | None = None
|
||||
|
||||
|
||||
class MediaProbeSchema(Schema):
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
streams: list[StreamSchema] = []
|
||||
format: FormatSchema | None = None
|
||||
|
||||
|
||||
class MediaSilencerParams(Schema):
|
||||
file_path: str
|
||||
folder: str = ""
|
||||
min_silence_duration_ms: int = 200
|
||||
silence_threshold_db: int = 16
|
||||
padding_ms: int = 100
|
||||
|
||||
|
||||
class MediaConverterParams(Schema):
|
||||
file_path: str
|
||||
folder: str = ""
|
||||
@@ -0,0 +1,266 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from os import path
|
||||
from tempfile import NamedTemporaryFile
|
||||
from typing import Callable
|
||||
|
||||
import anyio
|
||||
|
||||
from cpv3.infrastructure.storage.base import StorageService
|
||||
from cpv3.infrastructure.storage.types import FileInfo
|
||||
from cpv3.modules.media.schemas import MediaProbeSchema
|
||||
|
||||
|
||||
async def probe_media(storage: StorageService, *, file_key: str) -> MediaProbeSchema:
|
||||
tmp = await storage.download_to_temp(file_key)
|
||||
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"ffprobe",
|
||||
"-v",
|
||||
"error",
|
||||
"-show_streams",
|
||||
"-show_format",
|
||||
"-of",
|
||||
"json",
|
||||
tmp.path,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout, stderr = await proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(f"ffprobe failed: {stderr.decode(errors='ignore')}")
|
||||
|
||||
import json
|
||||
|
||||
raw = json.loads(stdout.decode())
|
||||
return MediaProbeSchema.model_validate(raw)
|
||||
finally:
|
||||
tmp.cleanup()
|
||||
|
||||
|
||||
def _compute_non_silent_segments(
|
||||
*,
|
||||
local_audio_path: str,
|
||||
min_silence_duration_ms: int,
|
||||
silence_threshold_db: int,
|
||||
padding_ms: int,
|
||||
) -> list[tuple[int, int]]:
|
||||
from pydub import AudioSegment, silence # type: ignore[import-untyped]
|
||||
|
||||
audio: AudioSegment = AudioSegment.from_file(local_audio_path)
|
||||
duration_ms = len(audio)
|
||||
|
||||
raw_segments = silence.detect_nonsilent(
|
||||
audio_segment=audio,
|
||||
min_silence_len=min_silence_duration_ms,
|
||||
silence_thresh=int(audio.dBFS - silence_threshold_db),
|
||||
)
|
||||
|
||||
segments: list[tuple[int, int]] = []
|
||||
for start_ms, end_ms in raw_segments:
|
||||
start = max(0, start_ms - padding_ms)
|
||||
end = min(duration_ms, end_ms + padding_ms)
|
||||
if end > start:
|
||||
segments.append((start, end))
|
||||
|
||||
return segments
|
||||
|
||||
|
||||
async def remove_silence(
|
||||
storage: StorageService,
|
||||
*,
|
||||
file_key: str,
|
||||
out_folder: str,
|
||||
min_silence_duration_ms: int = 200,
|
||||
silence_threshold_db: int = 16,
|
||||
padding_ms: int = 100,
|
||||
) -> FileInfo:
|
||||
input_tmp = await storage.download_to_temp(file_key)
|
||||
|
||||
try:
|
||||
segments = await anyio.to_thread.run_sync(
|
||||
lambda: _compute_non_silent_segments(
|
||||
local_audio_path=input_tmp.path,
|
||||
min_silence_duration_ms=min_silence_duration_ms,
|
||||
silence_threshold_db=silence_threshold_db,
|
||||
padding_ms=padding_ms,
|
||||
)
|
||||
)
|
||||
|
||||
if not segments:
|
||||
return await storage.get_file_info(file_key)
|
||||
|
||||
with NamedTemporaryFile(
|
||||
suffix=path.splitext(file_key)[1] or ".mp4", delete=False
|
||||
) as out:
|
||||
out_path = out.name
|
||||
|
||||
try:
|
||||
cmd: list[str] = ["ffmpeg"]
|
||||
for start_ms, end_ms in segments:
|
||||
start_s = start_ms / 1000.0
|
||||
duration_s = (end_ms - start_ms) / 1000.0
|
||||
cmd.extend(
|
||||
[
|
||||
"-ss",
|
||||
f"{start_s:.3f}",
|
||||
"-t",
|
||||
f"{duration_s:.3f}",
|
||||
"-y",
|
||||
"-i",
|
||||
input_tmp.path,
|
||||
]
|
||||
)
|
||||
|
||||
seg_count = len(segments)
|
||||
parts = [f"[{i}:v:0][{i}:a:0]" for i in range(seg_count)]
|
||||
filter_complex = "".join(parts) + f"concat=n={seg_count}:v=1:a=1[v][a]"
|
||||
|
||||
cmd.extend(
|
||||
[
|
||||
"-filter_complex",
|
||||
filter_complex,
|
||||
"-map",
|
||||
"[v]",
|
||||
"-map",
|
||||
"[a]",
|
||||
"-c:v",
|
||||
"libx264",
|
||||
"-c:a",
|
||||
"aac",
|
||||
"-preset",
|
||||
"medium",
|
||||
out_path,
|
||||
]
|
||||
)
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
_, stderr = await proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
|
||||
|
||||
output_key = path.join(out_folder or "", "silent", path.basename(file_key))
|
||||
with open(out_path, "rb") as out_file:
|
||||
_ = await storage.upload_fileobj(
|
||||
fileobj=out_file,
|
||||
file_name=path.basename(output_key),
|
||||
folder=path.dirname(output_key),
|
||||
gen_name=False,
|
||||
content_type="video/mp4",
|
||||
)
|
||||
|
||||
return await storage.get_file_info(output_key)
|
||||
finally:
|
||||
import os
|
||||
|
||||
if os.path.exists(out_path):
|
||||
os.remove(out_path)
|
||||
finally:
|
||||
input_tmp.cleanup()
|
||||
|
||||
|
||||
async def convert_to_mp4(
|
||||
storage: StorageService, *, file_key: str, out_folder: str
|
||||
) -> FileInfo:
|
||||
input_tmp = await storage.download_to_temp(file_key)
|
||||
|
||||
try:
|
||||
filename_without_ext = path.splitext(path.basename(file_key))[0]
|
||||
mp4_filename = filename_without_ext + ".mp4"
|
||||
|
||||
with NamedTemporaryFile(suffix=".mp4", delete=False) as out:
|
||||
out_path = out.name
|
||||
|
||||
try:
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-i",
|
||||
input_tmp.path,
|
||||
"-c:v",
|
||||
"libx264",
|
||||
"-c:a",
|
||||
"aac",
|
||||
"-preset",
|
||||
"medium",
|
||||
"-f",
|
||||
"mp4",
|
||||
out_path,
|
||||
]
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
_, stderr = await proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
|
||||
|
||||
output_key = path.join(out_folder or "", "converted", mp4_filename)
|
||||
with open(out_path, "rb") as out_file:
|
||||
_ = await storage.upload_fileobj(
|
||||
fileobj=out_file,
|
||||
file_name=mp4_filename,
|
||||
folder=path.dirname(output_key),
|
||||
gen_name=False,
|
||||
content_type="video/mp4",
|
||||
)
|
||||
|
||||
return await storage.get_file_info(output_key)
|
||||
finally:
|
||||
import os
|
||||
|
||||
if os.path.exists(out_path):
|
||||
os.remove(out_path)
|
||||
finally:
|
||||
input_tmp.cleanup()
|
||||
|
||||
|
||||
async def convert_to_ogg_temp(
|
||||
storage: StorageService, *, file_key: str
|
||||
) -> tuple[str, Callable[[], None]]:
|
||||
input_tmp = await storage.download_to_temp(file_key)
|
||||
|
||||
filename_without_ext = path.splitext(path.basename(file_key))[0]
|
||||
with NamedTemporaryFile(suffix=".ogg", delete=False) as out:
|
||||
out_path = out.name
|
||||
|
||||
async def _run() -> None:
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-i",
|
||||
input_tmp.path,
|
||||
"-c:a",
|
||||
"libopus",
|
||||
"-b:a",
|
||||
"24k",
|
||||
"-vn",
|
||||
"-ac",
|
||||
"1",
|
||||
"-ar",
|
||||
"16000",
|
||||
out_path,
|
||||
]
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
_, stderr = await proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
|
||||
|
||||
await _run()
|
||||
|
||||
def _cleanup() -> None:
|
||||
import os
|
||||
|
||||
input_tmp.cleanup()
|
||||
if os.path.exists(out_path):
|
||||
os.remove(out_path)
|
||||
|
||||
_ = filename_without_ext
|
||||
return out_path, _cleanup
|
||||
Reference in New Issue
Block a user