init: new structure + fix lint errors

This commit is contained in:
Daniil
2026-02-03 02:15:07 +03:00
commit 67e0f22b4f
89 changed files with 7654 additions and 0 deletions
+2
View File
@@ -0,0 +1,2 @@
.venv
.ruff_cache
+15
View File
@@ -0,0 +1,15 @@
.venv/
__pycache__/
*.py[cod]
.pytest_cache/
.ruff_cache/
.mypy_cache/
# Packaging/build artifacts
*.egg-info/
build/
dist/
# OS / editor
.DS_Store
.vscode/
+13
View File
@@ -0,0 +1,13 @@
{
"type": "service_account",
"project_id": "gen-lang-client-0718067008",
"private_key_id": "506e4e1119348c3d76a4fb1b5eb3d420155da863",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCyATEgxUYGFRah\nGBwVWTg5Yk2CH3em6guLrBseJiXODSQi/HeAf654EYzd1Z28EcqVECxekK05EWRo\nx3UyQcsUPe1O7vc2I91xtBEkOS+IgcfWu3+QHmio178657Arxzf51oCn2tYqZmbt\ntXluX/3EjQnHgbLJlBzUjht4zd6z2f0Pa2B8ulRWcpZ2bcPpsG3iHzZhzcqu5Z5V\nHCMAoAykxIJbw/z0alFWCww5oDzEfXoP6I4+2uvGkLDd/6lCovrs+4PpbV88aBU2\n0Gy5t+/0RkUKhyh73QU4ogmaBV1zD50fVFHNqKga+QfkKMFfCcPmhOyRWMJaUqj9\naEFMAWEDAgMBAAECggEACALYuK5YM+7gCVkB7o4I40BC17/dzTPMWDljMQyrd+YM\ngcyV/kEA06OKkPNLuYqdcO786DafrSlUuQh9BEmEv8D3vK0xkAy2Kydc/CHcu0qH\n+WiP2accs7ieASMrQRUitkpKmQNWzyqzqDAaKncEf0OPtJxoNP1G/OP2ZY3yX2q1\nUXOwMuPKZmfMN+Z+hsQmJ/H+ny1g9vWjEGilaLWfS1/JKXPs9pA1LMC+ZEHwzzFO\nWVyt2VSzS2NNp6GGsj6F12Gv+1HY5gm9lB39gTqOyVekPFcy0YLIYwWVmoGYQO/v\nIBJa3NIN6w3PnpHrfG5mAlfLMNtLH5TagChOjd0lSQKBgQDskzoGeih8to1xIoTB\nrr7cMvPmwZeFn2kD/CovCMhYQisS3Hq2ZAHru1XN74+0xYQlict/Y32c7cqb83WW\npKt1BqOmQhHkk8Ql1UZ1vrpU2VZEXLvx+wDrLSr8JKSazGP2q3yXz5bzWjG1EbI1\nEJ6b5oXO+I8N1owTw/wyxXS2OQKBgQDAntK7j2T84sDEqkCMFJvSr1+DFHkW0ClS\nWPhkP8rb1WDBdCtAiGhrbEeyPxmCNmzvyNACfOJwjwiP/wzuI9QER9qtQ+fkuz7l\ndJWVqjkaIzBw0ZopCDTx1SJfsDUUieze6WhJy6Yaja4jWx4ObIvW6cA665dSnx4x\n2AManltxGwKBgQDHYxJ9kQl8itNM7d3b62564grEipctDHyS2j2nZIyOLj5laCOY\ng1NHHZ2ksmp43huRE8DKPc0JrTSdGaUacPD88bqNjpPMBWpExKrc5AHhL0fQ2COj\nP3y3uVrIJg4BVZBOvpuEy3Ya5gKM7Dw5EyfKl5BhLwZKkG+A3SY6mEbUqQKBgGuY\nTB54+vBHuWYhs+yLln8Zd0DBLuKPZIWbLSTSfX5Osr1dJsvlZm6bDXGDzbvuqutG\nQOsi11oIi7/juFVNriC2AnDGfcJG1dtWRDhV3Y+8GFLswjApZWmwpILO6teOM8JQ\nnvCZ0S8D9UHKo/9qorJp3/cJon2EZFlzUEiOLIYBAoGBALMKDnLPXIcFJJcKmVTA\nQYyZ5zKLZqsfzVZnMRYrrrIEAfV0ZsavJrl4Bkyemz5X8WUblDOCUKWXPUJUMLmb\nrPjWESmL419l8vLBUmSZ92KHjyUPtgjT6D4BMpTqFtEjQ4PusDXduePgHxuxawk0\nTlvnpuziNaVjsXd0Yrww0WVH\n-----END PRIVATE KEY-----\n",
"client_email": "speech-text-user@gen-lang-client-0718067008.iam.gserviceaccount.com",
"client_id": "113891306697025836478",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/speech-text-user%40gen-lang-client-0718067008.iam.gserviceaccount.com",
"universe_domain": "googleapis.com"
}
+31
View File
@@ -0,0 +1,31 @@
# syntax=docker/dockerfile:1
FROM python:3.11-slim
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PATH="/app/.venv/bin:/root/.local/bin:${PATH}"
WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
ffmpeg \
&& rm -rf /var/lib/apt/lists/*
# Install uv
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
# Install deps (expects uv.lock)
COPY pyproject.toml uv.lock ./
RUN uv sync --frozen --no-dev
# Copy source
COPY cpv3 ./cpv3
COPY alembic ./alembic
COPY alembic.ini ./
EXPOSE 8000
CMD ["sh", "-c", "uv run alembic upgrade head && uv run uvicorn cpv3.main:app --host 0.0.0.0 --port 8000"]
+36
View File
@@ -0,0 +1,36 @@
[alembic]
script_location = alembic
prepend_sys_path = src
sqlalchemy.url = postgresql+asyncpg://postgres:postgres@localhost:5432/coffee_project_db
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers = console
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
+67
View File
@@ -0,0 +1,67 @@
from __future__ import annotations
import asyncio
from logging.config import fileConfig
from alembic import context
from sqlalchemy import pool
from sqlalchemy.ext.asyncio import async_engine_from_config
from cpv3.infrastructure.settings import get_settings
from cpv3.db.models import Base # noqa: F401
# Alembic Config object
config = context.config
if config.config_file_name is not None:
fileConfig(config.config_file_name)
target_metadata = Base.metadata
def _set_sqlalchemy_url() -> None:
settings = get_settings()
config.set_main_option("sqlalchemy.url", settings.get_database_url())
def run_migrations_offline() -> None:
_set_sqlalchemy_url()
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def do_run_migrations(connection) -> None: # type: ignore[no-untyped-def]
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
async def run_migrations_online() -> None:
_set_sqlalchemy_url()
connectable = async_engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
async with connectable.connect() as connection:
await connection.run_sync(do_run_migrations)
await connectable.dispose()
if context.is_offline_mode():
run_migrations_offline()
else:
asyncio.run(run_migrations_online())
+250
View File
@@ -0,0 +1,250 @@
"""initial schema
Revision ID: 0001
Revises:
Create Date: 2026-01-14
"""
from __future__ import annotations
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "0001"
down_revision = None
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"users",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("is_active", sa.Boolean(), nullable=False),
sa.Column("username", sa.String(length=150), nullable=False),
sa.Column("email", sa.String(length=254), nullable=False),
sa.Column("password_hash", sa.String(length=255), nullable=False),
sa.Column("first_name", sa.String(length=150), nullable=False),
sa.Column("last_name", sa.String(length=150), nullable=False),
sa.Column("phone_number", sa.String(length=15), nullable=True),
sa.Column("avatar", sa.String(length=2048), nullable=True),
sa.Column("email_verified", sa.Boolean(), nullable=False),
sa.Column("phone_verified", sa.Boolean(), nullable=False),
sa.Column("is_staff", sa.Boolean(), nullable=False),
sa.Column("is_superuser", sa.Boolean(), nullable=False),
sa.Column("date_joined", sa.DateTime(timezone=True), nullable=False),
sa.Column("last_login", sa.DateTime(timezone=True), nullable=True),
sa.UniqueConstraint("username", name="uq_users_username"),
sa.UniqueConstraint("phone_number", name="uq_users_phone_number"),
)
op.create_index("ix_users_username", "users", ["username"], unique=False)
op.create_table(
"projects",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("is_active", sa.Boolean(), nullable=False),
sa.Column("owner_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("name", sa.String(length=255), nullable=False),
sa.Column("description", sa.Text(), nullable=True),
sa.Column("language", sa.String(length=4), nullable=False),
sa.Column("folder", sa.String(length=1024), nullable=True),
sa.Column("status", sa.String(length=16), nullable=False),
sa.ForeignKeyConstraint(["owner_id"], ["users.id"], ondelete="RESTRICT"),
)
op.create_index("ix_projects_owner_id", "projects", ["owner_id"], unique=False)
op.create_table(
"files",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("is_active", sa.Boolean(), nullable=False),
sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("owner_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("original_filename", sa.String(length=255), nullable=False),
sa.Column("path", sa.String(length=1024), nullable=False),
sa.Column("storage_backend", sa.String(length=16), nullable=False),
sa.Column("mime_type", sa.String(length=128), nullable=False),
sa.Column("size_bytes", sa.BigInteger(), nullable=False),
sa.Column("checksum", sa.String(length=64), nullable=True),
sa.Column("file_format", sa.String(length=32), nullable=True),
sa.Column("is_uploaded", sa.Boolean(), nullable=False),
sa.Column("is_deleted", sa.Boolean(), nullable=False),
sa.ForeignKeyConstraint(["project_id"], ["projects.id"], ondelete="RESTRICT"),
sa.ForeignKeyConstraint(["owner_id"], ["users.id"], ondelete="RESTRICT"),
)
op.create_index("ix_files_project_id", "files", ["project_id"], unique=False)
op.create_index("ix_files_owner_id", "files", ["owner_id"], unique=False)
op.create_table(
"media_files",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("is_active", sa.Boolean(), nullable=False),
sa.Column("owner_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("duration_seconds", sa.Float(), nullable=False),
sa.Column("frame_rate", sa.Float(), nullable=True),
sa.Column("width", sa.Integer(), nullable=True),
sa.Column("height", sa.Integer(), nullable=True),
sa.Column("probe_json", sa.JSON(), nullable=True),
sa.Column("notes", sa.Text(), nullable=True),
sa.Column("meta", sa.JSON(), nullable=True),
sa.Column("is_deleted", sa.Boolean(), nullable=False),
sa.ForeignKeyConstraint(["owner_id"], ["users.id"], ondelete="RESTRICT"),
sa.ForeignKeyConstraint(["project_id"], ["projects.id"], ondelete="RESTRICT"),
)
op.create_index("ix_media_files_owner_id", "media_files", ["owner_id"], unique=False)
op.create_index("ix_media_files_project_id", "media_files", ["project_id"], unique=False)
op.create_table(
"artifact_media_files",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("is_active", sa.Boolean(), nullable=False),
sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("file_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("media_file_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("artifact_type", sa.String(length=32), nullable=False),
sa.Column("is_deleted", sa.Boolean(), nullable=False),
sa.ForeignKeyConstraint(["project_id"], ["projects.id"], ondelete="RESTRICT"),
sa.ForeignKeyConstraint(["file_id"], ["files.id"], ondelete="RESTRICT"),
sa.ForeignKeyConstraint(["media_file_id"], ["media_files.id"], ondelete="RESTRICT"),
)
op.create_index(
"ix_artifact_media_files_project_id", "artifact_media_files", ["project_id"], unique=False
)
op.create_index(
"ix_artifact_media_files_file_id", "artifact_media_files", ["file_id"], unique=False
)
op.create_index(
"ix_artifact_media_files_media_file_id",
"artifact_media_files",
["media_file_id"],
unique=False,
)
op.create_table(
"transcriptions",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("is_active", sa.Boolean(), nullable=False),
sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("source_file_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("artifact_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("engine", sa.String(length=32), nullable=False),
sa.Column("language", sa.String(length=3), nullable=True),
sa.Column("document", sa.JSON(), nullable=False),
sa.Column("transcribe_options", sa.JSON(), nullable=True),
sa.ForeignKeyConstraint(["project_id"], ["projects.id"], ondelete="RESTRICT"),
sa.ForeignKeyConstraint(["source_file_id"], ["files.id"], ondelete="RESTRICT"),
sa.ForeignKeyConstraint(["artifact_id"], ["artifact_media_files.id"], ondelete="RESTRICT"),
)
op.create_index("ix_transcriptions_project_id", "transcriptions", ["project_id"], unique=False)
op.create_index(
"ix_transcriptions_source_file_id", "transcriptions", ["source_file_id"], unique=False
)
op.create_index(
"ix_transcriptions_artifact_id", "transcriptions", ["artifact_id"], unique=False
)
op.create_table(
"jobs",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("is_active", sa.Boolean(), nullable=False),
sa.Column("broker_id", sa.String(length=255), nullable=False),
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("input_data", sa.JSON(), nullable=True),
sa.Column("output_data", sa.JSON(), nullable=True),
sa.Column("status", sa.String(length=16), nullable=False),
sa.Column("job_type", sa.String(length=32), nullable=False),
sa.Column("project_pct", sa.Float(), nullable=True),
sa.Column("error_message", sa.Text(), nullable=True),
sa.Column("current_message", sa.Text(), nullable=True),
sa.Column("started_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("finished_at", sa.DateTime(timezone=True), nullable=True),
sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="RESTRICT"),
sa.ForeignKeyConstraint(["project_id"], ["projects.id"], ondelete="RESTRICT"),
)
op.create_index("ix_jobs_user_id", "jobs", ["user_id"], unique=False)
op.create_index("ix_jobs_project_id", "jobs", ["project_id"], unique=False)
op.create_table(
"job_events",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("is_active", sa.Boolean(), nullable=False),
sa.Column("job_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("event_type", sa.String(length=64), nullable=False),
sa.Column("payload", sa.JSON(), nullable=True),
sa.ForeignKeyConstraint(["job_id"], ["jobs.id"], ondelete="CASCADE"),
)
op.create_index("ix_job_events_job_id", "job_events", ["job_id"], unique=False)
op.create_table(
"webhooks",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("is_active", sa.Boolean(), nullable=False),
sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("event", sa.String(length=255), nullable=True),
sa.Column("url", sa.String(length=1024), nullable=False),
sa.Column("secret", sa.String(length=255), nullable=True),
sa.ForeignKeyConstraint(["project_id"], ["projects.id"], ondelete="RESTRICT"),
sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="RESTRICT"),
)
op.create_index("ix_webhooks_project_id", "webhooks", ["project_id"], unique=False)
op.create_index("ix_webhooks_user_id", "webhooks", ["user_id"], unique=False)
def downgrade() -> None:
op.drop_index("ix_webhooks_user_id", table_name="webhooks")
op.drop_index("ix_webhooks_project_id", table_name="webhooks")
op.drop_table("webhooks")
op.drop_index("ix_job_events_job_id", table_name="job_events")
op.drop_table("job_events")
op.drop_index("ix_jobs_project_id", table_name="jobs")
op.drop_index("ix_jobs_user_id", table_name="jobs")
op.drop_table("jobs")
op.drop_index("ix_transcriptions_artifact_id", table_name="transcriptions")
op.drop_index("ix_transcriptions_source_file_id", table_name="transcriptions")
op.drop_index("ix_transcriptions_project_id", table_name="transcriptions")
op.drop_table("transcriptions")
op.drop_index("ix_artifact_media_files_media_file_id", table_name="artifact_media_files")
op.drop_index("ix_artifact_media_files_file_id", table_name="artifact_media_files")
op.drop_index("ix_artifact_media_files_project_id", table_name="artifact_media_files")
op.drop_table("artifact_media_files")
op.drop_index("ix_media_files_project_id", table_name="media_files")
op.drop_index("ix_media_files_owner_id", table_name="media_files")
op.drop_table("media_files")
op.drop_index("ix_files_owner_id", table_name="files")
op.drop_index("ix_files_project_id", table_name="files")
op.drop_table("files")
op.drop_index("ix_projects_owner_id", table_name="projects")
op.drop_table("projects")
op.drop_index("ix_users_username", table_name="users")
op.drop_table("users")
+3
View File
@@ -0,0 +1,3 @@
__all__ = ["__version__"]
__version__ = "0.1.0"
+3
View File
@@ -0,0 +1,3 @@
"""
API module - versioned API routers.
"""
+7
View File
@@ -0,0 +1,7 @@
"""
API v1 module.
"""
from cpv3.api.v1.router import api_router
__all__ = ["api_router"]
+48
View File
@@ -0,0 +1,48 @@
"""
API v1 router - aggregates all module routers.
"""
from __future__ import annotations
from fastapi import APIRouter
from cpv3.modules.captions.router import router as captions_router
from cpv3.modules.files.router import router as files_router
from cpv3.modules.jobs.router import events_router, jobs_router
from cpv3.modules.media.router import artifacts_router, media_router, mediafiles_router
from cpv3.modules.projects.router import router as projects_router
from cpv3.modules.system.router import router as system_router
from cpv3.modules.transcription.router import router as transcription_router
from cpv3.modules.users.router import auth_router, users_router
from cpv3.modules.webhooks.router import router as webhooks_router
api_router = APIRouter()
# System
api_router.include_router(system_router)
# Auth & Users
api_router.include_router(auth_router)
api_router.include_router(users_router)
# Projects
api_router.include_router(projects_router)
# Files (storage module renamed)
api_router.include_router(files_router)
# Media
api_router.include_router(media_router)
api_router.include_router(mediafiles_router)
api_router.include_router(artifacts_router)
# Transcription & Captions
api_router.include_router(transcription_router)
api_router.include_router(captions_router)
# Jobs
api_router.include_router(jobs_router)
api_router.include_router(events_router)
# Webhooks
api_router.include_router(webhooks_router)
+3
View File
@@ -0,0 +1,3 @@
"""
Common utilities and cross-cutting concerns for modules.
"""
+7
View File
@@ -0,0 +1,7 @@
from pydantic import BaseModel, ConfigDict
class Schema(BaseModel):
"""Base schema class for all Pydantic DTOs."""
model_config = ConfigDict(from_attributes=True)
View File
+25
View File
@@ -0,0 +1,25 @@
from __future__ import annotations
import uuid
from datetime import datetime, timezone
from sqlalchemy import Boolean, DateTime
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
def utcnow() -> datetime:
return datetime.now(timezone.utc)
class Base(DeclarativeBase):
pass
class BaseModelMixin:
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=utcnow, onupdate=utcnow
)
is_active: Mapped[bool] = mapped_column(Boolean, default=True)
+21
View File
@@ -0,0 +1,21 @@
from cpv3.db.base import Base
from cpv3.modules.jobs.models import Job, JobEvent
from cpv3.modules.media.models import ArtifactMediaFile, MediaFile
from cpv3.modules.projects.models import Project
from cpv3.modules.files.models import File
from cpv3.modules.transcription.models import Transcription
from cpv3.modules.users.models import User
from cpv3.modules.webhooks.models import Webhook
__all__ = [
"Base",
"User",
"Project",
"File",
"MediaFile",
"ArtifactMediaFile",
"Transcription",
"Job",
"JobEvent",
"Webhook",
]
+22
View File
@@ -0,0 +1,22 @@
from __future__ import annotations
from collections.abc import AsyncGenerator
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from cpv3.infrastructure.settings import get_settings
_settings = get_settings()
_engine = create_async_engine(
_settings.get_database_url(),
echo=_settings.debug,
pool_pre_ping=True,
)
SessionLocal = async_sessionmaker(bind=_engine, class_=AsyncSession, expire_on_commit=False)
async def get_db() -> AsyncGenerator[AsyncSession, None]:
async with SessionLocal() as session:
yield session
+3
View File
@@ -0,0 +1,3 @@
"""
Infrastructure layer - app bootstrapping, configuration, security, and external integrations.
"""
+60
View File
@@ -0,0 +1,60 @@
from __future__ import annotations
import uuid
from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from jwt import ExpiredSignatureError, InvalidTokenError
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.infrastructure.security import decode_token
from cpv3.db.session import get_db
from cpv3.modules.users.models import User
from cpv3.modules.users.repository import UserRepository
_bearer = HTTPBearer(auto_error=True)
async def get_current_user(
credentials: HTTPAuthorizationCredentials = Depends(_bearer),
db: AsyncSession = Depends(get_db),
) -> User:
token = credentials.credentials
try:
payload = decode_token(token)
except ExpiredSignatureError as e:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail="Token expired"
) from e
except InvalidTokenError as e:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token"
) from e
if payload.get("type") != "access":
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token"
)
sub = payload.get("sub")
if not sub:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token"
)
try:
user_id = uuid.UUID(str(sub))
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token"
) from e
user_repo = UserRepository(db)
user = await user_repo.get_by_id(user_id)
if user is None or not user.is_active:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid credentials"
)
return user
+43
View File
@@ -0,0 +1,43 @@
"""
Infrastructure-level dependencies for FastAPI dependency injection.
"""
from __future__ import annotations
from functools import lru_cache
from cpv3.infrastructure.settings import get_settings
from cpv3.infrastructure.storage.base import StorageBackend, StorageService
from cpv3.infrastructure.storage.local import LocalConfig, LocalStorageBackend
from cpv3.infrastructure.storage.s3 import S3Config, S3StorageBackend
@lru_cache
def _get_storage_service() -> StorageService:
settings = get_settings()
backend: StorageBackend
if settings.storage_backend.upper() == "LOCAL":
backend = LocalStorageBackend(LocalConfig(root_dir=settings.local_storage_dir))
else:
if not settings.s3_access_key or not settings.s3_secret_key:
raise RuntimeError(
"S3_ACCESS_KEY and S3_SECRET_KEY are required for S3 storage"
)
backend = S3StorageBackend(
S3Config(
access_key=settings.s3_access_key,
secret_key=settings.s3_secret_key,
bucket_name=settings.s3_bucket_name,
endpoint_url_internal=settings.s3_endpoint_url_internal,
endpoint_url_public=settings.s3_endpoint_url_public,
presign_expires_seconds=settings.s3_presign_expires_seconds,
)
)
return StorageService(backend)
async def get_storage() -> StorageService:
return _get_storage_service()
+55
View File
@@ -0,0 +1,55 @@
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from typing import Any, Literal
import jwt
from passlib.context import CryptContext # type: ignore[import-untyped]
from cpv3.infrastructure.settings import get_settings
# Use bcrypt_sha256 to lift the 72-byte password limit while still verifying legacy bcrypt hashes.
pwd_context = CryptContext(schemes=["bcrypt_sha256", "bcrypt"], deprecated="auto")
def hash_password(password: str) -> str:
return pwd_context.hash(password)
def verify_password(password: str, password_hash: str) -> bool:
return pwd_context.verify(password, password_hash)
def utcnow() -> datetime:
return datetime.now(timezone.utc)
def create_token(
*,
subject: str,
token_type: Literal["access", "refresh"],
expires_in: timedelta,
extra: dict[str, Any] | None = None,
) -> str:
settings = get_settings()
now = utcnow()
payload: dict[str, Any] = {
"sub": subject,
"type": token_type,
"iat": int(now.timestamp()),
"exp": int((now + expires_in).timestamp()),
}
if extra:
payload.update(extra)
return jwt.encode(
payload, settings.jwt_secret_key, algorithm=settings.jwt_algorithm
)
def decode_token(token: str) -> dict[str, Any]:
settings = get_settings()
return jwt.decode(
token, settings.jwt_secret_key, algorithms=[settings.jwt_algorithm]
)
+89
View File
@@ -0,0 +1,89 @@
from __future__ import annotations
from functools import lru_cache
from pathlib import Path
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
extra="ignore",
)
# App
debug: bool = Field(default=True, alias="DEBUG")
cors_allowed_origins: list[str] = Field(
default_factory=lambda: ["http://localhost:3000", "http://localhost:8000"],
alias="CORS_ALLOWED_ORIGINS",
)
# JWT
jwt_secret_key: str = Field(default="dev-secret", alias="JWT_SECRET_KEY")
jwt_algorithm: str = Field(default="HS256", alias="JWT_ALGORITHM")
jwt_access_ttl_minutes: int = Field(default=60, alias="JWT_ACCESS_TTL_MINUTES")
jwt_refresh_ttl_days: int = Field(default=30, alias="JWT_REFRESH_TTL_DAYS")
# DB
postgres_user: str = Field(default="postgres", alias="POSTGRES_USER")
postgres_password: str = Field(default="postgres", alias="POSTGRES_PASSWORD")
postgres_host: str = Field(default="localhost", alias="POSTGRES_HOST")
postgres_port: int = Field(default=5332, alias="POSTGRES_PORT")
postgres_database: str = Field(
default="coffee_project_db", alias="POSTGRES_DATABASE"
)
database_url: str | None = Field(default=None, alias="DATABASE_URL")
# Storage
storage_backend: str = Field(default="S3", alias="STORAGE_BACKEND")
s3_access_key: str | None = Field(default=None, alias="S3_ACCESS_KEY")
s3_secret_key: str | None = Field(default=None, alias="S3_SECRET_KEY")
s3_bucket_name: str = Field(default="coffee-bucket", alias="S3_BUCKET_NAME")
# Internal endpoint is used by the API container to talk to MinIO/S3.
s3_endpoint_url_internal: str | None = Field(
default=None, alias="S3_ENDPOINT_URL_INTERNAL"
)
# Public endpoint is only used to generate browser-accessible URLs.
s3_endpoint_url_public: str | None = Field(
default=None, alias="S3_ENDPOINT_URL_PUBLIC"
)
s3_presign_expires_seconds: int = Field(default=3600, alias="S3_PRESIGN_EXPIRES")
local_storage_dir: Path = Field(
default=Path("./.local_storage"), alias="LOCAL_STORAGE_DIR"
)
# External services
remotion_service_url: str = Field(
default="http://localhost:8001", alias="REMOTION_SERVICE_URL"
)
transcription_models_dir: Path = Field(
default=Path("./.artifacts/Models/transcription"),
alias="TRANSCRIPTION_MODELS_DIR",
)
google_service_key_path: Path = Field(
default=Path("./.s_data/keyapispeech.json"),
alias="GOOGLE_APPLICATION_CREDENTIALS",
)
def get_database_url(self) -> str:
if self.database_url:
return self.database_url
return (
f"postgresql+asyncpg://{self.postgres_user}:{self.postgres_password}"
f"@{self.postgres_host}:{self.postgres_port}/{self.postgres_database}"
)
@lru_cache
def get_settings() -> Settings:
return Settings()
+17
View File
@@ -0,0 +1,17 @@
"""
Storage infrastructure - file storage backends (local, S3).
"""
from cpv3.infrastructure.storage.base import StorageBackend
from cpv3.infrastructure.storage.local import LocalConfig, LocalStorageBackend
from cpv3.infrastructure.storage.s3 import S3Config, S3StorageBackend
from cpv3.infrastructure.storage.types import FileInfo
__all__ = [
"StorageBackend",
"LocalConfig",
"LocalStorageBackend",
"S3Config",
"S3StorageBackend",
"FileInfo",
]
+124
View File
@@ -0,0 +1,124 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from os import path
from tempfile import NamedTemporaryFile
from typing import BinaryIO, Callable, Protocol
from uuid import uuid4
import anyio
import anyio.to_thread
from cpv3.infrastructure.storage.types import FileInfo
@dataclass(frozen=True)
class TempFile:
path: str
cleanup: Callable[[], None]
class StorageBackend(Protocol):
"""Protocol defining the interface for storage backends."""
def upload_fileobj(
self, key: str, fileobj: BinaryIO, *, content_type: str | None
) -> None: ...
def download_fileobj(self, key: str, fileobj: BinaryIO) -> None: ...
def exists(self, key: str) -> bool: ...
def size(self, key: str) -> int: ...
def delete(self, key: str) -> None: ...
def read(self, key: str) -> bytes: ...
def generate_url(self, key: str) -> str: ...
class StorageService:
"""High-level async storage service wrapping a backend."""
def __init__(self, backend: StorageBackend) -> None:
self._backend = backend
def _make_key(self, file_name: str, folder: str, gen_name: bool) -> str:
if gen_name:
_, ext = path.splitext(file_name)
file_name = f"{uuid4().hex}{ext if ext else ''}"
return path.join(folder, file_name) if folder else file_name
async def upload_fileobj(
self,
*,
fileobj: BinaryIO,
file_name: str,
folder: str = "",
gen_name: bool = True,
content_type: str | None = None,
) -> str:
key = self._make_key(file_name, folder, gen_name)
def _upload() -> None:
fileobj.seek(0)
self._backend.upload_fileobj(key, fileobj, content_type=content_type)
await anyio.to_thread.run_sync(_upload)
return key
async def exists(self, key: str) -> bool:
return await anyio.to_thread.run_sync(lambda: self._backend.exists(key))
async def delete(self, key: str) -> None:
await anyio.to_thread.run_sync(lambda: self._backend.delete(key))
async def size(self, key: str) -> int:
return await anyio.to_thread.run_sync(lambda: self._backend.size(key))
async def read(self, key: str) -> bytes:
return await anyio.to_thread.run_sync(lambda: self._backend.read(key))
async def url(self, key: str) -> str:
return await anyio.to_thread.run_sync(lambda: self._backend.generate_url(key))
async def get_file_info(self, key: str) -> FileInfo:
if not await self.exists(key):
raise FileNotFoundError(f"File '{key}' does not exist")
file_url = await self.url(key)
file_size = await self.size(key)
return FileInfo(
file_path=key,
file_url=file_url,
file_size=file_size,
filename=path.basename(key),
)
async def download_to_temp(self, key: str) -> TempFile:
if not await self.exists(key):
raise FileNotFoundError(f"File '{key}' does not exist")
_, ext = path.splitext(key)
suffix = ext if ext else ".bin"
out_path: str
with NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
out_path = tmp.name
def _download() -> None:
with open(out_path, "wb") as out:
self._backend.download_fileobj(key, out)
await anyio.to_thread.run_sync(_download)
def _cleanup() -> None:
import os
if os.path.exists(out_path):
os.remove(out_path)
return TempFile(path=out_path, cleanup=_cleanup)
+62
View File
@@ -0,0 +1,62 @@
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from typing import BinaryIO
@dataclass(frozen=True)
class LocalConfig:
root_dir: Path
class LocalStorageBackend:
def __init__(self, cfg: LocalConfig) -> None:
self._cfg = cfg
self._cfg.root_dir.mkdir(parents=True, exist_ok=True)
def _full_path(self, key: str) -> Path:
return (self._cfg.root_dir / key).resolve()
def upload_fileobj(
self, key: str, fileobj: BinaryIO, *, content_type: str | None
) -> None:
# content_type is unused for filesystem backend.
_ = content_type
full_path = self._full_path(key)
full_path.parent.mkdir(parents=True, exist_ok=True)
with open(full_path, "wb") as out:
while True:
chunk = fileobj.read(1024 * 1024)
if not chunk:
break
out.write(chunk)
def download_fileobj(self, key: str, fileobj: BinaryIO) -> None:
full_path = self._full_path(key)
with open(full_path, "rb") as src:
while True:
chunk = src.read(1024 * 1024)
if not chunk:
break
fileobj.write(chunk)
def exists(self, key: str) -> bool:
return self._full_path(key).exists()
def size(self, key: str) -> int:
return self._full_path(key).stat().st_size
def delete(self, key: str) -> None:
path = self._full_path(key)
if path.exists():
path.unlink()
def read(self, key: str) -> bytes:
return self._full_path(key).read_bytes()
def generate_url(self, key: str) -> str:
# Served by cpv3 via /api/files/local/{path}
return f"/api/files/local/{key}"
+107
View File
@@ -0,0 +1,107 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import BinaryIO
import boto3 # type: ignore[import-untyped]
import boto3.session # type: ignore[import-untyped]
from botocore.config import Config # type: ignore[import-untyped]
from botocore.exceptions import ClientError # type: ignore[import-untyped]
@dataclass(frozen=True)
class S3Config:
access_key: str
secret_key: str
bucket_name: str
endpoint_url_internal: str | None
endpoint_url_public: str | None
presign_expires_seconds: int = 3600
class S3StorageBackend:
def __init__(self, cfg: S3Config) -> None:
self._cfg = cfg
self._bucket_ready = False
session = boto3.session.Session()
common = {
"aws_access_key_id": cfg.access_key,
"aws_secret_access_key": cfg.secret_key,
"region_name": "us-east-1",
"config": Config(signature_version="s3v4", s3={"addressing_style": "path"}),
}
self._client = session.client(
"s3", endpoint_url=cfg.endpoint_url_internal, **common
)
presign_endpoint = cfg.endpoint_url_public or cfg.endpoint_url_internal
self._presign_client = session.client(
"s3", endpoint_url=presign_endpoint, **common
)
def ensure_bucket(self) -> None:
if self._bucket_ready:
return
try:
self._client.head_bucket(Bucket=self._cfg.bucket_name)
except ClientError as e:
code = str(e.response.get("Error", {}).get("Code", ""))
if code in {"404", "NoSuchBucket"}:
self._client.create_bucket(Bucket=self._cfg.bucket_name)
else:
raise
self._bucket_ready = True
def upload_fileobj(
self, key: str, fileobj: BinaryIO, *, content_type: str | None
) -> None:
self.ensure_bucket()
extra_args = {"ContentType": content_type} if content_type else None
self._client.upload_fileobj(
Fileobj=fileobj,
Bucket=self._cfg.bucket_name,
Key=key,
ExtraArgs=extra_args,
)
def download_fileobj(self, key: str, fileobj: BinaryIO) -> None:
self.ensure_bucket()
self._client.download_fileobj(self._cfg.bucket_name, key, fileobj)
def exists(self, key: str) -> bool:
self.ensure_bucket()
try:
self._client.head_object(Bucket=self._cfg.bucket_name, Key=key)
return True
except ClientError as e:
code = str(e.response.get("Error", {}).get("Code", ""))
if code in {"404", "NoSuchKey"}:
return False
raise
def size(self, key: str) -> int:
self.ensure_bucket()
resp = self._client.head_object(Bucket=self._cfg.bucket_name, Key=key)
return int(resp.get("ContentLength", 0))
def delete(self, key: str) -> None:
self.ensure_bucket()
self._client.delete_object(Bucket=self._cfg.bucket_name, Key=key)
def read(self, key: str) -> bytes:
self.ensure_bucket()
resp = self._client.get_object(Bucket=self._cfg.bucket_name, Key=key)
body = resp["Body"].read()
return body
def generate_url(self, key: str) -> str:
self.ensure_bucket()
return self._presign_client.generate_presigned_url(
ClientMethod="get_object",
Params={"Bucket": self._cfg.bucket_name, "Key": key},
ExpiresIn=self._cfg.presign_expires_seconds,
)
+11
View File
@@ -0,0 +1,11 @@
from __future__ import annotations
from dataclasses import dataclass
@dataclass(frozen=True)
class FileInfo:
file_path: str
file_url: str
file_size: int | None = None
filename: str | None = None
+28
View File
@@ -0,0 +1,28 @@
from __future__ import annotations
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from cpv3.infrastructure.settings import get_settings
from cpv3.api.v1.router import api_router
settings = get_settings()
app = FastAPI(
title="Coffee Project Backend API",
version="0.0.0",
openapi_url="/api/schema/",
docs_url="/api/schema/swagger/",
redoc_url=None,
)
app.add_middleware(
CORSMiddleware,
allow_origins=settings.cors_allowed_origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Include the versioned API router
app.include_router(api_router)
View File
View File
+23
View File
@@ -0,0 +1,23 @@
from __future__ import annotations
from fastapi import APIRouter, Depends
from cpv3.infrastructure.auth import get_current_user
from cpv3.modules.captions.schemas import CaptionsRequest, CaptionsResponse
from cpv3.modules.captions.service import generate_captions
from cpv3.modules.users.models import User
router = APIRouter(prefix="/api/captions", tags=["Captions"])
@router.post("/get_video/", response_model=CaptionsResponse)
async def get_video(
body: CaptionsRequest, current_user: User = Depends(get_current_user)
) -> CaptionsResponse:
_ = current_user
result = await generate_captions(
folder=body.folder,
video_s3_path=body.video_s3_path,
transcription=body.transcription,
)
return CaptionsResponse(result=result)
+14
View File
@@ -0,0 +1,14 @@
from __future__ import annotations
from cpv3.common.schemas import Schema
from cpv3.modules.transcription.schemas import Document
class CaptionsRequest(Schema):
folder: str
video_s3_path: str
transcription: Document
class CaptionsResponse(Schema):
result: str
+31
View File
@@ -0,0 +1,31 @@
from __future__ import annotations
import httpx
from cpv3.infrastructure.settings import get_settings
from cpv3.modules.transcription.schemas import Document
async def generate_captions(
*, video_s3_path: str, folder: str, transcription: Document
) -> str:
"""Generate captions for a video using the Remotion service."""
settings = get_settings()
payload = {
"folder": folder,
"videoSrc": video_s3_path,
"transcription": transcription.model_dump(),
}
async with httpx.AsyncClient(timeout=300) as client:
resp = await client.post(
f"{settings.remotion_service_url}/api/render", json=payload
)
resp.raise_for_status()
data = resp.json()
if not isinstance(data, dict) or "output" not in data:
raise RuntimeError("Unexpected response from remotion service")
return str(data["output"])
+3
View File
@@ -0,0 +1,3 @@
"""
Files module - file management and storage operations.
"""
+38
View File
@@ -0,0 +1,38 @@
from __future__ import annotations
import uuid
from sqlalchemy import BigInteger, Boolean, ForeignKey, String
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column
from cpv3.db.base import Base, BaseModelMixin
class File(Base, BaseModelMixin):
__tablename__ = "files"
project_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("projects.id", ondelete="RESTRICT"),
nullable=True,
index=True,
)
owner_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("users.id", ondelete="RESTRICT"),
nullable=True,
index=True,
)
original_filename: Mapped[str] = mapped_column(String(255), default="")
path: Mapped[str] = mapped_column(String(1024))
storage_backend: Mapped[str] = mapped_column(String(16), default="S3")
mime_type: Mapped[str] = mapped_column(String(128))
size_bytes: Mapped[int] = mapped_column(BigInteger)
checksum: Mapped[str | None] = mapped_column(String(64), nullable=True)
file_format: Mapped[str | None] = mapped_column(String(32), nullable=True)
is_uploaded: Mapped[bool] = mapped_column(Boolean, default=False)
is_deleted: Mapped[bool] = mapped_column(Boolean, default=False)
+66
View File
@@ -0,0 +1,66 @@
from __future__ import annotations
import uuid
from sqlalchemy import Select, select
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.modules.files.models import File
from cpv3.modules.files.schemas import FileCreate, FileUpdate
from cpv3.modules.users.models import User
class FileRepository:
"""Repository for File database operations."""
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def list_all(self, *, requester: User) -> list[File]:
stmt: Select[tuple[File]] = select(File).where(File.is_deleted.is_(False))
if not requester.is_staff:
stmt = stmt.where(File.owner_id == requester.id)
result = await self._session.execute(stmt.order_by(File.created_at.desc()))
return list(result.scalars().all())
async def get_by_id(self, file_id: uuid.UUID) -> File | None:
result = await self._session.execute(select(File).where(File.id == file_id))
file = result.scalar_one_or_none()
if file is None:
return None
if file.is_deleted:
return None
return file
async def create(self, *, requester: User, data: FileCreate) -> File:
file = File(
owner_id=requester.id,
project_id=data.project_id,
original_filename=data.original_filename,
path=data.path,
storage_backend=data.storage_backend,
mime_type=data.mime_type,
size_bytes=data.size_bytes,
checksum=data.checksum,
file_format=data.file_format,
is_uploaded=data.is_uploaded,
)
self._session.add(file)
await self._session.commit()
await self._session.refresh(file)
return file
async def update(self, file: File, data: FileUpdate) -> File:
for key, value in data.model_dump(exclude_unset=True).items():
if value is not None:
setattr(file, key, value)
await self._session.commit()
await self._session.refresh(file)
return file
async def mark_deleted(self, file: File) -> None:
file.is_deleted = True
await self._session.commit()
+185
View File
@@ -0,0 +1,185 @@
from __future__ import annotations
import uuid
from fastapi import (
APIRouter,
Depends,
File as FastAPIFile,
Form,
HTTPException,
Query,
Response,
UploadFile,
status,
)
from fastapi.responses import FileResponse
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.infrastructure.auth import get_current_user
from cpv3.infrastructure.deps import get_storage
from cpv3.infrastructure.storage.base import StorageService
from cpv3.infrastructure.settings import get_settings
from cpv3.db.session import get_db
from cpv3.modules.files.schemas import (
FileCreate,
FileInfoResponse,
FileRead,
FileUpdate,
)
from cpv3.modules.files.service import FileService
from cpv3.modules.users.models import User
router = APIRouter(prefix="/api/files", tags=["Files"])
MAX_MB_SIZE = 100
@router.post(
"/upload/", response_model=FileInfoResponse, status_code=status.HTTP_201_CREATED
)
async def upload_file(
file: UploadFile = FastAPIFile(...),
folder: str = Form(default=""),
current_user: User = Depends(get_current_user),
storage: StorageService = Depends(get_storage),
) -> FileInfoResponse:
_ = current_user
# Validate max file size (matches old behavior).
file.file.seek(0, 2)
size_bytes = file.file.tell()
file.file.seek(0)
max_size = MAX_MB_SIZE * 1024 * 1024
if size_bytes > max_size:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"File size exceeds the maximum limit of {MAX_MB_SIZE} MB.",
)
key = await storage.upload_fileobj(
fileobj=file.file,
file_name=file.filename or "upload.bin",
folder=folder,
gen_name=True,
content_type=file.content_type,
)
info = await storage.get_file_info(key)
return FileInfoResponse(
file_path=info.file_path,
file_url=info.file_url,
file_size=info.file_size,
filename=file.filename,
)
@router.get("/get_file/", response_model=FileInfoResponse)
async def get_file_info(
file_path: str = Query(...),
current_user: User = Depends(get_current_user),
storage: StorageService = Depends(get_storage),
) -> FileInfoResponse:
_ = current_user
if not await storage.exists(file_path):
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
info = await storage.get_file_info(file_path)
return FileInfoResponse(
file_path=info.file_path,
file_url=info.file_url,
file_size=info.file_size,
filename=info.filename,
)
@router.get("/local/{file_path:path}")
async def get_local_file(
file_path: str,
current_user: User = Depends(get_current_user),
) -> FileResponse:
_ = current_user
settings = get_settings()
full_path = (settings.local_storage_dir / file_path).resolve()
if not full_path.exists():
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
return FileResponse(full_path)
@router.get("/files/", response_model=list[FileRead])
async def list_file_entries(
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[FileRead]:
service = FileService(db)
files = await service.list_files(requester=current_user)
return [FileRead.model_validate(f) for f in files]
@router.post("/files/", response_model=FileRead, status_code=status.HTTP_201_CREATED)
async def create_file_entry(
body: FileCreate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> FileRead:
service = FileService(db)
file = await service.create_file(requester=current_user, data=body)
return FileRead.model_validate(file)
@router.get("/files/{file_id}/", response_model=FileRead)
async def retrieve_file_entry(
file_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> FileRead:
service = FileService(db)
file = await service.get_file(file_id)
if file is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and file.owner_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
return FileRead.model_validate(file)
@router.patch("/files/{file_id}/", response_model=FileRead)
async def patch_file_entry(
file_id: uuid.UUID,
body: FileUpdate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> FileRead:
service = FileService(db)
file = await service.get_file(file_id)
if file is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and file.owner_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
file = await service.update_file(file, body)
return FileRead.model_validate(file)
@router.delete("/files/{file_id}/", status_code=status.HTTP_204_NO_CONTENT)
async def delete_file_entry(
file_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> Response:
service = FileService(db)
file = await service.get_file(file_id)
if file is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and file.owner_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
await service.delete_file(file)
return Response(status_code=status.HTTP_204_NO_CONTENT)
+65
View File
@@ -0,0 +1,65 @@
from __future__ import annotations
from datetime import datetime
from typing import Literal
from uuid import UUID
from cpv3.common.schemas import Schema
StorageBackendEnum = Literal["LOCAL", "S3"]
class FileRead(Schema):
id: UUID
project_id: UUID | None
owner_id: UUID | None
original_filename: str
path: str
storage_backend: StorageBackendEnum
mime_type: str
size_bytes: int
checksum: str | None
file_format: str | None
is_uploaded: bool
is_deleted: bool
is_active: bool
created_at: datetime
updated_at: datetime
class FileCreate(Schema):
project_id: UUID | None = None
original_filename: str
path: str
storage_backend: StorageBackendEnum = "S3"
mime_type: str
size_bytes: int
checksum: str | None = None
file_format: str | None = None
is_uploaded: bool = False
class FileUpdate(Schema):
original_filename: str | None = None
is_uploaded: bool | None = None
is_deleted: bool | None = None
class FileInfoResponse(Schema):
file_path: str
file_url: str
file_size: int | None = None
filename: str | None = None
class FileParam(Schema):
file_path: str
class FileUploadForm(Schema):
folder: str = ""
+32
View File
@@ -0,0 +1,32 @@
from __future__ import annotations
import uuid
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.modules.files.models import File
from cpv3.modules.files.repository import FileRepository
from cpv3.modules.files.schemas import FileCreate, FileUpdate
from cpv3.modules.users.models import User
class FileService:
"""Service for file business logic and orchestration."""
def __init__(self, session: AsyncSession) -> None:
self._repo = FileRepository(session)
async def list_files(self, *, requester: User) -> list[File]:
return await self._repo.list_all(requester=requester)
async def get_file(self, file_id: uuid.UUID) -> File | None:
return await self._repo.get_by_id(file_id)
async def create_file(self, *, requester: User, data: FileCreate) -> File:
return await self._repo.create(requester=requester, data=data)
async def update_file(self, file: File, data: FileUpdate) -> File:
return await self._repo.update(file, data)
async def delete_file(self, file: File) -> None:
await self._repo.mark_deleted(file)
View File
+49
View File
@@ -0,0 +1,49 @@
from __future__ import annotations
import uuid
from datetime import datetime
from sqlalchemy import DateTime, Float, ForeignKey, JSON, String, Text
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column
from cpv3.db.base import Base, BaseModelMixin
class Job(Base, BaseModelMixin):
__tablename__ = "jobs"
broker_id: Mapped[str] = mapped_column(String(255))
user_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("users.id", ondelete="RESTRICT"), nullable=True, index=True
)
project_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("projects.id", ondelete="RESTRICT"),
nullable=True,
index=True,
)
input_data: Mapped[dict | None] = mapped_column(JSON, nullable=True)
output_data: Mapped[dict | None] = mapped_column(JSON, nullable=True)
status: Mapped[str] = mapped_column(String(16), default="PENDING")
job_type: Mapped[str] = mapped_column(String(32), default="PENDING")
project_pct: Mapped[float | None] = mapped_column(Float, nullable=True)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
current_message: Mapped[str | None] = mapped_column(Text, nullable=True)
started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
class JobEvent(Base, BaseModelMixin):
__tablename__ = "job_events"
job_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("jobs.id", ondelete="CASCADE"), index=True
)
event_type: Mapped[str] = mapped_column(String(64))
payload: Mapped[dict | None] = mapped_column(JSON, nullable=True)
+109
View File
@@ -0,0 +1,109 @@
from __future__ import annotations
import uuid
from sqlalchemy import Select, select
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.modules.jobs.models import Job, JobEvent
from cpv3.modules.jobs.schemas import (
JobCreate,
JobEventCreate,
JobEventUpdate,
JobUpdate,
)
from cpv3.modules.users.models import User
class JobRepository:
"""Repository for Job database operations."""
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def list_all(self, *, requester: User) -> list[Job]:
stmt: Select[tuple[Job]] = select(Job).where(Job.is_active.is_(True))
if not requester.is_staff:
stmt = stmt.where(Job.user_id == requester.id)
result = await self._session.execute(stmt.order_by(Job.created_at.desc()))
return list(result.scalars().all())
async def get_by_id(self, job_id: uuid.UUID) -> Job | None:
result = await self._session.execute(
select(Job).where(Job.id == job_id).where(Job.is_active.is_(True))
)
return result.scalar_one_or_none()
async def create(self, *, requester: User, data: JobCreate) -> Job:
job = Job(
user_id=requester.id,
broker_id=data.broker_id,
project_id=data.project_id,
input_data=data.input_data,
status=data.status,
job_type=data.job_type,
)
self._session.add(job)
await self._session.commit()
await self._session.refresh(job)
return job
async def update(self, job: Job, data: JobUpdate) -> Job:
for key, value in data.model_dump(exclude_unset=True).items():
if value is not None:
setattr(job, key, value)
await self._session.commit()
await self._session.refresh(job)
return job
async def deactivate(self, job: Job) -> None:
job.is_active = False
await self._session.commit()
class JobEventRepository:
"""Repository for JobEvent database operations."""
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def list_all(self) -> list[JobEvent]:
result = await self._session.execute(
select(JobEvent)
.where(JobEvent.is_active.is_(True))
.order_by(JobEvent.created_at.desc())
)
return list(result.scalars().all())
async def get_by_id(self, event_id: uuid.UUID) -> JobEvent | None:
result = await self._session.execute(
select(JobEvent)
.where(JobEvent.id == event_id)
.where(JobEvent.is_active.is_(True))
)
return result.scalar_one_or_none()
async def create(self, data: JobEventCreate) -> JobEvent:
event = JobEvent(
job_id=data.job_id, event_type=data.event_type, payload=data.payload
)
self._session.add(event)
await self._session.commit()
await self._session.refresh(event)
return event
async def update(self, event: JobEvent, data: JobEventUpdate) -> JobEvent:
for key, value in data.model_dump(exclude_unset=True).items():
if value is not None:
setattr(event, key, value)
await self._session.commit()
await self._session.refresh(event)
return event
async def deactivate(self, event: JobEvent) -> None:
event.is_active = False
await self._session.commit()
+168
View File
@@ -0,0 +1,168 @@
from __future__ import annotations
import uuid
from fastapi import APIRouter, Depends, HTTPException, Response, status
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.infrastructure.auth import get_current_user
from cpv3.db.session import get_db
from cpv3.modules.jobs.schemas import (
JobCreate,
JobEventCreate,
JobEventRead,
JobEventUpdate,
JobRead,
JobUpdate,
)
from cpv3.modules.jobs.service import JobService
from cpv3.modules.users.models import User
jobs_router = APIRouter(prefix="/api/jobs", tags=["jobs"])
events_router = APIRouter(prefix="/api/jobs", tags=["events"])
@jobs_router.get("/jobs/", response_model=list[JobRead])
async def list_jobs_endpoint(
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[JobRead]:
service = JobService(db)
jobs = await service.list_jobs(requester=current_user)
return [JobRead.model_validate(j) for j in jobs]
@jobs_router.post("/jobs/", response_model=JobRead, status_code=status.HTTP_201_CREATED)
async def create_job_endpoint(
body: JobCreate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> JobRead:
service = JobService(db)
job = await service.create_job(requester=current_user, data=body)
return JobRead.model_validate(job)
@jobs_router.get("/jobs/{job_id}/", response_model=JobRead)
async def retrieve_job_endpoint(
job_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> JobRead:
service = JobService(db)
job = await service.get_job(job_id)
if job is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and job.user_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
return JobRead.model_validate(job)
@jobs_router.patch("/jobs/{job_id}/", response_model=JobRead)
async def patch_job_endpoint(
job_id: uuid.UUID,
body: JobUpdate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> JobRead:
service = JobService(db)
job = await service.get_job(job_id)
if job is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and job.user_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
job = await service.update_job(job, body)
return JobRead.model_validate(job)
@jobs_router.delete("/jobs/{job_id}/", status_code=status.HTTP_204_NO_CONTENT)
async def delete_job_endpoint(
job_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> Response:
service = JobService(db)
job = await service.get_job(job_id)
if job is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and job.user_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
await service.deactivate_job(job)
return Response(status_code=status.HTTP_204_NO_CONTENT)
@events_router.get("/events/", response_model=list[JobEventRead])
async def list_events_endpoint(
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[JobEventRead]:
_ = current_user
service = JobService(db)
events = await service.list_job_events()
return [JobEventRead.model_validate(e) for e in events]
@events_router.post("/events/", response_model=JobEventRead, status_code=status.HTTP_201_CREATED)
async def create_event_endpoint(
body: JobEventCreate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> JobEventRead:
_ = current_user
service = JobService(db)
event = await service.create_job_event(body)
return JobEventRead.model_validate(event)
@events_router.get("/events/{event_id}/", response_model=JobEventRead)
async def retrieve_event_endpoint(
event_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> JobEventRead:
_ = current_user
service = JobService(db)
event = await service.get_job_event(event_id)
if event is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
return JobEventRead.model_validate(event)
@events_router.patch("/events/{event_id}/", response_model=JobEventRead)
async def patch_event_endpoint(
event_id: uuid.UUID,
body: JobEventUpdate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> JobEventRead:
_ = current_user
service = JobService(db)
event = await service.get_job_event(event_id)
if event is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
event = await service.update_job_event(event, body)
return JobEventRead.model_validate(event)
@events_router.delete("/events/{event_id}/", status_code=status.HTTP_204_NO_CONTENT)
async def delete_event_endpoint(
event_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> Response:
_ = current_user
service = JobService(db)
event = await service.get_job_event(event_id)
if event is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
await service.deactivate_job_event(event)
return Response(status_code=status.HTTP_204_NO_CONTENT)
+74
View File
@@ -0,0 +1,74 @@
from __future__ import annotations
from datetime import datetime
from typing import Literal
from uuid import UUID
from cpv3.common.schemas import Schema
JobStatusEnum = Literal["PENDING", "RUNNING", "FAILED", "CANCELLED", "DONE"]
JobTypeEnum = Literal["PENDING", "RUNNING", "FAILED", "CANCELLED", "DONE"]
class JobRead(Schema):
id: UUID
broker_id: str
user_id: UUID | None
project_id: UUID | None
input_data: dict | None
output_data: dict | None
status: JobStatusEnum
job_type: JobTypeEnum
project_pct: float | None
error_message: str | None
current_message: str | None
started_at: datetime | None
finished_at: datetime | None
is_active: bool
created_at: datetime
updated_at: datetime
class JobCreate(Schema):
broker_id: str
project_id: UUID | None = None
input_data: dict | None = None
status: JobStatusEnum = "PENDING"
job_type: JobTypeEnum = "PENDING"
class JobUpdate(Schema):
output_data: dict | None = None
status: JobStatusEnum | None = None
project_pct: float | None = None
error_message: str | None = None
current_message: str | None = None
started_at: datetime | None = None
finished_at: datetime | None = None
class JobEventRead(Schema):
id: UUID
job_id: UUID
event_type: str
payload: dict | None
is_active: bool
created_at: datetime
updated_at: datetime
class JobEventCreate(Schema):
job_id: UUID
event_type: str
payload: dict | None = None
class JobEventUpdate(Schema):
payload: dict | None = None
+53
View File
@@ -0,0 +1,53 @@
from __future__ import annotations
import uuid
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.modules.jobs.models import Job, JobEvent
from cpv3.modules.jobs.repository import JobEventRepository, JobRepository
from cpv3.modules.jobs.schemas import (
JobCreate,
JobEventCreate,
JobEventUpdate,
JobUpdate,
)
from cpv3.modules.users.models import User
class JobService:
"""Service for job business logic and orchestration."""
def __init__(self, session: AsyncSession) -> None:
self._job_repo = JobRepository(session)
self._event_repo = JobEventRepository(session)
async def list_jobs(self, *, requester: User) -> list[Job]:
return await self._job_repo.list_all(requester=requester)
async def get_job(self, job_id: uuid.UUID) -> Job | None:
return await self._job_repo.get_by_id(job_id)
async def create_job(self, *, requester: User, data: JobCreate) -> Job:
return await self._job_repo.create(requester=requester, data=data)
async def update_job(self, job: Job, data: JobUpdate) -> Job:
return await self._job_repo.update(job, data)
async def deactivate_job(self, job: Job) -> None:
await self._job_repo.deactivate(job)
async def list_job_events(self) -> list[JobEvent]:
return await self._event_repo.list_all()
async def get_job_event(self, event_id: uuid.UUID) -> JobEvent | None:
return await self._event_repo.get_by_id(event_id)
async def create_job_event(self, data: JobEventCreate) -> JobEvent:
return await self._event_repo.create(data)
async def update_job_event(self, event: JobEvent, data: JobEventUpdate) -> JobEvent:
return await self._event_repo.update(event, data)
async def deactivate_job_event(self, event: JobEvent) -> None:
await self._event_repo.deactivate(event)
View File
+54
View File
@@ -0,0 +1,54 @@
from __future__ import annotations
import uuid
from sqlalchemy import Boolean, Float, ForeignKey, Integer, JSON, String, Text
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column
from cpv3.db.base import Base, BaseModelMixin
class MediaFile(Base, BaseModelMixin):
__tablename__ = "media_files"
owner_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("users.id", ondelete="RESTRICT"), index=True
)
project_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("projects.id", ondelete="RESTRICT"),
nullable=True,
index=True,
)
duration_seconds: Mapped[float] = mapped_column(Float)
frame_rate: Mapped[float | None] = mapped_column(Float, nullable=True)
width: Mapped[int | None] = mapped_column(Integer, nullable=True)
height: Mapped[int | None] = mapped_column(Integer, nullable=True)
probe_json: Mapped[dict | None] = mapped_column(JSON, nullable=True)
notes: Mapped[str | None] = mapped_column(Text, nullable=True)
meta: Mapped[dict | None] = mapped_column(JSON, nullable=True)
is_deleted: Mapped[bool] = mapped_column(Boolean, default=False)
class ArtifactMediaFile(Base, BaseModelMixin):
__tablename__ = "artifact_media_files"
project_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("projects.id", ondelete="RESTRICT"),
nullable=True,
index=True,
)
file_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("files.id", ondelete="RESTRICT"), nullable=True, index=True
)
media_file_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("media_files.id", ondelete="RESTRICT"), index=True
)
artifact_type: Mapped[str] = mapped_column(String(32), default="TRANSCRIPTION_JSON")
is_deleted: Mapped[bool] = mapped_column(Boolean, default=False)
+124
View File
@@ -0,0 +1,124 @@
from __future__ import annotations
import uuid
from sqlalchemy import Select, select
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.modules.media.models import ArtifactMediaFile, MediaFile
from cpv3.modules.media.schemas import (
ArtifactMediaFileCreate,
ArtifactMediaFileUpdate,
MediaFileCreate,
MediaFileUpdate,
)
from cpv3.modules.users.models import User
class MediaFileRepository:
"""Repository for MediaFile database operations."""
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def list_all(self, *, requester: User) -> list[MediaFile]:
stmt: Select[tuple[MediaFile]] = select(MediaFile).where(
MediaFile.is_deleted.is_(False)
)
if not requester.is_staff:
stmt = stmt.where(MediaFile.owner_id == requester.id)
result = await self._session.execute(stmt.order_by(MediaFile.created_at.desc()))
return list(result.scalars().all())
async def get_by_id(self, media_file_id: uuid.UUID) -> MediaFile | None:
result = await self._session.execute(
select(MediaFile).where(MediaFile.id == media_file_id)
)
media_file = result.scalar_one_or_none()
if media_file is None or media_file.is_deleted:
return None
return media_file
async def create(self, *, requester: User, data: MediaFileCreate) -> MediaFile:
media_file = MediaFile(
owner_id=requester.id,
project_id=data.project_id,
duration_seconds=data.duration_seconds,
frame_rate=data.frame_rate,
width=data.width,
height=data.height,
probe_json=data.probe_json,
notes=data.notes,
meta=data.meta,
)
self._session.add(media_file)
await self._session.commit()
await self._session.refresh(media_file)
return media_file
async def update(self, media_file: MediaFile, data: MediaFileUpdate) -> MediaFile:
for key, value in data.model_dump(exclude_unset=True).items():
if value is not None:
setattr(media_file, key, value)
await self._session.commit()
await self._session.refresh(media_file)
return media_file
async def mark_deleted(self, media_file: MediaFile) -> None:
media_file.is_deleted = True
await self._session.commit()
class ArtifactRepository:
"""Repository for ArtifactMediaFile database operations."""
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def list_all(self) -> list[ArtifactMediaFile]:
result = await self._session.execute(
select(ArtifactMediaFile)
.where(ArtifactMediaFile.is_deleted.is_(False))
.order_by(ArtifactMediaFile.created_at.desc())
)
return list(result.scalars().all())
async def get_by_id(self, artifact_id: uuid.UUID) -> ArtifactMediaFile | None:
result = await self._session.execute(
select(ArtifactMediaFile).where(ArtifactMediaFile.id == artifact_id)
)
artifact = result.scalar_one_or_none()
if artifact is None or artifact.is_deleted:
return None
return artifact
async def create(self, data: ArtifactMediaFileCreate) -> ArtifactMediaFile:
artifact = ArtifactMediaFile(
project_id=data.project_id,
file_id=data.file_id,
media_file_id=data.media_file_id,
artifact_type=data.artifact_type,
)
self._session.add(artifact)
await self._session.commit()
await self._session.refresh(artifact)
return artifact
async def update(
self, artifact: ArtifactMediaFile, data: ArtifactMediaFileUpdate
) -> ArtifactMediaFile:
for key, value in data.model_dump(exclude_unset=True).items():
if value is not None:
setattr(artifact, key, value)
await self._session.commit()
await self._session.refresh(artifact)
return artifact
async def mark_deleted(self, artifact: ArtifactMediaFile) -> None:
artifact.is_deleted = True
await self._session.commit()
+232
View File
@@ -0,0 +1,232 @@
from __future__ import annotations
import uuid
from fastapi import APIRouter, Depends, HTTPException, Query, Response, status
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.infrastructure.auth import get_current_user
from cpv3.infrastructure.deps import get_storage
from cpv3.infrastructure.storage.base import StorageService
from cpv3.db.session import get_db
from cpv3.modules.media.schemas import (
ArtifactMediaFileCreate,
ArtifactMediaFileRead,
ArtifactMediaFileUpdate,
MediaConverterParams,
MediaFileCreate,
MediaFileRead,
MediaFileUpdate,
MediaProbeSchema,
MediaSilencerParams,
)
from cpv3.modules.media.service import convert_to_mp4, probe_media, remove_silence
from cpv3.modules.media.repository import ArtifactRepository, MediaFileRepository
from cpv3.modules.files.schemas import FileInfoResponse
from cpv3.modules.users.models import User
media_router = APIRouter(prefix="/api/media", tags=["media"])
mediafiles_router = APIRouter(prefix="/api/media", tags=["mediafiles"])
artifacts_router = APIRouter(prefix="/api/media", tags=["artifacts"])
@media_router.get("/get_meta/", response_model=MediaProbeSchema)
async def get_meta(
file_path: str = Query(...),
current_user: User = Depends(get_current_user),
storage: StorageService = Depends(get_storage),
) -> MediaProbeSchema:
_ = current_user
return await probe_media(storage, file_key=file_path)
@media_router.post("/silence_remove", response_model=FileInfoResponse)
async def silence_remove(
body: MediaSilencerParams,
current_user: User = Depends(get_current_user),
storage: StorageService = Depends(get_storage),
) -> FileInfoResponse:
_ = current_user
info = await remove_silence(
storage,
file_key=body.file_path,
out_folder=body.folder,
min_silence_duration_ms=body.min_silence_duration_ms,
silence_threshold_db=body.silence_threshold_db,
padding_ms=body.padding_ms,
)
return FileInfoResponse(
file_path=info.file_path,
file_url=info.file_url,
file_size=info.file_size,
filename=info.filename,
)
@media_router.post("/convert", response_model=FileInfoResponse)
async def convert(
body: MediaConverterParams,
current_user: User = Depends(get_current_user),
storage: StorageService = Depends(get_storage),
) -> FileInfoResponse:
_ = current_user
info = await convert_to_mp4(storage, file_key=body.file_path, out_folder=body.folder)
return FileInfoResponse(
file_path=info.file_path,
file_url=info.file_url,
file_size=info.file_size,
filename=info.filename,
)
@mediafiles_router.get("/mediafiles/", response_model=list[MediaFileRead])
async def list_mediafiles(
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[MediaFileRead]:
repo = MediaFileRepository(db)
items = await repo.list_all(requester=current_user)
return [MediaFileRead.model_validate(m) for m in items]
@mediafiles_router.post(
"/mediafiles/", response_model=MediaFileRead, status_code=status.HTTP_201_CREATED
)
async def create_mediafile(
body: MediaFileCreate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> MediaFileRead:
repo = MediaFileRepository(db)
media_file = await repo.create(requester=current_user, data=body)
return MediaFileRead.model_validate(media_file)
@mediafiles_router.get("/mediafiles/{media_file_id}/", response_model=MediaFileRead)
async def retrieve_mediafile(
media_file_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> MediaFileRead:
repo = MediaFileRepository(db)
media_file = await repo.get_by_id(media_file_id)
if media_file is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and media_file.owner_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
return MediaFileRead.model_validate(media_file)
@mediafiles_router.patch("/mediafiles/{media_file_id}/", response_model=MediaFileRead)
async def patch_mediafile(
media_file_id: uuid.UUID,
body: MediaFileUpdate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> MediaFileRead:
repo = MediaFileRepository(db)
media_file = await repo.get_by_id(media_file_id)
if media_file is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and media_file.owner_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
media_file = await repo.update(media_file, body)
return MediaFileRead.model_validate(media_file)
@mediafiles_router.delete("/mediafiles/{media_file_id}/", status_code=status.HTTP_204_NO_CONTENT)
async def delete_mediafile(
media_file_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> Response:
repo = MediaFileRepository(db)
media_file = await repo.get_by_id(media_file_id)
if media_file is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and media_file.owner_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
await repo.mark_deleted(media_file)
return Response(status_code=status.HTTP_204_NO_CONTENT)
@artifacts_router.get("/artifacts/", response_model=list[ArtifactMediaFileRead])
async def list_artifact_mediafiles(
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[ArtifactMediaFileRead]:
_ = current_user
repo = ArtifactRepository(db)
items = await repo.list_all()
return [ArtifactMediaFileRead.model_validate(a) for a in items]
@artifacts_router.post(
"/artifacts/", response_model=ArtifactMediaFileRead, status_code=status.HTTP_201_CREATED
)
async def create_artifact_mediafile(
body: ArtifactMediaFileCreate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> ArtifactMediaFileRead:
_ = current_user
repo = ArtifactRepository(db)
artifact = await repo.create(body)
return ArtifactMediaFileRead.model_validate(artifact)
@artifacts_router.get("/artifacts/{artifact_id}/", response_model=ArtifactMediaFileRead)
async def retrieve_artifact_mediafile(
artifact_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> ArtifactMediaFileRead:
_ = current_user
repo = ArtifactRepository(db)
artifact = await repo.get_by_id(artifact_id)
if artifact is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
return ArtifactMediaFileRead.model_validate(artifact)
@artifacts_router.patch("/artifacts/{artifact_id}/", response_model=ArtifactMediaFileRead)
async def patch_artifact_mediafile(
artifact_id: uuid.UUID,
body: ArtifactMediaFileUpdate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> ArtifactMediaFileRead:
_ = current_user
repo = ArtifactRepository(db)
artifact = await repo.get_by_id(artifact_id)
if artifact is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
artifact = await repo.update(artifact, body)
return ArtifactMediaFileRead.model_validate(artifact)
@artifacts_router.delete("/artifacts/{artifact_id}/", status_code=status.HTTP_204_NO_CONTENT)
async def delete_artifact_mediafile(
artifact_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> Response:
_ = current_user
repo = ArtifactRepository(db)
artifact = await repo.get_by_id(artifact_id)
if artifact is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
await repo.mark_deleted(artifact)
return Response(status_code=status.HTTP_204_NO_CONTENT)
+150
View File
@@ -0,0 +1,150 @@
from __future__ import annotations
from datetime import datetime
from typing import Literal
from uuid import UUID
from pydantic import ConfigDict
from cpv3.common.schemas import Schema
ArtifactTypeEnum = Literal[
"TRANSCRIPTION_JSON",
"SILENCE_REMOVED_VIDEO",
"THUMBNAIL",
"AUDIO_PROXY",
"RENDERED_VIDEO",
]
class MediaFileRead(Schema):
id: UUID
owner_id: UUID
project_id: UUID | None
duration_seconds: float
frame_rate: float | None
width: int | None
height: int | None
probe_json: dict | None
notes: str | None
meta: dict | None
is_deleted: bool
is_active: bool
created_at: datetime
updated_at: datetime
class MediaFileCreate(Schema):
project_id: UUID | None = None
duration_seconds: float
frame_rate: float | None = None
width: int | None = None
height: int | None = None
probe_json: dict | None = None
notes: str | None = None
meta: dict | None = None
class MediaFileUpdate(Schema):
notes: str | None = None
meta: dict | None = None
is_deleted: bool | None = None
class ArtifactMediaFileRead(Schema):
id: UUID
project_id: UUID | None
file_id: UUID | None
media_file_id: UUID
artifact_type: ArtifactTypeEnum
is_deleted: bool
is_active: bool
created_at: datetime
updated_at: datetime
class ArtifactMediaFileCreate(Schema):
project_id: UUID | None = None
file_id: UUID | None = None
media_file_id: UUID
artifact_type: ArtifactTypeEnum
class ArtifactMediaFileUpdate(Schema):
is_deleted: bool | None = None
class DispositionSchema(Schema):
model_config = ConfigDict(extra="allow")
default: int | None = None
class StreamSchema(Schema):
model_config = ConfigDict(extra="allow")
index: int | None = None
codec_name: str | None = None
codec_long_name: str | None = None
profile: str | None = None
codec_type: str | None = None
codec_tag_string: str | None = None
codec_tag: str | None = None
width: int | None = None
height: int | None = None
id: str | None = None
r_frame_rate: str | None = None
avg_frame_rate: str | None = None
time_base: str | None = None
start_pts: int | None = None
start_time: str | None = None
duration_ts: int | None = None
duration: str | None = None
bit_rate: str | None = None
nb_frames: str | None = None
extradata_size: int | None = None
disposition: DispositionSchema | None = None
tags: dict[str, str] | None = None
class FormatSchema(Schema):
model_config = ConfigDict(extra="allow")
filename: str | None = None
nb_streams: int | None = None
format_name: str | None = None
format_long_name: str | None = None
start_time: str | None = None
duration: str | None = None
size: str | None = None
bit_rate: str | None = None
probe_score: int | None = None
tags: dict[str, str] | None = None
class MediaProbeSchema(Schema):
model_config = ConfigDict(extra="allow")
streams: list[StreamSchema] = []
format: FormatSchema | None = None
class MediaSilencerParams(Schema):
file_path: str
folder: str = ""
min_silence_duration_ms: int = 200
silence_threshold_db: int = 16
padding_ms: int = 100
class MediaConverterParams(Schema):
file_path: str
folder: str = ""
+266
View File
@@ -0,0 +1,266 @@
from __future__ import annotations
import asyncio
from os import path
from tempfile import NamedTemporaryFile
from typing import Callable
import anyio
from cpv3.infrastructure.storage.base import StorageService
from cpv3.infrastructure.storage.types import FileInfo
from cpv3.modules.media.schemas import MediaProbeSchema
async def probe_media(storage: StorageService, *, file_key: str) -> MediaProbeSchema:
tmp = await storage.download_to_temp(file_key)
try:
proc = await asyncio.create_subprocess_exec(
"ffprobe",
"-v",
"error",
"-show_streams",
"-show_format",
"-of",
"json",
tmp.path,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"ffprobe failed: {stderr.decode(errors='ignore')}")
import json
raw = json.loads(stdout.decode())
return MediaProbeSchema.model_validate(raw)
finally:
tmp.cleanup()
def _compute_non_silent_segments(
*,
local_audio_path: str,
min_silence_duration_ms: int,
silence_threshold_db: int,
padding_ms: int,
) -> list[tuple[int, int]]:
from pydub import AudioSegment, silence # type: ignore[import-untyped]
audio: AudioSegment = AudioSegment.from_file(local_audio_path)
duration_ms = len(audio)
raw_segments = silence.detect_nonsilent(
audio_segment=audio,
min_silence_len=min_silence_duration_ms,
silence_thresh=int(audio.dBFS - silence_threshold_db),
)
segments: list[tuple[int, int]] = []
for start_ms, end_ms in raw_segments:
start = max(0, start_ms - padding_ms)
end = min(duration_ms, end_ms + padding_ms)
if end > start:
segments.append((start, end))
return segments
async def remove_silence(
storage: StorageService,
*,
file_key: str,
out_folder: str,
min_silence_duration_ms: int = 200,
silence_threshold_db: int = 16,
padding_ms: int = 100,
) -> FileInfo:
input_tmp = await storage.download_to_temp(file_key)
try:
segments = await anyio.to_thread.run_sync(
lambda: _compute_non_silent_segments(
local_audio_path=input_tmp.path,
min_silence_duration_ms=min_silence_duration_ms,
silence_threshold_db=silence_threshold_db,
padding_ms=padding_ms,
)
)
if not segments:
return await storage.get_file_info(file_key)
with NamedTemporaryFile(
suffix=path.splitext(file_key)[1] or ".mp4", delete=False
) as out:
out_path = out.name
try:
cmd: list[str] = ["ffmpeg"]
for start_ms, end_ms in segments:
start_s = start_ms / 1000.0
duration_s = (end_ms - start_ms) / 1000.0
cmd.extend(
[
"-ss",
f"{start_s:.3f}",
"-t",
f"{duration_s:.3f}",
"-y",
"-i",
input_tmp.path,
]
)
seg_count = len(segments)
parts = [f"[{i}:v:0][{i}:a:0]" for i in range(seg_count)]
filter_complex = "".join(parts) + f"concat=n={seg_count}:v=1:a=1[v][a]"
cmd.extend(
[
"-filter_complex",
filter_complex,
"-map",
"[v]",
"-map",
"[a]",
"-c:v",
"libx264",
"-c:a",
"aac",
"-preset",
"medium",
out_path,
]
)
proc = await asyncio.create_subprocess_exec(
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
_, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
output_key = path.join(out_folder or "", "silent", path.basename(file_key))
with open(out_path, "rb") as out_file:
_ = await storage.upload_fileobj(
fileobj=out_file,
file_name=path.basename(output_key),
folder=path.dirname(output_key),
gen_name=False,
content_type="video/mp4",
)
return await storage.get_file_info(output_key)
finally:
import os
if os.path.exists(out_path):
os.remove(out_path)
finally:
input_tmp.cleanup()
async def convert_to_mp4(
storage: StorageService, *, file_key: str, out_folder: str
) -> FileInfo:
input_tmp = await storage.download_to_temp(file_key)
try:
filename_without_ext = path.splitext(path.basename(file_key))[0]
mp4_filename = filename_without_ext + ".mp4"
with NamedTemporaryFile(suffix=".mp4", delete=False) as out:
out_path = out.name
try:
cmd = [
"ffmpeg",
"-y",
"-i",
input_tmp.path,
"-c:v",
"libx264",
"-c:a",
"aac",
"-preset",
"medium",
"-f",
"mp4",
out_path,
]
proc = await asyncio.create_subprocess_exec(
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
_, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
output_key = path.join(out_folder or "", "converted", mp4_filename)
with open(out_path, "rb") as out_file:
_ = await storage.upload_fileobj(
fileobj=out_file,
file_name=mp4_filename,
folder=path.dirname(output_key),
gen_name=False,
content_type="video/mp4",
)
return await storage.get_file_info(output_key)
finally:
import os
if os.path.exists(out_path):
os.remove(out_path)
finally:
input_tmp.cleanup()
async def convert_to_ogg_temp(
storage: StorageService, *, file_key: str
) -> tuple[str, Callable[[], None]]:
input_tmp = await storage.download_to_temp(file_key)
filename_without_ext = path.splitext(path.basename(file_key))[0]
with NamedTemporaryFile(suffix=".ogg", delete=False) as out:
out_path = out.name
async def _run() -> None:
cmd = [
"ffmpeg",
"-y",
"-i",
input_tmp.path,
"-c:a",
"libopus",
"-b:a",
"24k",
"-vn",
"-ac",
"1",
"-ar",
"16000",
out_path,
]
proc = await asyncio.create_subprocess_exec(
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
_, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
await _run()
def _cleanup() -> None:
import os
input_tmp.cleanup()
if os.path.exists(out_path):
os.remove(out_path)
_ = filename_without_ext
return out_path, _cleanup
View File
+25
View File
@@ -0,0 +1,25 @@
from __future__ import annotations
import uuid
from sqlalchemy import ForeignKey, String, Text
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column
from cpv3.db.base import Base, BaseModelMixin
class Project(Base, BaseModelMixin):
__tablename__ = "projects"
owner_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("users.id", ondelete="RESTRICT"),
index=True,
)
name: Mapped[str] = mapped_column(String(255))
description: Mapped[str | None] = mapped_column(Text, nullable=True)
language: Mapped[str] = mapped_column(String(4), default="auto")
folder: Mapped[str | None] = mapped_column(String(1024), nullable=True)
status: Mapped[str] = mapped_column(String(16), default="DRAFT")
+63
View File
@@ -0,0 +1,63 @@
from __future__ import annotations
import uuid
from sqlalchemy import Select, select
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.modules.projects.models import Project
from cpv3.modules.projects.schemas import ProjectCreate, ProjectUpdate
from cpv3.modules.users.models import User
class ProjectRepository:
"""Repository for Project database operations."""
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def list_all(self, *, requester: User) -> list[Project]:
stmt: Select[tuple[Project]] = select(Project).where(
Project.is_active.is_(True)
)
if not requester.is_staff:
stmt = stmt.where(Project.owner_id == requester.id)
result = await self._session.execute(stmt.order_by(Project.created_at.desc()))
return list(result.scalars().all())
async def get_by_id(self, project_id: uuid.UUID) -> Project | None:
result = await self._session.execute(
select(Project)
.where(Project.id == project_id)
.where(Project.is_active.is_(True))
)
return result.scalar_one_or_none()
async def create(self, *, requester: User, data: ProjectCreate) -> Project:
project = Project(
owner_id=requester.id,
name=data.name,
description=data.description,
language=data.language,
folder=data.folder,
status=data.status,
)
self._session.add(project)
await self._session.commit()
await self._session.refresh(project)
return project
async def update(self, project: Project, data: ProjectUpdate) -> Project:
for key, value in data.model_dump(exclude_unset=True).items():
if value is not None:
setattr(project, key, value)
await self._session.commit()
await self._session.refresh(project)
return project
async def deactivate(self, project: Project) -> None:
project.is_active = False
await self._session.commit()
+89
View File
@@ -0,0 +1,89 @@
from __future__ import annotations
import uuid
from fastapi import APIRouter, Depends, HTTPException, Response, status
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.infrastructure.auth import get_current_user
from cpv3.db.session import get_db
from cpv3.modules.projects.schemas import ProjectCreate, ProjectRead, ProjectUpdate
from cpv3.modules.projects.service import ProjectService
from cpv3.modules.users.models import User
router = APIRouter(prefix="/api/projects", tags=["Projects"])
@router.get("/", response_model=list[ProjectRead])
async def list_all_projects(
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[ProjectRead]:
service = ProjectService(db)
projects = await service.list_projects(requester=current_user)
return [ProjectRead.model_validate(p) for p in projects]
@router.post("/", response_model=ProjectRead, status_code=status.HTTP_201_CREATED)
async def create_project_endpoint(
body: ProjectCreate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> ProjectRead:
service = ProjectService(db)
project = await service.create_project(requester=current_user, data=body)
return ProjectRead.model_validate(project)
@router.get("/{project_id}/", response_model=ProjectRead)
async def retrieve_project(
project_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> ProjectRead:
service = ProjectService(db)
project = await service.get_project(project_id)
if project is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and project.owner_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
return ProjectRead.model_validate(project)
@router.patch("/{project_id}/", response_model=ProjectRead)
async def patch_project(
project_id: uuid.UUID,
body: ProjectUpdate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> ProjectRead:
service = ProjectService(db)
project = await service.get_project(project_id)
if project is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and project.owner_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
project = await service.update_project(project, body)
return ProjectRead.model_validate(project)
@router.delete("/{project_id}/", status_code=status.HTTP_204_NO_CONTENT)
async def delete_project(
project_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> Response:
service = ProjectService(db)
project = await service.get_project(project_id)
if project is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and project.owner_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
await service.deactivate_project(project)
return Response(status_code=status.HTTP_204_NO_CONTENT)
+40
View File
@@ -0,0 +1,40 @@
from __future__ import annotations
from datetime import datetime
from typing import Literal
from uuid import UUID
from cpv3.common.schemas import Schema
ProjectStatusEnum = Literal["DRAFT", "PROCESSING", "DONE", "FAILED"]
class ProjectRead(Schema):
id: UUID
owner_id: UUID
name: str
description: str | None
language: str
folder: str | None
status: ProjectStatusEnum
is_active: bool
created_at: datetime
updated_at: datetime
class ProjectCreate(Schema):
name: str
description: str | None = None
language: str = "auto"
folder: str | None = None
status: ProjectStatusEnum = "DRAFT"
class ProjectUpdate(Schema):
name: str | None = None
description: str | None = None
language: str | None = None
folder: str | None = None
status: ProjectStatusEnum | None = None
+58
View File
@@ -0,0 +1,58 @@
from __future__ import annotations
import uuid
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.modules.projects.models import Project
from cpv3.modules.projects.repository import ProjectRepository
from cpv3.modules.projects.schemas import ProjectCreate, ProjectUpdate
from cpv3.modules.users.models import User
class ProjectService:
"""Service for project business logic and orchestration."""
def __init__(self, session: AsyncSession) -> None:
self._repo = ProjectRepository(session)
async def list_projects(self, *, requester: User) -> list[Project]:
return await self._repo.list_all(requester=requester)
async def get_project(self, project_id: uuid.UUID) -> Project | None:
return await self._repo.get_by_id(project_id)
async def create_project(self, *, requester: User, data: ProjectCreate) -> Project:
return await self._repo.create(requester=requester, data=data)
async def update_project(self, project: Project, data: ProjectUpdate) -> Project:
return await self._repo.update(project, data)
async def deactivate_project(self, project: Project) -> None:
await self._repo.deactivate(project)
# Legacy function exports for backward compatibility
async def list_projects(session: AsyncSession, *, requester: User) -> list[Project]:
service = ProjectService(session)
return await service.list_projects(requester=requester)
async def get_project(session: AsyncSession, project_id: uuid.UUID) -> Project | None:
service = ProjectService(session)
return await service.get_project(project_id)
async def create_project(session: AsyncSession, *, requester: User, data: ProjectCreate) -> Project:
service = ProjectService(session)
return await service.create_project(requester=requester, data=data)
async def update_project(session: AsyncSession, project: Project, data: ProjectUpdate) -> Project:
service = ProjectService(session)
return await service.update_project(project, data)
async def deactivate_project(session: AsyncSession, project: Project) -> None:
service = ProjectService(session)
await service.deactivate_project(project)
View File
+10
View File
@@ -0,0 +1,10 @@
from __future__ import annotations
from fastapi import APIRouter
router = APIRouter(prefix="/api", tags=["System"])
@router.get("/ping/")
async def ping() -> dict[str, str]:
return {"status": "ok"}
+15
View File
@@ -0,0 +1,15 @@
FIRST_WORD_IN_DOCUMENT = "first-word-in-document"
FIRST_WORD_IN_SEGMENT = "first-word-in-segment"
FIRST_WORD_IN_LINE = "first-word-in-line"
LAST_WORD_IN_DOCUMENT = "last-word-in-document"
LAST_WORD_IN_SEGMENT = "last-word-in-segment"
LAST_WORD_IN_LINE = "last-word-in-line"
FIRST_LINE_IN_DOCUMENT = "first-line-in-document"
FIRST_LINE_IN_SEGMENT = "first-line-in-segment"
LAST_LINE_IN_DOCUMENT = "last-line-in-document"
LAST_LINE_IN_SEGMENT = "last-line-in-segment"
FIRST_SEGMENT_IN_DOCUMENT = "first-segment-in-document"
LAST_SEGMENT_IN_DOCUMENT = "last-segment-in-document"
+35
View File
@@ -0,0 +1,35 @@
from __future__ import annotations
import uuid
from sqlalchemy import JSON, ForeignKey, String
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column
from cpv3.db.base import Base, BaseModelMixin
class Transcription(Base, BaseModelMixin):
__tablename__ = "transcriptions"
project_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("projects.id", ondelete="RESTRICT"),
nullable=True,
index=True,
)
source_file_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("files.id", ondelete="RESTRICT"), index=True
)
artifact_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("artifact_media_files.id", ondelete="RESTRICT"),
nullable=True,
index=True,
)
engine: Mapped[str] = mapped_column(String(32), default="LOCAL_WHISPER")
language: Mapped[str | None] = mapped_column(String(3), nullable=True)
document: Mapped[dict] = mapped_column(JSON)
transcribe_options: Mapped[dict | None] = mapped_column(JSON, nullable=True)
+64
View File
@@ -0,0 +1,64 @@
from __future__ import annotations
import uuid
from sqlalchemy import Select, select
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.modules.transcription.models import Transcription
from cpv3.modules.transcription.schemas import TranscriptionCreate, TranscriptionUpdate
class TranscriptionRepository:
"""Repository for Transcription database operations."""
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def list_all(self) -> list[Transcription]:
stmt: Select[tuple[Transcription]] = select(Transcription).where(
Transcription.is_active.is_(True)
)
result = await self._session.execute(
stmt.order_by(Transcription.created_at.desc())
)
return list(result.scalars().all())
async def get_by_id(self, transcription_id: uuid.UUID) -> Transcription | None:
result = await self._session.execute(
select(Transcription)
.where(Transcription.id == transcription_id)
.where(Transcription.is_active.is_(True))
)
return result.scalar_one_or_none()
async def create(self, data: TranscriptionCreate) -> Transcription:
transcription = Transcription(
project_id=data.project_id,
source_file_id=data.source_file_id,
artifact_id=data.artifact_id,
engine=data.engine,
language=data.language,
document=data.document,
transcribe_options=data.transcribe_options,
)
self._session.add(transcription)
await self._session.commit()
await self._session.refresh(transcription)
return transcription
async def update(
self, transcription: Transcription, data: TranscriptionUpdate
) -> Transcription:
for key, value in data.model_dump(exclude_unset=True).items():
if value is not None:
setattr(transcription, key, value)
await self._session.commit()
await self._session.refresh(transcription)
return transcription
async def deactivate(self, transcription: Transcription) -> None:
transcription.is_active = False
await self._session.commit()
+129
View File
@@ -0,0 +1,129 @@
from __future__ import annotations
import uuid
from fastapi import APIRouter, Depends, HTTPException, Response, status
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.infrastructure.auth import get_current_user
from cpv3.infrastructure.deps import get_storage
from cpv3.infrastructure.storage.base import StorageService
from cpv3.db.session import get_db
from cpv3.modules.transcription.schemas import (
Document,
GoogleSpeechParams,
TranscriptionCreate,
TranscriptionRead,
TranscriptionUpdate,
WhisperParams,
)
from cpv3.modules.transcription.service import (
transcribe_with_google_speech,
transcribe_with_whisper,
)
from cpv3.modules.transcription.repository import TranscriptionRepository
from cpv3.modules.users.models import User
router = APIRouter(prefix="/api/transcribe", tags=["Transcription"])
@router.get("/transcriptions/", response_model=list[TranscriptionRead])
async def list_all_transcriptions(
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[TranscriptionRead]:
_ = current_user
repo = TranscriptionRepository(db)
items = await repo.list_all()
return [TranscriptionRead.model_validate(t) for t in items]
@router.post(
"/transcriptions/", response_model=TranscriptionRead, status_code=status.HTTP_201_CREATED
)
async def create_transcription_entry(
body: TranscriptionCreate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> TranscriptionRead:
_ = current_user
repo = TranscriptionRepository(db)
transcription = await repo.create(body)
return TranscriptionRead.model_validate(transcription)
@router.get("/transcriptions/{transcription_id}/", response_model=TranscriptionRead)
async def retrieve_transcription_entry(
transcription_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> TranscriptionRead:
_ = current_user
repo = TranscriptionRepository(db)
transcription = await repo.get_by_id(transcription_id)
if transcription is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
return TranscriptionRead.model_validate(transcription)
@router.patch("/transcriptions/{transcription_id}/", response_model=TranscriptionRead)
async def patch_transcription_entry(
transcription_id: uuid.UUID,
body: TranscriptionUpdate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> TranscriptionRead:
_ = current_user
repo = TranscriptionRepository(db)
transcription = await repo.get_by_id(transcription_id)
if transcription is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
transcription = await repo.update(transcription, body)
return TranscriptionRead.model_validate(transcription)
@router.delete("/transcriptions/{transcription_id}/", status_code=status.HTTP_204_NO_CONTENT)
async def delete_transcription_entry(
transcription_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> Response:
_ = current_user
repo = TranscriptionRepository(db)
transcription = await repo.get_by_id(transcription_id)
if transcription is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
await repo.deactivate(transcription)
return Response(status_code=status.HTTP_204_NO_CONTENT)
@router.post("/whisper/", response_model=Document)
async def whisper_transcribe(
body: WhisperParams,
current_user: User = Depends(get_current_user),
storage: StorageService = Depends(get_storage),
) -> Document:
_ = current_user
return await transcribe_with_whisper(
storage,
file_key=body.file_path,
model_name=body.model_name,
language=body.language,
)
@router.post("/google-speech/", response_model=Document)
async def google_speech_transcribe(
body: GoogleSpeechParams,
current_user: User = Depends(get_current_user),
storage: StorageService = Depends(get_storage),
) -> Document:
_ = current_user
return await transcribe_with_google_speech(
storage,
file_key=body.file_path,
language_codes=body.language_codes,
)
+146
View File
@@ -0,0 +1,146 @@
from __future__ import annotations
from datetime import datetime
from typing import Literal
from uuid import UUID
from cpv3.common.schemas import Schema
TranscriptionEngineEnum = Literal["LOCAL_WHISPER", "GOOGLE_SPEECH_CLOUD"]
class TranscriptionRead(Schema):
id: UUID
project_id: UUID | None
source_file_id: UUID
artifact_id: UUID | None
engine: TranscriptionEngineEnum
language: str | None
document: dict
transcribe_options: dict | None
is_active: bool
created_at: datetime
updated_at: datetime
class TranscriptionCreate(Schema):
project_id: UUID | None = None
source_file_id: UUID
artifact_id: UUID | None = None
engine: TranscriptionEngineEnum = "LOCAL_WHISPER"
language: str | None = None
document: dict
transcribe_options: dict | None = None
class TranscriptionUpdate(Schema):
document: dict | None = None
transcribe_options: dict | None = None
# ---------------------------------- Document ----------------------------------
class Tag(Schema):
name: str
class TimeRange(Schema):
start: float
end: float
class WordNode(Schema):
text: str
semantic_tags: list[Tag]
structure_tags: list[Tag]
time: TimeRange
class LineNode(Schema):
text: str
semantic_tags: list[Tag]
structure_tags: list[Tag]
time: TimeRange
words: list[WordNode]
class SegmentNode(Schema):
text: str
semantic_tags: list[Tag]
structure_tags: list[Tag]
time: TimeRange
lines: list[LineNode]
class Document(Schema):
segments: list[SegmentNode]
class WordOptions(Schema):
highlight_words: bool = False
max_line_width: int = 32
max_line_count: int = 2
# ---------------------------------- Whisper Models ----------------------------------
class WhisperWord(Schema):
word: str
start: float
end: float
probability: float
class WhisperSegment(Schema):
id: int
seek: int
start: float
end: float
text: str
tokens: list[int]
temperature: float
avg_logprob: float
compression_ratio: float
no_speech_prob: float
words: list[WhisperWord]
class WhisperResult(Schema):
text: str
segments: list[WhisperSegment]
language: str
class WhisperParams(Schema):
file_path: str
language: str | None = None
model_name: str = "tiny"
# ---------------------------------- Google Speech Models ----------------------------------
class GoogleSpeechWord(Schema):
word: str
start: float
end: float
class GoogleSpeechSegment(Schema):
text: str
start: float
end: float
words: list[GoogleSpeechWord]
class GoogleSpeechResult(Schema):
text: str
segments: list[GoogleSpeechSegment]
language: str
class GoogleSpeechParams(Schema):
file_path: str
language_codes: list[str] | None = None
+402
View File
@@ -0,0 +1,402 @@
from __future__ import annotations
import asyncio
from tempfile import NamedTemporaryFile
from typing import Callable, cast
import anyio
from cpv3.infrastructure.settings import get_settings
from cpv3.infrastructure.storage.base import StorageService
from cpv3.modules.transcription.constants import (
FIRST_LINE_IN_DOCUMENT,
FIRST_LINE_IN_SEGMENT,
FIRST_SEGMENT_IN_DOCUMENT,
FIRST_WORD_IN_DOCUMENT,
FIRST_WORD_IN_LINE,
FIRST_WORD_IN_SEGMENT,
LAST_LINE_IN_DOCUMENT,
LAST_LINE_IN_SEGMENT,
LAST_SEGMENT_IN_DOCUMENT,
LAST_WORD_IN_DOCUMENT,
LAST_WORD_IN_LINE,
LAST_WORD_IN_SEGMENT,
)
from cpv3.modules.transcription.schemas import (
Document,
GoogleSpeechResult,
GoogleSpeechSegment,
GoogleSpeechWord,
LineNode,
SegmentNode,
Tag,
TimeRange,
WhisperResult,
WhisperSegment,
WhisperWord,
WordNode,
WordOptions,
)
class DocumentBuilder:
def compute_segment_lines(
self, segment: WhisperSegment | GoogleSpeechSegment, max_chars_per_line: int
) -> list[LineNode]:
words = segment.words or []
lines: list[list[WhisperWord | GoogleSpeechWord]] = []
cur_line: list[WhisperWord | GoogleSpeechWord] = []
cur_len = 0
for w in words:
text = (w.word or "").strip()
if not text:
continue
extra = len(text) + (1 if cur_line else 0)
if cur_line and cur_len + extra > max_chars_per_line:
lines.append(cur_line)
cur_line, cur_len = [w], len(text)
else:
cur_line.append(w)
cur_len += extra
if cur_line:
lines.append(cur_line)
result_lines: list[LineNode] = []
for rline in lines:
time = TimeRange(start=rline[0].start, end=rline[-1].end)
word_nodes = [
WordNode(
text=(rword.word or "").strip(),
time=TimeRange(start=rword.start, end=rword.end),
semantic_tags=[],
structure_tags=[],
)
for rword in rline
]
line_node = LineNode(
text=" ".join((rword.word or "") for rword in rline).strip(),
semantic_tags=[],
structure_tags=[],
time=time,
words=word_nodes,
)
result_lines.append(line_node)
return result_lines
def process_line(
self,
line: LineNode,
is_first_line_in_document: bool,
is_last_line_in_document: bool,
is_first_line_in_segment: bool,
is_last_line_in_segment: bool,
) -> list[WordNode]:
words: list[WordNode] = []
for idx, word in enumerate(line.words):
is_first = idx == 0
is_last = idx == len(line.words) - 1
rules = [
(is_first_line_in_document and is_first, FIRST_WORD_IN_DOCUMENT),
(is_last_line_in_document and is_last, LAST_WORD_IN_DOCUMENT),
(is_first_line_in_segment and is_first, FIRST_WORD_IN_SEGMENT),
(is_last_line_in_segment and is_last, LAST_WORD_IN_SEGMENT),
(is_first, FIRST_WORD_IN_LINE),
(is_last, LAST_WORD_IN_LINE),
]
structure_tags = [
Tag(name=tag_name) for condition, tag_name in rules if condition
]
new_word = word.model_copy(update={"structure_tags": structure_tags})
words.append(new_word)
return words
def process_segment(
self,
segment: SegmentNode,
is_first_segment_in_document: bool,
is_last_segment_in_document: bool,
) -> list[LineNode]:
lines: list[LineNode] = []
for idx, line in enumerate(segment.lines):
is_first = idx == 0
is_last = idx == len(segment.lines) - 1
rules = [
(is_first_segment_in_document and is_first, FIRST_LINE_IN_DOCUMENT),
(is_last_segment_in_document and is_last, LAST_LINE_IN_DOCUMENT),
(is_first, FIRST_LINE_IN_SEGMENT),
(is_last, LAST_LINE_IN_SEGMENT),
]
structure_tags = [
Tag(name=tag_name) for condition, tag_name in rules if condition
]
words = self.process_line(
line,
is_first_line_in_document=is_first_segment_in_document and is_first,
is_last_line_in_document=is_last_segment_in_document and is_last,
is_first_line_in_segment=is_first,
is_last_line_in_segment=is_last,
)
new_line = line.model_copy(
update={"structure_tags": structure_tags, "words": words}
)
lines.append(new_line)
return lines
def process_document(self, document: Document) -> Document:
segments: list[SegmentNode] = []
for idx, segment in enumerate(document.segments):
structure_tags: list[Tag] = []
is_first_segment_in_document = idx == 0
is_last_segment_in_document = idx == len(document.segments) - 1
if is_first_segment_in_document:
structure_tags.append(Tag(name=FIRST_SEGMENT_IN_DOCUMENT))
if is_last_segment_in_document:
structure_tags.append(Tag(name=LAST_SEGMENT_IN_DOCUMENT))
lines = self.process_segment(
segment, is_first_segment_in_document, is_last_segment_in_document
)
new_segment = segment.model_copy(
update={"lines": lines, "structure_tags": structure_tags}
)
segments.append(new_segment)
return Document(segments=segments)
async def _convert_local_to_ogg(input_path: str) -> tuple[str, Callable[[], None]]:
with NamedTemporaryFile(suffix=".ogg", delete=False) as out:
out_path = out.name
proc = await asyncio.create_subprocess_exec(
"ffmpeg",
"-y",
"-i",
input_path,
"-c:a",
"libopus",
"-b:a",
"24k",
"-vn",
"-ac",
"1",
"-ar",
"16000",
out_path,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
_, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"ffmpeg failed: {stderr.decode(errors='ignore')}")
def _cleanup() -> None:
import os
if os.path.exists(out_path):
os.remove(out_path)
return out_path, _cleanup
def _make_document_from_segments(
builder: DocumentBuilder,
segments: list[WhisperSegment] | list[GoogleSpeechSegment],
*,
max_line_width: int,
) -> Document:
result_segments: list[SegmentNode] = []
for segment in segments:
lines = builder.compute_segment_lines(segment, max_line_width)
time = TimeRange(start=segment.start, end=segment.end)
segment_node = SegmentNode(
text=segment.text.strip(),
semantic_tags=[],
structure_tags=[],
time=time,
lines=lines,
)
result_segments.append(segment_node)
return Document(segments=result_segments)
def _whisper_transcribe_sync(
*,
local_file_path: str,
model_name: str,
language: str | None,
) -> Document:
import whisper # type: ignore[import-untyped]
settings = get_settings()
settings.transcription_models_dir.mkdir(parents=True, exist_ok=True)
builder = DocumentBuilder()
model = whisper.load_model(
model_name, download_root=str(settings.transcription_models_dir)
)
if language is None:
audio = whisper.load_audio(local_file_path)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio, n_mels=model.dims.n_mels).to(
model.device
)
_, probs_raw = model.detect_language(mel)
probs = cast(dict[str, float], probs_raw)
language = max(probs, key=lambda k: probs[k])
result = whisper.transcribe(
audio=whisper.load_audio(local_file_path),
model=model,
word_timestamps=True,
temperature=0.2,
language=language,
verbose=False,
)
parsed = WhisperResult.model_validate(result)
words_options = WordOptions(
highlight_words=True,
max_line_width=32,
max_line_count=2,
)
document = _make_document_from_segments(
builder, parsed.segments, max_line_width=words_options.max_line_width
)
return builder.process_document(document)
async def transcribe_with_whisper(
storage: StorageService,
*,
file_key: str,
model_name: str = "tiny",
language: str | None = None,
) -> Document:
tmp = await storage.download_to_temp(file_key)
try:
return await anyio.to_thread.run_sync(
lambda: _whisper_transcribe_sync(
local_file_path=tmp.path,
model_name=model_name,
language=language,
)
)
finally:
tmp.cleanup()
def _google_transcribe_sync(
*, ogg_bytes: bytes, language_codes: list[str]
) -> GoogleSpeechResult:
from google.cloud import speech
settings = get_settings()
client: speech.SpeechClient = speech.SpeechClient.from_service_account_file(
str(settings.google_service_key_path)
)
audio = speech.RecognitionAudio(content=ogg_bytes)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.OGG_OPUS,
sample_rate_hertz=16000,
language_code=language_codes[0],
alternative_language_codes=(
language_codes[1:] if len(language_codes) > 1 else []
),
model="latest_long",
enable_word_time_offsets=True,
)
operation = client.long_running_recognize(config=config, audio=audio)
response = operation.result(timeout=600)
segments: list[GoogleSpeechSegment] = []
full_text = ""
for result in response.results:
alternative = result.alternatives[0]
words: list[GoogleSpeechWord] = []
for word_info in alternative.words:
words.append(
GoogleSpeechWord(
word=word_info.word,
start=word_info.start_time.total_seconds(),
end=word_info.end_time.total_seconds(),
)
)
if words:
segment_text = alternative.transcript
full_text += segment_text + " "
segments.append(
GoogleSpeechSegment(
text=segment_text,
start=words[0].start,
end=words[-1].end,
words=words,
)
)
return GoogleSpeechResult(
text=full_text.strip(), segments=segments, language=language_codes[0]
)
async def transcribe_with_google_speech(
storage: StorageService,
*,
file_key: str,
language_codes: list[str] | None = None,
) -> Document:
language_codes = language_codes or ["ru-RU", "en-US"]
builder = DocumentBuilder()
words_options = WordOptions()
input_tmp = await storage.download_to_temp(file_key)
try:
ogg_path, ogg_cleanup = await _convert_local_to_ogg(input_tmp.path)
try:
with open(ogg_path, "rb") as f:
content = f.read()
result = await anyio.to_thread.run_sync(
lambda: _google_transcribe_sync(
ogg_bytes=content, language_codes=language_codes
)
)
document = _make_document_from_segments(
builder, result.segments, max_line_width=words_options.max_line_width
)
return builder.process_document(document)
finally:
ogg_cleanup()
finally:
input_tmp.cleanup()
View File
+36
View File
@@ -0,0 +1,36 @@
from __future__ import annotations
from datetime import datetime, timezone
from sqlalchemy import Boolean, DateTime, String
from sqlalchemy.orm import Mapped, mapped_column
from cpv3.db.base import Base, BaseModelMixin
def utcnow() -> datetime:
return datetime.now(timezone.utc)
class User(Base, BaseModelMixin):
__tablename__ = "users"
username: Mapped[str] = mapped_column(String(150), unique=True, index=True)
email: Mapped[str] = mapped_column(String(254), default="")
password_hash: Mapped[str] = mapped_column(String(255))
first_name: Mapped[str] = mapped_column(String(150), default="")
last_name: Mapped[str] = mapped_column(String(150), default="")
phone_number: Mapped[str | None] = mapped_column(String(15), unique=True, nullable=True)
avatar: Mapped[str | None] = mapped_column(String(2048), nullable=True)
email_verified: Mapped[bool] = mapped_column(Boolean, default=False)
phone_verified: Mapped[bool] = mapped_column(Boolean, default=False)
is_staff: Mapped[bool] = mapped_column(Boolean, default=False)
is_superuser: Mapped[bool] = mapped_column(Boolean, default=False)
date_joined: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
last_login: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+76
View File
@@ -0,0 +1,76 @@
from __future__ import annotations
import uuid
from sqlalchemy import Select, select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.modules.users.models import User
from cpv3.modules.users.schemas import UserCreate, UserRegister, UserUpdate
from cpv3.infrastructure.security import hash_password
class UserRepository:
"""Repository for User database operations."""
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def get_by_id(self, user_id: uuid.UUID) -> User | None:
result = await self._session.execute(select(User).where(User.id == user_id))
return result.scalar_one_or_none()
async def get_by_username(self, username: str) -> User | None:
result = await self._session.execute(
select(User).where(User.username == username)
)
return result.scalar_one_or_none()
async def list_all(self, *, requester: User) -> list[User]:
stmt: Select[tuple[User]] = select(User)
if not requester.is_staff:
stmt = stmt.where(User.id == requester.id)
result = await self._session.execute(stmt.order_by(User.created_at.desc()))
return list(result.scalars().all())
async def create(self, *, data: UserCreate | UserRegister) -> User:
user = User(
username=data.username,
email=data.email,
password_hash=hash_password(data.password),
first_name=data.first_name,
last_name=data.last_name,
phone_number=data.phone_number,
avatar=data.avatar,
)
self._session.add(user)
try:
await self._session.commit()
except IntegrityError as e:
await self._session.rollback()
raise ValueError("User already exists or violates constraints") from e
await self._session.refresh(user)
return user
async def update(self, user: User, data: UserUpdate) -> User:
update_data = data.model_dump(exclude_unset=True)
for key, value in update_data.items():
if value is not None:
setattr(user, key, value)
try:
await self._session.commit()
except IntegrityError as e:
await self._session.rollback()
raise ValueError("Update violates constraints") from e
await self._session.refresh(user)
return user
async def deactivate(self, user: User) -> None:
user.is_active = False
await self._session.commit()
+180
View File
@@ -0,0 +1,180 @@
from __future__ import annotations
import uuid
from datetime import timedelta
from fastapi import APIRouter, Depends, HTTPException, Response, status
from jwt import ExpiredSignatureError, InvalidTokenError
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.infrastructure.auth import get_current_user
from cpv3.infrastructure.security import create_token, decode_token
from cpv3.infrastructure.settings import get_settings
from cpv3.db.session import get_db
from cpv3.modules.users.models import User
from cpv3.modules.users.schemas import (
TokenRefresh,
TokenRefreshResponse,
UserCreate,
UserLogin,
UserRead,
UserRegister,
UserRegisterResponse,
UserUpdate,
)
from cpv3.modules.users.service import UserService
users_router = APIRouter(prefix="/api/users", tags=["Users"])
auth_router = APIRouter(prefix="/auth", tags=["auth"])
def _issue_tokens(user: User) -> tuple[str, str]:
settings = get_settings()
access = create_token(
subject=str(user.id),
token_type="access",
expires_in=timedelta(minutes=settings.jwt_access_ttl_minutes),
extra={"is_staff": user.is_staff, "is_superuser": user.is_superuser},
)
refresh = create_token(
subject=str(user.id),
token_type="refresh",
expires_in=timedelta(days=settings.jwt_refresh_ttl_days),
)
return access, refresh
@users_router.get("/", response_model=list[UserRead])
async def list_all_users(
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[UserRead]:
service = UserService(db)
users = await service.list_users(requester=current_user)
return [UserRead.model_validate(u) for u in users]
@users_router.post("/", response_model=UserRead, status_code=status.HTTP_201_CREATED)
async def create_user_endpoint(
body: UserCreate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> UserRead:
service = UserService(db)
try:
user = await service.create_user(body, requester=current_user)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) from e
return UserRead.model_validate(user)
@users_router.get("/me/", response_model=UserRead)
async def me(current_user: User = Depends(get_current_user)) -> UserRead:
return UserRead.model_validate(current_user)
@users_router.get("/{user_id}/", response_model=UserRead)
async def retrieve_user(
user_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> UserRead:
service = UserService(db)
user = await service.get_user_by_id(user_id)
if user is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and user.id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
return UserRead.model_validate(user)
@users_router.patch("/{user_id}/", response_model=UserRead)
async def patch_user(
user_id: uuid.UUID,
body: UserUpdate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> UserRead:
service = UserService(db)
user = await service.get_user_by_id(user_id)
if user is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and user.id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
try:
user = await service.update_user(user, body)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) from e
return UserRead.model_validate(user)
@users_router.delete("/{user_id}/", status_code=status.HTTP_204_NO_CONTENT)
async def delete_user(
user_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> Response:
service = UserService(db)
user = await service.get_user_by_id(user_id)
if user is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and user.id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
await service.deactivate_user(user)
return Response(status_code=status.HTTP_204_NO_CONTENT)
@auth_router.post(
"/register", response_model=UserRegisterResponse, status_code=status.HTTP_201_CREATED
)
async def register(body: UserRegister, db: AsyncSession = Depends(get_db)) -> UserRegisterResponse:
service = UserService(db)
try:
user = await service.register_user(body)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) from e
access, refresh = _issue_tokens(user)
return UserRegisterResponse(user=UserRead.model_validate(user), access=access, refresh=refresh)
@auth_router.post("/login", response_model=UserRegisterResponse)
async def login(body: UserLogin, db: AsyncSession = Depends(get_db)) -> UserRegisterResponse:
service = UserService(db)
user = await service.authenticate(body.username, body.password)
if user is None:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid credentials")
access, refresh = _issue_tokens(user)
return UserRegisterResponse(user=UserRead.model_validate(user), access=access, refresh=refresh)
@auth_router.post("/refresh", response_model=TokenRefreshResponse)
async def refresh(body: TokenRefresh) -> TokenRefreshResponse:
try:
payload = decode_token(body.refresh)
if payload.get("type") != "refresh":
raise InvalidTokenError("wrong type")
user_id = uuid.UUID(str(payload.get("sub")))
settings = get_settings()
access = create_token(
subject=str(user_id),
token_type="access",
expires_in=timedelta(minutes=settings.jwt_access_ttl_minutes),
)
return TokenRefreshResponse(access=access, refresh=body.refresh)
except (ExpiredSignatureError, InvalidTokenError, ValueError):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid refresh token"
)
+75
View File
@@ -0,0 +1,75 @@
from __future__ import annotations
from datetime import datetime
from uuid import UUID
from cpv3.common.schemas import Schema
class UserRead(Schema):
id: UUID
username: str
email: str
first_name: str
last_name: str
phone_number: str | None
avatar: str | None
email_verified: bool
phone_verified: bool
is_active: bool
is_staff: bool
is_superuser: bool
date_joined: datetime
created_at: datetime
updated_at: datetime
class UserCreate(Schema):
username: str
email: str
password: str
first_name: str = ""
last_name: str = ""
phone_number: str | None = None
avatar: str | None = None
class UserUpdate(Schema):
first_name: str | None = None
last_name: str | None = None
email: str | None = None
phone_number: str | None = None
avatar: str | None = None
class UserRegister(Schema):
username: str
email: str
password: str
first_name: str = ""
last_name: str = ""
phone_number: str | None = None
avatar: str | None = None
class UserLogin(Schema):
username: str
password: str
class UserRegisterResponse(Schema):
user: UserRead
access: str
refresh: str
class TokenRefresh(Schema):
refresh: str
class TokenRefreshResponse(Schema):
access: str
refresh: str
+92
View File
@@ -0,0 +1,92 @@
from __future__ import annotations
import uuid
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.infrastructure.security import verify_password
from cpv3.modules.users.models import User
from cpv3.modules.users.repository import UserRepository
from cpv3.modules.users.schemas import UserCreate, UserRegister, UserUpdate
class UserService:
"""Service for user business logic and orchestration."""
def __init__(self, session: AsyncSession) -> None:
self._repo = UserRepository(session)
async def get_user_by_id(self, user_id: uuid.UUID) -> User | None:
return await self._repo.get_by_id(user_id)
async def get_user_by_username(self, username: str) -> User | None:
return await self._repo.get_by_username(username)
async def list_users(self, *, requester: User) -> list[User]:
return await self._repo.list_all(requester=requester)
async def create_user(self, data: UserCreate, *, requester: User | None) -> User:
# Keep Django behavior: any authenticated user can create via this endpoint.
if requester is None:
raise ValueError("Authentication required")
return await self._repo.create(data=data)
async def register_user(self, data: UserRegister) -> User:
return await self._repo.create(data=data)
async def update_user(self, user: User, data: UserUpdate) -> User:
return await self._repo.update(user, data)
async def deactivate_user(self, user: User) -> None:
await self._repo.deactivate(user)
async def authenticate(self, username: str, password: str) -> User | None:
user = await self._repo.get_by_username(username)
if user is None:
return None
if not user.is_active:
return None
if not verify_password(password, user.password_hash):
return None
return user
# Legacy function exports for backward compatibility
async def get_user_by_id(session: AsyncSession, user_id: uuid.UUID) -> User | None:
service = UserService(session)
return await service.get_user_by_id(user_id)
async def get_user_by_username(session: AsyncSession, username: str) -> User | None:
service = UserService(session)
return await service.get_user_by_username(username)
async def list_users(session: AsyncSession, *, requester: User) -> list[User]:
service = UserService(session)
return await service.list_users(requester=requester)
async def create_user(session: AsyncSession, data: UserCreate, *, requester: User | None) -> User:
service = UserService(session)
return await service.create_user(data, requester=requester)
async def register_user(session: AsyncSession, data: UserRegister) -> User:
service = UserService(session)
return await service.register_user(data)
async def update_user(session: AsyncSession, user: User, data: UserUpdate) -> User:
service = UserService(session)
return await service.update_user(user, data)
async def deactivate_user(session: AsyncSession, user: User) -> None:
service = UserService(session)
await service.deactivate_user(user)
async def authenticate(session: AsyncSession, username: str, password: str) -> User | None:
service = UserService(session)
return await service.authenticate(username, password)
View File
+27
View File
@@ -0,0 +1,27 @@
from __future__ import annotations
import uuid
from sqlalchemy import ForeignKey, String
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column
from cpv3.db.base import Base, BaseModelMixin
class Webhook(Base, BaseModelMixin):
__tablename__ = "webhooks"
project_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("projects.id", ondelete="RESTRICT"),
nullable=True,
index=True,
)
user_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("users.id", ondelete="RESTRICT"), nullable=True, index=True
)
event: Mapped[str | None] = mapped_column(String(255), nullable=True)
url: Mapped[str] = mapped_column(String(1024))
secret: Mapped[str | None] = mapped_column(String(255), nullable=True)
+63
View File
@@ -0,0 +1,63 @@
from __future__ import annotations
import uuid
from sqlalchemy import Select, select
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.modules.users.models import User
from cpv3.modules.webhooks.models import Webhook
from cpv3.modules.webhooks.schemas import WebhookCreate, WebhookUpdate
class WebhookRepository:
"""Repository for Webhook database operations."""
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def list_all(self, *, requester: User) -> list[Webhook]:
stmt: Select[tuple[Webhook]] = select(Webhook).where(
Webhook.is_active.is_(True)
)
if not requester.is_staff:
stmt = stmt.where(Webhook.user_id == requester.id)
result = await self._session.execute(stmt.order_by(Webhook.created_at.desc()))
return list(result.scalars().all())
async def get_by_id(self, webhook_id: uuid.UUID) -> Webhook | None:
result = await self._session.execute(
select(Webhook)
.where(Webhook.id == webhook_id)
.where(Webhook.is_active.is_(True))
)
return result.scalar_one_or_none()
async def create(self, *, requester: User, data: WebhookCreate) -> Webhook:
webhook = Webhook(
user_id=requester.id,
project_id=data.project_id,
event=data.event,
url=data.url,
secret=data.secret,
is_active=data.is_active,
)
self._session.add(webhook)
await self._session.commit()
await self._session.refresh(webhook)
return webhook
async def update(self, webhook: Webhook, data: WebhookUpdate) -> Webhook:
for key, value in data.model_dump(exclude_unset=True).items():
if value is not None:
setattr(webhook, key, value)
await self._session.commit()
await self._session.refresh(webhook)
return webhook
async def deactivate(self, webhook: Webhook) -> None:
webhook.is_active = False
await self._session.commit()
+89
View File
@@ -0,0 +1,89 @@
from __future__ import annotations
import uuid
from fastapi import APIRouter, Depends, HTTPException, Response, status
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.infrastructure.auth import get_current_user
from cpv3.db.session import get_db
from cpv3.modules.users.models import User
from cpv3.modules.webhooks.schemas import WebhookCreate, WebhookRead, WebhookUpdate
from cpv3.modules.webhooks.service import WebhookService
router = APIRouter(prefix="/api/webhooks", tags=["Webhooks"])
@router.get("/", response_model=list[WebhookRead])
async def list_all_webhooks(
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> list[WebhookRead]:
service = WebhookService(db)
items = await service.list_webhooks(requester=current_user)
return [WebhookRead.model_validate(w) for w in items]
@router.post("/", response_model=WebhookRead, status_code=status.HTTP_201_CREATED)
async def create_webhook_endpoint(
body: WebhookCreate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> WebhookRead:
service = WebhookService(db)
webhook = await service.create_webhook(requester=current_user, data=body)
return WebhookRead.model_validate(webhook)
@router.get("/{webhook_id}/", response_model=WebhookRead)
async def retrieve_webhook_endpoint(
webhook_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> WebhookRead:
service = WebhookService(db)
webhook = await service.get_webhook(webhook_id)
if webhook is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and webhook.user_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
return WebhookRead.model_validate(webhook)
@router.patch("/{webhook_id}/", response_model=WebhookRead)
async def patch_webhook_endpoint(
webhook_id: uuid.UUID,
body: WebhookUpdate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> WebhookRead:
service = WebhookService(db)
webhook = await service.get_webhook(webhook_id)
if webhook is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and webhook.user_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
webhook = await service.update_webhook(webhook, body)
return WebhookRead.model_validate(webhook)
@router.delete("/{webhook_id}/", status_code=status.HTTP_204_NO_CONTENT)
async def delete_webhook_endpoint(
webhook_id: uuid.UUID,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
) -> Response:
service = WebhookService(db)
webhook = await service.get_webhook(webhook_id)
if webhook is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Not found")
if not current_user.is_staff and webhook.user_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
await service.deactivate_webhook(webhook)
return Response(status_code=status.HTTP_204_NO_CONTENT)
+35
View File
@@ -0,0 +1,35 @@
from __future__ import annotations
from datetime import datetime
from uuid import UUID
from cpv3.common.schemas import Schema
class WebhookRead(Schema):
id: UUID
project_id: UUID | None
user_id: UUID | None
event: str | None
url: str
secret: str | None
is_active: bool
created_at: datetime
updated_at: datetime
class WebhookCreate(Schema):
project_id: UUID | None = None
event: str | None = None
url: str
secret: str | None = None
is_active: bool = True
class WebhookUpdate(Schema):
event: str | None = None
url: str | None = None
secret: str | None = None
is_active: bool | None = None
+32
View File
@@ -0,0 +1,32 @@
from __future__ import annotations
import uuid
from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.modules.webhooks.models import Webhook
from cpv3.modules.webhooks.repository import WebhookRepository
from cpv3.modules.webhooks.schemas import WebhookCreate, WebhookUpdate
from cpv3.modules.users.models import User
class WebhookService:
"""Service for webhook business logic and orchestration."""
def __init__(self, session: AsyncSession) -> None:
self._repo = WebhookRepository(session)
async def list_webhooks(self, *, requester: User) -> list[Webhook]:
return await self._repo.list_all(requester=requester)
async def get_webhook(self, webhook_id: uuid.UUID) -> Webhook | None:
return await self._repo.get_by_id(webhook_id)
async def create_webhook(self, *, requester: User, data: WebhookCreate) -> Webhook:
return await self._repo.create(requester=requester, data=data)
async def update_webhook(self, webhook: Webhook, data: WebhookUpdate) -> Webhook:
return await self._repo.update(webhook, data)
async def deactivate_webhook(self, webhook: Webhook) -> None:
await self._repo.deactivate(webhook)
+70
View File
@@ -0,0 +1,70 @@
services:
db:
container_name: cpv3_postgres
image: postgres:16
environment:
POSTGRES_USER: ${POSTGRES_USER:-postgres}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres}
POSTGRES_DB: ${POSTGRES_DATABASE:-coffee_project_db}
ports:
- "5332:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres} -d ${POSTGRES_DB:-coffee_project_db}"]
interval: 5s
timeout: 3s
retries: 20
volumes:
- cpv3_db:/var/lib/postgresql/data
minio:
container_name: cpv3_minio
image: minio/minio
ports:
- "9000:9000"
- "9001:9001"
environment:
MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}
MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}
command: server /data --console-address ":9001"
volumes:
- cpv3_minio:/data
api:
container_name: cpv3_api
build: .
depends_on:
db:
condition: service_healthy
environment:
DEBUG: ${DEBUG:-1}
JWT_SECRET_KEY: ${JWT_SECRET_KEY:-dev-secret}
POSTGRES_USER: ${POSTGRES_USER:-postgres}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres}
POSTGRES_HOST: db
POSTGRES_PORT: 5432
POSTGRES_DATABASE: ${POSTGRES_DATABASE:-coffee_project_db}
STORAGE_BACKEND: ${STORAGE_BACKEND:-S3}
S3_ACCESS_KEY: ${MINIO_ROOT_USER:-minioadmin}
S3_SECRET_KEY: ${MINIO_ROOT_PASSWORD:-minioadmin}
S3_BUCKET_NAME: ${S3_BUCKET_NAME:-coffee-bucket}
S3_ENDPOINT_URL_INTERNAL: http://minio:9000
# Used only for generated browser links (presigned URLs)
S3_ENDPOINT_URL_PUBLIC: http://localhost:9000
REMOTION_SERVICE_URL: ${REMOTION_SERVICE_URL:-http://localhost:8001}
ports:
- "8000:8000"
command: >
sh -c "uv run alembic upgrade head &&
uv run uvicorn cpv3.main:app --host 0.0.0.0 --port 8000 --reload --reload-dir /app/src"
volumes:
- ./src:/app/src
- ./alembic:/app/alembic
- ./alembic.ini:/app/alembic.ini
volumes:
cpv3_db:
cpv3_minio:
+211
View File
@@ -0,0 +1,211 @@
# API Reference
## Authentication
All endpoints (except `/auth/*` and `/api/ping/`) require a Bearer token in the Authorization header.
### Register
```
POST /auth/register
```
### Login
```
POST /auth/login
```
### Refresh Token
```
POST /auth/refresh
```
## Users
### List Users
```
GET /api/users/
```
### Get Current User
```
GET /api/users/me/
```
### Get User by ID
```
GET /api/users/{user_id}/
```
### Create User
```
POST /api/users/
```
### Update User
```
PATCH /api/users/{user_id}/
```
### Delete User
```
DELETE /api/users/{user_id}/
```
## Projects
### List Projects
```
GET /api/projects/
```
### Create Project
```
POST /api/projects/
```
### Get Project
```
GET /api/projects/{project_id}/
```
### Update Project
```
PATCH /api/projects/{project_id}/
```
### Delete Project
```
DELETE /api/projects/{project_id}/
```
## Files
### Upload File
```
POST /api/files/upload/
```
### Get File Info
```
GET /api/files/get_file/?file_path={path}
```
### Local File Access
```
GET /api/files/local/{file_path}
```
### List File Entries
```
GET /api/files/files/
```
### Create File Entry
```
POST /api/files/files/
```
### Get File Entry
```
GET /api/files/files/{file_id}/
```
### Update File Entry
```
PATCH /api/files/files/{file_id}/
```
### Delete File Entry
```
DELETE /api/files/files/{file_id}/
```
## Media
### Get Media Metadata
```
GET /api/media/get_meta/?file_path={path}
```
### Remove Silence
```
POST /api/media/silence_remove
```
### Convert to MP4
```
POST /api/media/convert
```
## Transcription
### Whisper Transcribe
```
POST /api/transcribe/whisper/
```
### Google Speech Transcribe
```
POST /api/transcribe/google-speech/
```
## Captions
### Get Video with Captions
```
POST /api/captions/get_video/
```
## Jobs
### List Jobs
```
GET /api/jobs/jobs/
```
### Create Job
```
POST /api/jobs/jobs/
```
### Get Job
```
GET /api/jobs/jobs/{job_id}/
```
### Update Job
```
PATCH /api/jobs/jobs/{job_id}/
```
### Delete Job
```
DELETE /api/jobs/jobs/{job_id}/
```
## Webhooks
### List Webhooks
```
GET /api/webhooks/
```
### Create Webhook
```
POST /api/webhooks/
```
### Get Webhook
```
GET /api/webhooks/{webhook_id}/
```
### Update Webhook
```
PATCH /api/webhooks/{webhook_id}/
```
### Delete Webhook
```
DELETE /api/webhooks/{webhook_id}/
```
+53
View File
@@ -0,0 +1,53 @@
# Documentation
This folder contains documentation and references for the Coffee Project Backend API.
## Project Structure
```
cpv3/
├── api/ # Versioned API routers
│ └── v1/
│ └── router.py # Aggregates all module routers
├── common/ # Cross-cutting concerns and utilities
│ └── schemas.py # Base Pydantic schema
├── db/ # Database configuration
│ ├── base.py # SQLAlchemy base classes
│ ├── models.py # All ORM models (optional central import)
│ └── session.py # Database session management
├── infrastructure/ # App bootstrapping, config, security, external integrations
│ ├── settings.py # Application settings (Pydantic)
│ ├── security.py # Password hashing, JWT tokens
│ ├── auth.py # Authentication dependencies
│ ├── deps.py # Infrastructure-level dependencies
│ └── storage/ # Storage backends
│ ├── base.py # StorageService and protocol
│ ├── local.py # Local filesystem backend
│ ├── s3.py # S3/MinIO backend
│ └── types.py # Storage types
├── modules/ # Feature modules
│ ├── captions/
│ ├── files/ # File management (renamed from storage)
│ ├── jobs/
│ ├── media/
│ ├── projects/
│ ├── system/
│ ├── transcription/
│ ├── users/
│ └── webhooks/
└── main.py # FastAPI application entry point
```
## Module Structure
Each module follows this structure:
- `router.py` - HTTP concerns only (request/response, status codes, dependencies)
- `schemas.py` - Pydantic DTOs only
- `service.py` - Business logic + orchestration (calls repositories + other services)
- `repository.py` - All DB queries (SQLAlchemy session usage)
- `models.py` - ORM models only
## API Versioning
The API uses URL-based versioning. All routes are mounted under `/api/` prefix.
+96
View File
@@ -0,0 +1,96 @@
.
├── alembic
│   ├── env.py
│   └── versions
│   └── 0001_initial.py
├── alembic.ini
├── cpv3
│   ├── core
│   │   ├── auth.py
│   │   ├── __init__.py
│   │   ├── schemas.py
│   │   ├── security.py
│   │   ├── settings.py
│   │   └── storage
│   │   ├── deps.py
│   │   ├── __init__.py
│   │   ├── local_backend.py
│   │   ├── s3_backend.py
│   │   ├── service.py
│   │   └── types.py
│   ├── db
│   │   ├── base.py
│   │   ├── __init__.py
│   │   ├── models.py
│   │   └── session.py
│   ├── __init__.py
│   ├── main.py
│   └── modules
│   ├── captions
│   │   ├── __init__.py
│   │   ├── router.py
│   │   ├── schemas.py
│   │   └── service.py
│   ├── __init__.py
│   ├── jobs
│   │   ├── __init__.py
│   │   ├── models.py
│   │   ├── router.py
│   │   ├── schemas.py
│   │   └── service_db.py
│   ├── media
│   │   ├── __init__.py
│   │   ├── models.py
│   │   ├── router.py
│   │   ├── schemas.py
│   │   ├── service_db.py
│   │   └── service.py
│   ├── projects
│   │   ├── __init__.py
│   │   ├── models.py
│   │   ├── router.py
│   │   ├── schemas.py
│   │   └── service.py
│   ├── storage
│   │   ├── __init__.py
│   │   ├── models.py
│   │   ├── router.py
│   │   ├── schemas.py
│   │   └── service.py
│   ├── system
│   │   ├── __init__.py
│   │   └── router.py
│   ├── transcription
│   │   ├── constants.py
│   │   ├── __init__.py
│   │   ├── models.py
│   │   ├── router.py
│   │   ├── schemas.py
│   │   ├── service_db.py
│   │   └── service.py
│   ├── users
│   │   ├── __init__.py
│   │   ├── models.py
│   │   ├── router.py
│   │   ├── schemas.py
│   │   └── service.py
│   └── webhooks
│   ├── __init__.py
│   ├── models.py
│   ├── router.py
│   ├── schemas.py
│   └── service_db.py
├── cpv3.egg-info
│   ├── dependency_links.txt
│   ├── PKG-INFO
│   ├── requires.txt
│   ├── SOURCES.txt
│   └── top_level.txt
├── docker-compose.yml
├── Dockerfile
├── project_file_schema.txt
├── pyproject.toml
├── schema.dbml
└── uv.lock
18 directories, 76 files
+341
View File
@@ -0,0 +1,341 @@
// Enums (expanded + used for Webhooks/JobEvents)
enum ProjectStatusEnum {
DRAFT
IN_PROGRESS
REVIEW
COMPLETED
ARCHIVED
CANCELED
}
enum StorageBackendEnum {
LOCAL
S3
}
enum MediaFileTypeEnum {
VIDEO
AUDIO
IMAGE
}
enum SourceTypeEnum {
UPLOAD
IMPORT
RECORDING
GENERATED
}
enum ArtifactTypeEnum {
VIDEO
AUDIO
THUMBNAIL
PREVIEW
WAVEFORM
TRANSCRIPT_JSON
METADATA
}
enum TranscribeEngineEnum {
LOCAL_WHISPER
GOOGLE_CLOUD
}
enum JobStatusEnum {
QUEUED
STARTED
IN_PROGRESS
SUCCEEDED
FAILED
CANCELED
RETRYING
}
enum WebhookEventEnum {
PROJECT_CREATED
PROJECT_UPDATED
FILE_UPLOADED
ARTIFACT_CREATED
TRANSCRIPTION_COMPLETED
JOB_STARTED
JOB_PROGRESS
JOB_SUCCEEDED
JOB_FAILED
}
enum JobEventTypeEnum {
LOG
STATUS_CHANGED
PROGRESS
WARNING
ERROR
}
// Tables
Table Users {
id uuid [pk]
created_at datetime
updated_at datetime
is_active boolean
deleted_at datetime
username varchar(150) [unique]
email varchar [unique]
password_hash varchar(255)
first_name varchar(150)
last_name varchar(150)
phone_number varchar(16)
avatar varchar(2048)
email_verified boolean
phone_verified boolean
// Replace is_staff and is_superuser with role system later
is_staff boolean
is_superuser boolean
date_joined datetime
last_login datetime
Indexes {
(email) [name: "idx_users_email"]
(username) [name: "idx_users_username"]
(is_active) [name: "idx_users_is_active"]
(deleted_at) [name: "idx_users_deleted_at"]
}
}
Table Projects {
id uuid [pk]
created_at datetime
updated_at datetime
is_active boolean
deleted_at datetime
owner_id uuid
name varchar(150)
description varchar(4096)
language varchar(3)
folder varchar
status ProjectStatusEnum
Indexes {
(owner_id) [name: "idx_projects_owner_id"]
(status) [name: "idx_projects_status"]
(is_active) [name: "idx_projects_is_active"]
(deleted_at) [name: "idx_projects_deleted_at"]
}
}
Table Files {
id uuid [pk]
created_at datetime
updated_at datetime
is_active boolean
deleted_at datetime
project_id uuid
owner_id uuid
original_file_name varchar
path varchar
storage_backend StorageBackendEnum
mime_type varchar
size_bytes bigint
checksum varchar(256)
file_format varchar(10)
is_uploaded boolean
Indexes {
(project_id) [name: "idx_files_project_id"]
(owner_id) [name: "idx_files_owner_id"]
(checksum) [name: "idx_files_checksum"]
(is_uploaded) [name: "idx_files_is_uploaded"]
(is_active) [name: "idx_files_is_active"]
(deleted_at) [name: "idx_files_deleted_at"]
}
}
// Normalized: MediaFiles extends Files (no duplicate project_id/owner_id)
Table MediaFiles {
id uuid [pk]
created_at datetime
updated_at datetime
is_active boolean
deleted_at datetime
file_id uuid [unique]
duration_sec float
frame_rate float
width integer
height integer
probe_json jsonb
notes varchar(4096)
meta jsonb
file_type MediaFileTypeEnum
source_type SourceTypeEnum
Indexes {
(file_id) [name: "idx_mediafiles_file_id"]
(file_type) [name: "idx_mediafiles_file_type"]
(source_type) [name: "idx_mediafiles_source_type"]
(is_active) [name: "idx_mediafiles_is_active"]
(deleted_at) [name: "idx_mediafiles_deleted_at"]
}
}
// Normalized: Artifact points to output file; lineage via source_media_file_id
Table Artifacts {
id uuid [pk]
created_at datetime
updated_at datetime
is_active boolean
deleted_at datetime
output_file_id uuid [unique]
source_media_file_id uuid
artifact_type ArtifactTypeEnum
Indexes {
(output_file_id) [name: "idx_artifacts_output_file_id"]
(source_media_file_id) [name: "idx_artifacts_source_media_file_id"]
(artifact_type) [name: "idx_artifacts_artifact_type"]
(is_active) [name: "idx_artifacts_is_active"]
(deleted_at) [name: "idx_artifacts_deleted_at"]
}
}
// Normalized: no duplicate project_id; derive via input_file_id / transcript_artifact_id
Table Transcriptions {
id uuid [pk]
created_at datetime
updated_at datetime
is_active boolean
deleted_at datetime
input_file_id uuid
transcript_artifact_id uuid
engine TranscribeEngineEnum
language varchar(3)
document jsonb
transcribe_options jsonb
Indexes {
(input_file_id) [name: "idx_transcriptions_input_file_id"]
(transcript_artifact_id) [unique, name: "ux_transcriptions_transcript_artifact_id"]
(engine) [name: "idx_transcriptions_engine"]
(is_active) [name: "idx_transcriptions_is_active"]
(deleted_at) [name: "idx_transcriptions_deleted_at"]
}
}
Table Jobs {
id uuid [pk]
created_at datetime
updated_at datetime
is_active boolean
broker_id varchar(255) [unique]
user_id uuid
project_id uuid
input_data jsonb
output_data jsonb
status JobStatusEnum
progress_pct integer
error_message varchar(4096)
current_message varchar(4096)
started_at datetime
finished_at datetime
Indexes {
(broker_id) [name: "idx_jobs_broker_id"]
(user_id) [name: "idx_jobs_user_id"]
(project_id) [name: "idx_jobs_project_id"]
(status) [name: "idx_jobs_status"]
(is_active) [name: "idx_jobs_is_active"]
}
}
Table JobEvents {
id uuid [pk]
created_at datetime
updated_at datetime
is_active boolean
job_id uuid
event_type JobEventTypeEnum
payload jsonb
Indexes {
(job_id) [name: "idx_jobevents_job_id"]
(event_type) [name: "idx_jobevents_event_type"]
(created_at) [name: "idx_jobevents_created_at"]
(is_active) [name: "idx_jobevents_is_active"]
}
}
Table Webhooks {
id uuid [pk]
created_at datetime
updated_at datetime
is_active boolean
deleted_at datetime
project_id uuid
user_id uuid
event WebhookEventEnum
url varchar(512)
secret varchar(256)
Indexes {
(project_id) [name: "idx_webhooks_project_id"]
(user_id) [name: "idx_webhooks_user_id"]
(event) [name: "idx_webhooks_event"]
(project_id, event, url) [unique, name: "ux_webhooks_project_event_url"]
(is_active) [name: "idx_webhooks_is_active"]
(deleted_at) [name: "idx_webhooks_deleted_at"]
}
}
// Refs (fixed + aligned to actual FK columns)
Ref: Projects.owner_id > Users.id
Ref: Files.project_id > Projects.id
Ref: Files.owner_id > Users.id
Ref: MediaFiles.file_id > Files.id
Ref: Artifacts.output_file_id > Files.id
Ref: Artifacts.source_media_file_id > MediaFiles.id
Ref: Transcriptions.input_file_id > Files.id
Ref: Transcriptions.transcript_artifact_id > Artifacts.id
Ref: Jobs.project_id > Projects.id
Ref: Jobs.user_id > Users.id
Ref: JobEvents.job_id > Jobs.id
Ref: Webhooks.project_id > Projects.id
Ref: Webhooks.user_id > Users.id
+44
View File
@@ -0,0 +1,44 @@
[project]
name = "cpv3"
version = "0.1.0"
description = "Coffee Project Backend v3 (FastAPI)"
requires-python = ">=3.11"
dependencies = [
"fastapi>=0.115.0",
"uvicorn[standard]>=0.30.0",
"sqlalchemy>=2.0.30",
"asyncpg>=0.29.0",
"alembic>=1.13.2",
"pydantic>=2.7.0",
"pydantic-settings>=2.3.0",
"python-multipart>=0.0.9",
"boto3>=1.42.1",
"httpx>=0.27.0",
"PyJWT>=2.8.0",
"passlib[bcrypt]>=1.7.4",
# bcrypt 4.x dropped the __about__ metadata passlib 1.7.x expects; pin to 3.x.
"bcrypt>=3.2.2,<4.0.0",
"python-dotenv>=1.0.1",
"pydub>=0.25.1",
"google-cloud-speech>=2.34.0",
"openai-whisper>=20250625",
]
[dependency-groups]
dev = [
"mypy>=1.19.1",
"ruff>=0.6.0",
]
[tool.ruff]
line-length = 100
[build-system]
requires = ["setuptools>=68", "wheel"]
build-backend = "setuptools.build_meta"
[tool.setuptools.packages.find]
where = ["."]
include = ["cpv3*"]
exclude = ["alembic*"]
namespaces = false
+3
View File
@@ -0,0 +1,3 @@
"""
Tests package for Coffee Project Backend.
"""
+44
View File
@@ -0,0 +1,44 @@
"""
Shared test fixtures and configuration.
"""
from __future__ import annotations
import pytest # type: ignore[import-not-found]
from fastapi.testclient import TestClient
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from cpv3.db.base import Base
from cpv3.main import app
# Use in-memory SQLite for tests (or configure a test database)
TEST_DATABASE_URL = "sqlite+aiosqlite:///:memory:"
@pytest.fixture
def test_client():
"""Create a test client for the FastAPI app."""
with TestClient(app) as client:
yield client
@pytest.fixture
async def test_db_session():
"""Create a test database session."""
engine = create_async_engine(TEST_DATABASE_URL, echo=False)
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
async_session = async_sessionmaker(
bind=engine, class_=AsyncSession, expire_on_commit=False
)
async with async_session() as session:
yield session
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
await engine.dispose()
+3
View File
@@ -0,0 +1,3 @@
"""
Integration tests.
"""
+3
View File
@@ -0,0 +1,3 @@
"""
Unit tests for modules.
"""
Generated
+2117
View File
File diff suppressed because it is too large Load Diff