chore: first commit

This commit is contained in:
Daniil
2026-02-17 23:33:08 +03:00
parent a25bf623ea
commit 937e58859a
3 changed files with 411 additions and 254 deletions
+8 -14
View File
@@ -7,7 +7,7 @@ from __future__ import annotations
import uuid import uuid
from typing import cast from typing import cast
from fastapi import APIRouter, Depends, HTTPException, Request, status from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from cpv3.db.session import get_db from cpv3.db.session import get_db
@@ -22,6 +22,7 @@ from cpv3.modules.tasks.schemas import (
TaskStatusResponse, TaskStatusResponse,
TaskSubmitResponse, TaskSubmitResponse,
TaskTypeEnum, TaskTypeEnum,
TaskWebhookEvent,
TranscriptionGenerateRequest, TranscriptionGenerateRequest,
) )
from cpv3.modules.tasks.service import TaskService from cpv3.modules.tasks.service import TaskService
@@ -146,23 +147,16 @@ async def get_task_status(
@router.post("/webhook/{job_id}/", include_in_schema=False) @router.post("/webhook/{job_id}/", include_in_schema=False)
async def task_webhook_callback( async def task_webhook_callback(
job_id: uuid.UUID, job_id: uuid.UUID,
request: Request, body: TaskWebhookEvent,
db: AsyncSession = Depends(get_db), db: AsyncSession = Depends(get_db),
) -> dict[str, str]: ) -> dict[str, str]:
"""Internal webhook endpoint for task status updates.""" """Internal webhook endpoint for task status updates."""
service = TaskService(db)
try: try:
await request.json() await service.record_webhook_event(job_id=job_id, event=body)
except Exception: except ValueError as exc:
raise HTTPException( raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid JSON payload" status_code=status.HTTP_404_NOT_FOUND, detail=str(exc)
) ) from exc
job_service = JobService(db)
job = await job_service.get_job(job_id)
if job is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND, detail="Job not found"
)
return {"status": "received", "job_id": str(job_id)} return {"status": "received", "job_id": str(job_id)}
+34 -10
View File
@@ -8,20 +8,14 @@ from datetime import datetime
from typing import Literal from typing import Literal
from uuid import UUID from uuid import UUID
from pydantic import Field from pydantic import Field, model_validator
from cpv3.common.schemas import Schema from cpv3.common.schemas import Schema
from cpv3.modules.jobs.schemas import JobStatusEnum, JobTypeEnum
TaskTypeEnum = Literal[ TaskTypeEnum = JobTypeEnum
"MEDIA_PROBE", TaskStatusEnum = JobStatusEnum
"SILENCE_REMOVE",
"MEDIA_CONVERT",
"TRANSCRIPTION_GENERATE",
"CAPTIONS_GENERATE",
]
TaskStatusEnum = Literal["PENDING", "RUNNING", "FAILED", "CANCELLED", "DONE"]
# --- Request schemas --- # --- Request schemas ---
@@ -104,3 +98,33 @@ class TaskStatusResponse(Schema):
output_data: dict | None = None output_data: dict | None = None
started_at: datetime | None = None started_at: datetime | None = None
finished_at: datetime | None = None finished_at: datetime | None = None
class TaskWebhookEvent(Schema):
"""Webhook event payload for task updates."""
status: TaskStatusEnum | None = None
progress_pct: float | None = None
current_message: str | None = None
error_message: str | None = None
output_data: dict | None = None
started_at: datetime | None = None
finished_at: datetime | None = None
@model_validator(mode="after")
def validate_has_update(self) -> "TaskWebhookEvent":
has_update = any(
value is not None
for value in (
self.status,
self.progress_pct,
self.current_message,
self.error_message,
self.output_data,
self.started_at,
self.finished_at,
)
)
if not has_update:
raise ValueError("Webhook event must include at least one update field.")
return self
+359 -220
View File
@@ -12,22 +12,27 @@ from typing import Any
import dramatiq # type: ignore[import-untyped] import dramatiq # type: ignore[import-untyped]
from dramatiq.brokers.redis import RedisBroker # type: ignore[import-untyped] from dramatiq.brokers.redis import RedisBroker # type: ignore[import-untyped]
from pydantic import BaseModel import httpx
from sqlalchemy import create_engine, select
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Session, sessionmaker
from cpv3.infrastructure.deps import _get_storage_service from cpv3.infrastructure.deps import _get_storage_service
from cpv3.infrastructure.settings import get_settings from cpv3.infrastructure.settings import get_settings
from cpv3.modules.jobs.models import Job, JobEvent from cpv3.modules.jobs.models import Job
from cpv3.modules.jobs.repository import JobRepository from cpv3.modules.jobs.repository import JobEventRepository, JobRepository
from cpv3.modules.jobs.schemas import JobCreate, JobTypeEnum from cpv3.modules.jobs.schemas import (
JobCreate,
JobEventCreate,
JobStatusEnum,
JobTypeEnum,
JobUpdate,
)
from cpv3.modules.tasks.schemas import ( from cpv3.modules.tasks.schemas import (
CaptionsGenerateRequest, CaptionsGenerateRequest,
MediaConvertRequest, MediaConvertRequest,
MediaProbeRequest, MediaProbeRequest,
SilenceRemoveRequest, SilenceRemoveRequest,
TaskSubmitResponse, TaskSubmitResponse,
TaskWebhookEvent,
TranscriptionGenerateRequest, TranscriptionGenerateRequest,
) )
from cpv3.modules.transcription.repository import TranscriptionRepository from cpv3.modules.transcription.repository import TranscriptionRepository
@@ -37,6 +42,40 @@ from cpv3.modules.webhooks.schemas import WebhookCreate
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
JOB_STATUS_PENDING: JobStatusEnum = "PENDING"
JOB_STATUS_RUNNING: JobStatusEnum = "RUNNING"
JOB_STATUS_DONE: JobStatusEnum = "DONE"
JOB_STATUS_FAILED: JobStatusEnum = "FAILED"
JOB_TYPE_MEDIA_PROBE: JobTypeEnum = "MEDIA_PROBE"
JOB_TYPE_SILENCE_REMOVE: JobTypeEnum = "SILENCE_REMOVE"
JOB_TYPE_MEDIA_CONVERT: JobTypeEnum = "MEDIA_CONVERT"
JOB_TYPE_TRANSCRIPTION_GENERATE: JobTypeEnum = "TRANSCRIPTION_GENERATE"
JOB_TYPE_CAPTIONS_GENERATE: JobTypeEnum = "CAPTIONS_GENERATE"
EVENT_TYPE_STATUS_PREFIX = "status_"
EVENT_TYPE_PROGRESS = "progress"
EVENT_TYPE_LOG = "log"
EVENT_TYPE_OUTPUT = "output"
EVENT_TYPE_ERROR = "error"
TASK_WEBHOOK_PATH = "/api/tasks/webhook/{job_id}/"
WEBHOOK_TIMEOUT_SECONDS = 10
MESSAGE_STARTING = "Starting"
MESSAGE_COMPLETED = "Completed"
MESSAGE_PROBING_MEDIA = "Probing media"
MESSAGE_PROCESSING = "Processing"
MESSAGE_CONVERTING = "Converting"
MESSAGE_RENDERING_CAPTIONS = "Rendering captions"
PROGRESS_COMPLETE = 100.0
PROGRESS_MEDIA_PROBE = 50.0
PROGRESS_SILENCE_REMOVE = 30.0
PROGRESS_MEDIA_CONVERT = 30.0
PROGRESS_TRANSCRIPTION = 20.0
PROGRESS_CAPTIONS = 30.0
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Dramatiq broker setup # Dramatiq broker setup
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -47,62 +86,53 @@ dramatiq.set_broker(_redis_broker)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Sync DB helpers for Dramatiq workers # Webhook helpers for Dramatiq workers
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def _get_sync_session() -> Session: def _utc_now() -> datetime:
"""Create sync DB session for worker tasks.""" """Return current UTC time."""
return datetime.now(timezone.utc)
def _build_webhook_url(job_id: uuid.UUID) -> str:
"""Build the internal webhook URL for task updates."""
settings = get_settings() settings = get_settings()
sync_url = settings.get_database_url().replace( base_url = settings.webhook_base_url.rstrip("/")
"postgresql+asyncpg://", "postgresql://" return f"{base_url}{TASK_WEBHOOK_PATH.format(job_id=job_id)}"
def _build_webhook_event_name(job_type: JobTypeEnum) -> str:
"""Build webhook event name for a job type."""
return f"task.{job_type.lower()}"
def _send_webhook_event(webhook_url: str, event: TaskWebhookEvent) -> None:
"""Send a task webhook event to the API."""
payload = event.model_dump(mode="json", exclude_none=True)
try:
response = httpx.post(
webhook_url, json=payload, timeout=WEBHOOK_TIMEOUT_SECONDS
) )
engine = create_engine(sync_url, pool_pre_ping=True) response.raise_for_status()
return sessionmaker(bind=engine, expire_on_commit=False)() except Exception:
logger.exception("Failed to send task webhook event to %s", webhook_url)
raise
def _update_job( def _derive_event_type(event: TaskWebhookEvent) -> str:
job_id: uuid.UUID, """Derive a job event type from a webhook event payload."""
*, if event.status is not None:
status: str | None = None, return f"{EVENT_TYPE_STATUS_PREFIX}{event.status}"
current_message: str | None = None, if event.error_message is not None:
progress_pct: float | None = None, return EVENT_TYPE_ERROR
error_message: str | None = None, if event.progress_pct is not None:
output_data: dict | None = None, return EVENT_TYPE_PROGRESS
started_at: datetime | None = None, if event.current_message is not None:
finished_at: datetime | None = None, return EVENT_TYPE_LOG
) -> Job | None: if event.output_data is not None:
"""Update job in database (sync, for workers).""" return EVENT_TYPE_OUTPUT
with _get_sync_session() as session: return EVENT_TYPE_LOG
job = session.execute(select(Job).where(Job.id == job_id)).scalar_one_or_none()
if job is None:
return None
if status is not None:
job.status = status
if current_message is not None:
job.current_message = current_message
if progress_pct is not None:
job.project_pct = progress_pct
if error_message is not None:
job.error_message = error_message
if output_data is not None:
job.output_data = output_data
if started_at is not None:
job.started_at = started_at
if finished_at is not None:
job.finished_at = finished_at
# Create event
event = JobEvent(
job_id=job_id,
event_type=f"status_{status}" if status else "progress",
payload={"status": status or job.status, "message": current_message},
)
session.add(event)
session.commit()
session.refresh(job)
return job
def _run_async(coro: Any) -> Any: def _run_async(coro: Any) -> Any:
@@ -121,37 +151,49 @@ def _run_async(coro: Any) -> Any:
@dramatiq.actor(max_retries=3, min_backoff=1000) @dramatiq.actor(max_retries=3, min_backoff=1000)
def media_probe_actor(job_id: str, file_key: str) -> None: def media_probe_actor(job_id: str, webhook_url: str, file_key: str) -> None:
"""Probe media file to extract metadata.""" """Probe media file to extract metadata."""
from cpv3.modules.media.service import probe_media from cpv3.modules.media.service import probe_media
_job_id = uuid.UUID(job_id) job_uuid = uuid.UUID(job_id)
_update_job( _send_webhook_event(
_job_id, webhook_url,
status="RUNNING", TaskWebhookEvent(
current_message="Starting", status=JOB_STATUS_RUNNING,
started_at=datetime.now(timezone.utc), current_message=MESSAGE_STARTING,
started_at=_utc_now(),
),
) )
try: try:
storage = _get_storage_service() storage = _get_storage_service()
_update_job(_job_id, current_message="Probing media", progress_pct=50.0) _send_webhook_event(
result = _run_async(probe_media(storage, file_key=file_key)) webhook_url,
_update_job( TaskWebhookEvent(
_job_id, current_message=MESSAGE_PROBING_MEDIA,
status="DONE", progress_pct=PROGRESS_MEDIA_PROBE,
current_message="Completed", ),
progress_pct=100.0,
output_data=result.model_dump(mode="json"),
finished_at=datetime.now(timezone.utc),
) )
except Exception as e: result = _run_async(probe_media(storage, file_key=file_key))
logger.exception("media_probe_actor failed: %s", _job_id) _send_webhook_event(
_update_job( webhook_url,
_job_id, TaskWebhookEvent(
status="FAILED", status=JOB_STATUS_DONE,
error_message=str(e), current_message=MESSAGE_COMPLETED,
finished_at=datetime.now(timezone.utc), progress_pct=PROGRESS_COMPLETE,
output_data=result.model_dump(mode="json"),
finished_at=_utc_now(),
),
)
except Exception as exc:
logger.exception("media_probe_actor failed: %s", job_uuid)
_send_webhook_event(
webhook_url,
TaskWebhookEvent(
status=JOB_STATUS_FAILED,
error_message=str(exc),
finished_at=_utc_now(),
),
) )
raise raise
@@ -159,6 +201,7 @@ def media_probe_actor(job_id: str, file_key: str) -> None:
@dramatiq.actor(max_retries=3, min_backoff=1000) @dramatiq.actor(max_retries=3, min_backoff=1000)
def silence_remove_actor( def silence_remove_actor(
job_id: str, job_id: str,
webhook_url: str,
file_key: str, file_key: str,
out_folder: str, out_folder: str,
min_silence_duration_ms: int, min_silence_duration_ms: int,
@@ -168,17 +211,25 @@ def silence_remove_actor(
"""Remove silence from media file.""" """Remove silence from media file."""
from cpv3.modules.media.service import remove_silence from cpv3.modules.media.service import remove_silence
_job_id = uuid.UUID(job_id) job_uuid = uuid.UUID(job_id)
_update_job( _send_webhook_event(
_job_id, webhook_url,
status="RUNNING", TaskWebhookEvent(
current_message="Starting", status=JOB_STATUS_RUNNING,
started_at=datetime.now(timezone.utc), current_message=MESSAGE_STARTING,
started_at=_utc_now(),
),
) )
try: try:
storage = _get_storage_service() storage = _get_storage_service()
_update_job(_job_id, current_message="Processing", progress_pct=30.0) _send_webhook_event(
webhook_url,
TaskWebhookEvent(
current_message=MESSAGE_PROCESSING,
progress_pct=PROGRESS_SILENCE_REMOVE,
),
)
result = _run_async( result = _run_async(
remove_silence( remove_silence(
storage, storage,
@@ -189,42 +240,52 @@ def silence_remove_actor(
padding_ms=padding_ms, padding_ms=padding_ms,
) )
) )
_update_job( _send_webhook_event(
_job_id, webhook_url,
status="DONE", TaskWebhookEvent(
current_message="Completed", status=JOB_STATUS_DONE,
progress_pct=100.0, current_message=MESSAGE_COMPLETED,
progress_pct=PROGRESS_COMPLETE,
output_data={ output_data={
"file_path": result.file_path, "file_path": result.file_path,
"file_url": result.file_url, "file_url": result.file_url,
"file_size": result.file_size, "file_size": result.file_size,
}, },
finished_at=datetime.now(timezone.utc), finished_at=_utc_now(),
),
) )
except Exception as e: except Exception as exc:
logger.exception("silence_remove_actor failed: %s", _job_id) logger.exception("silence_remove_actor failed: %s", job_uuid)
_update_job( _send_webhook_event(
_job_id, webhook_url,
status="FAILED", TaskWebhookEvent(
error_message=str(e), status=JOB_STATUS_FAILED,
finished_at=datetime.now(timezone.utc), error_message=str(exc),
finished_at=_utc_now(),
),
) )
raise raise
@dramatiq.actor(max_retries=3, min_backoff=1000) @dramatiq.actor(max_retries=3, min_backoff=1000)
def media_convert_actor( def media_convert_actor(
job_id: str, file_key: str, out_folder: str, output_format: str job_id: str,
webhook_url: str,
file_key: str,
out_folder: str,
output_format: str,
) -> None: ) -> None:
"""Convert media file to specified format.""" """Convert media file to specified format."""
from cpv3.modules.media.service import convert_to_mp4 from cpv3.modules.media.service import convert_to_mp4
_job_id = uuid.UUID(job_id) job_uuid = uuid.UUID(job_id)
_update_job( _send_webhook_event(
_job_id, webhook_url,
status="RUNNING", TaskWebhookEvent(
current_message="Starting", status=JOB_STATUS_RUNNING,
started_at=datetime.now(timezone.utc), current_message=MESSAGE_STARTING,
started_at=_utc_now(),
),
) )
try: try:
@@ -232,36 +293,51 @@ def media_convert_actor(
raise ValueError(f"Unsupported format: {output_format}") raise ValueError(f"Unsupported format: {output_format}")
storage = _get_storage_service() storage = _get_storage_service()
_update_job(_job_id, current_message="Converting", progress_pct=30.0) _send_webhook_event(
webhook_url,
TaskWebhookEvent(
current_message=MESSAGE_CONVERTING,
progress_pct=PROGRESS_MEDIA_CONVERT,
),
)
result = _run_async( result = _run_async(
convert_to_mp4(storage, file_key=file_key, out_folder=out_folder) convert_to_mp4(storage, file_key=file_key, out_folder=out_folder)
) )
_update_job( _send_webhook_event(
_job_id, webhook_url,
status="DONE", TaskWebhookEvent(
current_message="Completed", status=JOB_STATUS_DONE,
progress_pct=100.0, current_message=MESSAGE_COMPLETED,
progress_pct=PROGRESS_COMPLETE,
output_data={ output_data={
"file_path": result.file_path, "file_path": result.file_path,
"file_url": result.file_url, "file_url": result.file_url,
"file_size": result.file_size, "file_size": result.file_size,
}, },
finished_at=datetime.now(timezone.utc), finished_at=_utc_now(),
),
) )
except Exception as e: except Exception as exc:
logger.exception("media_convert_actor failed: %s", _job_id) logger.exception("media_convert_actor failed: %s", job_uuid)
_update_job( _send_webhook_event(
_job_id, webhook_url,
status="FAILED", TaskWebhookEvent(
error_message=str(e), status=JOB_STATUS_FAILED,
finished_at=datetime.now(timezone.utc), error_message=str(exc),
finished_at=_utc_now(),
),
) )
raise raise
@dramatiq.actor(max_retries=2, min_backoff=2000) @dramatiq.actor(max_retries=2, min_backoff=2000)
def transcription_generate_actor( def transcription_generate_actor(
job_id: str, file_key: str, engine: str, language: str | None, model: str job_id: str,
webhook_url: str,
file_key: str,
engine: str,
language: str | None,
model: str,
) -> None: ) -> None:
"""Generate transcription from audio/video file.""" """Generate transcription from audio/video file."""
from cpv3.modules.transcription.service import ( from cpv3.modules.transcription.service import (
@@ -269,18 +345,24 @@ def transcription_generate_actor(
transcribe_with_whisper, transcribe_with_whisper,
) )
_job_id = uuid.UUID(job_id) job_uuid = uuid.UUID(job_id)
_update_job( _send_webhook_event(
_job_id, webhook_url,
status="RUNNING", TaskWebhookEvent(
current_message="Starting", status=JOB_STATUS_RUNNING,
started_at=datetime.now(timezone.utc), current_message=MESSAGE_STARTING,
started_at=_utc_now(),
),
) )
try: try:
storage = _get_storage_service() storage = _get_storage_service()
_update_job( _send_webhook_event(
_job_id, current_message=f"Transcribing ({engine})", progress_pct=20.0 webhook_url,
TaskWebhookEvent(
current_message=f"Transcribing ({engine})",
progress_pct=PROGRESS_TRANSCRIPTION,
),
) )
if engine == "whisper": if engine == "whisper":
@@ -299,64 +381,84 @@ def transcription_generate_actor(
else: else:
raise ValueError(f"Unknown engine: {engine}") raise ValueError(f"Unknown engine: {engine}")
_update_job( _send_webhook_event(
_job_id, webhook_url,
status="DONE", TaskWebhookEvent(
current_message="Completed", status=JOB_STATUS_DONE,
progress_pct=100.0, current_message=MESSAGE_COMPLETED,
progress_pct=PROGRESS_COMPLETE,
output_data={"document": document.model_dump(mode="json")}, output_data={"document": document.model_dump(mode="json")},
finished_at=datetime.now(timezone.utc), finished_at=_utc_now(),
),
) )
except Exception as e: except Exception as exc:
logger.exception("transcription_generate_actor failed: %s", _job_id) logger.exception("transcription_generate_actor failed: %s", job_uuid)
_update_job( _send_webhook_event(
_job_id, webhook_url,
status="FAILED", TaskWebhookEvent(
error_message=str(e), status=JOB_STATUS_FAILED,
finished_at=datetime.now(timezone.utc), error_message=str(exc),
finished_at=_utc_now(),
),
) )
raise raise
@dramatiq.actor(max_retries=2, min_backoff=2000) @dramatiq.actor(max_retries=2, min_backoff=2000)
def captions_generate_actor( def captions_generate_actor(
job_id: str, video_s3_path: str, folder: str, transcription_json: dict job_id: str,
webhook_url: str,
video_s3_path: str,
folder: str,
transcription_json: dict,
) -> None: ) -> None:
"""Generate captions on video.""" """Generate captions on video."""
from cpv3.modules.captions.service import generate_captions from cpv3.modules.captions.service import generate_captions
from cpv3.modules.transcription.schemas import Document from cpv3.modules.transcription.schemas import Document
_job_id = uuid.UUID(job_id) job_uuid = uuid.UUID(job_id)
_update_job( _send_webhook_event(
_job_id, webhook_url,
status="RUNNING", TaskWebhookEvent(
current_message="Starting", status=JOB_STATUS_RUNNING,
started_at=datetime.now(timezone.utc), current_message=MESSAGE_STARTING,
started_at=_utc_now(),
),
) )
try: try:
_update_job(_job_id, current_message="Rendering captions", progress_pct=30.0) _send_webhook_event(
webhook_url,
TaskWebhookEvent(
current_message=MESSAGE_RENDERING_CAPTIONS,
progress_pct=PROGRESS_CAPTIONS,
),
)
document = Document.model_validate(transcription_json) document = Document.model_validate(transcription_json)
output_path = _run_async( output_path = _run_async(
generate_captions( generate_captions(
video_s3_path=video_s3_path, folder=folder, transcription=document video_s3_path=video_s3_path, folder=folder, transcription=document
) )
) )
_update_job( _send_webhook_event(
_job_id, webhook_url,
status="DONE", TaskWebhookEvent(
current_message="Completed", status=JOB_STATUS_DONE,
progress_pct=100.0, current_message=MESSAGE_COMPLETED,
progress_pct=PROGRESS_COMPLETE,
output_data={"output_path": output_path}, output_data={"output_path": output_path},
finished_at=datetime.now(timezone.utc), finished_at=_utc_now(),
),
) )
except Exception as e: except Exception as exc:
logger.exception("captions_generate_actor failed: %s", _job_id) logger.exception("captions_generate_actor failed: %s", job_uuid)
_update_job( _send_webhook_event(
_job_id, webhook_url,
status="FAILED", TaskWebhookEvent(
error_message=str(e), status=JOB_STATUS_FAILED,
finished_at=datetime.now(timezone.utc), error_message=str(exc),
finished_at=_utc_now(),
),
) )
raise raise
@@ -367,14 +469,15 @@ def captions_generate_actor(
class TaskService: class TaskService:
"""Service for submitting background tasks.""" """Service for submitting background tasks and recording webhook updates."""
def __init__(self, session: AsyncSession) -> None: def __init__(self, session: AsyncSession) -> None:
self._session = session self._session = session
self._job_repo = JobRepository(session) self._job_repo = JobRepository(session)
self._event_repo = JobEventRepository(session)
self._webhook_repo = WebhookRepository(session) self._webhook_repo = WebhookRepository(session)
async def _create_job( async def _create_job_and_webhook(
self, self,
*, *,
requester: User, requester: User,
@@ -383,7 +486,6 @@ class TaskService:
input_data: dict, input_data: dict,
) -> tuple[Job, str]: ) -> tuple[Job, str]:
"""Create job and webhook, return job and webhook URL.""" """Create job and webhook, return job and webhook URL."""
settings = get_settings()
broker_id = uuid.uuid4().hex broker_id = uuid.uuid4().hex
job = await self._job_repo.create( job = await self._job_repo.create(
@@ -396,92 +498,132 @@ class TaskService:
), ),
) )
webhook_url = f"{settings.webhook_base_url}/api/tasks/webhook/{job.id}/" webhook_url = _build_webhook_url(job.id)
await self._webhook_repo.create( await self._webhook_repo.create(
requester=requester, requester=requester,
data=WebhookCreate( data=WebhookCreate(
project_id=project_id, event=f"task.{job_type.lower()}", url=webhook_url project_id=project_id,
event=_build_webhook_event_name(job_type),
url=webhook_url,
), ),
) )
return job, webhook_url return job, webhook_url
async def _submit_task(
self,
*,
requester: User,
job_type: JobTypeEnum,
project_id: uuid.UUID | None,
input_data: dict,
actor: Any,
actor_kwargs: dict[str, Any],
) -> TaskSubmitResponse:
job, webhook_url = await self._create_job_and_webhook(
requester=requester,
job_type=job_type,
project_id=project_id,
input_data=input_data,
)
actor.send(job_id=str(job.id), webhook_url=webhook_url, **actor_kwargs)
return TaskSubmitResponse(
job_id=job.id,
webhook_url=webhook_url,
status=JOB_STATUS_PENDING,
)
async def record_webhook_event(
self, *, job_id: uuid.UUID, event: TaskWebhookEvent
) -> Job:
"""Apply a webhook event to the job and store a job event record."""
job = await self._job_repo.get_by_id(job_id)
if job is None:
raise ValueError(f"Job {job_id} not found")
job_update = JobUpdate(
status=event.status,
project_pct=event.progress_pct,
current_message=event.current_message,
error_message=event.error_message,
output_data=event.output_data,
started_at=event.started_at,
finished_at=event.finished_at,
)
job = await self._job_repo.update(job, job_update)
event_type = _derive_event_type(event)
payload = event.model_dump(mode="json", exclude_none=True)
await self._event_repo.create(
JobEventCreate(job_id=job.id, event_type=event_type, payload=payload)
)
return job
async def submit_media_probe( async def submit_media_probe(
self, *, requester: User, request: MediaProbeRequest self, *, requester: User, request: MediaProbeRequest
) -> TaskSubmitResponse: ) -> TaskSubmitResponse:
"""Submit media probe task.""" """Submit media probe task."""
job, webhook_url = await self._create_job( return await self._submit_task(
requester=requester, requester=requester,
job_type="MEDIA_PROBE", job_type=JOB_TYPE_MEDIA_PROBE,
project_id=request.project_id, project_id=request.project_id,
input_data=request.model_dump(mode="json"), input_data=request.model_dump(mode="json"),
) actor=media_probe_actor,
media_probe_actor.send(job_id=str(job.id), file_key=request.file_key) actor_kwargs={"file_key": request.file_key},
return TaskSubmitResponse(
job_id=job.id, webhook_url=webhook_url, status="PENDING"
) )
async def submit_silence_remove( async def submit_silence_remove(
self, *, requester: User, request: SilenceRemoveRequest self, *, requester: User, request: SilenceRemoveRequest
) -> TaskSubmitResponse: ) -> TaskSubmitResponse:
"""Submit silence removal task.""" """Submit silence removal task."""
job, webhook_url = await self._create_job( return await self._submit_task(
requester=requester, requester=requester,
job_type="SILENCE_REMOVE", job_type=JOB_TYPE_SILENCE_REMOVE,
project_id=request.project_id, project_id=request.project_id,
input_data=request.model_dump(mode="json"), input_data=request.model_dump(mode="json"),
) actor=silence_remove_actor,
silence_remove_actor.send( actor_kwargs={
job_id=str(job.id), "file_key": request.file_key,
file_key=request.file_key, "out_folder": request.out_folder,
out_folder=request.out_folder, "min_silence_duration_ms": request.min_silence_duration_ms,
min_silence_duration_ms=request.min_silence_duration_ms, "silence_threshold_db": request.silence_threshold_db,
silence_threshold_db=request.silence_threshold_db, "padding_ms": request.padding_ms,
padding_ms=request.padding_ms, },
)
return TaskSubmitResponse(
job_id=job.id, webhook_url=webhook_url, status="PENDING"
) )
async def submit_media_convert( async def submit_media_convert(
self, *, requester: User, request: MediaConvertRequest self, *, requester: User, request: MediaConvertRequest
) -> TaskSubmitResponse: ) -> TaskSubmitResponse:
"""Submit media conversion task.""" """Submit media conversion task."""
job, webhook_url = await self._create_job( return await self._submit_task(
requester=requester, requester=requester,
job_type="MEDIA_CONVERT", job_type=JOB_TYPE_MEDIA_CONVERT,
project_id=request.project_id, project_id=request.project_id,
input_data=request.model_dump(mode="json"), input_data=request.model_dump(mode="json"),
) actor=media_convert_actor,
media_convert_actor.send( actor_kwargs={
job_id=str(job.id), "file_key": request.file_key,
file_key=request.file_key, "out_folder": request.out_folder,
out_folder=request.out_folder, "output_format": request.output_format,
output_format=request.output_format, },
)
return TaskSubmitResponse(
job_id=job.id, webhook_url=webhook_url, status="PENDING"
) )
async def submit_transcription_generate( async def submit_transcription_generate(
self, *, requester: User, request: TranscriptionGenerateRequest self, *, requester: User, request: TranscriptionGenerateRequest
) -> TaskSubmitResponse: ) -> TaskSubmitResponse:
"""Submit transcription generation task.""" """Submit transcription generation task."""
job, webhook_url = await self._create_job( return await self._submit_task(
requester=requester, requester=requester,
job_type="TRANSCRIPTION_GENERATE", job_type=JOB_TYPE_TRANSCRIPTION_GENERATE,
project_id=request.project_id, project_id=request.project_id,
input_data=request.model_dump(mode="json"), input_data=request.model_dump(mode="json"),
) actor=transcription_generate_actor,
transcription_generate_actor.send( actor_kwargs={
job_id=str(job.id), "file_key": request.file_key,
file_key=request.file_key, "engine": request.engine,
engine=request.engine, "language": request.language,
language=request.language, "model": request.model,
model=request.model, },
)
return TaskSubmitResponse(
job_id=job.id, webhook_url=webhook_url, status="PENDING"
) )
async def submit_captions_generate( async def submit_captions_generate(
@@ -493,18 +635,15 @@ class TaskService:
if transcription is None: if transcription is None:
raise ValueError(f"Transcription {request.transcription_id} not found") raise ValueError(f"Transcription {request.transcription_id} not found")
job, webhook_url = await self._create_job( return await self._submit_task(
requester=requester, requester=requester,
job_type="CAPTIONS_GENERATE", job_type=JOB_TYPE_CAPTIONS_GENERATE,
project_id=request.project_id, project_id=request.project_id,
input_data=request.model_dump(mode="json"), input_data=request.model_dump(mode="json"),
) actor=captions_generate_actor,
captions_generate_actor.send( actor_kwargs={
job_id=str(job.id), "video_s3_path": request.video_s3_path,
video_s3_path=request.video_s3_path, "folder": request.folder,
folder=request.folder, "transcription_json": transcription.document,
transcription_json=transcription.document, },
)
return TaskSubmitResponse(
job_id=job.id, webhook_url=webhook_url, status="PENDING"
) )