chore: first commit
This commit is contained in:
@@ -7,7 +7,7 @@ from __future__ import annotations
|
|||||||
import uuid
|
import uuid
|
||||||
from typing import cast
|
from typing import cast
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException, Request, status
|
from fastapi import APIRouter, Depends, HTTPException, status
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from cpv3.db.session import get_db
|
from cpv3.db.session import get_db
|
||||||
@@ -22,6 +22,7 @@ from cpv3.modules.tasks.schemas import (
|
|||||||
TaskStatusResponse,
|
TaskStatusResponse,
|
||||||
TaskSubmitResponse,
|
TaskSubmitResponse,
|
||||||
TaskTypeEnum,
|
TaskTypeEnum,
|
||||||
|
TaskWebhookEvent,
|
||||||
TranscriptionGenerateRequest,
|
TranscriptionGenerateRequest,
|
||||||
)
|
)
|
||||||
from cpv3.modules.tasks.service import TaskService
|
from cpv3.modules.tasks.service import TaskService
|
||||||
@@ -146,23 +147,16 @@ async def get_task_status(
|
|||||||
@router.post("/webhook/{job_id}/", include_in_schema=False)
|
@router.post("/webhook/{job_id}/", include_in_schema=False)
|
||||||
async def task_webhook_callback(
|
async def task_webhook_callback(
|
||||||
job_id: uuid.UUID,
|
job_id: uuid.UUID,
|
||||||
request: Request,
|
body: TaskWebhookEvent,
|
||||||
db: AsyncSession = Depends(get_db),
|
db: AsyncSession = Depends(get_db),
|
||||||
) -> dict[str, str]:
|
) -> dict[str, str]:
|
||||||
"""Internal webhook endpoint for task status updates."""
|
"""Internal webhook endpoint for task status updates."""
|
||||||
|
service = TaskService(db)
|
||||||
try:
|
try:
|
||||||
await request.json()
|
await service.record_webhook_event(job_id=job_id, event=body)
|
||||||
except Exception:
|
except ValueError as exc:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid JSON payload"
|
status_code=status.HTTP_404_NOT_FOUND, detail=str(exc)
|
||||||
)
|
) from exc
|
||||||
|
|
||||||
job_service = JobService(db)
|
|
||||||
job = await job_service.get_job(job_id)
|
|
||||||
|
|
||||||
if job is None:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_404_NOT_FOUND, detail="Job not found"
|
|
||||||
)
|
|
||||||
|
|
||||||
return {"status": "received", "job_id": str(job_id)}
|
return {"status": "received", "job_id": str(job_id)}
|
||||||
|
|||||||
@@ -8,20 +8,14 @@ from datetime import datetime
|
|||||||
from typing import Literal
|
from typing import Literal
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
from pydantic import Field
|
from pydantic import Field, model_validator
|
||||||
|
|
||||||
from cpv3.common.schemas import Schema
|
from cpv3.common.schemas import Schema
|
||||||
|
from cpv3.modules.jobs.schemas import JobStatusEnum, JobTypeEnum
|
||||||
|
|
||||||
|
|
||||||
TaskTypeEnum = Literal[
|
TaskTypeEnum = JobTypeEnum
|
||||||
"MEDIA_PROBE",
|
TaskStatusEnum = JobStatusEnum
|
||||||
"SILENCE_REMOVE",
|
|
||||||
"MEDIA_CONVERT",
|
|
||||||
"TRANSCRIPTION_GENERATE",
|
|
||||||
"CAPTIONS_GENERATE",
|
|
||||||
]
|
|
||||||
|
|
||||||
TaskStatusEnum = Literal["PENDING", "RUNNING", "FAILED", "CANCELLED", "DONE"]
|
|
||||||
|
|
||||||
|
|
||||||
# --- Request schemas ---
|
# --- Request schemas ---
|
||||||
@@ -104,3 +98,33 @@ class TaskStatusResponse(Schema):
|
|||||||
output_data: dict | None = None
|
output_data: dict | None = None
|
||||||
started_at: datetime | None = None
|
started_at: datetime | None = None
|
||||||
finished_at: datetime | None = None
|
finished_at: datetime | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class TaskWebhookEvent(Schema):
|
||||||
|
"""Webhook event payload for task updates."""
|
||||||
|
|
||||||
|
status: TaskStatusEnum | None = None
|
||||||
|
progress_pct: float | None = None
|
||||||
|
current_message: str | None = None
|
||||||
|
error_message: str | None = None
|
||||||
|
output_data: dict | None = None
|
||||||
|
started_at: datetime | None = None
|
||||||
|
finished_at: datetime | None = None
|
||||||
|
|
||||||
|
@model_validator(mode="after")
|
||||||
|
def validate_has_update(self) -> "TaskWebhookEvent":
|
||||||
|
has_update = any(
|
||||||
|
value is not None
|
||||||
|
for value in (
|
||||||
|
self.status,
|
||||||
|
self.progress_pct,
|
||||||
|
self.current_message,
|
||||||
|
self.error_message,
|
||||||
|
self.output_data,
|
||||||
|
self.started_at,
|
||||||
|
self.finished_at,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if not has_update:
|
||||||
|
raise ValueError("Webhook event must include at least one update field.")
|
||||||
|
return self
|
||||||
|
|||||||
+369
-230
@@ -12,22 +12,27 @@ from typing import Any
|
|||||||
|
|
||||||
import dramatiq # type: ignore[import-untyped]
|
import dramatiq # type: ignore[import-untyped]
|
||||||
from dramatiq.brokers.redis import RedisBroker # type: ignore[import-untyped]
|
from dramatiq.brokers.redis import RedisBroker # type: ignore[import-untyped]
|
||||||
from pydantic import BaseModel
|
import httpx
|
||||||
from sqlalchemy import create_engine, select
|
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from sqlalchemy.orm import Session, sessionmaker
|
|
||||||
|
|
||||||
from cpv3.infrastructure.deps import _get_storage_service
|
from cpv3.infrastructure.deps import _get_storage_service
|
||||||
from cpv3.infrastructure.settings import get_settings
|
from cpv3.infrastructure.settings import get_settings
|
||||||
from cpv3.modules.jobs.models import Job, JobEvent
|
from cpv3.modules.jobs.models import Job
|
||||||
from cpv3.modules.jobs.repository import JobRepository
|
from cpv3.modules.jobs.repository import JobEventRepository, JobRepository
|
||||||
from cpv3.modules.jobs.schemas import JobCreate, JobTypeEnum
|
from cpv3.modules.jobs.schemas import (
|
||||||
|
JobCreate,
|
||||||
|
JobEventCreate,
|
||||||
|
JobStatusEnum,
|
||||||
|
JobTypeEnum,
|
||||||
|
JobUpdate,
|
||||||
|
)
|
||||||
from cpv3.modules.tasks.schemas import (
|
from cpv3.modules.tasks.schemas import (
|
||||||
CaptionsGenerateRequest,
|
CaptionsGenerateRequest,
|
||||||
MediaConvertRequest,
|
MediaConvertRequest,
|
||||||
MediaProbeRequest,
|
MediaProbeRequest,
|
||||||
SilenceRemoveRequest,
|
SilenceRemoveRequest,
|
||||||
TaskSubmitResponse,
|
TaskSubmitResponse,
|
||||||
|
TaskWebhookEvent,
|
||||||
TranscriptionGenerateRequest,
|
TranscriptionGenerateRequest,
|
||||||
)
|
)
|
||||||
from cpv3.modules.transcription.repository import TranscriptionRepository
|
from cpv3.modules.transcription.repository import TranscriptionRepository
|
||||||
@@ -37,6 +42,40 @@ from cpv3.modules.webhooks.schemas import WebhookCreate
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
JOB_STATUS_PENDING: JobStatusEnum = "PENDING"
|
||||||
|
JOB_STATUS_RUNNING: JobStatusEnum = "RUNNING"
|
||||||
|
JOB_STATUS_DONE: JobStatusEnum = "DONE"
|
||||||
|
JOB_STATUS_FAILED: JobStatusEnum = "FAILED"
|
||||||
|
|
||||||
|
JOB_TYPE_MEDIA_PROBE: JobTypeEnum = "MEDIA_PROBE"
|
||||||
|
JOB_TYPE_SILENCE_REMOVE: JobTypeEnum = "SILENCE_REMOVE"
|
||||||
|
JOB_TYPE_MEDIA_CONVERT: JobTypeEnum = "MEDIA_CONVERT"
|
||||||
|
JOB_TYPE_TRANSCRIPTION_GENERATE: JobTypeEnum = "TRANSCRIPTION_GENERATE"
|
||||||
|
JOB_TYPE_CAPTIONS_GENERATE: JobTypeEnum = "CAPTIONS_GENERATE"
|
||||||
|
|
||||||
|
EVENT_TYPE_STATUS_PREFIX = "status_"
|
||||||
|
EVENT_TYPE_PROGRESS = "progress"
|
||||||
|
EVENT_TYPE_LOG = "log"
|
||||||
|
EVENT_TYPE_OUTPUT = "output"
|
||||||
|
EVENT_TYPE_ERROR = "error"
|
||||||
|
|
||||||
|
TASK_WEBHOOK_PATH = "/api/tasks/webhook/{job_id}/"
|
||||||
|
WEBHOOK_TIMEOUT_SECONDS = 10
|
||||||
|
|
||||||
|
MESSAGE_STARTING = "Starting"
|
||||||
|
MESSAGE_COMPLETED = "Completed"
|
||||||
|
MESSAGE_PROBING_MEDIA = "Probing media"
|
||||||
|
MESSAGE_PROCESSING = "Processing"
|
||||||
|
MESSAGE_CONVERTING = "Converting"
|
||||||
|
MESSAGE_RENDERING_CAPTIONS = "Rendering captions"
|
||||||
|
|
||||||
|
PROGRESS_COMPLETE = 100.0
|
||||||
|
PROGRESS_MEDIA_PROBE = 50.0
|
||||||
|
PROGRESS_SILENCE_REMOVE = 30.0
|
||||||
|
PROGRESS_MEDIA_CONVERT = 30.0
|
||||||
|
PROGRESS_TRANSCRIPTION = 20.0
|
||||||
|
PROGRESS_CAPTIONS = 30.0
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Dramatiq broker setup
|
# Dramatiq broker setup
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -47,62 +86,53 @@ dramatiq.set_broker(_redis_broker)
|
|||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Sync DB helpers for Dramatiq workers
|
# Webhook helpers for Dramatiq workers
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
def _get_sync_session() -> Session:
|
def _utc_now() -> datetime:
|
||||||
"""Create sync DB session for worker tasks."""
|
"""Return current UTC time."""
|
||||||
|
return datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_webhook_url(job_id: uuid.UUID) -> str:
|
||||||
|
"""Build the internal webhook URL for task updates."""
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
sync_url = settings.get_database_url().replace(
|
base_url = settings.webhook_base_url.rstrip("/")
|
||||||
"postgresql+asyncpg://", "postgresql://"
|
return f"{base_url}{TASK_WEBHOOK_PATH.format(job_id=job_id)}"
|
||||||
)
|
|
||||||
engine = create_engine(sync_url, pool_pre_ping=True)
|
|
||||||
return sessionmaker(bind=engine, expire_on_commit=False)()
|
|
||||||
|
|
||||||
|
|
||||||
def _update_job(
|
def _build_webhook_event_name(job_type: JobTypeEnum) -> str:
|
||||||
job_id: uuid.UUID,
|
"""Build webhook event name for a job type."""
|
||||||
*,
|
return f"task.{job_type.lower()}"
|
||||||
status: str | None = None,
|
|
||||||
current_message: str | None = None,
|
|
||||||
progress_pct: float | None = None,
|
|
||||||
error_message: str | None = None,
|
|
||||||
output_data: dict | None = None,
|
|
||||||
started_at: datetime | None = None,
|
|
||||||
finished_at: datetime | None = None,
|
|
||||||
) -> Job | None:
|
|
||||||
"""Update job in database (sync, for workers)."""
|
|
||||||
with _get_sync_session() as session:
|
|
||||||
job = session.execute(select(Job).where(Job.id == job_id)).scalar_one_or_none()
|
|
||||||
if job is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if status is not None:
|
|
||||||
job.status = status
|
|
||||||
if current_message is not None:
|
|
||||||
job.current_message = current_message
|
|
||||||
if progress_pct is not None:
|
|
||||||
job.project_pct = progress_pct
|
|
||||||
if error_message is not None:
|
|
||||||
job.error_message = error_message
|
|
||||||
if output_data is not None:
|
|
||||||
job.output_data = output_data
|
|
||||||
if started_at is not None:
|
|
||||||
job.started_at = started_at
|
|
||||||
if finished_at is not None:
|
|
||||||
job.finished_at = finished_at
|
|
||||||
|
|
||||||
# Create event
|
def _send_webhook_event(webhook_url: str, event: TaskWebhookEvent) -> None:
|
||||||
event = JobEvent(
|
"""Send a task webhook event to the API."""
|
||||||
job_id=job_id,
|
payload = event.model_dump(mode="json", exclude_none=True)
|
||||||
event_type=f"status_{status}" if status else "progress",
|
try:
|
||||||
payload={"status": status or job.status, "message": current_message},
|
response = httpx.post(
|
||||||
|
webhook_url, json=payload, timeout=WEBHOOK_TIMEOUT_SECONDS
|
||||||
)
|
)
|
||||||
session.add(event)
|
response.raise_for_status()
|
||||||
session.commit()
|
except Exception:
|
||||||
session.refresh(job)
|
logger.exception("Failed to send task webhook event to %s", webhook_url)
|
||||||
return job
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def _derive_event_type(event: TaskWebhookEvent) -> str:
|
||||||
|
"""Derive a job event type from a webhook event payload."""
|
||||||
|
if event.status is not None:
|
||||||
|
return f"{EVENT_TYPE_STATUS_PREFIX}{event.status}"
|
||||||
|
if event.error_message is not None:
|
||||||
|
return EVENT_TYPE_ERROR
|
||||||
|
if event.progress_pct is not None:
|
||||||
|
return EVENT_TYPE_PROGRESS
|
||||||
|
if event.current_message is not None:
|
||||||
|
return EVENT_TYPE_LOG
|
||||||
|
if event.output_data is not None:
|
||||||
|
return EVENT_TYPE_OUTPUT
|
||||||
|
return EVENT_TYPE_LOG
|
||||||
|
|
||||||
|
|
||||||
def _run_async(coro: Any) -> Any:
|
def _run_async(coro: Any) -> Any:
|
||||||
@@ -121,37 +151,49 @@ def _run_async(coro: Any) -> Any:
|
|||||||
|
|
||||||
|
|
||||||
@dramatiq.actor(max_retries=3, min_backoff=1000)
|
@dramatiq.actor(max_retries=3, min_backoff=1000)
|
||||||
def media_probe_actor(job_id: str, file_key: str) -> None:
|
def media_probe_actor(job_id: str, webhook_url: str, file_key: str) -> None:
|
||||||
"""Probe media file to extract metadata."""
|
"""Probe media file to extract metadata."""
|
||||||
from cpv3.modules.media.service import probe_media
|
from cpv3.modules.media.service import probe_media
|
||||||
|
|
||||||
_job_id = uuid.UUID(job_id)
|
job_uuid = uuid.UUID(job_id)
|
||||||
_update_job(
|
_send_webhook_event(
|
||||||
_job_id,
|
webhook_url,
|
||||||
status="RUNNING",
|
TaskWebhookEvent(
|
||||||
current_message="Starting",
|
status=JOB_STATUS_RUNNING,
|
||||||
started_at=datetime.now(timezone.utc),
|
current_message=MESSAGE_STARTING,
|
||||||
|
started_at=_utc_now(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
storage = _get_storage_service()
|
storage = _get_storage_service()
|
||||||
_update_job(_job_id, current_message="Probing media", progress_pct=50.0)
|
_send_webhook_event(
|
||||||
result = _run_async(probe_media(storage, file_key=file_key))
|
webhook_url,
|
||||||
_update_job(
|
TaskWebhookEvent(
|
||||||
_job_id,
|
current_message=MESSAGE_PROBING_MEDIA,
|
||||||
status="DONE",
|
progress_pct=PROGRESS_MEDIA_PROBE,
|
||||||
current_message="Completed",
|
),
|
||||||
progress_pct=100.0,
|
|
||||||
output_data=result.model_dump(mode="json"),
|
|
||||||
finished_at=datetime.now(timezone.utc),
|
|
||||||
)
|
)
|
||||||
except Exception as e:
|
result = _run_async(probe_media(storage, file_key=file_key))
|
||||||
logger.exception("media_probe_actor failed: %s", _job_id)
|
_send_webhook_event(
|
||||||
_update_job(
|
webhook_url,
|
||||||
_job_id,
|
TaskWebhookEvent(
|
||||||
status="FAILED",
|
status=JOB_STATUS_DONE,
|
||||||
error_message=str(e),
|
current_message=MESSAGE_COMPLETED,
|
||||||
finished_at=datetime.now(timezone.utc),
|
progress_pct=PROGRESS_COMPLETE,
|
||||||
|
output_data=result.model_dump(mode="json"),
|
||||||
|
finished_at=_utc_now(),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("media_probe_actor failed: %s", job_uuid)
|
||||||
|
_send_webhook_event(
|
||||||
|
webhook_url,
|
||||||
|
TaskWebhookEvent(
|
||||||
|
status=JOB_STATUS_FAILED,
|
||||||
|
error_message=str(exc),
|
||||||
|
finished_at=_utc_now(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
@@ -159,6 +201,7 @@ def media_probe_actor(job_id: str, file_key: str) -> None:
|
|||||||
@dramatiq.actor(max_retries=3, min_backoff=1000)
|
@dramatiq.actor(max_retries=3, min_backoff=1000)
|
||||||
def silence_remove_actor(
|
def silence_remove_actor(
|
||||||
job_id: str,
|
job_id: str,
|
||||||
|
webhook_url: str,
|
||||||
file_key: str,
|
file_key: str,
|
||||||
out_folder: str,
|
out_folder: str,
|
||||||
min_silence_duration_ms: int,
|
min_silence_duration_ms: int,
|
||||||
@@ -168,17 +211,25 @@ def silence_remove_actor(
|
|||||||
"""Remove silence from media file."""
|
"""Remove silence from media file."""
|
||||||
from cpv3.modules.media.service import remove_silence
|
from cpv3.modules.media.service import remove_silence
|
||||||
|
|
||||||
_job_id = uuid.UUID(job_id)
|
job_uuid = uuid.UUID(job_id)
|
||||||
_update_job(
|
_send_webhook_event(
|
||||||
_job_id,
|
webhook_url,
|
||||||
status="RUNNING",
|
TaskWebhookEvent(
|
||||||
current_message="Starting",
|
status=JOB_STATUS_RUNNING,
|
||||||
started_at=datetime.now(timezone.utc),
|
current_message=MESSAGE_STARTING,
|
||||||
|
started_at=_utc_now(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
storage = _get_storage_service()
|
storage = _get_storage_service()
|
||||||
_update_job(_job_id, current_message="Processing", progress_pct=30.0)
|
_send_webhook_event(
|
||||||
|
webhook_url,
|
||||||
|
TaskWebhookEvent(
|
||||||
|
current_message=MESSAGE_PROCESSING,
|
||||||
|
progress_pct=PROGRESS_SILENCE_REMOVE,
|
||||||
|
),
|
||||||
|
)
|
||||||
result = _run_async(
|
result = _run_async(
|
||||||
remove_silence(
|
remove_silence(
|
||||||
storage,
|
storage,
|
||||||
@@ -189,42 +240,52 @@ def silence_remove_actor(
|
|||||||
padding_ms=padding_ms,
|
padding_ms=padding_ms,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
_update_job(
|
_send_webhook_event(
|
||||||
_job_id,
|
webhook_url,
|
||||||
status="DONE",
|
TaskWebhookEvent(
|
||||||
current_message="Completed",
|
status=JOB_STATUS_DONE,
|
||||||
progress_pct=100.0,
|
current_message=MESSAGE_COMPLETED,
|
||||||
output_data={
|
progress_pct=PROGRESS_COMPLETE,
|
||||||
"file_path": result.file_path,
|
output_data={
|
||||||
"file_url": result.file_url,
|
"file_path": result.file_path,
|
||||||
"file_size": result.file_size,
|
"file_url": result.file_url,
|
||||||
},
|
"file_size": result.file_size,
|
||||||
finished_at=datetime.now(timezone.utc),
|
},
|
||||||
|
finished_at=_utc_now(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as exc:
|
||||||
logger.exception("silence_remove_actor failed: %s", _job_id)
|
logger.exception("silence_remove_actor failed: %s", job_uuid)
|
||||||
_update_job(
|
_send_webhook_event(
|
||||||
_job_id,
|
webhook_url,
|
||||||
status="FAILED",
|
TaskWebhookEvent(
|
||||||
error_message=str(e),
|
status=JOB_STATUS_FAILED,
|
||||||
finished_at=datetime.now(timezone.utc),
|
error_message=str(exc),
|
||||||
|
finished_at=_utc_now(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
@dramatiq.actor(max_retries=3, min_backoff=1000)
|
@dramatiq.actor(max_retries=3, min_backoff=1000)
|
||||||
def media_convert_actor(
|
def media_convert_actor(
|
||||||
job_id: str, file_key: str, out_folder: str, output_format: str
|
job_id: str,
|
||||||
|
webhook_url: str,
|
||||||
|
file_key: str,
|
||||||
|
out_folder: str,
|
||||||
|
output_format: str,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Convert media file to specified format."""
|
"""Convert media file to specified format."""
|
||||||
from cpv3.modules.media.service import convert_to_mp4
|
from cpv3.modules.media.service import convert_to_mp4
|
||||||
|
|
||||||
_job_id = uuid.UUID(job_id)
|
job_uuid = uuid.UUID(job_id)
|
||||||
_update_job(
|
_send_webhook_event(
|
||||||
_job_id,
|
webhook_url,
|
||||||
status="RUNNING",
|
TaskWebhookEvent(
|
||||||
current_message="Starting",
|
status=JOB_STATUS_RUNNING,
|
||||||
started_at=datetime.now(timezone.utc),
|
current_message=MESSAGE_STARTING,
|
||||||
|
started_at=_utc_now(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -232,36 +293,51 @@ def media_convert_actor(
|
|||||||
raise ValueError(f"Unsupported format: {output_format}")
|
raise ValueError(f"Unsupported format: {output_format}")
|
||||||
|
|
||||||
storage = _get_storage_service()
|
storage = _get_storage_service()
|
||||||
_update_job(_job_id, current_message="Converting", progress_pct=30.0)
|
_send_webhook_event(
|
||||||
|
webhook_url,
|
||||||
|
TaskWebhookEvent(
|
||||||
|
current_message=MESSAGE_CONVERTING,
|
||||||
|
progress_pct=PROGRESS_MEDIA_CONVERT,
|
||||||
|
),
|
||||||
|
)
|
||||||
result = _run_async(
|
result = _run_async(
|
||||||
convert_to_mp4(storage, file_key=file_key, out_folder=out_folder)
|
convert_to_mp4(storage, file_key=file_key, out_folder=out_folder)
|
||||||
)
|
)
|
||||||
_update_job(
|
_send_webhook_event(
|
||||||
_job_id,
|
webhook_url,
|
||||||
status="DONE",
|
TaskWebhookEvent(
|
||||||
current_message="Completed",
|
status=JOB_STATUS_DONE,
|
||||||
progress_pct=100.0,
|
current_message=MESSAGE_COMPLETED,
|
||||||
output_data={
|
progress_pct=PROGRESS_COMPLETE,
|
||||||
"file_path": result.file_path,
|
output_data={
|
||||||
"file_url": result.file_url,
|
"file_path": result.file_path,
|
||||||
"file_size": result.file_size,
|
"file_url": result.file_url,
|
||||||
},
|
"file_size": result.file_size,
|
||||||
finished_at=datetime.now(timezone.utc),
|
},
|
||||||
|
finished_at=_utc_now(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as exc:
|
||||||
logger.exception("media_convert_actor failed: %s", _job_id)
|
logger.exception("media_convert_actor failed: %s", job_uuid)
|
||||||
_update_job(
|
_send_webhook_event(
|
||||||
_job_id,
|
webhook_url,
|
||||||
status="FAILED",
|
TaskWebhookEvent(
|
||||||
error_message=str(e),
|
status=JOB_STATUS_FAILED,
|
||||||
finished_at=datetime.now(timezone.utc),
|
error_message=str(exc),
|
||||||
|
finished_at=_utc_now(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
@dramatiq.actor(max_retries=2, min_backoff=2000)
|
@dramatiq.actor(max_retries=2, min_backoff=2000)
|
||||||
def transcription_generate_actor(
|
def transcription_generate_actor(
|
||||||
job_id: str, file_key: str, engine: str, language: str | None, model: str
|
job_id: str,
|
||||||
|
webhook_url: str,
|
||||||
|
file_key: str,
|
||||||
|
engine: str,
|
||||||
|
language: str | None,
|
||||||
|
model: str,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Generate transcription from audio/video file."""
|
"""Generate transcription from audio/video file."""
|
||||||
from cpv3.modules.transcription.service import (
|
from cpv3.modules.transcription.service import (
|
||||||
@@ -269,18 +345,24 @@ def transcription_generate_actor(
|
|||||||
transcribe_with_whisper,
|
transcribe_with_whisper,
|
||||||
)
|
)
|
||||||
|
|
||||||
_job_id = uuid.UUID(job_id)
|
job_uuid = uuid.UUID(job_id)
|
||||||
_update_job(
|
_send_webhook_event(
|
||||||
_job_id,
|
webhook_url,
|
||||||
status="RUNNING",
|
TaskWebhookEvent(
|
||||||
current_message="Starting",
|
status=JOB_STATUS_RUNNING,
|
||||||
started_at=datetime.now(timezone.utc),
|
current_message=MESSAGE_STARTING,
|
||||||
|
started_at=_utc_now(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
storage = _get_storage_service()
|
storage = _get_storage_service()
|
||||||
_update_job(
|
_send_webhook_event(
|
||||||
_job_id, current_message=f"Transcribing ({engine})", progress_pct=20.0
|
webhook_url,
|
||||||
|
TaskWebhookEvent(
|
||||||
|
current_message=f"Transcribing ({engine})",
|
||||||
|
progress_pct=PROGRESS_TRANSCRIPTION,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
if engine == "whisper":
|
if engine == "whisper":
|
||||||
@@ -299,64 +381,84 @@ def transcription_generate_actor(
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown engine: {engine}")
|
raise ValueError(f"Unknown engine: {engine}")
|
||||||
|
|
||||||
_update_job(
|
_send_webhook_event(
|
||||||
_job_id,
|
webhook_url,
|
||||||
status="DONE",
|
TaskWebhookEvent(
|
||||||
current_message="Completed",
|
status=JOB_STATUS_DONE,
|
||||||
progress_pct=100.0,
|
current_message=MESSAGE_COMPLETED,
|
||||||
output_data={"document": document.model_dump(mode="json")},
|
progress_pct=PROGRESS_COMPLETE,
|
||||||
finished_at=datetime.now(timezone.utc),
|
output_data={"document": document.model_dump(mode="json")},
|
||||||
|
finished_at=_utc_now(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as exc:
|
||||||
logger.exception("transcription_generate_actor failed: %s", _job_id)
|
logger.exception("transcription_generate_actor failed: %s", job_uuid)
|
||||||
_update_job(
|
_send_webhook_event(
|
||||||
_job_id,
|
webhook_url,
|
||||||
status="FAILED",
|
TaskWebhookEvent(
|
||||||
error_message=str(e),
|
status=JOB_STATUS_FAILED,
|
||||||
finished_at=datetime.now(timezone.utc),
|
error_message=str(exc),
|
||||||
|
finished_at=_utc_now(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
@dramatiq.actor(max_retries=2, min_backoff=2000)
|
@dramatiq.actor(max_retries=2, min_backoff=2000)
|
||||||
def captions_generate_actor(
|
def captions_generate_actor(
|
||||||
job_id: str, video_s3_path: str, folder: str, transcription_json: dict
|
job_id: str,
|
||||||
|
webhook_url: str,
|
||||||
|
video_s3_path: str,
|
||||||
|
folder: str,
|
||||||
|
transcription_json: dict,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Generate captions on video."""
|
"""Generate captions on video."""
|
||||||
from cpv3.modules.captions.service import generate_captions
|
from cpv3.modules.captions.service import generate_captions
|
||||||
from cpv3.modules.transcription.schemas import Document
|
from cpv3.modules.transcription.schemas import Document
|
||||||
|
|
||||||
_job_id = uuid.UUID(job_id)
|
job_uuid = uuid.UUID(job_id)
|
||||||
_update_job(
|
_send_webhook_event(
|
||||||
_job_id,
|
webhook_url,
|
||||||
status="RUNNING",
|
TaskWebhookEvent(
|
||||||
current_message="Starting",
|
status=JOB_STATUS_RUNNING,
|
||||||
started_at=datetime.now(timezone.utc),
|
current_message=MESSAGE_STARTING,
|
||||||
|
started_at=_utc_now(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
_update_job(_job_id, current_message="Rendering captions", progress_pct=30.0)
|
_send_webhook_event(
|
||||||
|
webhook_url,
|
||||||
|
TaskWebhookEvent(
|
||||||
|
current_message=MESSAGE_RENDERING_CAPTIONS,
|
||||||
|
progress_pct=PROGRESS_CAPTIONS,
|
||||||
|
),
|
||||||
|
)
|
||||||
document = Document.model_validate(transcription_json)
|
document = Document.model_validate(transcription_json)
|
||||||
output_path = _run_async(
|
output_path = _run_async(
|
||||||
generate_captions(
|
generate_captions(
|
||||||
video_s3_path=video_s3_path, folder=folder, transcription=document
|
video_s3_path=video_s3_path, folder=folder, transcription=document
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
_update_job(
|
_send_webhook_event(
|
||||||
_job_id,
|
webhook_url,
|
||||||
status="DONE",
|
TaskWebhookEvent(
|
||||||
current_message="Completed",
|
status=JOB_STATUS_DONE,
|
||||||
progress_pct=100.0,
|
current_message=MESSAGE_COMPLETED,
|
||||||
output_data={"output_path": output_path},
|
progress_pct=PROGRESS_COMPLETE,
|
||||||
finished_at=datetime.now(timezone.utc),
|
output_data={"output_path": output_path},
|
||||||
|
finished_at=_utc_now(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as exc:
|
||||||
logger.exception("captions_generate_actor failed: %s", _job_id)
|
logger.exception("captions_generate_actor failed: %s", job_uuid)
|
||||||
_update_job(
|
_send_webhook_event(
|
||||||
_job_id,
|
webhook_url,
|
||||||
status="FAILED",
|
TaskWebhookEvent(
|
||||||
error_message=str(e),
|
status=JOB_STATUS_FAILED,
|
||||||
finished_at=datetime.now(timezone.utc),
|
error_message=str(exc),
|
||||||
|
finished_at=_utc_now(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
@@ -367,14 +469,15 @@ def captions_generate_actor(
|
|||||||
|
|
||||||
|
|
||||||
class TaskService:
|
class TaskService:
|
||||||
"""Service for submitting background tasks."""
|
"""Service for submitting background tasks and recording webhook updates."""
|
||||||
|
|
||||||
def __init__(self, session: AsyncSession) -> None:
|
def __init__(self, session: AsyncSession) -> None:
|
||||||
self._session = session
|
self._session = session
|
||||||
self._job_repo = JobRepository(session)
|
self._job_repo = JobRepository(session)
|
||||||
|
self._event_repo = JobEventRepository(session)
|
||||||
self._webhook_repo = WebhookRepository(session)
|
self._webhook_repo = WebhookRepository(session)
|
||||||
|
|
||||||
async def _create_job(
|
async def _create_job_and_webhook(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
requester: User,
|
requester: User,
|
||||||
@@ -383,7 +486,6 @@ class TaskService:
|
|||||||
input_data: dict,
|
input_data: dict,
|
||||||
) -> tuple[Job, str]:
|
) -> tuple[Job, str]:
|
||||||
"""Create job and webhook, return job and webhook URL."""
|
"""Create job and webhook, return job and webhook URL."""
|
||||||
settings = get_settings()
|
|
||||||
broker_id = uuid.uuid4().hex
|
broker_id = uuid.uuid4().hex
|
||||||
|
|
||||||
job = await self._job_repo.create(
|
job = await self._job_repo.create(
|
||||||
@@ -396,92 +498,132 @@ class TaskService:
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
webhook_url = f"{settings.webhook_base_url}/api/tasks/webhook/{job.id}/"
|
webhook_url = _build_webhook_url(job.id)
|
||||||
await self._webhook_repo.create(
|
await self._webhook_repo.create(
|
||||||
requester=requester,
|
requester=requester,
|
||||||
data=WebhookCreate(
|
data=WebhookCreate(
|
||||||
project_id=project_id, event=f"task.{job_type.lower()}", url=webhook_url
|
project_id=project_id,
|
||||||
|
event=_build_webhook_event_name(job_type),
|
||||||
|
url=webhook_url,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
return job, webhook_url
|
return job, webhook_url
|
||||||
|
|
||||||
|
async def _submit_task(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
requester: User,
|
||||||
|
job_type: JobTypeEnum,
|
||||||
|
project_id: uuid.UUID | None,
|
||||||
|
input_data: dict,
|
||||||
|
actor: Any,
|
||||||
|
actor_kwargs: dict[str, Any],
|
||||||
|
) -> TaskSubmitResponse:
|
||||||
|
job, webhook_url = await self._create_job_and_webhook(
|
||||||
|
requester=requester,
|
||||||
|
job_type=job_type,
|
||||||
|
project_id=project_id,
|
||||||
|
input_data=input_data,
|
||||||
|
)
|
||||||
|
actor.send(job_id=str(job.id), webhook_url=webhook_url, **actor_kwargs)
|
||||||
|
return TaskSubmitResponse(
|
||||||
|
job_id=job.id,
|
||||||
|
webhook_url=webhook_url,
|
||||||
|
status=JOB_STATUS_PENDING,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def record_webhook_event(
|
||||||
|
self, *, job_id: uuid.UUID, event: TaskWebhookEvent
|
||||||
|
) -> Job:
|
||||||
|
"""Apply a webhook event to the job and store a job event record."""
|
||||||
|
job = await self._job_repo.get_by_id(job_id)
|
||||||
|
if job is None:
|
||||||
|
raise ValueError(f"Job {job_id} not found")
|
||||||
|
|
||||||
|
job_update = JobUpdate(
|
||||||
|
status=event.status,
|
||||||
|
project_pct=event.progress_pct,
|
||||||
|
current_message=event.current_message,
|
||||||
|
error_message=event.error_message,
|
||||||
|
output_data=event.output_data,
|
||||||
|
started_at=event.started_at,
|
||||||
|
finished_at=event.finished_at,
|
||||||
|
)
|
||||||
|
job = await self._job_repo.update(job, job_update)
|
||||||
|
|
||||||
|
event_type = _derive_event_type(event)
|
||||||
|
payload = event.model_dump(mode="json", exclude_none=True)
|
||||||
|
await self._event_repo.create(
|
||||||
|
JobEventCreate(job_id=job.id, event_type=event_type, payload=payload)
|
||||||
|
)
|
||||||
|
return job
|
||||||
|
|
||||||
async def submit_media_probe(
|
async def submit_media_probe(
|
||||||
self, *, requester: User, request: MediaProbeRequest
|
self, *, requester: User, request: MediaProbeRequest
|
||||||
) -> TaskSubmitResponse:
|
) -> TaskSubmitResponse:
|
||||||
"""Submit media probe task."""
|
"""Submit media probe task."""
|
||||||
job, webhook_url = await self._create_job(
|
return await self._submit_task(
|
||||||
requester=requester,
|
requester=requester,
|
||||||
job_type="MEDIA_PROBE",
|
job_type=JOB_TYPE_MEDIA_PROBE,
|
||||||
project_id=request.project_id,
|
project_id=request.project_id,
|
||||||
input_data=request.model_dump(mode="json"),
|
input_data=request.model_dump(mode="json"),
|
||||||
)
|
actor=media_probe_actor,
|
||||||
media_probe_actor.send(job_id=str(job.id), file_key=request.file_key)
|
actor_kwargs={"file_key": request.file_key},
|
||||||
return TaskSubmitResponse(
|
|
||||||
job_id=job.id, webhook_url=webhook_url, status="PENDING"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
async def submit_silence_remove(
|
async def submit_silence_remove(
|
||||||
self, *, requester: User, request: SilenceRemoveRequest
|
self, *, requester: User, request: SilenceRemoveRequest
|
||||||
) -> TaskSubmitResponse:
|
) -> TaskSubmitResponse:
|
||||||
"""Submit silence removal task."""
|
"""Submit silence removal task."""
|
||||||
job, webhook_url = await self._create_job(
|
return await self._submit_task(
|
||||||
requester=requester,
|
requester=requester,
|
||||||
job_type="SILENCE_REMOVE",
|
job_type=JOB_TYPE_SILENCE_REMOVE,
|
||||||
project_id=request.project_id,
|
project_id=request.project_id,
|
||||||
input_data=request.model_dump(mode="json"),
|
input_data=request.model_dump(mode="json"),
|
||||||
)
|
actor=silence_remove_actor,
|
||||||
silence_remove_actor.send(
|
actor_kwargs={
|
||||||
job_id=str(job.id),
|
"file_key": request.file_key,
|
||||||
file_key=request.file_key,
|
"out_folder": request.out_folder,
|
||||||
out_folder=request.out_folder,
|
"min_silence_duration_ms": request.min_silence_duration_ms,
|
||||||
min_silence_duration_ms=request.min_silence_duration_ms,
|
"silence_threshold_db": request.silence_threshold_db,
|
||||||
silence_threshold_db=request.silence_threshold_db,
|
"padding_ms": request.padding_ms,
|
||||||
padding_ms=request.padding_ms,
|
},
|
||||||
)
|
|
||||||
return TaskSubmitResponse(
|
|
||||||
job_id=job.id, webhook_url=webhook_url, status="PENDING"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
async def submit_media_convert(
|
async def submit_media_convert(
|
||||||
self, *, requester: User, request: MediaConvertRequest
|
self, *, requester: User, request: MediaConvertRequest
|
||||||
) -> TaskSubmitResponse:
|
) -> TaskSubmitResponse:
|
||||||
"""Submit media conversion task."""
|
"""Submit media conversion task."""
|
||||||
job, webhook_url = await self._create_job(
|
return await self._submit_task(
|
||||||
requester=requester,
|
requester=requester,
|
||||||
job_type="MEDIA_CONVERT",
|
job_type=JOB_TYPE_MEDIA_CONVERT,
|
||||||
project_id=request.project_id,
|
project_id=request.project_id,
|
||||||
input_data=request.model_dump(mode="json"),
|
input_data=request.model_dump(mode="json"),
|
||||||
)
|
actor=media_convert_actor,
|
||||||
media_convert_actor.send(
|
actor_kwargs={
|
||||||
job_id=str(job.id),
|
"file_key": request.file_key,
|
||||||
file_key=request.file_key,
|
"out_folder": request.out_folder,
|
||||||
out_folder=request.out_folder,
|
"output_format": request.output_format,
|
||||||
output_format=request.output_format,
|
},
|
||||||
)
|
|
||||||
return TaskSubmitResponse(
|
|
||||||
job_id=job.id, webhook_url=webhook_url, status="PENDING"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
async def submit_transcription_generate(
|
async def submit_transcription_generate(
|
||||||
self, *, requester: User, request: TranscriptionGenerateRequest
|
self, *, requester: User, request: TranscriptionGenerateRequest
|
||||||
) -> TaskSubmitResponse:
|
) -> TaskSubmitResponse:
|
||||||
"""Submit transcription generation task."""
|
"""Submit transcription generation task."""
|
||||||
job, webhook_url = await self._create_job(
|
return await self._submit_task(
|
||||||
requester=requester,
|
requester=requester,
|
||||||
job_type="TRANSCRIPTION_GENERATE",
|
job_type=JOB_TYPE_TRANSCRIPTION_GENERATE,
|
||||||
project_id=request.project_id,
|
project_id=request.project_id,
|
||||||
input_data=request.model_dump(mode="json"),
|
input_data=request.model_dump(mode="json"),
|
||||||
)
|
actor=transcription_generate_actor,
|
||||||
transcription_generate_actor.send(
|
actor_kwargs={
|
||||||
job_id=str(job.id),
|
"file_key": request.file_key,
|
||||||
file_key=request.file_key,
|
"engine": request.engine,
|
||||||
engine=request.engine,
|
"language": request.language,
|
||||||
language=request.language,
|
"model": request.model,
|
||||||
model=request.model,
|
},
|
||||||
)
|
|
||||||
return TaskSubmitResponse(
|
|
||||||
job_id=job.id, webhook_url=webhook_url, status="PENDING"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
async def submit_captions_generate(
|
async def submit_captions_generate(
|
||||||
@@ -493,18 +635,15 @@ class TaskService:
|
|||||||
if transcription is None:
|
if transcription is None:
|
||||||
raise ValueError(f"Transcription {request.transcription_id} not found")
|
raise ValueError(f"Transcription {request.transcription_id} not found")
|
||||||
|
|
||||||
job, webhook_url = await self._create_job(
|
return await self._submit_task(
|
||||||
requester=requester,
|
requester=requester,
|
||||||
job_type="CAPTIONS_GENERATE",
|
job_type=JOB_TYPE_CAPTIONS_GENERATE,
|
||||||
project_id=request.project_id,
|
project_id=request.project_id,
|
||||||
input_data=request.model_dump(mode="json"),
|
input_data=request.model_dump(mode="json"),
|
||||||
)
|
actor=captions_generate_actor,
|
||||||
captions_generate_actor.send(
|
actor_kwargs={
|
||||||
job_id=str(job.id),
|
"video_s3_path": request.video_s3_path,
|
||||||
video_s3_path=request.video_s3_path,
|
"folder": request.folder,
|
||||||
folder=request.folder,
|
"transcription_json": transcription.document,
|
||||||
transcription_json=transcription.document,
|
},
|
||||||
)
|
|
||||||
return TaskSubmitResponse(
|
|
||||||
job_id=job.id, webhook_url=webhook_url, status="PENDING"
|
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user