feature: create multitasking
This commit is contained in:
@@ -0,0 +1,5 @@
|
||||
"""
|
||||
Background tasks module for long-running operations.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -0,0 +1,168 @@
|
||||
"""
|
||||
API endpoints for background task submission.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from typing import cast
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, status
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from cpv3.db.session import get_db
|
||||
from cpv3.infrastructure.auth import get_current_user
|
||||
from cpv3.modules.jobs.service import JobService
|
||||
from cpv3.modules.tasks.schemas import (
|
||||
CaptionsGenerateRequest,
|
||||
MediaConvertRequest,
|
||||
MediaProbeRequest,
|
||||
SilenceRemoveRequest,
|
||||
TaskStatusEnum,
|
||||
TaskStatusResponse,
|
||||
TaskSubmitResponse,
|
||||
TaskTypeEnum,
|
||||
TranscriptionGenerateRequest,
|
||||
)
|
||||
from cpv3.modules.tasks.service import TaskService
|
||||
from cpv3.modules.users.models import User
|
||||
|
||||
router = APIRouter(prefix="/api/tasks", tags=["tasks"])
|
||||
|
||||
|
||||
@router.post(
|
||||
"/media-probe/",
|
||||
response_model=TaskSubmitResponse,
|
||||
status_code=status.HTTP_202_ACCEPTED,
|
||||
)
|
||||
async def submit_media_probe(
|
||||
body: MediaProbeRequest,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> TaskSubmitResponse:
|
||||
"""Submit a background task to probe media file metadata."""
|
||||
service = TaskService(db)
|
||||
return await service.submit_media_probe(requester=current_user, request=body)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/silence-remove/",
|
||||
response_model=TaskSubmitResponse,
|
||||
status_code=status.HTTP_202_ACCEPTED,
|
||||
)
|
||||
async def submit_silence_remove(
|
||||
body: SilenceRemoveRequest,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> TaskSubmitResponse:
|
||||
"""Submit a background task to remove silence from media file."""
|
||||
service = TaskService(db)
|
||||
return await service.submit_silence_remove(requester=current_user, request=body)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/media-convert/",
|
||||
response_model=TaskSubmitResponse,
|
||||
status_code=status.HTTP_202_ACCEPTED,
|
||||
)
|
||||
async def submit_media_convert(
|
||||
body: MediaConvertRequest,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> TaskSubmitResponse:
|
||||
"""Submit a background task to convert media file format."""
|
||||
service = TaskService(db)
|
||||
return await service.submit_media_convert(requester=current_user, request=body)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/transcription-generate/",
|
||||
response_model=TaskSubmitResponse,
|
||||
status_code=status.HTTP_202_ACCEPTED,
|
||||
)
|
||||
async def submit_transcription_generate(
|
||||
body: TranscriptionGenerateRequest,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> TaskSubmitResponse:
|
||||
"""Submit a background task to generate transcription from audio/video."""
|
||||
service = TaskService(db)
|
||||
return await service.submit_transcription_generate(
|
||||
requester=current_user, request=body
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/captions-generate/",
|
||||
response_model=TaskSubmitResponse,
|
||||
status_code=status.HTTP_202_ACCEPTED,
|
||||
)
|
||||
async def submit_captions_generate(
|
||||
body: CaptionsGenerateRequest,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> TaskSubmitResponse:
|
||||
"""Submit a background task to generate captions on video."""
|
||||
service = TaskService(db)
|
||||
try:
|
||||
return await service.submit_captions_generate(
|
||||
requester=current_user, request=body
|
||||
)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/status/{job_id}/", response_model=TaskStatusResponse)
|
||||
async def get_task_status(
|
||||
job_id: uuid.UUID,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> TaskStatusResponse:
|
||||
"""Get the status of a background task."""
|
||||
job_service = JobService(db)
|
||||
job = await job_service.get_job(job_id)
|
||||
|
||||
if job is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND, detail="Job not found"
|
||||
)
|
||||
|
||||
if not current_user.is_staff and job.user_id != current_user.id:
|
||||
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden")
|
||||
|
||||
return TaskStatusResponse(
|
||||
job_id=job.id,
|
||||
status=cast(TaskStatusEnum, job.status),
|
||||
job_type=cast(TaskTypeEnum, job.job_type),
|
||||
progress_pct=job.project_pct,
|
||||
current_message=job.current_message,
|
||||
error_message=job.error_message,
|
||||
output_data=job.output_data,
|
||||
started_at=job.started_at,
|
||||
finished_at=job.finished_at,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/webhook/{job_id}/", include_in_schema=False)
|
||||
async def task_webhook_callback(
|
||||
job_id: uuid.UUID,
|
||||
request: Request,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> dict[str, str]:
|
||||
"""Internal webhook endpoint for task status updates."""
|
||||
try:
|
||||
await request.json()
|
||||
except Exception:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid JSON payload"
|
||||
)
|
||||
|
||||
job_service = JobService(db)
|
||||
job = await job_service.get_job(job_id)
|
||||
|
||||
if job is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND, detail="Job not found"
|
||||
)
|
||||
|
||||
return {"status": "received", "job_id": str(job_id)}
|
||||
@@ -0,0 +1,106 @@
|
||||
"""
|
||||
Task request and response schemas.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Literal
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from cpv3.common.schemas import Schema
|
||||
|
||||
|
||||
TaskTypeEnum = Literal[
|
||||
"MEDIA_PROBE",
|
||||
"SILENCE_REMOVE",
|
||||
"MEDIA_CONVERT",
|
||||
"TRANSCRIPTION_GENERATE",
|
||||
"CAPTIONS_GENERATE",
|
||||
]
|
||||
|
||||
TaskStatusEnum = Literal["PENDING", "RUNNING", "FAILED", "CANCELLED", "DONE"]
|
||||
|
||||
|
||||
# --- Request schemas ---
|
||||
|
||||
|
||||
class MediaProbeRequest(Schema):
|
||||
"""Request to probe media file metadata."""
|
||||
|
||||
file_key: str = Field(..., description="Storage key of the file to probe")
|
||||
project_id: UUID | None = Field(default=None, description="Associated project ID")
|
||||
|
||||
|
||||
class SilenceRemoveRequest(Schema):
|
||||
"""Request to remove silence from media file."""
|
||||
|
||||
file_key: str = Field(..., description="Storage key of the input file")
|
||||
out_folder: str = Field(..., description="Output folder for processed file")
|
||||
project_id: UUID | None = Field(default=None, description="Associated project ID")
|
||||
min_silence_duration_ms: int = Field(
|
||||
default=200, description="Minimum silence duration in milliseconds"
|
||||
)
|
||||
silence_threshold_db: int = Field(
|
||||
default=16, description="Silence threshold in decibels"
|
||||
)
|
||||
padding_ms: int = Field(
|
||||
default=100, description="Padding around non-silent segments in milliseconds"
|
||||
)
|
||||
|
||||
|
||||
class MediaConvertRequest(Schema):
|
||||
"""Request to convert media file to different format."""
|
||||
|
||||
file_key: str = Field(..., description="Storage key of the input file")
|
||||
out_folder: str = Field(..., description="Output folder for converted file")
|
||||
output_format: str = Field(default="mp4", description="Target output format")
|
||||
project_id: UUID | None = Field(default=None, description="Associated project ID")
|
||||
|
||||
|
||||
class TranscriptionGenerateRequest(Schema):
|
||||
"""Request to generate transcription from audio/video file."""
|
||||
|
||||
file_key: str = Field(..., description="Storage key of the input file")
|
||||
project_id: UUID | None = Field(default=None, description="Associated project ID")
|
||||
engine: Literal["whisper", "google"] = Field(
|
||||
default="whisper", description="Transcription engine to use"
|
||||
)
|
||||
language: str | None = Field(default=None, description="Language code (e.g., 'en')")
|
||||
model: str = Field(default="base", description="Model size for whisper")
|
||||
|
||||
|
||||
class CaptionsGenerateRequest(Schema):
|
||||
"""Request to generate captions/subtitles video."""
|
||||
|
||||
video_s3_path: str = Field(..., description="S3 path to the video file")
|
||||
folder: str = Field(..., description="Output folder for rendered video")
|
||||
transcription_id: UUID = Field(..., description="ID of the transcription to use")
|
||||
project_id: UUID | None = Field(default=None, description="Associated project ID")
|
||||
|
||||
|
||||
# --- Response schemas ---
|
||||
|
||||
|
||||
class TaskSubmitResponse(Schema):
|
||||
"""Response after submitting a background task."""
|
||||
|
||||
job_id: UUID = Field(..., description="Job ID for tracking")
|
||||
webhook_url: str = Field(..., description="Webhook URL for status updates")
|
||||
status: TaskStatusEnum = Field(default="PENDING", description="Initial task status")
|
||||
|
||||
|
||||
class TaskStatusResponse(Schema):
|
||||
"""Response for task status query."""
|
||||
|
||||
job_id: UUID
|
||||
status: TaskStatusEnum
|
||||
job_type: TaskTypeEnum
|
||||
progress_pct: float | None = None
|
||||
current_message: str | None = None
|
||||
error_message: str | None = None
|
||||
output_data: dict | None = None
|
||||
started_at: datetime | None = None
|
||||
finished_at: datetime | None = None
|
||||
@@ -0,0 +1,510 @@
|
||||
"""
|
||||
Task service for submitting and managing background tasks.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
import dramatiq # type: ignore[import-untyped]
|
||||
from dramatiq.brokers.redis import RedisBroker # type: ignore[import-untyped]
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import create_engine, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from cpv3.infrastructure.deps import _get_storage_service
|
||||
from cpv3.infrastructure.settings import get_settings
|
||||
from cpv3.modules.jobs.models import Job, JobEvent
|
||||
from cpv3.modules.jobs.repository import JobRepository
|
||||
from cpv3.modules.jobs.schemas import JobCreate, JobTypeEnum
|
||||
from cpv3.modules.tasks.schemas import (
|
||||
CaptionsGenerateRequest,
|
||||
MediaConvertRequest,
|
||||
MediaProbeRequest,
|
||||
SilenceRemoveRequest,
|
||||
TaskSubmitResponse,
|
||||
TranscriptionGenerateRequest,
|
||||
)
|
||||
from cpv3.modules.transcription.repository import TranscriptionRepository
|
||||
from cpv3.modules.users.models import User
|
||||
from cpv3.modules.webhooks.repository import WebhookRepository
|
||||
from cpv3.modules.webhooks.schemas import WebhookCreate
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dramatiq broker setup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_settings = get_settings()
|
||||
_redis_broker = RedisBroker(url=_settings.redis_url)
|
||||
dramatiq.set_broker(_redis_broker)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sync DB helpers for Dramatiq workers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _get_sync_session() -> Session:
|
||||
"""Create sync DB session for worker tasks."""
|
||||
settings = get_settings()
|
||||
sync_url = settings.get_database_url().replace(
|
||||
"postgresql+asyncpg://", "postgresql://"
|
||||
)
|
||||
engine = create_engine(sync_url, pool_pre_ping=True)
|
||||
return sessionmaker(bind=engine, expire_on_commit=False)()
|
||||
|
||||
|
||||
def _update_job(
|
||||
job_id: uuid.UUID,
|
||||
*,
|
||||
status: str | None = None,
|
||||
current_message: str | None = None,
|
||||
progress_pct: float | None = None,
|
||||
error_message: str | None = None,
|
||||
output_data: dict | None = None,
|
||||
started_at: datetime | None = None,
|
||||
finished_at: datetime | None = None,
|
||||
) -> Job | None:
|
||||
"""Update job in database (sync, for workers)."""
|
||||
with _get_sync_session() as session:
|
||||
job = session.execute(select(Job).where(Job.id == job_id)).scalar_one_or_none()
|
||||
if job is None:
|
||||
return None
|
||||
|
||||
if status is not None:
|
||||
job.status = status
|
||||
if current_message is not None:
|
||||
job.current_message = current_message
|
||||
if progress_pct is not None:
|
||||
job.project_pct = progress_pct
|
||||
if error_message is not None:
|
||||
job.error_message = error_message
|
||||
if output_data is not None:
|
||||
job.output_data = output_data
|
||||
if started_at is not None:
|
||||
job.started_at = started_at
|
||||
if finished_at is not None:
|
||||
job.finished_at = finished_at
|
||||
|
||||
# Create event
|
||||
event = JobEvent(
|
||||
job_id=job_id,
|
||||
event_type=f"status_{status}" if status else "progress",
|
||||
payload={"status": status or job.status, "message": current_message},
|
||||
)
|
||||
session.add(event)
|
||||
session.commit()
|
||||
session.refresh(job)
|
||||
return job
|
||||
|
||||
|
||||
def _run_async(coro: Any) -> Any:
|
||||
"""Run async function in new event loop (for sync Dramatiq actors)."""
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
return loop.run_until_complete(coro)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dramatiq actors
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dramatiq.actor(max_retries=3, min_backoff=1000)
|
||||
def media_probe_actor(job_id: str, file_key: str) -> None:
|
||||
"""Probe media file to extract metadata."""
|
||||
from cpv3.modules.media.service import probe_media
|
||||
|
||||
_job_id = uuid.UUID(job_id)
|
||||
_update_job(
|
||||
_job_id,
|
||||
status="RUNNING",
|
||||
current_message="Starting",
|
||||
started_at=datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
try:
|
||||
storage = _get_storage_service()
|
||||
_update_job(_job_id, current_message="Probing media", progress_pct=50.0)
|
||||
result = _run_async(probe_media(storage, file_key=file_key))
|
||||
_update_job(
|
||||
_job_id,
|
||||
status="DONE",
|
||||
current_message="Completed",
|
||||
progress_pct=100.0,
|
||||
output_data=result.model_dump(mode="json"),
|
||||
finished_at=datetime.now(timezone.utc),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("media_probe_actor failed: %s", _job_id)
|
||||
_update_job(
|
||||
_job_id,
|
||||
status="FAILED",
|
||||
error_message=str(e),
|
||||
finished_at=datetime.now(timezone.utc),
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
@dramatiq.actor(max_retries=3, min_backoff=1000)
|
||||
def silence_remove_actor(
|
||||
job_id: str,
|
||||
file_key: str,
|
||||
out_folder: str,
|
||||
min_silence_duration_ms: int,
|
||||
silence_threshold_db: int,
|
||||
padding_ms: int,
|
||||
) -> None:
|
||||
"""Remove silence from media file."""
|
||||
from cpv3.modules.media.service import remove_silence
|
||||
|
||||
_job_id = uuid.UUID(job_id)
|
||||
_update_job(
|
||||
_job_id,
|
||||
status="RUNNING",
|
||||
current_message="Starting",
|
||||
started_at=datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
try:
|
||||
storage = _get_storage_service()
|
||||
_update_job(_job_id, current_message="Processing", progress_pct=30.0)
|
||||
result = _run_async(
|
||||
remove_silence(
|
||||
storage,
|
||||
file_key=file_key,
|
||||
out_folder=out_folder,
|
||||
min_silence_duration_ms=min_silence_duration_ms,
|
||||
silence_threshold_db=silence_threshold_db,
|
||||
padding_ms=padding_ms,
|
||||
)
|
||||
)
|
||||
_update_job(
|
||||
_job_id,
|
||||
status="DONE",
|
||||
current_message="Completed",
|
||||
progress_pct=100.0,
|
||||
output_data={
|
||||
"file_path": result.file_path,
|
||||
"file_url": result.file_url,
|
||||
"file_size": result.file_size,
|
||||
},
|
||||
finished_at=datetime.now(timezone.utc),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("silence_remove_actor failed: %s", _job_id)
|
||||
_update_job(
|
||||
_job_id,
|
||||
status="FAILED",
|
||||
error_message=str(e),
|
||||
finished_at=datetime.now(timezone.utc),
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
@dramatiq.actor(max_retries=3, min_backoff=1000)
|
||||
def media_convert_actor(
|
||||
job_id: str, file_key: str, out_folder: str, output_format: str
|
||||
) -> None:
|
||||
"""Convert media file to specified format."""
|
||||
from cpv3.modules.media.service import convert_to_mp4
|
||||
|
||||
_job_id = uuid.UUID(job_id)
|
||||
_update_job(
|
||||
_job_id,
|
||||
status="RUNNING",
|
||||
current_message="Starting",
|
||||
started_at=datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
try:
|
||||
if output_format.lower() != "mp4":
|
||||
raise ValueError(f"Unsupported format: {output_format}")
|
||||
|
||||
storage = _get_storage_service()
|
||||
_update_job(_job_id, current_message="Converting", progress_pct=30.0)
|
||||
result = _run_async(
|
||||
convert_to_mp4(storage, file_key=file_key, out_folder=out_folder)
|
||||
)
|
||||
_update_job(
|
||||
_job_id,
|
||||
status="DONE",
|
||||
current_message="Completed",
|
||||
progress_pct=100.0,
|
||||
output_data={
|
||||
"file_path": result.file_path,
|
||||
"file_url": result.file_url,
|
||||
"file_size": result.file_size,
|
||||
},
|
||||
finished_at=datetime.now(timezone.utc),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("media_convert_actor failed: %s", _job_id)
|
||||
_update_job(
|
||||
_job_id,
|
||||
status="FAILED",
|
||||
error_message=str(e),
|
||||
finished_at=datetime.now(timezone.utc),
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
@dramatiq.actor(max_retries=2, min_backoff=2000)
|
||||
def transcription_generate_actor(
|
||||
job_id: str, file_key: str, engine: str, language: str | None, model: str
|
||||
) -> None:
|
||||
"""Generate transcription from audio/video file."""
|
||||
from cpv3.modules.transcription.service import (
|
||||
transcribe_with_google_speech,
|
||||
transcribe_with_whisper,
|
||||
)
|
||||
|
||||
_job_id = uuid.UUID(job_id)
|
||||
_update_job(
|
||||
_job_id,
|
||||
status="RUNNING",
|
||||
current_message="Starting",
|
||||
started_at=datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
try:
|
||||
storage = _get_storage_service()
|
||||
_update_job(
|
||||
_job_id, current_message=f"Transcribing ({engine})", progress_pct=20.0
|
||||
)
|
||||
|
||||
if engine == "whisper":
|
||||
document = _run_async(
|
||||
transcribe_with_whisper(
|
||||
storage, file_key=file_key, model_name=model, language=language
|
||||
)
|
||||
)
|
||||
elif engine == "google":
|
||||
language_codes = [language] if language else None
|
||||
document = _run_async(
|
||||
transcribe_with_google_speech(
|
||||
storage, file_key=file_key, language_codes=language_codes
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown engine: {engine}")
|
||||
|
||||
_update_job(
|
||||
_job_id,
|
||||
status="DONE",
|
||||
current_message="Completed",
|
||||
progress_pct=100.0,
|
||||
output_data={"document": document.model_dump(mode="json")},
|
||||
finished_at=datetime.now(timezone.utc),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("transcription_generate_actor failed: %s", _job_id)
|
||||
_update_job(
|
||||
_job_id,
|
||||
status="FAILED",
|
||||
error_message=str(e),
|
||||
finished_at=datetime.now(timezone.utc),
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
@dramatiq.actor(max_retries=2, min_backoff=2000)
|
||||
def captions_generate_actor(
|
||||
job_id: str, video_s3_path: str, folder: str, transcription_json: dict
|
||||
) -> None:
|
||||
"""Generate captions on video."""
|
||||
from cpv3.modules.captions.service import generate_captions
|
||||
from cpv3.modules.transcription.schemas import Document
|
||||
|
||||
_job_id = uuid.UUID(job_id)
|
||||
_update_job(
|
||||
_job_id,
|
||||
status="RUNNING",
|
||||
current_message="Starting",
|
||||
started_at=datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
try:
|
||||
_update_job(_job_id, current_message="Rendering captions", progress_pct=30.0)
|
||||
document = Document.model_validate(transcription_json)
|
||||
output_path = _run_async(
|
||||
generate_captions(
|
||||
video_s3_path=video_s3_path, folder=folder, transcription=document
|
||||
)
|
||||
)
|
||||
_update_job(
|
||||
_job_id,
|
||||
status="DONE",
|
||||
current_message="Completed",
|
||||
progress_pct=100.0,
|
||||
output_data={"output_path": output_path},
|
||||
finished_at=datetime.now(timezone.utc),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("captions_generate_actor failed: %s", _job_id)
|
||||
_update_job(
|
||||
_job_id,
|
||||
status="FAILED",
|
||||
error_message=str(e),
|
||||
finished_at=datetime.now(timezone.utc),
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Task Service
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TaskService:
|
||||
"""Service for submitting background tasks."""
|
||||
|
||||
def __init__(self, session: AsyncSession) -> None:
|
||||
self._session = session
|
||||
self._job_repo = JobRepository(session)
|
||||
self._webhook_repo = WebhookRepository(session)
|
||||
|
||||
async def _create_job(
|
||||
self,
|
||||
*,
|
||||
requester: User,
|
||||
job_type: JobTypeEnum,
|
||||
project_id: uuid.UUID | None,
|
||||
input_data: dict,
|
||||
) -> tuple[Job, str]:
|
||||
"""Create job and webhook, return job and webhook URL."""
|
||||
settings = get_settings()
|
||||
broker_id = uuid.uuid4().hex
|
||||
|
||||
job = await self._job_repo.create(
|
||||
requester=requester,
|
||||
data=JobCreate(
|
||||
broker_id=broker_id,
|
||||
project_id=project_id,
|
||||
input_data=input_data,
|
||||
job_type=job_type,
|
||||
),
|
||||
)
|
||||
|
||||
webhook_url = f"{settings.webhook_base_url}/api/tasks/webhook/{job.id}/"
|
||||
await self._webhook_repo.create(
|
||||
requester=requester,
|
||||
data=WebhookCreate(
|
||||
project_id=project_id, event=f"task.{job_type.lower()}", url=webhook_url
|
||||
),
|
||||
)
|
||||
|
||||
return job, webhook_url
|
||||
|
||||
async def submit_media_probe(
|
||||
self, *, requester: User, request: MediaProbeRequest
|
||||
) -> TaskSubmitResponse:
|
||||
"""Submit media probe task."""
|
||||
job, webhook_url = await self._create_job(
|
||||
requester=requester,
|
||||
job_type="MEDIA_PROBE",
|
||||
project_id=request.project_id,
|
||||
input_data=request.model_dump(mode="json"),
|
||||
)
|
||||
media_probe_actor.send(job_id=str(job.id), file_key=request.file_key)
|
||||
return TaskSubmitResponse(
|
||||
job_id=job.id, webhook_url=webhook_url, status="PENDING"
|
||||
)
|
||||
|
||||
async def submit_silence_remove(
|
||||
self, *, requester: User, request: SilenceRemoveRequest
|
||||
) -> TaskSubmitResponse:
|
||||
"""Submit silence removal task."""
|
||||
job, webhook_url = await self._create_job(
|
||||
requester=requester,
|
||||
job_type="SILENCE_REMOVE",
|
||||
project_id=request.project_id,
|
||||
input_data=request.model_dump(mode="json"),
|
||||
)
|
||||
silence_remove_actor.send(
|
||||
job_id=str(job.id),
|
||||
file_key=request.file_key,
|
||||
out_folder=request.out_folder,
|
||||
min_silence_duration_ms=request.min_silence_duration_ms,
|
||||
silence_threshold_db=request.silence_threshold_db,
|
||||
padding_ms=request.padding_ms,
|
||||
)
|
||||
return TaskSubmitResponse(
|
||||
job_id=job.id, webhook_url=webhook_url, status="PENDING"
|
||||
)
|
||||
|
||||
async def submit_media_convert(
|
||||
self, *, requester: User, request: MediaConvertRequest
|
||||
) -> TaskSubmitResponse:
|
||||
"""Submit media conversion task."""
|
||||
job, webhook_url = await self._create_job(
|
||||
requester=requester,
|
||||
job_type="MEDIA_CONVERT",
|
||||
project_id=request.project_id,
|
||||
input_data=request.model_dump(mode="json"),
|
||||
)
|
||||
media_convert_actor.send(
|
||||
job_id=str(job.id),
|
||||
file_key=request.file_key,
|
||||
out_folder=request.out_folder,
|
||||
output_format=request.output_format,
|
||||
)
|
||||
return TaskSubmitResponse(
|
||||
job_id=job.id, webhook_url=webhook_url, status="PENDING"
|
||||
)
|
||||
|
||||
async def submit_transcription_generate(
|
||||
self, *, requester: User, request: TranscriptionGenerateRequest
|
||||
) -> TaskSubmitResponse:
|
||||
"""Submit transcription generation task."""
|
||||
job, webhook_url = await self._create_job(
|
||||
requester=requester,
|
||||
job_type="TRANSCRIPTION_GENERATE",
|
||||
project_id=request.project_id,
|
||||
input_data=request.model_dump(mode="json"),
|
||||
)
|
||||
transcription_generate_actor.send(
|
||||
job_id=str(job.id),
|
||||
file_key=request.file_key,
|
||||
engine=request.engine,
|
||||
language=request.language,
|
||||
model=request.model,
|
||||
)
|
||||
return TaskSubmitResponse(
|
||||
job_id=job.id, webhook_url=webhook_url, status="PENDING"
|
||||
)
|
||||
|
||||
async def submit_captions_generate(
|
||||
self, *, requester: User, request: CaptionsGenerateRequest
|
||||
) -> TaskSubmitResponse:
|
||||
"""Submit captions generation task."""
|
||||
transcription_repo = TranscriptionRepository(self._session)
|
||||
transcription = await transcription_repo.get_by_id(request.transcription_id)
|
||||
if transcription is None:
|
||||
raise ValueError(f"Transcription {request.transcription_id} not found")
|
||||
|
||||
job, webhook_url = await self._create_job(
|
||||
requester=requester,
|
||||
job_type="CAPTIONS_GENERATE",
|
||||
project_id=request.project_id,
|
||||
input_data=request.model_dump(mode="json"),
|
||||
)
|
||||
captions_generate_actor.send(
|
||||
job_id=str(job.id),
|
||||
video_s3_path=request.video_s3_path,
|
||||
folder=request.folder,
|
||||
transcription_json=transcription.document,
|
||||
)
|
||||
return TaskSubmitResponse(
|
||||
job_id=job.id, webhook_url=webhook_url, status="PENDING"
|
||||
)
|
||||
Reference in New Issue
Block a user