chore: something changed, commit before reorg

This commit is contained in:
Daniil
2026-04-27 23:19:04 +03:00
parent 259d3da89f
commit b9030a863e
19 changed files with 2753 additions and 146 deletions
+53 -25
View File
@@ -36,6 +36,7 @@ from cpv3.modules.jobs.schemas import (
)
from cpv3.modules.media.repository import ArtifactRepository
from cpv3.modules.media.schemas import ArtifactMediaFileCreate
from cpv3.modules.project_workspaces.service import ProjectWorkspaceService
from cpv3.modules.tasks.schemas import (
CaptionsGenerateRequest,
FrameExtractRequest,
@@ -140,6 +141,7 @@ DRAMATIQ_BROKER_REF_SEPARATOR = ":"
class JobCancelledError(RuntimeError):
"""Raised when a job was cancelled before completion."""
# ---------------------------------------------------------------------------
# Dramatiq broker setup
# ---------------------------------------------------------------------------
@@ -808,10 +810,12 @@ def transcription_generate_actor(
)
)
elif engine == "salutespeech":
audio_stream = next(
(s for s in probe.streams if s.codec_type == "audio"), None
audio_stream = next((s for s in probe.streams if s.codec_type == "audio"), None)
sr = (
int(audio_stream.sample_rate)
if audio_stream and audio_stream.sample_rate
else 16000
)
sr = int(audio_stream.sample_rate) if audio_stream and audio_stream.sample_rate else 16000
document = _run_async(
transcribe_with_salute_speech(
storage,
@@ -1125,6 +1129,9 @@ class TaskService:
self._event_repo = JobEventRepository(session)
self._webhook_repo = WebhookRepository(session)
def _get_project_workspace_service(self):
return ProjectWorkspaceService(self._session)
async def _update_job_broker_reference(self, job: Job, broker_reference: str) -> Job:
"""Persist the transport-specific broker reference after enqueueing."""
job.broker_id = broker_reference
@@ -1310,28 +1317,33 @@ class TaskService:
# Save artifacts BEFORE sending notifications so data exists when frontend refetches
if job.job_type == JOB_TYPE_TRANSCRIPTION_GENERATE and event.status == JOB_STATUS_DONE:
try:
await self._save_transcription_artifacts(job)
job = await self._save_transcription_artifacts(job)
except Exception:
logger.exception("Failed to save transcription artifacts for job %s", job_id)
if job.job_type == JOB_TYPE_MEDIA_CONVERT and event.status == JOB_STATUS_DONE:
try:
await self._save_convert_artifacts(job)
job = await self._save_convert_artifacts(job)
except Exception:
logger.exception("Failed to save convert artifacts for job %s", job_id)
if job.job_type == JOB_TYPE_SILENCE_APPLY and event.status == JOB_STATUS_DONE:
try:
await self._save_silence_apply_artifacts(job)
job = await self._save_silence_apply_artifacts(job)
except Exception:
logger.exception("Failed to save silence apply artifacts for job %s", job_id)
if job.job_type == JOB_TYPE_CAPTIONS_GENERATE and event.status == JOB_STATUS_DONE:
try:
await self._save_captions_artifacts(job)
job = await self._save_captions_artifacts(job)
except Exception:
logger.exception("Failed to save captions artifacts for job %s", job_id)
try:
await self._sync_project_workspace_after_webhook(job)
except Exception:
logger.exception("Failed to project workspace state for job %s", job_id)
# Push real-time notification via WebSocket (after artifacts are persisted)
if job.user_id is not None:
try:
@@ -1344,6 +1356,11 @@ class TaskService:
return job
async def _sync_project_workspace_after_webhook(self, job: Job) -> None:
if job.project_id is None:
return
await self._get_project_workspace_service().handle_job_update(job=job)
async def cancel_job(self, job: Job) -> Job:
"""Cancel a job, clean queued transport state and ignore late webhooks."""
if job.status in (JOB_STATUS_DONE, JOB_STATUS_FAILED, JOB_STATUS_CANCELLED):
@@ -1386,9 +1403,14 @@ class TaskService:
except Exception:
logger.exception("Failed to create cancellation notification for job %s", job.id)
try:
await self._sync_project_workspace_after_webhook(job)
except Exception:
logger.exception("Failed to project workspace state for cancelled job %s", job.id)
return job
async def _save_transcription_artifacts(self, job: Job) -> None:
async def _save_transcription_artifacts(self, job: Job) -> Job:
"""Create Transcription, ArtifactMediaFile and File records."""
input_data = job.input_data or {}
output_data = job.output_data or {}
@@ -1407,7 +1429,7 @@ class TaskService:
user = await user_repo.get_by_id(job.user_id) # type: ignore[arg-type]
if user is None:
logger.warning("User %s not found, skipping artifact save", job.user_id)
return
return job
# Find or create source File record
file_repo = FileRepository(self._session)
@@ -1472,7 +1494,7 @@ class TaskService:
# Create Transcription record
transcription_repo = TranscriptionRepository(self._session)
engine_db = ENGINE_MAP.get(engine_raw, "LOCAL_WHISPER")
await transcription_repo.create(
transcription = await transcription_repo.create(
data=TranscriptionCreate(
project_id=project_id,
source_file_id=source_file.id,
@@ -1483,9 +1505,16 @@ class TaskService:
),
)
logger.info("Saved transcription artifacts for job %s", job.id)
updated_output = dict(output_data)
updated_output["artifact_id"] = str(artifact.id)
updated_output["transcription_id"] = str(transcription.id)
updated_output["source_file_id"] = str(source_file.id)
job = await self._job_repo.update(job, JobUpdate(output_data=updated_output))
async def _save_convert_artifacts(self, job: Job) -> None:
logger.info("Saved transcription artifacts for job %s", job.id)
return job
async def _save_convert_artifacts(self, job: Job) -> Job:
"""Create File and ArtifactMediaFile records for converted MP4."""
input_data = job.input_data or {}
output_data = job.output_data or {}
@@ -1503,7 +1532,7 @@ class TaskService:
user = await user_repo.get_by_id(job.user_id) # type: ignore[arg-type]
if user is None:
logger.warning("User %s not found, skipping convert artifact save", job.user_id)
return
return job
# Derive output filename from source file
file_repo = FileRepository(self._session)
@@ -1542,11 +1571,12 @@ class TaskService:
updated_output = dict(output_data)
updated_output["file_id"] = str(converted_file.id)
await self._job_repo.update(job, JobUpdate(output_data=updated_output))
job = await self._job_repo.update(job, JobUpdate(output_data=updated_output))
logger.info("Saved convert artifacts for job %s", job.id)
return job
async def _save_silence_apply_artifacts(self, job: Job) -> None:
async def _save_silence_apply_artifacts(self, job: Job) -> Job:
"""Create File and ArtifactMediaFile records for silence-applied video."""
input_data = job.input_data or {}
output_data = job.output_data or {}
@@ -1562,10 +1592,8 @@ class TaskService:
user_repo = UserRepository(self._session)
user = await user_repo.get_by_id(job.user_id) # type: ignore[arg-type]
if user is None:
logger.warning(
"User %s not found, skipping silence apply artifact save", job.user_id
)
return
logger.warning("User %s not found, skipping silence apply artifact save", job.user_id)
return job
file_repo = FileRepository(self._session)
source_file = await file_repo.get_by_path(file_key)
@@ -1601,15 +1629,16 @@ class TaskService:
updated_output = dict(output_data)
updated_output["file_id"] = str(processed_file.id)
await self._job_repo.update(job, JobUpdate(output_data=updated_output))
job = await self._job_repo.update(job, JobUpdate(output_data=updated_output))
logger.info(
"Saved silence apply artifacts for job %s (file_id=%s)",
job.id,
processed_file.id,
)
return job
async def _save_captions_artifacts(self, job: Job) -> None:
async def _save_captions_artifacts(self, job: Job) -> Job:
"""Create File and ArtifactMediaFile records for captioned video."""
input_data = job.input_data or {}
output_data = job.output_data or {}
@@ -1626,7 +1655,7 @@ class TaskService:
user = await user_repo.get_by_id(job.user_id) # type: ignore[arg-type]
if user is None:
logger.warning("User %s not found, skipping captions artifact save", job.user_id)
return
return job
# Get file size from S3
storage = _get_storage_service()
@@ -1670,11 +1699,10 @@ class TaskService:
# Update job output_data with file_id so frontend can reference it
updated_output = dict(output_data)
updated_output["file_id"] = str(captioned_file.id)
job = await self._job_repo.update(
job, JobUpdate(output_data=updated_output)
)
job = await self._job_repo.update(job, JobUpdate(output_data=updated_output))
logger.info("Saved captions artifacts for job %s (file_id=%s)", job.id, captioned_file.id)
return job
async def submit_media_probe(
self, *, requester: User, request: MediaProbeRequest