Files
main_backend/cpv3/modules/project_workspaces/schemas.py
T
2026-04-27 23:19:04 +03:00

472 lines
15 KiB
Python

from __future__ import annotations
from enum import StrEnum
from typing import Annotated, Literal, get_args
from uuid import UUID
from pydantic import AliasChoices, Field, model_validator
from cpv3.common.schemas import Schema
from cpv3.modules.jobs.schemas import JobTypeEnum
WORKFLOW_VERSION = 1
VALID_JOB_TYPES = set(get_args(JobTypeEnum))
WorkspaceScreenEnum = Literal[
"upload",
"verify",
"silence-settings",
"processing",
"fragments",
"silence-apply-processing",
"transcription-settings",
"transcription-processing",
"subtitle-revision",
"caption-settings",
"caption-processing",
"caption-result",
]
class WorkflowPhase(StrEnum):
INGEST = "INGEST"
VERIFY = "VERIFY"
SILENCE = "SILENCE"
TRANSCRIPTION = "TRANSCRIPTION"
CAPTIONS = "CAPTIONS"
DONE = "DONE"
class SilenceWorkflowStatus(StrEnum):
IDLE = "IDLE"
CONFIGURED = "CONFIGURED"
DETECTING = "DETECTING"
REVIEWING = "REVIEWING"
APPLYING = "APPLYING"
COMPLETED = "COMPLETED"
SKIPPED = "SKIPPED"
class TranscriptionWorkflowStatus(StrEnum):
IDLE = "IDLE"
PROCESSING = "PROCESSING"
REVIEWING = "REVIEWING"
COMPLETED = "COMPLETED"
class CaptionsWorkflowStatus(StrEnum):
IDLE = "IDLE"
CONFIGURED = "CONFIGURED"
PROCESSING = "PROCESSING"
COMPLETED = "COMPLETED"
class ActiveJobState(Schema):
job_id: UUID
job_type: JobTypeEnum
class WorkspaceViewState(Schema):
used_file_ids: list[UUID] = Field(default_factory=list)
selected_file_id: UUID | None = None
class SilenceSettingsState(Schema):
min_silence_duration_ms: int = 200
silence_threshold_db: int = 16
padding_ms: int = 100
class CutRegionState(Schema):
start_ms: int
end_ms: int
class SilenceState(Schema):
status: SilenceWorkflowStatus = SilenceWorkflowStatus.IDLE
settings: SilenceSettingsState = Field(default_factory=SilenceSettingsState)
detect_job_id: UUID | None = None
detected_segments: list[CutRegionState] = Field(default_factory=list)
reviewed_cuts: list[CutRegionState] = Field(
default_factory=list,
validation_alias=AliasChoices("reviewed_cuts", "cut_regions"),
serialization_alias="reviewed_cuts",
)
duration_ms: int | None = None
applied_output_file_id: UUID | None = Field(
default=None,
validation_alias=AliasChoices("applied_output_file_id", "output_file_id"),
serialization_alias="applied_output_file_id",
)
class TranscriptionRequestState(Schema):
engine: Literal["whisper", "google", "salutespeech"] = "whisper"
language: str | None = None
model: str = "base"
class TranscriptionState(Schema):
status: TranscriptionWorkflowStatus = TranscriptionWorkflowStatus.IDLE
request: TranscriptionRequestState = Field(default_factory=TranscriptionRequestState)
job_id: UUID | None = None
artifact_id: UUID | None = None
transcription_id: UUID | None = None
reviewed: bool = False
class CaptionsState(Schema):
status: CaptionsWorkflowStatus = CaptionsWorkflowStatus.IDLE
preset_id: UUID | None = None
style_config: dict | None = None
render_job_id: UUID | None = Field(
default=None,
validation_alias=AliasChoices("render_job_id", "job_id"),
serialization_alias="render_job_id",
)
output_file_id: UUID | None = None
class ProjectWorkspaceState(Schema):
version: int = WORKFLOW_VERSION
phase: WorkflowPhase = WorkflowPhase.INGEST
active_job: ActiveJobState | None = None
source_file_id: UUID | None = None
workspace_view: WorkspaceViewState = Field(default_factory=WorkspaceViewState)
silence: SilenceState = Field(default_factory=SilenceState)
transcription: TranscriptionState = Field(default_factory=TranscriptionState)
captions: CaptionsState = Field(default_factory=CaptionsState)
class ProjectWorkspaceRead(Schema):
project_id: UUID
revision: int
version: int
phase: WorkflowPhase
current_screen: WorkspaceScreenEnum
active_job: ActiveJobState | None
source_file_id: UUID | None
workspace_view: WorkspaceViewState
silence: SilenceState
transcription: TranscriptionState
captions: CaptionsState
class WorkflowActionBase(Schema):
type: str
revision: int
class SetSourceFileAction(WorkflowActionBase):
type: Literal["SET_SOURCE_FILE"]
file_id: UUID = Field(
validation_alias=AliasChoices("file_id", "source_file_id"),
serialization_alias="file_id",
)
class ResetSourceFileAction(WorkflowActionBase):
type: Literal["RESET_SOURCE_FILE"]
class StartMediaConvertAction(WorkflowActionBase):
type: Literal["START_MEDIA_CONVERT"]
output_format: str = "mp4"
out_folder: str = "output_files"
class ConfirmVerifyAction(WorkflowActionBase):
type: Literal["CONFIRM_VERIFY"]
class SetSilenceSettingsAction(WorkflowActionBase):
type: Literal["SET_SILENCE_SETTINGS"]
settings: SilenceSettingsState = Field(default_factory=SilenceSettingsState)
@model_validator(mode="before")
@classmethod
def normalize_settings(cls, data: object) -> object:
if not isinstance(data, dict) or "settings" in data:
return data
return {
**data,
"settings": {
"min_silence_duration_ms": data.get("min_silence_duration_ms", 200),
"silence_threshold_db": data.get("silence_threshold_db", 16),
"padding_ms": data.get("padding_ms", 100),
},
}
class StartSilenceDetectAction(WorkflowActionBase):
type: Literal["START_SILENCE_DETECT"]
class SetSilenceCutsAction(WorkflowActionBase):
type: Literal["SET_SILENCE_CUTS"]
cuts: list[CutRegionState] = Field(
validation_alias=AliasChoices("cuts", "reviewed_cuts", "cut_regions"),
)
class SkipSilenceApplyAction(WorkflowActionBase):
type: Literal["SKIP_SILENCE_APPLY"]
class StartSilenceApplyAction(WorkflowActionBase):
type: Literal["START_SILENCE_APPLY"]
cuts: list[CutRegionState] | None = None
out_folder: str = "output_files"
output_name: str | None = None
class ReopenSilenceReviewAction(WorkflowActionBase):
type: Literal["REOPEN_SILENCE_REVIEW"]
class StartTranscriptionAction(WorkflowActionBase):
type: Literal["START_TRANSCRIPTION"]
engine: Literal["whisper", "google", "salutespeech"] = "whisper"
language: str | None = None
model: str = "base"
request: TranscriptionRequestState | None = None
@model_validator(mode="after")
def normalize_request(self) -> "StartTranscriptionAction":
if self.request is None:
self.request = TranscriptionRequestState(
engine=self.engine,
language=self.language,
model=self.model,
)
return self
self.engine = self.request.engine
self.language = self.request.language
self.model = self.request.model
return self
class ReopenTranscriptionConfigAction(WorkflowActionBase):
type: Literal["REOPEN_TRANSCRIPTION_CONFIG"]
class MarkTranscriptionReviewedAction(WorkflowActionBase):
type: Literal["MARK_TRANSCRIPTION_REVIEWED"]
class SelectCaptionPresetAction(WorkflowActionBase):
type: Literal["SELECT_CAPTION_PRESET"]
preset_id: UUID | None = None
style_config: dict | None = None
class StartCaptionRenderAction(WorkflowActionBase):
type: Literal["START_CAPTION_RENDER"]
folder: str = "output_files"
class ReopenCaptionConfigAction(WorkflowActionBase):
type: Literal["REOPEN_CAPTION_CONFIG"]
class SetWorkspaceViewAction(WorkflowActionBase):
type: Literal["SET_WORKSPACE_VIEW"]
workspace_view: WorkspaceViewState
@model_validator(mode="before")
@classmethod
def normalize_workspace_view(cls, data: object) -> object:
if not isinstance(data, dict) or "workspace_view" in data:
return data
return {
**data,
"workspace_view": {
"used_file_ids": data.get("used_file_ids", []),
"selected_file_id": data.get("selected_file_id"),
},
}
WorkflowActionRequest = Annotated[
(
SetSourceFileAction
| ResetSourceFileAction
| StartMediaConvertAction
| ConfirmVerifyAction
| SetSilenceSettingsAction
| StartSilenceDetectAction
| SetSilenceCutsAction
| SkipSilenceApplyAction
| StartSilenceApplyAction
| ReopenSilenceReviewAction
| StartTranscriptionAction
| ReopenTranscriptionConfigAction
| MarkTranscriptionReviewedAction
| SelectCaptionPresetAction
| StartCaptionRenderAction
| ReopenCaptionConfigAction
| SetWorkspaceViewAction
),
Field(discriminator="type"),
]
def build_default_workspace_state() -> ProjectWorkspaceState:
return ProjectWorkspaceState()
def build_workspace_state_from_legacy(
legacy_workspace_state: dict | None,
) -> ProjectWorkspaceState:
state = build_default_workspace_state()
if not isinstance(legacy_workspace_state, dict):
return state
wizard = legacy_workspace_state.get("wizard")
if not isinstance(wizard, dict):
wizard = {}
source_file_id = _parse_uuid(wizard.get("primary_file_id"))
if source_file_id is not None:
state.source_file_id = source_file_id
used_file_ids: list[UUID] = []
used_files = legacy_workspace_state.get("used_files")
if isinstance(used_files, list):
for item in used_files:
if not isinstance(item, dict):
continue
file_id = _parse_uuid(item.get("id"))
if file_id is not None and file_id not in used_file_ids:
used_file_ids.append(file_id)
if source_file_id is not None and source_file_id not in used_file_ids:
used_file_ids.insert(0, source_file_id)
state.workspace_view.used_file_ids = used_file_ids
if source_file_id is not None and source_file_id in used_file_ids:
state.workspace_view.selected_file_id = source_file_id
active_job_id = _parse_uuid(wizard.get("active_job_id"))
active_job_type = wizard.get("active_job_type")
if active_job_id is not None and active_job_type in VALID_JOB_TYPES:
state.active_job = ActiveJobState(
job_id=active_job_id,
job_type=active_job_type,
)
silence_job_id = _parse_uuid(wizard.get("silence_job_id"))
if silence_job_id is not None:
state.silence.detect_job_id = silence_job_id
transcription_artifact_id = _parse_uuid(wizard.get("transcription_artifact_id"))
if transcription_artifact_id is not None:
state.transcription.artifact_id = transcription_artifact_id
caption_preset_id = _parse_uuid(wizard.get("caption_preset_id"))
if caption_preset_id is not None:
state.captions.preset_id = caption_preset_id
caption_style_config = wizard.get("caption_style_config")
if isinstance(caption_style_config, dict):
state.captions.style_config = caption_style_config
captioned_video_file_id = _parse_uuid(wizard.get("captioned_video_file_id"))
if captioned_video_file_id is not None:
state.captions.output_file_id = captioned_video_file_id
silence_settings = wizard.get("silence_settings")
if isinstance(silence_settings, dict):
state.silence.settings = SilenceSettingsState.model_validate(silence_settings)
state.silence.status = SilenceWorkflowStatus.CONFIGURED
current_step = wizard.get("current_step")
step_phase_map = {
"upload": WorkflowPhase.INGEST,
"verify": WorkflowPhase.VERIFY,
"silence-settings": WorkflowPhase.SILENCE,
"processing": WorkflowPhase.SILENCE,
"fragments": WorkflowPhase.SILENCE,
"silence-apply-processing": WorkflowPhase.SILENCE,
"transcription-settings": WorkflowPhase.TRANSCRIPTION,
"transcription-processing": WorkflowPhase.TRANSCRIPTION,
"subtitle-revision": WorkflowPhase.TRANSCRIPTION,
"caption-settings": WorkflowPhase.CAPTIONS,
"caption-processing": WorkflowPhase.CAPTIONS,
"caption-result": WorkflowPhase.DONE,
}
if current_step in step_phase_map:
state.phase = step_phase_map[current_step]
if current_step == "processing":
state.silence.status = SilenceWorkflowStatus.DETECTING
elif current_step == "fragments":
state.silence.status = SilenceWorkflowStatus.REVIEWING
elif current_step == "silence-apply-processing":
state.silence.status = SilenceWorkflowStatus.APPLYING
elif current_step == "transcription-processing":
state.transcription.status = TranscriptionWorkflowStatus.PROCESSING
elif current_step == "subtitle-revision":
state.transcription.status = TranscriptionWorkflowStatus.REVIEWING
elif current_step == "caption-settings":
state.captions.status = CaptionsWorkflowStatus.CONFIGURED
elif current_step == "caption-processing":
state.captions.status = CaptionsWorkflowStatus.PROCESSING
elif current_step == "caption-result":
state.captions.status = CaptionsWorkflowStatus.COMPLETED
if state.active_job is not None:
if state.active_job.job_type == "MEDIA_CONVERT":
state.phase = WorkflowPhase.VERIFY
elif state.active_job.job_type == "SILENCE_DETECT":
state.phase = WorkflowPhase.SILENCE
state.silence.status = SilenceWorkflowStatus.DETECTING
state.silence.detect_job_id = state.active_job.job_id
elif state.active_job.job_type == "SILENCE_APPLY":
state.phase = WorkflowPhase.SILENCE
state.silence.status = SilenceWorkflowStatus.APPLYING
elif state.active_job.job_type == "TRANSCRIPTION_GENERATE":
state.phase = WorkflowPhase.TRANSCRIPTION
state.transcription.status = TranscriptionWorkflowStatus.PROCESSING
state.transcription.job_id = state.active_job.job_id
elif state.active_job.job_type == "CAPTIONS_GENERATE":
state.phase = WorkflowPhase.CAPTIONS
state.captions.status = CaptionsWorkflowStatus.PROCESSING
state.captions.render_job_id = state.active_job.job_id
if captioned_video_file_id is not None:
state.phase = WorkflowPhase.DONE
state.captions.status = CaptionsWorkflowStatus.COMPLETED
elif transcription_artifact_id is not None and (
state.transcription.status == TranscriptionWorkflowStatus.IDLE
):
state.phase = WorkflowPhase.TRANSCRIPTION
state.transcription.status = TranscriptionWorkflowStatus.REVIEWING
elif silence_job_id is not None and state.silence.status == SilenceWorkflowStatus.IDLE:
state.phase = WorkflowPhase.SILENCE
state.silence.status = SilenceWorkflowStatus.REVIEWING
elif source_file_id is not None and state.phase == WorkflowPhase.INGEST:
state.phase = WorkflowPhase.VERIFY
return state
def _parse_uuid(value: object) -> UUID | None:
if value is None:
return None
try:
return UUID(str(value))
except (TypeError, ValueError):
return None
# Backward-compatible aliases used by existing tests and frontend hand-written types.
TaskWorkflowActiveJob = ActiveJobState
SilenceSettingsPayload = SilenceSettingsState
WorkflowSilenceState = SilenceState
WorkflowTranscriptionRequest = TranscriptionRequestState