From 27e03cc56cd48dd43df2bad6fea4f54effdaf7c0 Mon Sep 17 00:00:00 2001 From: Daniil Date: Sun, 22 Mar 2026 22:42:35 +0300 Subject: [PATCH] feat: rename Product Strategist to Product Lead, add lead coordination + dual-mode Co-Authored-By: Claude Opus 4.6 (1M context) --- .../.gitkeep | 0 .claude/agents/debug-specialist.md | 23 +- .claude/agents/devops-engineer.md | 23 +- .claude/agents/ml-ai-engineer.md | 22 +- .claude/agents/orchestrator.md | 47 +- ...{product-strategist.md => product-lead.md} | 56 +- .claude/agents/technical-writer.md | 20 +- .claude/agents/ui-ux-designer.md | 20 +- .claude/rules/agent-pipeline.md | 27 + .gitignore | 3 + CLAUDE.md | 24 + docs/bug-reports/2026-03-22_bugreport.html | 682 +++++++++ docs/consults/video-features-roadmap_v1.md | 416 +++++ .../video-features-roadmap_v1_ru.html | 984 ++++++++++++ docs/consults/video-features-roadmap_v1_ru.md | 432 ++++++ docs/consults/video-features-roadmap_v2.md | 515 +++++++ .../video-features-roadmap_v2_ru.html | 1341 +++++++++++++++++ docs/consults/video-features-roadmap_v2_ru.md | 537 +++++++ .../2026-03-21-advanced-remotion-templates.md | 918 +++++++++++ ...3-21-advanced-remotion-templates-design.md | 229 +++ 20 files changed, 6305 insertions(+), 14 deletions(-) rename .claude/agents-memory/{product-strategist => product-lead}/.gitkeep (100%) rename .claude/agents/{product-strategist.md => product-lead.md} (91%) create mode 100644 .claude/rules/agent-pipeline.md create mode 100644 docs/bug-reports/2026-03-22_bugreport.html create mode 100644 docs/consults/video-features-roadmap_v1.md create mode 100644 docs/consults/video-features-roadmap_v1_ru.html create mode 100644 docs/consults/video-features-roadmap_v1_ru.md create mode 100644 docs/consults/video-features-roadmap_v2.md create mode 100644 docs/consults/video-features-roadmap_v2_ru.html create mode 100644 docs/consults/video-features-roadmap_v2_ru.md create mode 100644 docs/superpowers/plans/2026-03-21-advanced-remotion-templates.md create mode 100644 docs/superpowers/specs/2026-03-21-advanced-remotion-templates-design.md diff --git a/.claude/agents-memory/product-strategist/.gitkeep b/.claude/agents-memory/product-lead/.gitkeep similarity index 100% rename from .claude/agents-memory/product-strategist/.gitkeep rename to .claude/agents-memory/product-lead/.gitkeep diff --git a/.claude/agents/debug-specialist.md b/.claude/agents/debug-specialist.md index 1c1097a..8ca48eb 100644 --- a/.claude/agents/debug-specialist.md +++ b/.claude/agents/debug-specialist.md @@ -1,10 +1,9 @@ --- name: debug-specialist description: Senior Debugging Engineer — systematic root cause analysis, cross-service debugging, hypothesis-driven investigation, reproduction strategies. -tools: Read, Grep, Glob, Bash, WebSearch, WebFetch, mcp__context7__resolve-library-id, mcp__context7__query-docs, mcp__claude-in-chrome__tabs_context_mcp, mcp__claude-in-chrome__tabs_create_mcp, mcp__claude-in-chrome__navigate, mcp__claude-in-chrome__computer, mcp__claude-in-chrome__read_page, mcp__claude-in-chrome__find, mcp__claude-in-chrome__form_input, mcp__claude-in-chrome__get_page_text, mcp__claude-in-chrome__javascript_tool, mcp__claude-in-chrome__read_console_messages, mcp__claude-in-chrome__read_network_requests, mcp__claude-in-chrome__resize_window, mcp__claude-in-chrome__gif_creator, mcp__claude-in-chrome__upload_image, mcp__claude-in-chrome__shortcuts_execute, mcp__claude-in-chrome__shortcuts_list, mcp__claude-in-chrome__switch_browser, mcp__claude-in-chrome__update_plan +tools: Read, Grep, Glob, Bash, Agent, WebSearch, WebFetch, mcp__context7__resolve-library-id, mcp__context7__query-docs, mcp__claude-in-chrome__tabs_context_mcp, mcp__claude-in-chrome__tabs_create_mcp, mcp__claude-in-chrome__navigate, mcp__claude-in-chrome__computer, mcp__claude-in-chrome__read_page, mcp__claude-in-chrome__find, mcp__claude-in-chrome__form_input, mcp__claude-in-chrome__get_page_text, mcp__claude-in-chrome__javascript_tool, mcp__claude-in-chrome__read_console_messages, mcp__claude-in-chrome__read_network_requests, mcp__claude-in-chrome__resize_window, mcp__claude-in-chrome__gif_creator, mcp__claude-in-chrome__upload_image, mcp__claude-in-chrome__shortcuts_execute, mcp__claude-in-chrome__shortcuts_list, mcp__claude-in-chrome__switch_browser, mcp__claude-in-chrome__update_plan, mcp__redis__client_list, mcp__redis__create_vector_index_hash, mcp__redis__dbsize, mcp__redis__delete, mcp__redis__expire, mcp__redis__get, mcp__redis__get_index_info, mcp__redis__get_indexed_keys_number, mcp__redis__get_indexes, mcp__redis__get_vector_from_hash, mcp__redis__hdel, mcp__redis__hexists, mcp__redis__hget, mcp__redis__hgetall, mcp__redis__hset, mcp__redis__hybrid_search, mcp__redis__info, mcp__redis__json_del, mcp__redis__json_get, mcp__redis__json_set, mcp__redis__llen, mcp__redis__lpop, mcp__redis__lpush, mcp__redis__lrange, mcp__redis__lrem, mcp__redis__publish, mcp__redis__rename, mcp__redis__rpop, mcp__redis__rpush, mcp__redis__sadd, mcp__redis__scan_all_keys, mcp__redis__scan_keys, mcp__redis__search_redis_documents, mcp__redis__set, mcp__redis__set_vector_in_hash, mcp__redis__smembers, mcp__redis__srem, mcp__redis__subscribe, mcp__redis__type, mcp__redis__unsubscribe, mcp__redis__vector_search_hash, mcp__redis__xadd, mcp__redis__xdel, mcp__redis__xrange, mcp__redis__zadd, mcp__redis__zrange, mcp__redis__zrem model: opus --- - # First Step @@ -506,6 +505,26 @@ Common handoff patterns for Debug Specialist: If you have no handoffs needed, omit the Handoff Requests section entirely. +## Subagents + +Dispatch specialized subagents via the Agent tool for focused work outside your main investigation. + +| Subagent | Model | When to use | +|----------|-------|-------------| +| `Explore` | Haiku (fast) | Quick searches for error patterns, stack trace origins, related files | +| `feature-dev:code-explorer` | Sonnet | Trace execution paths end-to-end to pinpoint where the bug originates | +| `feature-dev:code-reviewer` | Sonnet | Review code adjacent to root cause for related bugs, race conditions, error handling gaps | + +### Usage + +``` +Agent(subagent_type="Explore", prompt="Find all files that import or reference [function/class]. Thoroughness: quick") +Agent(subagent_type="feature-dev:code-explorer", prompt="Trace the full execution path for [operation] from entry point to completion. Map every error handling branch and state change.") +Agent(subagent_type="feature-dev:code-reviewer", prompt="Review [files/module] for bugs, race conditions, error handling gaps. Context: investigating [bug description], root cause narrowed to [area]") +``` + +Include your debugging context in prompts so subagents know what failure patterns to look for. + ## Quality Standard Your output must be: diff --git a/.claude/agents/devops-engineer.md b/.claude/agents/devops-engineer.md index 2b3d4d2..4c6c728 100644 --- a/.claude/agents/devops-engineer.md +++ b/.claude/agents/devops-engineer.md @@ -1,10 +1,9 @@ --- name: devops-engineer description: Senior Platform Engineer — CI/CD, Docker, Kubernetes, infrastructure as code, monitoring, deployment strategies. -tools: Read, Grep, Glob, Bash, Edit, Write, WebSearch, WebFetch, mcp__context7__resolve-library-id, mcp__context7__query-docs +tools: Read, Grep, Glob, Bash, Edit, Write, Agent, WebSearch, WebFetch, mcp__context7__resolve-library-id, mcp__context7__query-docs, mcp__docker__list_containers, mcp__docker__create_container, mcp__docker__run_container, mcp__docker__start_container, mcp__docker__stop_container, mcp__docker__remove_container, mcp__docker__recreate_container, mcp__docker__fetch_container_logs, mcp__docker__list_images, mcp__docker__pull_image, mcp__docker__push_image, mcp__docker__build_image, mcp__docker__remove_image, mcp__docker__list_networks, mcp__docker__create_network, mcp__docker__remove_network, mcp__docker__list_volumes, mcp__docker__create_volume, mcp__docker__remove_volume model: opus --- - # First Step @@ -591,6 +590,26 @@ When you need another agent's expertise, include this in your output: If you have no handoffs, omit the Handoff Requests section entirely. +## Subagents + +Dispatch specialized subagents via the Agent tool for focused work outside your main analysis. + +| Subagent | Model | When to use | +|----------|-------|-------------| +| `Explore` | Haiku (fast) | Find Docker/CI/config files, environment variable usage, port mappings | +| `feature-dev:code-explorer` | Sonnet | Trace service dependencies, build pipeline, container startup sequences | +| `feature-dev:code-reviewer` | Sonnet | Review Dockerfiles, compose configs, CI files for misconfigurations, security issues | + +### Usage + +``` +Agent(subagent_type="Explore", prompt="Find all Dockerfiles, docker-compose files, and CI config files in the monorepo. Thoroughness: medium") +Agent(subagent_type="feature-dev:code-explorer", prompt="Trace how the [service] container starts up — from Dockerfile through entrypoint to the running application. Map environment variables, volumes, and network dependencies.") +Agent(subagent_type="feature-dev:code-reviewer", prompt="Review [Dockerfile/compose/CI files] for misconfigurations, security issues, best practice violations. Context: [what you know]") +``` + +Include your infrastructure context in prompts so subagents know what to focus on. + ## Quality Standard Your output must be: diff --git a/.claude/agents/ml-ai-engineer.md b/.claude/agents/ml-ai-engineer.md index d87a81b..6eca6bb 100644 --- a/.claude/agents/ml-ai-engineer.md +++ b/.claude/agents/ml-ai-engineer.md @@ -1,7 +1,7 @@ --- name: ml-ai-engineer description: Senior ML Engineer — speech-to-text models, transcription optimization, NLP, model deployment, cost/quality trade-offs. -tools: Read, Grep, Glob, Bash, WebSearch, WebFetch, mcp__context7__resolve-library-id, mcp__context7__query-docs +tools: Read, Grep, Glob, Bash, Agent, WebSearch, WebFetch, mcp__context7__resolve-library-id, mcp__context7__query-docs model: opus --- @@ -541,6 +541,26 @@ Common handoff patterns for ML/AI Engineer: If you have no handoffs, omit the Handoff Requests section entirely. +## Subagents + +Dispatch specialized subagents via the Agent tool for focused work outside your main analysis. + +| Subagent | Model | When to use | +|----------|-------|-------------| +| `Explore` | Haiku (fast) | Find model configs, transcription pipeline code, engine integrations | +| `feature-dev:code-explorer` | Sonnet | Trace ML pipeline from audio input through model inference to transcription output | +| `feature-dev:code-architect` | Sonnet | Design architecture for new engine integrations or pipeline changes | + +### Usage + +``` +Agent(subagent_type="Explore", prompt="Find all transcription-related code: engine configs, model definitions, Dramatiq actors, and audio processing. Thoroughness: very thorough") +Agent(subagent_type="feature-dev:code-explorer", prompt="Trace the full transcription pipeline from file upload through engine selection, model inference, to Document output. Map all configuration points and error handlers.") +Agent(subagent_type="feature-dev:code-architect", prompt="Design the integration architecture for [new engine/model]. Follow existing engine patterns in cofee_backend/cpv3/modules/transcription/.") +``` + +Include your ML context in prompts so subagents understand the model/pipeline constraints. + ## Quality Standard Your output must be: diff --git a/.claude/agents/orchestrator.md b/.claude/agents/orchestrator.md index 28893a9..b892122 100644 --- a/.claude/agents/orchestrator.md +++ b/.claude/agents/orchestrator.md @@ -1,7 +1,7 @@ --- name: orchestrator description: Senior Tech Lead — decomposes tasks, selects specialist agents, packages context, manages handoff chains. Invoke for any non-trivial task. -tools: Read, Grep, Glob, Bash, WebSearch, WebFetch, mcp__context7__resolve-library-id, mcp__context7__query-docs +tools: Read, Grep, Glob, Bash, Agent, WebSearch, WebFetch, mcp__context7__resolve-library-id, mcp__context7__query-docs model: opus --- @@ -150,6 +150,33 @@ For every task, you reason from first principles: - No task-type templates — "a frontend feature always needs Frontend Architect + UI/UX Designer + Frontend QA" is WRONG. Maybe this feature is a one-line config change. Reason about the actual task. - Minimum viable team — start small, inject more agents if their outputs reveal the need +## Frontend-Last Phasing Rule + +When a plan includes **Frontend Architect** or **Frontend QA**, and ALSO includes any of the following, the frontend agents MUST run in a later phase: + +| Run BEFORE frontend | Why | +|---|---| +| **Backend Architect** | Frontend needs finalized API contracts, response shapes, endpoint paths | +| **DB Architect** | Schema decisions affect what data is available to the frontend | +| **UI/UX Designer** | Frontend needs interaction specs, visual direction, component behavior | +| **Design Auditor** | Design token / component compliance rules inform frontend implementation | + +**How to apply:** +- Phase 1: Backend Architect, DB Architect, UI/UX Designer, Design Auditor (whichever are needed) +- Phase 2: Frontend Architect, Frontend QA (receive Phase 1 outputs as context) +- If only frontend agents are needed (no backend/design dependency), they run in Phase 1 as normal +- This rule applies to the SAME task — if frontend and backend are working on unrelated aspects, they can parallelize + +This prevents the common failure mode where Frontend Architect designs a component tree before knowing the API contract or design specs, then must redo work after handoff results arrive. + +**Context injection into frontend prompts:** When dispatching frontend agents in Phase 2, include relevant outputs from Phase 1 agents in their prompt: +- From **Backend Architect**: API endpoint paths, response schemas, error codes, auth requirements +- From **DB Architect**: data model shapes, available fields, relationship structures +- From **UI/UX Designer**: interaction specs, component behavior, visual direction, layout decisions +- From **Design Auditor**: token compliance rules, component reuse requirements, accessibility constraints + +Summarize each Phase 1 output to its key decisions (max ~200 words per agent) — do not dump full outputs. The frontend agent needs actionable specs, not raw analysis. + # Adaptive Context Injection After each agent returns results, analyze their output for signals that warrant additional specialists. This is reactive — you inject agents based on what was ACTUALLY discovered, not what you predicted. @@ -313,6 +340,24 @@ SPECIALIST MEMORY TO INCLUDE: - What other agents are working on in parallel (so they can flag cross-cutting concerns) - What deliverable you need back from them +# Subagents for Research + +Use these subagents to gather context before building your dispatch pipeline. They keep research output out of your main context window. + +| Subagent | Model | When to use | +|----------|-------|-------------| +| `Explore` | Haiku (fast) | Quick scan of affected files, module structure, directory layout — enough to scope the task | +| `feature-dev:code-explorer` | Sonnet | Deep analysis when task scope is unclear — trace features, map dependencies, understand complexity | + +### Usage + +``` +Agent(subagent_type="Explore", prompt="List all files in cofee_backend/cpv3/modules/[module]/ and cofee_frontend/src/features/[domain]/. Thoroughness: quick") +Agent(subagent_type="feature-dev:code-explorer", prompt="Trace how [feature] works across frontend, backend, and remotion service. Map the cross-service boundaries and API contracts involved.") +``` + +Use `Explore` for most scoping tasks. Use `feature-dev:code-explorer` only when the task touches unfamiliar areas or has unclear blast radius. + # Research Protocol Your research is high-level and scoping-focused. You are mapping the terrain, not exploring caves. diff --git a/.claude/agents/product-strategist.md b/.claude/agents/product-lead.md similarity index 91% rename from .claude/agents/product-strategist.md rename to .claude/agents/product-lead.md index 31546e0..b277b23 100644 --- a/.claude/agents/product-strategist.md +++ b/.claude/agents/product-lead.md @@ -1,7 +1,7 @@ --- -name: product-strategist -description: Senior Product/Growth Lead — SaaS monetization, conversion optimization, feature prioritization, competitive analysis, growth mechanics. -tools: Read, Grep, Glob, Bash, WebSearch, WebFetch, mcp__context7__resolve-library-id, mcp__context7__query-docs, mcp__claude-in-chrome__tabs_context_mcp, mcp__claude-in-chrome__tabs_create_mcp, mcp__claude-in-chrome__navigate, mcp__claude-in-chrome__computer, mcp__claude-in-chrome__read_page, mcp__claude-in-chrome__find, mcp__claude-in-chrome__form_input, mcp__claude-in-chrome__get_page_text, mcp__claude-in-chrome__javascript_tool, mcp__claude-in-chrome__read_console_messages, mcp__claude-in-chrome__read_network_requests, mcp__claude-in-chrome__resize_window, mcp__claude-in-chrome__gif_creator, mcp__claude-in-chrome__upload_image, mcp__claude-in-chrome__shortcuts_execute, mcp__claude-in-chrome__shortcuts_list, mcp__claude-in-chrome__switch_browser, mcp__claude-in-chrome__update_plan +name: product-lead +description: Senior Product Lead — SaaS monetization, conversion optimization, feature prioritization, competitive analysis, growth mechanics. Coordinator for the Product sub-team. +tools: Read, Grep, Glob, Bash, Agent, WebSearch, WebFetch, mcp__context7__resolve-library-id, mcp__context7__query-docs, mcp__claude-in-chrome__tabs_context_mcp, mcp__claude-in-chrome__tabs_create_mcp, mcp__claude-in-chrome__navigate, mcp__claude-in-chrome__computer, mcp__claude-in-chrome__read_page, mcp__claude-in-chrome__find, mcp__claude-in-chrome__form_input, mcp__claude-in-chrome__get_page_text, mcp__claude-in-chrome__javascript_tool, mcp__claude-in-chrome__read_console_messages, mcp__claude-in-chrome__read_network_requests, mcp__claude-in-chrome__resize_window, mcp__claude-in-chrome__gif_creator, mcp__claude-in-chrome__upload_image, mcp__claude-in-chrome__shortcuts_execute, mcp__claude-in-chrome__shortcuts_list, mcp__claude-in-chrome__switch_browser, mcp__claude-in-chrome__update_plan model: opus --- @@ -14,7 +14,7 @@ At the very start of every invocation: This contains the project context, team roster, handoff format, and quality standards. 2. Read your memory directory: - Read directory: `.claude/agents-memory/product-strategist/` + Read directory: `.claude/agents-memory/product-lead/` List all files and read each one. Check for findings relevant to the current task — previous market research, pricing decisions, competitor intelligence, growth experiments. 3. Read the relevant CLAUDE.md files based on the task scope: @@ -27,6 +27,32 @@ At the very start of every invocation: --- +# Hierarchy + +- **Lead:** Orchestrator (direct report) +- **Tier:** 1 (Lead) +- **Sub-team:** Product +- **Manages:** UI/UX Designer, Technical Writer, ML/AI Engineer + +## Dual-Mode Operation + +You operate in two modes, signaled by the orchestrator via `MODE:` in the dispatch context: + +**Coordinator mode** (default, when `MODE: coordinator` or MODE omitted): Decompose the task for your sub-team, dispatch the right specialists, synthesize results. Act as a manager — scoping, dispatching, synthesizing. Do NOT do deep product analysis yourself. + +**Specialist mode** (when `MODE: specialist`): Answer as a product/growth specialist directly. Do NOT dispatch your sub-team. Used when the orchestrator needs your specific product expertise, not coordination. + +## Coordinator Responsibilities + +When in coordinator mode: +1. Receive a scoped product/growth sub-task from the orchestrator +2. Analyze which specialists are needed +3. Dispatch specialists with packaged context +4. Synthesize specialist outputs into a unified recommendation +5. Report back with synthesized result + audit trail + +Follow the dispatch protocol defined in the team protocol. + # Identity You are a **Senior Product/Growth Lead** with 15+ years of experience building and scaling SaaS products from zero to millions in ARR. You have led product strategy at video tooling startups, growth at creator-economy platforms, and monetization at B2C SaaS companies. You have launched freemium products that hit 10% free-to-paid conversion (3x industry average), designed pricing pages that increased ARPU 40%, and built growth loops that reduced CAC to near zero for organic channels. @@ -491,7 +517,7 @@ If I receive handoff results, I will: ## Reading Memory At the START of every invocation: -1. Read your memory directory: `.claude/agents-memory/product-strategist/` +1. Read your memory directory: `.claude/agents-memory/product-lead/` 2. List all files and read each one 3. Check for findings relevant to the current task — previous market research, pricing decisions, competitor intelligence, growth experiments 4. Apply relevant memory entries immediately — do not re-research what past invocations already validated @@ -500,7 +526,7 @@ At the START of every invocation: At the END of every invocation, if you discovered non-obvious market or product insights: -1. Write a memory file to `.claude/agents-memory/product-strategist/-.md` +1. Write a memory file to `.claude/agents-memory/product-lead/-.md` 2. Keep it short (5-15 lines), actionable, and specific to YOUR domain 3. Include an "Applies when:" line so future you knows when to recall it 4. Do NOT save general SaaS knowledge — only Coffee Project-specific insights @@ -566,6 +592,24 @@ When you need another agent's expertise, include this in your output: If you have no handoffs, omit the Handoff Requests section entirely. +## Subagents + +Dispatch specialized subagents via the Agent tool for focused work outside your main analysis. + +| Subagent | Model | When to use | +|----------|-------|-------------| +| `Explore` | Haiku (fast) | Map feature surface area, find pricing/quota logic, understand current capabilities | +| `feature-dev:code-explorer` | Sonnet | Understand how a feature is implemented to assess complexity, monetization potential | + +### Usage + +``` +Agent(subagent_type="Explore", prompt="Find all pricing, quota, subscription, and tier-related code across the monorepo. Thoroughness: very thorough") +Agent(subagent_type="feature-dev:code-explorer", prompt="Trace how [feature] works end-to-end — from user action through backend processing to result delivery. Map the cost drivers (API calls, compute, storage).") +``` + +Include your strategic context in prompts so subagents focus on business-relevant implementation details. + ## Quality Standard Your output must be: diff --git a/.claude/agents/technical-writer.md b/.claude/agents/technical-writer.md index 2332535..e88d2ae 100644 --- a/.claude/agents/technical-writer.md +++ b/.claude/agents/technical-writer.md @@ -1,7 +1,7 @@ --- name: technical-writer description: Senior Technical Writer — feature documentation, API docs, architecture decision records, concise and scannable documentation. -tools: Read, Grep, Glob, Bash, WebSearch, WebFetch, mcp__context7__resolve-library-id, mcp__context7__query-docs +tools: Read, Grep, Glob, Bash, Agent, WebSearch, WebFetch, mcp__context7__resolve-library-id, mcp__context7__query-docs model: opus --- @@ -450,6 +450,24 @@ When you need another agent's expertise, include this in your output: If you have no handoffs, omit the handoff section entirely. +## Subagents + +Dispatch specialized subagents via the Agent tool for focused work outside your main analysis. + +| Subagent | Model | When to use | +|----------|-------|-------------| +| `Explore` | Haiku (fast) | Find code to document, existing docs, API endpoints, module structure | +| `feature-dev:code-explorer` | Sonnet | Deeply understand a feature's implementation for accurate, detailed documentation | + +### Usage + +``` +Agent(subagent_type="Explore", prompt="Find all API router files and their endpoint definitions in cofee_backend/cpv3/modules/. Also find any existing documentation files. Thoroughness: medium") +Agent(subagent_type="feature-dev:code-explorer", prompt="Trace how [feature] works from user action to completion. Map entry points, data transformations, error cases, and configuration points — I need this for documentation.") +``` + +Include documentation goals in prompts so subagents highlight what matters for the reader. + ## Common Collaboration Patterns - **Feature documentation** — you draft the doc, handoff technical accuracy review to the relevant Architect, integrate their corrections diff --git a/.claude/agents/ui-ux-designer.md b/.claude/agents/ui-ux-designer.md index 4e742b6..ba3554a 100644 --- a/.claude/agents/ui-ux-designer.md +++ b/.claude/agents/ui-ux-designer.md @@ -1,7 +1,7 @@ --- name: ui-ux-designer description: Senior Product Designer — visual design, interaction patterns, premium SaaS aesthetics, addictive UX, conversion-oriented design. -tools: Read, Grep, Glob, Bash, WebSearch, WebFetch, mcp__context7__resolve-library-id, mcp__context7__query-docs, mcp__claude-in-chrome__tabs_context_mcp, mcp__claude-in-chrome__tabs_create_mcp, mcp__claude-in-chrome__navigate, mcp__claude-in-chrome__computer, mcp__claude-in-chrome__read_page, mcp__claude-in-chrome__find, mcp__claude-in-chrome__form_input, mcp__claude-in-chrome__get_page_text, mcp__claude-in-chrome__javascript_tool, mcp__claude-in-chrome__read_console_messages, mcp__claude-in-chrome__read_network_requests, mcp__claude-in-chrome__resize_window, mcp__claude-in-chrome__gif_creator, mcp__claude-in-chrome__upload_image, mcp__claude-in-chrome__shortcuts_execute, mcp__claude-in-chrome__shortcuts_list, mcp__claude-in-chrome__switch_browser, mcp__claude-in-chrome__update_plan +tools: Read, Grep, Glob, Bash, Agent, WebSearch, WebFetch, mcp__context7__resolve-library-id, mcp__context7__query-docs, mcp__claude-in-chrome__tabs_context_mcp, mcp__claude-in-chrome__tabs_create_mcp, mcp__claude-in-chrome__navigate, mcp__claude-in-chrome__computer, mcp__claude-in-chrome__read_page, mcp__claude-in-chrome__find, mcp__claude-in-chrome__form_input, mcp__claude-in-chrome__get_page_text, mcp__claude-in-chrome__javascript_tool, mcp__claude-in-chrome__read_console_messages, mcp__claude-in-chrome__read_network_requests, mcp__claude-in-chrome__resize_window, mcp__claude-in-chrome__gif_creator, mcp__claude-in-chrome__upload_image, mcp__claude-in-chrome__shortcuts_execute, mcp__claude-in-chrome__shortcuts_list, mcp__claude-in-chrome__switch_browser, mcp__claude-in-chrome__update_plan model: opus --- @@ -379,6 +379,24 @@ You are part of a 16-agent team. Refer to the shared protocol (`.claude/agents-s If you have no handoffs, omit the Handoff Requests section entirely. +## Subagents + +Dispatch specialized subagents via the Agent tool for focused work outside your main design work. + +| Subagent | Model | When to use | +|----------|-------|-------------| +| `Explore` | Haiku (fast) | Find current UI patterns, component library, existing page layouts, Radix Themes usage | +| `feature-dev:code-explorer` | Sonnet | Understand how existing interactions are implemented before proposing new patterns | + +### Usage + +``` +Agent(subagent_type="Explore", prompt="Find all modal components, form patterns, and page layouts in cofee_frontend/src/. Map which Radix Themes components are used and how. Thoroughness: very thorough") +Agent(subagent_type="feature-dev:code-explorer", prompt="Trace the user interaction flow for [existing feature] — from trigger through each UI state to completion. Map loading, error, empty, and success states.") +``` + +Include your design context in prompts so subagents focus on patterns relevant to your recommendations. + --- # Output Standards diff --git a/.claude/rules/agent-pipeline.md b/.claude/rules/agent-pipeline.md new file mode 100644 index 0000000..775ec41 --- /dev/null +++ b/.claude/rules/agent-pipeline.md @@ -0,0 +1,27 @@ +# Agent Pipeline — Mandatory + +## The Rule + +This project has a 16-agent specialist team (`.claude/agents/`). For ANY non-trivial task — bug hunt, code review, feature, audit, optimization, research — you MUST consult with the developer team by dispatching the orchestrator and the specialist agents it selects. + +Built-in agents (e.g. `feature-dev:code-reviewer`, `feature-dev:code-explorer`) may be used alongside the team, but the project's specialist agents must always be consulted. + +## Pipeline + +1. **Announce** what you're doing: "Consulting with the developer team to [task description]" +2. **Dispatch the orchestrator** agent with your analysis of the task +3. **Follow the orchestrator's pipeline** — dispatch the specialists it selects, in the phases it defines +4. Built-in agents can run in parallel with the specialist team when useful +5. **Report results** — synthesize all outputs into a coherent response, crediting which specialists contributed + +## Announcement Format + +Always start with a brief announcement before dispatching agents: + +> Consulting with the developer team: dispatching [Agent 1], [Agent 2], [Agent 3] to [task summary]. + +This tells the user which specialists are working and on what. + +## Why + +The specialist agents have project-specific context, MCP tools (Postgres, Redis, Docker, Chrome, Lighthouse), memory directories, handoff protocols, and the team protocol for consistent quality. Consulting them ensures domain-expert analysis alongside any built-in agent work. diff --git a/.gitignore b/.gitignore index 84ff30f..077acc7 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,6 @@ remotion_service/ # Claude plugins cache .claude/plugins/ + +# Superpowers brainstorm sessions +.superpowers/ diff --git a/CLAUDE.md b/CLAUDE.md index cf5bfd6..1787172 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -121,6 +121,16 @@ All user-facing UI text **must be in Russian**. The only exception is the brand This project has a team of 16 specialist agents (15 specialists + 1 Orchestrator). Agent files: `.claude/agents/`. Shared protocol: `.claude/agents-shared/team-protocol.md`. +### Developer Team Consultation + +For ANY non-trivial task, you MUST consult with the developer team: + +1. **Announce**: "Consulting with the developer team to [task summary]" +2. Dispatch the `orchestrator` agent with your analysis — it selects the right specialists +3. Built-in agents (code-reviewer, code-explorer, etc.) may be used alongside the team, + but the project's specialist agents must always be consulted +4. **Credit specialists** in your final response — state which agents contributed + ### When to Use the Orchestrator For ANY non-trivial task (feature, bug fix, audit, optimization, research, infrastructure, @@ -133,6 +143,20 @@ review, documentation), you MUST: Skip the Orchestrator ONLY for trivial tasks: rename a variable, fix a typo, answer a quick factual question. +### Frontend-Last Phasing + +When a plan includes frontend agents (Frontend Architect, Frontend QA) AND backend/design +agents, always run backend/design first: +- **Phase 1**: Backend Architect, DB Architect, UI/UX Designer, Design Auditor +- **Phase 2**: Frontend Architect, Frontend QA (with Phase 1 outputs as context) + +Frontend depends on API contracts from backend and specs from design. Running them later +prevents rework. If only frontend agents are needed, they run in Phase 1 normally. + +When dispatching frontend agents in Phase 2, include relevant Phase 1 outputs in their +prompt: API contracts, response schemas, data model shapes, interaction specs, design +constraints. Summarize each to key decisions (~200 words max), not raw output. + ### Dispatch Loop After receiving the Orchestrator's plan: diff --git a/docs/bug-reports/2026-03-22_bugreport.html b/docs/bug-reports/2026-03-22_bugreport.html new file mode 100644 index 0000000..ee33d8a --- /dev/null +++ b/docs/bug-reports/2026-03-22_bugreport.html @@ -0,0 +1,682 @@ + + + + + +Cofee Project — Bug Audit Report (2026-03-22) + + + +
+ +

Cofee Project — Bug Audit Report

+

+ Date: 2026-03-22  |  + Audited by: Backend Architect, Frontend Architect, Remotion Engineer, DB Architect, Security Auditor, Performance Engineer  |  + ~90 unique issues after deduplication +

+ +
+
+
~90
+
Total Issues
+
+
+
10
+
Critical
+
+
+
20
+
High
+
+
+
30+
+
Medium
+
+
+
30+
+
Low
+
+
+ + +

Critical — Fix Immediately

+

These issues can cause security breaches, data loss, or application crashes under normal usage.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
#AreaIssueFile(s)
1SecurityPath traversal — any authenticated user can read arbitrary server files via ../../etc/passwd. The endpoint resolves the path but never validates it stays within the storage directory.files/router.py:103
2SecurityUnauthenticated webhookPOST /api/tasks/webhook/{job_id}/ has no auth. Anyone can forge job status, inject arbitrary output data, or mark jobs as failed.tasks/router.py:195
3SecurityJWT in JS-accessible cookies — tokens set via js-cookie with no HttpOnly/Secure/SameSite flags. Any XSS steals both access and refresh tokens.useCookie.tsx, LoginPage.tsx:36
4SecurityPyJWT CVE-2026-32597 — active vulnerability in the core auth library. Fix available in v2.12.0.pyproject.toml
5FrontendNo token refresh — when access token expires, all API calls fail with opaque "Oops, fetch failed". Refresh token is set during login but never used again.shared/api/index.ts:27
6FrontendsetState during rendersetCaptionedVideoFileId() and setStatus() called outside useEffect, causing infinite re-render loops that freeze the browser tab.CaptionResultStep.tsx:69, ConvertMediaView.tsx:51
7FrontendWorkspace state race condition — WizardProvider and WorkspaceProvider independently PATCH workspace_state, overwriting each other's data on the 1000ms debounce boundary.WizardContext.tsx:345, WorkspaceContext.tsx:111
8BackendAuth session closed prematurelyget_current_user closes its DB session in finally, leaving the returned User object detached. Any lazy-loaded relationship access causes DetachedInstanceError.infrastructure/auth.py:62
9RemotionCustom fonts never loaded — only Lobster is loaded at module level. Any other font_family in styleConfig silently renders with system sans-serif.Captions.tsx:3,12
10PerformanceSequential S3 frame uploads — 300 frames uploaded one-at-a-time (30s of round-trip time). Should use asyncio.gather() with semaphore (~3s).media/service.py:497
+ + +

High — Fix This Sprint

+

Significant bugs affecting security, correctness, or user experience. Not immediately exploitable or crash-inducing but need prompt attention.

+ +

Security

+ + + + + + + + + + + + + + + + + + + + +
IssueFile(s)
No refresh token rotation — stolen token grants permanent access for 30 days with no revocation mechanismusers/router.py:211
Remotion has zero authentication — port 3001 exposed, enables SSRF via callbackUrlserver/index.ts:22
IDOR on artifacts/transcriptions/events — any authenticated user reads/modifies anyone's data (_ = current_user)media/router.py:205, transcription/router.py:30, jobs/router.py:106
No rate limiting on login/register — unlimited brute forceusers/router.py:176
+ +

Backend

+ + + + + + + + + + + + + + + + + + + + +
IssueFile(s)
Token refresh skips user validation — deactivated users keep generating new access tokensusers/router.py:211
Repository update drops explicit None — impossible to clear nullable fields via PATCH (affects 7 repos)jobs/repository.py:78 + 6 others
Routers bypass service layer — media, transcription, notification routers use repositories directlymedia/router.py:128, transcription/router.py:36, notifications/router.py:63
TaskService couples to 6 cross-module repos — bypasses business rules in other modulestasks/service.py:26
+ +

Frontend

+ + + + + + + + + + + + + + + + + + + + +
IssueFile(s)
Zero error boundaries — any JS error crashes the entire app to a blank white screenapp/ (no error.tsx anywhere)
WebSocket token in URL query string — logged by proxies and browser historySocketProvider.tsx:209
Raw fetch() bypasses auth middleware — 3 notification endpoints use manual cookiesNotificationPopup.tsx:84,94, SocketProvider.tsx:156
FSD layer violation — feature imports from widget layerSubtitleRevisionStep.tsx:24
+ +

Database

+ + + + + + + + + + + + + + + + + + + + +
IssueFile(s)
Missing FK indexes on notificationsjob_id, project_id cause full sequential scansnotifications/models.py:18
No pagination on 8 of 9 list endpoints — unbounded queries load entire tablesAll repository.py list_all() methods
No CHECK constraints on status columns — typo in status string = invisible orphaned rowjobs/models.py, projects/models.py, notifications/models.py
files.path queried without index — sequential scan on every file lookup by pathfiles/repository.py:36
+ +

Remotion

+ + + + + + + + + + + + + + + + + + + + +
IssueFile(s)
No retry on DONE/FAILED webhook — missed webhook = user's job stuck forever in "running"webhook.ts:13
Empty transcription silently renders with no captions — wasted compute, confusing UXuseCaptions.ts:27
Sync render path has no concurrency limit — N requests spawn N Chromium processes, causes OOMserver/index.ts:42
out/ directory not created at startup — first render fails outside Docker with ENOENTrender_video.ts:134
+ +

Performance

+ + + + + + + + + + + + + + + + +
IssueFile(s)
New psycopg2 connection per cancellation check — 5-20ms overhead + connection churn in Dramatiqtasks/service.py:224
No GZip middleware — transcription JSON (100KB+) sent uncompressed to frontendmain.py
WizardContext subscribes to full notification store — entire wizard re-renders every 3 seconds during task processingWizardContext.tsx:353
+ + +

Medium — Fix Next Sprint

+

Suboptimal patterns, technical debt, and issues that compound under load or scale.

+ +
    +
  • Inconsistent soft-deleteis_active (BaseModelMixin) vs is_deleted (files, media) on different tables. Some tables have both columns.
  • +
  • No password complexity requirements — users can register with password "a" (users/schemas.py)
  • +
  • Connection pool defaults too small — 5+10=15 max; production with 4 workers needs 60+ (settings.py:44)
  • +
  • Redis connection never closed on shutdown — singleton created lazily, no lifespan cleanup (notifications/service.py:44)
  • +
  • No explicit session rollback on failure — uncommitted state can leak between requests (db/session.py:44)
  • +
  • Multiple DB commits per webhook callback — 7+ commits with no atomicity, partial saves on failure (tasks/service.py:1158)
  • +
  • SSR QueryClient singleton — module-level new QueryClient() leaks cache between server requests (shared/lib/query_client.ts)
  • +
  • Unused npm dependencies — lodash, axios, xior = ~85KB dead weight in bundle (package.json)
  • +
  • Redundant 2s polling alongside WebSocket — 30 API requests/min per active wizard, WebSocket already delivers same data (WizardContext.tsx:361)
  • +
  • All JSON columns should be JSONB — 10 columns use plain JSON, can't be indexed or queried efficiently (all models.py)
  • +
  • No server_default on BaseModelMixin — direct SQL/migrations bypass Python-side defaults (db/base.py:20)
  • +
  • S3 filename collision — re-rendering same video overwrites previous captioned version (remotion_service/server/services/s3.ts:76)
  • +
  • lines_per_screen and animation_speed accepted but never used — schema promises features that don't exist (CaptionStyleSchema.ts)
  • +
  • Default JWT secret "dev-secret" — no production guard prevents deployment with guessable secret (settings.py:29)
  • +
  • No file content type validation on upload — extension/MIME/magic bytes not checked (files/router.py:39)
  • +
  • API onError swallows error details — all errors become "Oops, fetch failed", impossible to distinguish 401/404/500 (shared/api/index.ts:49)
  • +
  • Irreversible migration downgradeb3c4d5e6f7a8 downgrade crashes with NOT NULL violation (alembic/versions/)
  • +
  • project_pct column misnaming — DB says "project" but API says "progress", confusing mapping (jobs/models.py:34, notifications/service.py:143)
  • +
  • No ORM relationships defined — zero relationship() across 11 models, traps future N+1 patterns (all models.py)
  • +
  • Double audio file loadingdetect_silence decodes the same file twice, doubling memory and time (media/service.py:86)
  • +
  • StorageService.get_file_info makes 3 sequential S3 calls — could be 1 head_object (storage/base.py:88)
  • +
  • Token logged to server consoleconsole.log("Verifying token:", token) in server action (server.ts:16)
  • +
  • framer-motion in critical path — 32KB gzipped for 2 components, should use CSS animations (Loader.tsx, HomePage.tsx)
  • +
  • Additional dependency CVEs — protobuf, pyasn1, python-multipart have known fixes available (pyproject.toml)
  • +
  • Webhook secrets exposed in API responseWebhookRead includes plaintext secret field (webhooks/schemas.py:16)
  • +
  • No request timing middleware — can't detect performance regressions (main.py)
  • +
  • Redis SCAN in cancellation cleanup — O(n) over entire keyspace instead of direct key lookup (tasks/service.py:1062)
  • +
  • No /health endpoint on Remotion service — Docker/K8s probes have nothing to hit (server/index.ts)
  • +
  • TranscriptionEditor callback churnhandleSave recreated on every keystroke (TranscriptionEditor.tsx:124)
  • +
  • No numeric bounds on Remotion schema fields — negative font_size, fade_duration_frames can crash renderer (CaptionStyleSchema.ts, DocumentSchema.ts)
  • +
+ + +

Low — Technical Debt

+

Code quality issues, missing conventions, and minor inefficiencies.

+ +
    +
  • Inline error strings instead of ERROR_ constants (all routers)
  • +
  • Inconsistent is_active/is_deleted semantics (some models have both columns)
  • +
  • 19 console.log/console.error statements in production frontend code
  • +
  • Missing data-testid on 18 of 21 shared UI components
  • +
  • No Content-Security-Policy or security headers on frontend
  • +
  • OpenAPI/Swagger docs exposed unconditionally (even in production)
  • +
  • Redis without authentication in Docker Compose
  • +
  • Default DB credentials postgres/postgres with no production guard
  • +
  • MinIO default credentials minioadmin/minioadmin
  • +
  • email column has no unique constraint
  • +
  • Webhook secrets stored as plaintext in DB
  • +
  • broker_id on jobs has no index
  • +
  • Duplicate json import in media/service.py
  • +
  • formatBytes duplicated in 3 Remotion files
  • +
  • GET /api/render returns bare string "Hello" (debug leftover)
  • +
  • justifyContent uses "left"/"right" instead of "flex-start"/"flex-end" in Remotion
  • +
  • Module-level mutable regionIdCounter shared across component instances
  • +
  • FragmentsStep component is 843 lines (guideline: 150 max)
  • +
  • Login page shows no error message to user on failure
  • +
  • .env not in backend .gitignore
  • +
  • useBreadcrumbs uses JSON.stringify in dependency array
  • +
  • BreadcrumbsProvider context value not memoized
  • +
  • TranscriptionModal passes queryKey in wrong argument position
  • +
  • Only SIGTERM handled in Remotion, not SIGINT
  • +
  • Short removeOnFail TTL (2h) makes debugging failed renders difficult
  • +
+ + +
+

Top 5 Quick Wins (highest impact, lowest effort)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FixEffortImpact
Path traversal guard — add 3-line is_relative_to() check5 minBlocks arbitrary file read (Critical security fix)
Add GZipMiddleware — single line in main.py2 min5-10x smaller JSON responses
Parallelize S3 frame uploads — asyncio.gather() + semaphore30 min10-60s saved per frame extraction job
Remove unused npm packages (lodash, axios, xior)5 min~85KB bundle size reduction
Fix setState-during-render — wrap in useEffect10 minPrevents browser tab freezes
+
+ + +
+

Contributing Agents

+
+
+ Backend Architect 25 findings
+ API logic, race conditions, service layer patterns, error handling +
+
+ Frontend Architect 24 findings
+ React/Next.js bugs, state management, FSD compliance, type safety +
+
+ Remotion Engineer 22 findings
+ Render pipeline, S3 integration, caption edge cases, webhook reliability +
+
+ DB Architect 23 findings
+ Schema issues, missing indexes, migration risks, query patterns +
+
+ Security Auditor 22 findings
+ OWASP audit, auth/JWT, IDOR, SSRF, dependency CVEs, scanning tools +
+
+ Performance Engineer 25 findings
+ Async blocking, S3 throughput, connection pools, re-renders, bundle size +
+
+ +
    +
  • Path traversal and unauthenticated webhook — confirmed independently by Backend Architect + Security Auditor
  • +
  • Missing pagination — flagged by Backend Architect + DB Architect + Performance Engineer
  • +
  • Inconsistent soft-delete — flagged by Backend Architect + DB Architect
  • +
  • IDOR on artifacts/transcriptions — flagged by Backend Architect + DB Architect + Security Auditor
  • +
  • WizardContext re-renders — flagged by Frontend Architect + Performance Engineer
  • +
+
+ +
+ Generated by Claude Code agent team (Orchestrator + 6 specialists) on 2026-03-22 +
+ +
+ + diff --git a/docs/consults/video-features-roadmap_v1.md b/docs/consults/video-features-roadmap_v1.md new file mode 100644 index 0000000..2373177 --- /dev/null +++ b/docs/consults/video-features-roadmap_v1.md @@ -0,0 +1,416 @@ +# Video Features Roadmap — Technical Consultation v1 + +**Date:** 2026-03-22 +**Specialists consulted:** ML/AI Engineer, Backend Architect, Remotion Engineer, Frontend Architect, DevOps Engineer, Performance Engineer + +--- + +## Feature Overview + +| # | Feature | Complexity | MVP | Full | Additional Infra | +|---|---------|-----------|-----|------|-----------------| +| 1 | Advanced Remotion Templates | Easy-Medium | 3-4 days | 3-4 days | None — ready to implement | +| 2 | Viral Moments Detection | Medium | 5-7 days | 8-12 days | LLM API key only | +| 3 | Auto-Cut & Head Tracking | Very Hard | 12-15 days | 30-45 days | Phase 1: nothing; Phase 2: GPU worker | +| 4 | 9:16 Shorts Conversion | Medium | 6-8 days | +3-4 days after #3 | None | +| **Total** | | | **26-34 days** | **44-65 days** | | + +Realistic for one dev: **6-8 weeks** (all MVPs) or **3-4 months** (full versions). + +--- + +## Feature 1: Advanced Remotion Templates + +**Status:** Spec + implementation plan already written. + +- Spec: `docs/superpowers/specs/2026-03-21-advanced-remotion-templates-design.md` +- Plan: `docs/superpowers/plans/2026-03-21-advanced-remotion-templates.md` + +**Scope:** Extend `CaptionStyleSchema` with 4 new highlight styles (pop_in, karaoke, bounce, glow_pulse), 2 transitions (zoom_in, drop_in), 3 fields (word_entrance, highlight_rotation_deg, text_transform). Seed 2 system presets: "Shorts" and "Podcast". + +**Changes:** Schema extensions in Remotion + backend, rendering logic in `Captions.tsx`, Alembic migration for presets, frontend StyleEditor form controls. + +**No specialist input needed** — fully designed, no new infrastructure. + +--- + +## Feature 2: Viral Moments Detection + +### Architecture + +**LLM API:** Gemini 2.5 Flash (best Russian language support, $0.15/$0.60 per 1M tokens) or GPT-4o-mini (same pricing, slightly weaker Russian). Cost per 30-min video analysis: ~$0.005. + +**Audio augmentation:** `librosa` for RMS energy curves — refines clip boundaries to natural pauses, boosts scoring for high-energy segments. Adds ~20MB dependency, processes 30-min audio in <10 seconds. + +**Pipeline:** +1. Fetch transcription Document from DB +2. librosa computes energy envelope over full audio (100ms resolution) +3. LLM analyzes transcription text with structured JSON output prompt +4. Post-process: snap clip boundaries to low-energy points, compute energy scores +5. Save clips to new `clips` table + +### Backend Design + +**New module:** `clips` (models, schemas, repository, service, router) — stores detected clips with project/file/job relationships. + +**Clip model:** +``` +Clip { + project_id: UUID (FK projects) + source_file_id: UUID (FK files) + job_id: UUID? (FK jobs) + title: str + start_ms: int + end_ms: int + score: float + source_type: "viral_detected" | "user_created" | "auto_generated" + status: "pending" | "approved" | "rejected" | "exported" + meta: JSON? (LLM reasoning, tags, hashtags) +} +``` + +**New job type:** `VIRAL_DETECT` added to `JobTypeEnum`. Actor calls LLM API directly via `httpx` from Dramatiq worker (no separate service needed). + +**LLM integration:** +- Direct HTTP call from actor with retry + exponential backoff on 429 +- Prompts stored in `cpv3/infrastructure/prompts/viral_detection_v1.txt` +- Active version controlled by `LLM_VIRAL_PROMPT_VERSION` env var +- New settings: `LLM_API_URL`, `LLM_API_KEY`, `LLM_MODEL_NAME` + +### Frontend Design + +- New `ViralClipsStep` in project wizard (features/project/) +- Clip list with thumbnails, scores, titles, approve/reject buttons +- Clip edit modal with video preview (scoped playback for start/end range) +- New job type `VIRAL_DETECT` in notification handling (existing WebSocket infrastructure) + +### Key Numbers + +| Metric | Value | +|---|---| +| Accuracy (precision) | 50-70% | +| Accuracy (recall) | 60-80% | +| Processing time | 10-20 seconds | +| Cost per video | ~$0.005 | +| Cost at 1,000 videos/month | ~$5 | +| New dependencies | `google-generativeai` or `openai` (~10MB) + `librosa` (~20MB) | + +### Risks + +- **Prompt engineering quality** determines feature value — iterate based on user feedback +- **Visual-only moments** (facial expressions, physical comedy) cannot be detected from text — ~20-30% of viral moments are missed +- **Transcription quality matters** — Whisper `tiny` has ~25% WER on Russian; use at least `small` for viral detection input +- **LLM hallucinated timestamps** — validate returned timestamps against actual segment boundaries + +### MVP vs Full + +- **MVP:** Text-only LLM analysis, no audio energy. Returns clips with scores. User reviews and accepts/rejects. +- **Full:** Add librosa energy analysis, few-shot prompt examples from user-accepted clips, batch processing, direct clip export to 9:16. + +--- + +## Feature 3: Auto-Cut & Head Tracking + +### Architecture + +**Face detection:** MediaPipe BlazeFace (Apache 2.0, ~2MB model, 30-60 FPS on CPU). Sample at 3 FPS — face positions don't change significantly within 330ms. Dependency: `mediapipe` (~30MB). + +**Speaker diarization:** pyannote.audio 3.1 (MIT, ~10% DER, self-hosted). Runs on CPU at 0.17-0.33x real-time (5-10 min for 30-min audio). GPU accelerates to 1-2 min. Dependencies: `pyannote-audio` (~200MB) + `torchaudio` (~50-80MB). PyTorch already installed via Whisper. + +**Face-speaker mapping:** +- Phase 1: Temporal correlation heuristic — match face tracks to speaker segments by maximum temporal overlap. 70-85% accuracy for 2-speaker videos. Zero additional dependencies. ~100 lines of Python. +- Phase 2: TalkNet-ASD (Active Speaker Detection) — jointly analyzes lip movement + audio to detect who is speaking. 92.3% accuracy. Requires `torchvision` + model weights (~50MB). Needs GPU (2-5 FPS on CPU vs 15-25 FPS on GPU). + +**Video compositing (Remotion approach):** + +Dynamic crop via CSS `transform: scale() translate()` on `