HKUDS
diff --git a/‎README.md‎
Lines changed: 10 additions & 1 deletion b/‎README.md‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎registry.json‎
Lines changed: 12 additions & 0 deletions b/‎registry.json‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎videocaptioner/agent-harness/VIDEOCAPTIONER.md‎
Lines changed: 83 additions & 0 deletions b/‎videocaptioner/agent-harness/VIDEOCAPTIONER.md‎
Lines changed: 83 additions & 0 deletions
diff --git a/‎videocaptioner/agent-harness/cli_anything/__init__.py‎ b/‎videocaptioner/agent-harness/cli_anything/__init__.py‎
diff --git a/‎videocaptioner/agent-harness/cli_anything/videocaptioner/README.md‎
Lines changed: 71 additions & 0 deletions b/‎videocaptioner/agent-harness/cli_anything/videocaptioner/README.md‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎videocaptioner/agent-harness/cli_anything/videocaptioner/__init__.py‎ b/‎videocaptioner/agent-harness/cli_anything/videocaptioner/__init__.py‎
diff --git a/‎videocaptioner/agent-harness/cli_anything/videocaptioner/core/__init__.py‎ b/‎videocaptioner/agent-harness/cli_anything/videocaptioner/core/__init__.py‎
diff --git a/‎videocaptioner/agent-harness/cli_anything/videocaptioner/core/pipeline.py‎
Lines changed: 91 additions & 0 deletions b/‎videocaptioner/agent-harness/cli_anything/videocaptioner/core/pipeline.py‎
Lines changed: 91 additions & 0 deletions
diff --git a/‎videocaptioner/agent-harness/cli_anything/videocaptioner/core/subtitle.py‎
Lines changed: 68 additions & 0 deletions b/‎videocaptioner/agent-harness/cli_anything/videocaptioner/core/subtitle.py‎
Lines changed: 68 additions & 0 deletions
@@ -469,13 +469,14 @@ The catalog auto-updates whenever `registry.json` changes — new community CLIs
 | **🤖 AI/ML Platforms** | Automate model training, inference pipelines, and hyperparameter tuning through structured commands | Stable Diffusion WebUI, ComfyUI, Ollama, InvokeAI, Text-generation-webui, Open WebUI, Fooocus, Kohya_ss, AnythingLLM, SillyTavern |
 | **📊 Data & Analytics** | Enable programmatic data processing, visualization, and statistical analysis workflows | JupyterLab, Apache Superset, Metabase, Redash, DBeaver, KNIME, Orange, OpenSearch Dashboards, Lightdash |
 | **💻 Development Tools** | Streamline code editing, building, testing, and deployment processes via command interfaces | Jenkins, Gitea, Hoppscotch, Portainer, pgAdmin, SonarQube, ArgoCD, OpenLens, Insomnia, Beekeeper Studio, **[iTerm2](https://iterm2.com)** |
-| **🎨 Creative & Media** | Control content creation, editing, and rendering workflows programmatically | Blender, GIMP, OBS Studio, Audacity, Krita, Kdenlive, Shotcut, Inkscape, Darktable, LMMS, Ardour |
+| **🎨 Creative & Media** | Control content creation, editing, and rendering workflows programmatically | Blender, GIMP, OBS Studio, Audacity, Krita, Kdenlive, Shotcut, Inkscape, Darktable, LMMS, Ardour, VideoCaptioner |
 | **🔬 Scientific Computing** | Automate research workflows, simulations, and complex calculations | ImageJ, FreeCAD, QGIS, ParaView, Gephi, LibreCAD, Stellarium, KiCad, JASP, Jamovi |
 | **🏢 Enterprise & Office** | Convert business applications and productivity tools into agent-accessible systems | NextCloud, GitLab, Grafana, Mattermost, LibreOffice, AppFlowy, NocoDB, Odoo (Community), Plane, ERPNext |
 | **📞 Communication & Collaboration** | Automate meeting scheduling, participant management, recording retrieval, and reporting through structured CLI | Zoom, Jitsi Meet, BigBlueButton, Mattermost |
 | **📐 Diagramming & Visualization** | Create and manipulate diagrams, flowcharts, architecture diagrams, and visual documentation programmatically | Draw.io (diagrams.net), Mermaid, PlantUML, Excalidraw, yEd |
 | **🌐 Network & Infrastructure** | Manage network services, DNS, ad-blocking, and infrastructure through structured CLI commands | AdGuardHome |
 | **🔬 Graphics & GPU Debugging** | Analyze GPU frame captures, inspect pipeline state, export shaders, and diff rendering state | RenderDoc |
+| **🎬 Video & Subtitles** | Transcribe speech, translate subtitles, burn styled captions into video — full captioning pipeline | VideoCaptioner |
 | **✨ AI Content Generation** | Generate professional deliverables (slides, docs, diagrams, websites, research reports) through AI-powered cloud APIs | [AnyGen](https://www.anygen.io), Gamma, Beautiful.ai, Tome |
 
 ---
@@ -751,6 +752,13 @@ Each application received complete, production-ready CLI interfaces — not demo
 <td align="center">✅ 98</td>
 </tr>
 <tr>
+<td align="center"><strong>🎬 <a href="videocaptioner/agent-harness/">VideoCaptioner</a></strong></td>
+<td>AI Video Captioning</td>
+<td><code>cli-anything-videocaptioner</code></td>
+<td>videocaptioner CLI (PyPI)</td>
+<td align="center">✅ 26</td>
+</tr>
+<tr>
 <td align="center"><strong>🎨 Sketch</strong></td>
 <td>UI Design</td>
 <td><code>sketch-cli</code></td>
@@ -879,6 +887,7 @@ cli-anything/
 ├── 🦙 ollama/agent-harness/             # Ollama CLI (98 tests)
 ├── 🎨 sketch/agent-harness/             # Sketch CLI (19 tests, Node.js)
 ├── 🔬 renderdoc/agent-harness/          # RenderDoc CLI (59 tests)
+└── 🎬 videocaptioner/agent-harness/     # VideoCaptioner CLI (26 tests)
 └── ☁️ cloudcompare/agent-harness/       # CloudCompare CLI (88 tests)
 ```
 
 
@@ -384,6 +384,18 @@
       "contributor_url": "https://github.com/levishilf"
     },
     {
+      "name": "videocaptioner",
+      "display_name": "VideoCaptioner",
+      "version": "1.0.0",
+      "description": "AI-powered video captioning — transcribe speech, optimize/translate subtitles, burn styled subtitles into video",
+      "requires": "videocaptioner (pip install videocaptioner), ffmpeg",
+      "homepage": "https://github.com/WEIFENG2333/VideoCaptioner",
+      "install_cmd": "pip install git+https://github.com/HKUDS/CLI-Anything.git#subdirectory=videocaptioner/agent-harness",
+      "entry_point": "cli-anything-videocaptioner",
+      "skill_md": "videocaptioner/agent-harness/cli_anything/videocaptioner/skills/SKILL.md",
+      "category": "video",
+      "contributor": "WEIFENG2333",
+      "contributor_url": "https://github.com/WEIFENG2333"
       "name": "intelwatch",
       "display_name": "Intelwatch",
       "version": "1.0.0",
 
@@ -0,0 +1,83 @@
+# VideoCaptioner: Project-Specific Analysis & SOP
+
+## Architecture Summary
+
+VideoCaptioner is an AI-powered video captioning tool that provides a complete
+pipeline from speech recognition to styled subtitle synthesis. It ships as a
+standalone CLI (`pip install videocaptioner`) with a well-defined command interface.
+
+```
++----------------------------------------------------------+
+|                   VideoCaptioner CLI                      |
+|  +------------+ +----------+ +-----------+ +-----------+ |
+|  | Transcribe | | Subtitle | | Synthesize| |  Process  | |
+|  | (ASR)      | | (NLP)    | | (FFmpeg)  | | (Pipeline)| |
+|  +-----+------+ +----+-----+ +-----+-----+ +-----+-----+ |
+|        |              |             |             |        |
+|  +-----+--------------+-------------+-------------+-----+ |
+|  |                    Core Engine                       | |
+|  |  ASR engines, LLM optimization, Translation,        | |
+|  |  Subtitle rendering (ASS + Rounded), FFmpeg          | |
+|  +-----------------------------------------------------+ |
++----------------------------------------------------------+
+```
+
+## CLI Strategy: Subprocess Wrapper
+
+Unlike applications that need reverse-engineering of internal formats,
+VideoCaptioner already provides a production CLI. Our harness:
+
+1. **Click wrapper** provides the CLI-Anything standard interface
+2. **Subprocess backend** delegates to `videocaptioner` CLI commands
+3. **JSON mode** (`--json`) returns structured output for agents
+4. **REPL mode** provides interactive session with tab-completion
+
+### Why Subprocess?
+
+VideoCaptioner's CLI is:
+- **Production-tested** with 50+ unit tests and 200+ QA test cases
+- **Feature-complete** with 7 subcommands covering the full pipeline
+- **Well-documented** with clear `--help` text and exit codes
+- **Actively maintained** on PyPI with automated releases
+
+Wrapping via subprocess preserves all these qualities without reimplementation.
+
+## Coverage
+
+### Transcription (4 ASR engines)
+- `bijian` — Free, Chinese & English, no setup needed
+- `jianying` — Free, Chinese & English, no setup needed
+- `whisper-api` — All languages, OpenAI-compatible API
+- `whisper-cpp` — All languages, local model
+
+### Subtitle Processing
+- **Split** — Semantic re-segmentation via LLM
+- **Optimize** — Fix ASR errors, punctuation, formatting via LLM
+- **Translate** — 38 languages, 3 translators (LLM, Bing free, Google free)
+- **Layout** — target-above, source-above, target-only, source-only
+
+### Video Synthesis
+- **Soft subtitles** — Embedded subtitle track (switchable)
+- **Hard subtitles** — Burned into video frames
+- **ASS style** — Traditional outline/shadow with presets (default, anime, vertical)
+- **Rounded style** — Modern rounded background boxes
+- **Customizable** — Inline JSON override for any style parameter
+- **Quality levels** — ultra (CRF 18), high (CRF 23), medium (CRF 28), low (CRF 32)
+
+### Utilities
+- Configuration management (TOML config + env vars)
+- Style preset listing with full parameters
+- Online video download (YouTube, Bilibili, etc.)
+
+## Testing Strategy
+
+- **Unit tests**: Mock subprocess calls, verify argument construction
+- **End-to-end tests**: Real videocaptioner CLI with test media files
+- **Prerequisite**: `videocaptioner` and `ffmpeg` must be installed
+
+## Limitations
+
+- Requires `videocaptioner` package to be installed separately
+- Free ASR engines (bijian/jianying) only support Chinese & English
+- LLM features require an OpenAI-compatible API key
+- Hard subtitle styles require FFmpeg
@@ -0,0 +1,71 @@
+# VideoCaptioner CLI
+
+AI-powered video captioning tool with beautiful customizable subtitle styles.
+
+## Architecture
+
+- **Subprocess backend** delegates to the production `videocaptioner` CLI (`pip install videocaptioner`)
+- **Click** provides the CLI framework with subcommand groups and REPL
+- **JSON output mode** (`--json`) for agent consumption
+- **Free features included**: bijian ASR (Chinese/English), Bing/Google translation
+
+## Pipeline
+
+```
+Audio/Video → ASR Transcription → Subtitle Splitting → LLM Optimization → Translation → Video Synthesis
+                  (bijian/whisper)      (semantic)         (fix errors)      (38 languages)  (styled subtitles)
+```
+
+## Install
+
+```bash
+pip install videocaptioner click prompt-toolkit
+```
+
+## Run
+
+```bash
+# One-shot: transcribe a Chinese video and add English subtitles
+cli-anything-videocaptioner process video.mp4 --asr bijian --translator bing --target-language en --subtitle-mode hard
+
+# Transcribe only
+cli-anything-videocaptioner transcribe video.mp4 --asr bijian -o output.srt
+
+# Translate existing subtitles
+cli-anything-videocaptioner subtitle input.srt --translator google --target-language ja
+
+# Burn subtitles with anime style
+cli-anything-videocaptioner synthesize video.mp4 -s sub.srt --subtitle-mode hard --style anime
+
+# Custom style (red outline, large font)
+cli-anything-videocaptioner synthesize video.mp4 -s sub.srt --subtitle-mode hard \
+  --style-override '{"outline_color": "#ff0000", "font_size": 48}'
+
+# JSON output mode (for agent consumption)
+cli-anything-videocaptioner --json transcribe video.mp4 --asr bijian
+
+# Interactive REPL
+cli-anything-videocaptioner
+```
+
+## Subtitle Styles
+
+Two rendering modes for beautiful subtitles:
+
+**ASS mode** — traditional outline/shadow:
+- Presets: `default` (white+black), `anime` (warm+orange), `vertical` (portrait videos)
+
+**Rounded mode** — modern rounded background boxes:
+- Preset: `rounded` (dark text on semi-transparent background)
+
+Fully customizable via `--style-override` with inline JSON.
+
+## Coverage
+
+| Feature | Commands |
+|---------|----------|
+| Transcription | 4 ASR engines, auto language detection, word timestamps |
+| Subtitle Processing | Split + optimize + translate, 3 translators, 38 languages |
+| Video Synthesis | Soft/hard subtitles, 4 quality levels, 5 style presets |
+| Styles | ASS outline + rounded background, inline JSON customization |
+| Utilities | Config management, style listing, video download |
@@ -0,0 +1,91 @@
+"""Full pipeline — transcribe → optimize → translate → synthesize in one command."""
+
+from cli_anything.videocaptioner.utils.vc_backend import run_quiet
+
+
+def process(
+    input_path: str,
+    output_path: str | None = None,
+    asr: str = "bijian",
+    language: str = "auto",
+    translator: str | None = None,
+    target_language: str | None = None,
+    subtitle_mode: str = "soft",
+    quality: str = "medium",
+    layout: str | None = None,
+    style: str | None = None,
+    style_override: str | None = None,
+    render_mode: str | None = None,
+    no_optimize: bool = False,
+    no_translate: bool = False,
+    no_split: bool = False,
+    no_synthesize: bool = False,
+    reflect: bool = False,
+    prompt: str | None = None,
+    api_key: str | None = None,
+    api_base: str | None = None,
+    model: str | None = None,
+) -> str:
+    """Run the complete captioning pipeline.
+
+    Args:
+        input_path: Video or audio file path.
+        output_path: Output file or directory path.
+        asr: ASR engine.
+        language: Source language.
+        translator: Translation service.
+        target_language: Target language.
+        subtitle_mode: soft or hard.
+        quality: Video quality.
+        layout: Bilingual layout.
+        style: Style preset name.
+        style_override: Inline JSON style override.
+        render_mode: ass or rounded.
+        no_optimize: Skip optimization.
+        no_translate: Skip translation.
+        no_split: Skip re-segmentation.
+        no_synthesize: Skip video synthesis.
+        reflect: Reflective translation.
+        prompt: Custom LLM prompt.
+        api_key: LLM API key.
+        api_base: LLM API base URL.
+        model: LLM model name.
+
+    Returns:
+        Output file path.
+    """
+    args = ["process", input_path, "--asr", asr, "--language", language,
+            "--subtitle-mode", subtitle_mode, "--quality", quality]
+    if output_path:
+        args += ["-o", output_path]
+    if translator:
+        args += ["--translator", translator]
+    if target_language:
+        args += ["--target-language", target_language]
+    if layout:
+        args += ["--layout", layout]
+    if style:
+        args += ["--style", style]
+    if style_override:
+        args += ["--style-override", style_override]
+    if render_mode:
+        args += ["--render-mode", render_mode]
+    if no_optimize:
+        args.append("--no-optimize")
+    if no_translate:
+        args.append("--no-translate")
+    if no_split:
+        args.append("--no-split")
+    if no_synthesize:
+        args.append("--no-synthesize")
+    if reflect:
+        args.append("--reflect")
+    if prompt:
+        args += ["--prompt", prompt]
+    if api_key:
+        args += ["--api-key", api_key]
+    if api_base:
+        args += ["--api-base", api_base]
+    if model:
+        args += ["--model", model]
+    return run_quiet(args)
@@ -0,0 +1,68 @@
+"""Subtitle processing — optimize and translate subtitle files."""
+
+from cli_anything.videocaptioner.utils.vc_backend import run_quiet
+
+
+def process_subtitle(
+    input_path: str,
+    output_path: str | None = None,
+    translator: str | None = None,
+    target_language: str | None = None,
+    format: str = "srt",
+    layout: str | None = None,
+    no_optimize: bool = False,
+    no_translate: bool = False,
+    no_split: bool = False,
+    reflect: bool = False,
+    prompt: str | None = None,
+    api_key: str | None = None,
+    api_base: str | None = None,
+    model: str | None = None,
+) -> str:
+    """Optimize and/or translate a subtitle file.
+
+    Args:
+        input_path: Subtitle file (.srt, .ass, .vtt).
+        output_path: Output file or directory path.
+        translator: Translation service (llm, bing, google).
+        target_language: Target language BCP 47 code.
+        format: Output format (srt, ass, txt, json).
+        layout: Bilingual layout (target-above, source-above, target-only, source-only).
+        no_optimize: Skip LLM optimization.
+        no_translate: Skip translation.
+        no_split: Skip re-segmentation.
+        reflect: Enable reflective translation (LLM only).
+        prompt: Custom LLM prompt.
+        api_key: LLM API key.
+        api_base: LLM API base URL.
+        model: LLM model name.
+
+    Returns:
+        Output file path.
+    """
+    args = ["subtitle", input_path, "--format", format]
+    if output_path:
+        args += ["-o", output_path]
+    if translator:
+        args += ["--translator", translator]
+    if target_language:
+        args += ["--target-language", target_language]
+    if layout:
+        args += ["--layout", layout]
+    if no_optimize:
+        args.append("--no-optimize")
+    if no_translate:
+        args.append("--no-translate")
+    if no_split:
+        args.append("--no-split")
+    if reflect:
+        args.append("--reflect")
+    if prompt:
+        args += ["--prompt", prompt]
+    if api_key:
+        args += ["--api-key", api_key]
+    if api_base:
+        args += ["--api-base", api_base]
+    if model:
+        args += ["--model", model]
+    return run_quiet(args)