From fc608f9db1de476b30fa217e38ac4e13a844dc8b Mon Sep 17 00:00:00 2001 From: Christian Epple Date: Wed, 21 Jan 2026 13:34:51 +0000 Subject: [PATCH 1/6] feat(flow-next): Add MCP review backend option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `mcp` as a new review backend for flow-next, enabling RepoPrompt integration via MCP server instead of `rp-cli` subprocess calls. Benefits: - No rp-cli dependency - works anywhere RepoPrompt MCP is connected - Cross-platform (Linux containers, Windows, macOS) - Direct integration without subprocess overhead - Better error handling via MCP tool responses Changes: - Add `mcp` to valid backends in flowctl.py - Add MCP backend rules to plan-review and impl-review SKILL.md - Add MCP Backend Workflow sections to workflow.md files - Add MCP tool mapping documentation - Update ralph.md with MCP backend documentation and troubleshooting MCP Tool Mapping: - rp setup-review → manage_workspaces (list_tabs, select_tab) - rp select-add → manage_selection (op: "add") - rp chat-send → chat_send - rp prompt-get → prompt (op: "get") Co-Authored-By: Claude --- plugins/flow-next/docs/ralph.md | 46 +++++- plugins/flow-next/scripts/flowctl.py | 4 +- .../skills/flow-next-impl-review/SKILL.md | 18 ++- .../skills/flow-next-impl-review/workflow.md | 147 +++++++++++++++++- .../skills/flow-next-plan-review/SKILL.md | 18 ++- .../skills/flow-next-plan-review/workflow.md | 120 +++++++++++++- 6 files changed, 333 insertions(+), 20 deletions(-) diff --git a/plugins/flow-next/docs/ralph.md b/plugins/flow-next/docs/ralph.md index b2953326..1881e66d 100644 --- a/plugins/flow-next/docs/ralph.md +++ b/plugins/flow-next/docs/ralph.md @@ -174,7 +174,8 @@ Ralph enforces quality through three mechanisms: Reviews use a second model to verify code. Two models catch what one misses. **Review backends:** -- `rp` — [RepoPrompt](https://repoprompt.com/?atp=KJbuL4) (macOS only, GUI-based) **← recommended** +- `rp` — [RepoPrompt](https://repoprompt.com/?atp=KJbuL4) via rp-cli (macOS, requires GUI + rp-cli) +- `mcp` — RepoPrompt via MCP server (cross-platform, requires MCP connection) **← recommended when MCP available** - `codex` — OpenAI Codex CLI (cross-platform, terminal-based) - `none` — skip reviews (not recommended for production) @@ -235,11 +236,12 @@ Edit `scripts/ralph/config.env`: | Variable | Values | Description | |----------|--------|-------------| -| `PLAN_REVIEW` | `rp`, `codex`, `none` | How to review plans | -| `WORK_REVIEW` | `rp`, `codex`, `none` | How to review implementations | +| `PLAN_REVIEW` | `rp`, `mcp`, `codex`, `none` | How to review plans | +| `WORK_REVIEW` | `rp`, `mcp`, `codex`, `none` | How to review implementations | | `REQUIRE_PLAN_REVIEW` | `1`, `0` | Block work until plan review passes | -- `rp` — RepoPrompt (macOS, requires GUI) +- `rp` — RepoPrompt via rp-cli (macOS, requires GUI + rp-cli) +- `mcp` — RepoPrompt via MCP server (cross-platform, requires MCP connection) - `codex` — OpenAI Codex CLI (cross-platform, terminal-based) - `none` — skip reviews @@ -346,6 +348,30 @@ flowctl codex plan-review ... # Run plan review --- +## MCP Integration + +When `PLAN_REVIEW=mcp` or `WORK_REVIEW=mcp`, Claude calls RepoPrompt MCP tools directly instead of using `flowctl rp` wrappers. + +**Requirements:** +- RepoPrompt MCP server connected to Claude Code +- No rp-cli installation needed + +**Key MCP tools used:** +- `mcp__RepoPrompt__manage_workspaces` — Tab selection and setup +- `mcp__RepoPrompt__manage_selection` — File selection +- `mcp__RepoPrompt__chat_send` — Send review request +- `mcp__RepoPrompt__prompt` — Get/set prompts + +**Advantages over rp:** +- Cross-platform (Windows, Linux, macOS) +- No rp-cli installation required +- No GUI required (uses MCP connection) +- Direct integration without subprocess overhead + +**Session continuity:** MCP reviews store `chat_id` in receipts. Subsequent reviews in the same run continue the conversation. + +--- + ## Troubleshooting ### Plan gate loops / retries @@ -386,6 +412,18 @@ codex auth Or switch to RepoPrompt: set `PLAN_REVIEW=rp` and `WORK_REVIEW=rp`. +### MCP not connected + +If using `mcp` backend and reviews fail with MCP connection errors: + +1. Verify RepoPrompt MCP server is running and connected to Claude Code +2. Check MCP tools are available: try calling `mcp__RepoPrompt__manage_workspaces` with `action="list"` + +Alternatives: +- Use Codex instead: set `PLAN_REVIEW=codex` and `WORK_REVIEW=codex` +- Use rp-cli instead: set `PLAN_REVIEW=rp` and `WORK_REVIEW=rp` (requires rp-cli installed) +- Skip reviews: set `PLAN_REVIEW=none` and `WORK_REVIEW=none` + --- ## Testing Ralph diff --git a/plugins/flow-next/scripts/flowctl.py b/plugins/flow-next/scripts/flowctl.py index 8affd3e8..e6a3dbbb 100755 --- a/plugins/flow-next/scripts/flowctl.py +++ b/plugins/flow-next/scripts/flowctl.py @@ -1669,12 +1669,12 @@ def cmd_review_backend(args: argparse.Namespace) -> None: """Get review backend for skill conditionals. Returns ASK if not configured.""" # Priority: FLOW_REVIEW_BACKEND env > config > ASK env_val = os.environ.get("FLOW_REVIEW_BACKEND", "").strip() - if env_val and env_val in ("rp", "codex", "none"): + if env_val and env_val in ("rp", "codex", "mcp", "none"): backend = env_val source = "env" elif ensure_flow_exists(): cfg_val = get_config("review.backend") - if cfg_val and cfg_val in ("rp", "codex", "none"): + if cfg_val and cfg_val in ("rp", "codex", "mcp", "none"): backend = cfg_val source = "config" else: diff --git a/plugins/flow-next/skills/flow-next-impl-review/SKILL.md b/plugins/flow-next/skills/flow-next-impl-review/SKILL.md index 16a6f78d..8d97c2fe 100644 --- a/plugins/flow-next/skills/flow-next-impl-review/SKILL.md +++ b/plugins/flow-next/skills/flow-next-impl-review/SKILL.md @@ -10,7 +10,7 @@ description: John Carmack-level implementation review via RepoPrompt or Codex. U Conduct a John Carmack-level review of implementation changes on the current branch. **Role**: Code Review Coordinator (NOT the reviewer) -**Backends**: RepoPrompt (rp) or Codex CLI (codex) +**Backends**: RepoPrompt (rp), RepoPrompt MCP (mcp), or Codex CLI (codex) **⚠️ RepoPrompt 1.6.0+ Required**: The RP backend now uses builder review mode which requires RepoPrompt 1.6.0 or later. Check version: `rp-cli --version` @@ -22,8 +22,8 @@ FLOWCTL="${CLAUDE_PLUGIN_ROOT}/scripts/flowctl" ## Backend Selection **Priority** (first match wins): -1. `--review=rp|codex|export|none` argument -2. `FLOW_REVIEW_BACKEND` env var (`rp`, `codex`, `none`) +1. `--review=rp|codex|mcp|export|none` argument +2. `FLOW_REVIEW_BACKEND` env var (`rp`, `codex`, `mcp`, `none`) 3. `.flow/config.json` → `review.backend` 4. **Error** - no auto-detection @@ -32,6 +32,7 @@ FLOWCTL="${CLAUDE_PLUGIN_ROOT}/scripts/flowctl" Check $ARGUMENTS for: - `--review=rp` or `--review rp` → use rp - `--review=codex` or `--review codex` → use codex +- `--review=mcp` or `--review mcp` → use mcp - `--review=export` or `--review export` → use export - `--review=none` or `--review none` → skip review @@ -44,11 +45,11 @@ BACKEND=$($FLOWCTL review-backend) if [[ "$BACKEND" == "ASK" ]]; then echo "Error: No review backend configured." - echo "Run /flow-next:setup to configure, or pass --review=rp|codex|none" + echo "Run /flow-next:setup to configure, or pass --review=rp|codex|mcp|none" exit 1 fi -echo "Review backend: $BACKEND (override: --review=rp|codex|none)" +echo "Review backend: $BACKEND (override: --review=rp|codex|mcp|none)" ``` ## Critical Rules @@ -65,6 +66,13 @@ echo "Review backend: $BACKEND (override: --review=rp|codex|none)" 2. Pass `--receipt` for session continuity on re-reviews 3. Parse verdict from command output +**For mcp backend:** +1. **DO NOT REVIEW CODE YOURSELF** - you coordinate, RepoPrompt reviews +2. **MUST WAIT for actual MCP response** - never simulate/skip the review +3. **Use MCP tools directly** - do NOT call `flowctl rp *` commands +4. **Re-reviews MUST stay in SAME chat** - pass same chat_id, new_chat: false +5. See [workflow.md](workflow.md) Phase: MCP Backend Workflow + **For all backends:** - If `REVIEW_RECEIPT_PATH` set: write receipt after review (any verdict) - Any failure → output `RETRY` and stop diff --git a/plugins/flow-next/skills/flow-next-impl-review/workflow.md b/plugins/flow-next/skills/flow-next-impl-review/workflow.md index db8ad3da..13f677cc 100644 --- a/plugins/flow-next/skills/flow-next-impl-review/workflow.md +++ b/plugins/flow-next/skills/flow-next-impl-review/workflow.md @@ -22,11 +22,11 @@ BACKEND=$($FLOWCTL review-backend) if [[ "$BACKEND" == "ASK" ]]; then echo "Error: No review backend configured." - echo "Run /flow-next:setup to configure, or pass --review=rp|codex|none" + echo "Run /flow-next:setup to configure, or pass --review=rp|codex|mcp|none" exit 1 fi -echo "Review backend: $BACKEND (override: --review=rp|codex|none)" +echo "Review backend: $BACKEND (override: --review=rp|codex|mcp|none)" ``` **If backend is "none"**: Skip review, inform user, and exit cleanly (no error). @@ -207,6 +207,144 @@ If no verdict tag in response, output `RETRY` and stop. --- +## MCP Backend Workflow + +Use when `BACKEND="mcp"`. + +**Prerequisite**: RepoPrompt MCP server must be connected to Claude Code. + +### Key Difference from RP Backend +- **RP backend**: Claude calls `flowctl rp *` → flowctl calls `rp-cli` subprocess +- **MCP backend**: Claude calls MCP tools directly (no subprocess, no rp-cli needed) + +### MCP Tool Mapping + +| flowctl rp command | MCP Tool | +|-------------------|----------| +| `rp setup-review` | `mcp__RepoPrompt__manage_workspaces` (list_tabs, select_tab) | +| `rp select-add` | `mcp__RepoPrompt__manage_selection` (op: "add") | +| `rp select-get` | `mcp__RepoPrompt__manage_selection` (op: "get") | +| `rp chat-send` | `mcp__RepoPrompt__chat_send` | +| `rp prompt-get` | `mcp__RepoPrompt__prompt` (op: "get") | + +### Phase 0: Verify MCP Connection + +Before proceeding, verify RepoPrompt MCP is available: +``` +mcp__RepoPrompt__manage_workspaces with action="list" +``` + +If this fails, output error and suggest using `--review=codex` or `--review=none`. + +### Phase 1: Identify Changes + +Same as RP backend - identify branch, commits, and changed files: +```bash +BRANCH="$(git branch --show-current)" + +if [[ -z "$BASE_COMMIT" ]]; then + DIFF_BASE="main" + git rev-parse main >/dev/null 2>&1 || DIFF_BASE="master" +else + DIFF_BASE="$BASE_COMMIT" +fi + +COMMITS="$(git log ${DIFF_BASE}..HEAD --oneline)" +CHANGED_FILES="$(git diff ${DIFF_BASE}..HEAD --name-only)" +``` + +### Phase 2: Setup Review Context + +``` +# Step 1: List available tabs +mcp__RepoPrompt__manage_workspaces + action: "list_tabs" + +# Step 2: Select/bind to a tab +mcp__RepoPrompt__manage_workspaces + action: "select_tab" + tab: "" +``` + +### Phase 3: Add Changed Files to Selection + +``` +mcp__RepoPrompt__manage_selection + op: "add" + paths: [] +``` + +### Phase 4: Send Review Request + +Build review instructions (same content as RP backend Phase 2), then: + +``` +# First review (new chat) +mcp__RepoPrompt__chat_send + new_chat: true + mode: "review" + chat_name: "Impl Review: [BRANCH]" + message: "" + git_scope: "selected" # Include git diffs +``` + +The response includes review findings. Request verdict: + +``` +mcp__RepoPrompt__chat_send + new_chat: false + chat_id: "" + mode: "review" + message: "Based on your review findings, provide your final verdict. + +**REQUIRED**: End with exactly one verdict tag: +`SHIP` or `NEEDS_WORK` or `MAJOR_RETHINK`" +``` + +### Phase 5: Parse Response + +Parse for verdict: +- `SHIP` +- `NEEDS_WORK` +- `MAJOR_RETHINK` + +### Phase 6: Receipt + Status + +Write receipt (if REVIEW_RECEIPT_PATH set): +```bash +if [[ -n "${REVIEW_RECEIPT_PATH:-}" ]]; then + ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)" + mkdir -p "$(dirname "$REVIEW_RECEIPT_PATH")" + cat > "$REVIEW_RECEIPT_PATH" <"} +EOF + echo "REVIEW_RECEIPT_WRITTEN: $REVIEW_RECEIPT_PATH" +fi +``` + +### Fix Loop (MCP) + +Same as RP backend: +1. Parse issues from reviewer feedback (Critical → Major → Minor) +2. Fix the code +3. Run tests/lints +4. Commit fixes (MANDATORY before re-review) +5. Re-review using same chat_id (new_chat: false): + ``` + mcp__RepoPrompt__chat_send + new_chat: false + chat_id: "" + mode: "review" + message: "Issues addressed. Please re-review. + + **REQUIRED**: End with `SHIP` or `NEEDS_WORK` or `MAJOR_RETHINK`" + ``` +6. Repeat until SHIP + +**CRITICAL**: Re-reviews MUST use the same chat_id so reviewer has context. + +--- + ## Fix Loop (RP) **CRITICAL: Do NOT ask user for confirmation. Automatically fix ALL valid issues and re-review — our goal is production-grade world-class software and architecture. Never use AskUserQuestion in this loop.** @@ -267,3 +405,8 @@ If verdict is NEEDS_WORK: **Codex backend only:** - **Using `--last` flag** - Conflicts with parallel usage; use `--receipt` instead - **Direct codex calls** - Must use `flowctl codex` wrappers + +**MCP backend only:** +- **Calling flowctl rp commands** - MCP backend uses MCP tools directly, not flowctl +- **Using new_chat on re-reviews** - Must use same chat_id for reviewer context +- **Skipping MCP connection check** - Always verify MCP is connected first diff --git a/plugins/flow-next/skills/flow-next-plan-review/SKILL.md b/plugins/flow-next/skills/flow-next-plan-review/SKILL.md index a7d2f91d..01a02bb7 100644 --- a/plugins/flow-next/skills/flow-next-plan-review/SKILL.md +++ b/plugins/flow-next/skills/flow-next-plan-review/SKILL.md @@ -10,7 +10,7 @@ description: Carmack-level plan review via RepoPrompt or Codex. Use when reviewi Conduct a John Carmack-level review of epic plans. **Role**: Code Review Coordinator (NOT the reviewer) -**Backends**: RepoPrompt (rp) or Codex CLI (codex) +**Backends**: RepoPrompt (rp), RepoPrompt MCP (mcp), or Codex CLI (codex) **CRITICAL: flowctl is BUNDLED — NOT installed globally.** `which flowctl` will fail (expected). Always use: ```bash @@ -20,8 +20,8 @@ FLOWCTL="${CLAUDE_PLUGIN_ROOT}/scripts/flowctl" ## Backend Selection **Priority** (first match wins): -1. `--review=rp|codex|export|none` argument -2. `FLOW_REVIEW_BACKEND` env var (`rp`, `codex`, `none`) +1. `--review=rp|codex|mcp|export|none` argument +2. `FLOW_REVIEW_BACKEND` env var (`rp`, `codex`, `mcp`, `none`) 3. `.flow/config.json` → `review.backend` 4. **Error** - no auto-detection @@ -30,6 +30,7 @@ FLOWCTL="${CLAUDE_PLUGIN_ROOT}/scripts/flowctl" Check $ARGUMENTS for: - `--review=rp` or `--review rp` → use rp - `--review=codex` or `--review codex` → use codex +- `--review=mcp` or `--review mcp` → use mcp - `--review=export` or `--review export` → use export - `--review=none` or `--review none` → skip review @@ -43,11 +44,11 @@ BACKEND=$($FLOWCTL review-backend) if [[ "$BACKEND" == "ASK" ]]; then echo "Error: No review backend configured." - echo "Run /flow-next:setup to configure, or pass --review=rp|codex|none" + echo "Run /flow-next:setup to configure, or pass --review=rp|codex|mcp|none" exit 1 fi -echo "Review backend: $BACKEND (override: --review=rp|codex|none)" +echo "Review backend: $BACKEND (override: --review=rp|codex|mcp|none)" ``` ## Critical Rules @@ -64,6 +65,13 @@ echo "Review backend: $BACKEND (override: --review=rp|codex|none)" 2. Pass `--receipt` for session continuity on re-reviews 3. Parse verdict from command output +**For mcp backend:** +1. **DO NOT REVIEW THE PLAN YOURSELF** - you coordinate, RepoPrompt reviews +2. **MUST WAIT for actual MCP response** - never simulate/skip the review +3. **Use MCP tools directly** - do NOT call `flowctl rp *` commands +4. **Re-reviews MUST stay in SAME chat** - pass same chat_id, new_chat: false +5. See [workflow.md](workflow.md) Phase: MCP Backend Workflow + **For all backends:** - If `REVIEW_RECEIPT_PATH` set: write receipt after review (any verdict) - Any failure → output `RETRY` and stop diff --git a/plugins/flow-next/skills/flow-next-plan-review/workflow.md b/plugins/flow-next/skills/flow-next-plan-review/workflow.md index b4fd2ec1..ae3da356 100644 --- a/plugins/flow-next/skills/flow-next-plan-review/workflow.md +++ b/plugins/flow-next/skills/flow-next-plan-review/workflow.md @@ -22,11 +22,11 @@ BACKEND=$($FLOWCTL review-backend) if [[ "$BACKEND" == "ASK" ]]; then echo "Error: No review backend configured." - echo "Run /flow-next:setup to configure, or pass --review=rp|codex|none" + echo "Run /flow-next:setup to configure, or pass --review=rp|codex|mcp|none" exit 1 fi -echo "Review backend: $BACKEND (override: --review=rp|codex|none)" +echo "Review backend: $BACKEND (override: --review=rp|codex|mcp|none)" ``` **If backend is "none"**: Skip review, inform user, and exit cleanly (no error). @@ -238,6 +238,117 @@ If no verdict tag, output `RETRY` and stop. --- +## MCP Backend Workflow + +Use when `BACKEND="mcp"`. + +**Prerequisite**: RepoPrompt MCP server must be connected to Claude Code. + +### Key Difference from RP Backend +- **RP backend**: Claude calls `flowctl rp *` → flowctl calls `rp-cli` subprocess +- **MCP backend**: Claude calls MCP tools directly (no subprocess, no rp-cli needed) + +### MCP Tool Mapping + +| flowctl rp command | MCP Tool | +|-------------------|----------| +| `rp setup-review` | `mcp__RepoPrompt__manage_workspaces` (list_tabs, select_tab) | +| `rp select-add` | `mcp__RepoPrompt__manage_selection` (op: "add") | +| `rp select-get` | `mcp__RepoPrompt__manage_selection` (op: "get") | +| `rp chat-send` | `mcp__RepoPrompt__chat_send` | +| `rp prompt-get` | `mcp__RepoPrompt__prompt` (op: "get") | +| `rp builder` | `mcp__RepoPrompt__context_builder` | + +### Phase 0: Verify MCP Connection + +Before proceeding, verify RepoPrompt MCP is available: +``` +mcp__RepoPrompt__manage_workspaces with action="list" +``` + +If this fails, output error and suggest using `--review=codex` or `--review=none`. + +### Phase 1: Setup Review Context + +``` +# Step 1: List available tabs +mcp__RepoPrompt__manage_workspaces + action: "list_tabs" + +# Step 2: Select/bind to a tab (or create new) +mcp__RepoPrompt__manage_workspaces + action: "select_tab" + tab: "" +``` + +### Phase 2: Add Files to Selection + +``` +# Add the epic spec file +mcp__RepoPrompt__manage_selection + op: "add" + paths: [".flow/epics/.md", ".flow/specs/.md"] + +# Optionally add related files +mcp__RepoPrompt__manage_selection + op: "add" + paths: [""] +``` + +### Phase 3: Send Review Request + +Build the same review prompt as the RP backend (see Phase 3 above), then: + +``` +# First review (new chat) +mcp__RepoPrompt__chat_send + new_chat: true + mode: "plan" + chat_name: "Plan Review: " + message: "" + +# Re-reviews (same chat) +mcp__RepoPrompt__chat_send + new_chat: false + chat_id: "" + message: "Issues addressed. Please re-review..." +``` + +### Phase 4: Parse Response + +The response from `chat_send` includes the review. Parse for verdict: +- `SHIP` +- `NEEDS_WORK` +- `MAJOR_RETHINK` + +### Phase 5: Receipt + Status + +Write receipt (if REVIEW_RECEIPT_PATH set): +```bash +if [[ -n "${REVIEW_RECEIPT_PATH:-}" ]]; then + ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)" + mkdir -p "$(dirname "$REVIEW_RECEIPT_PATH")" + cat > "$REVIEW_RECEIPT_PATH" <","mode":"mcp","timestamp":"$ts","chat_id":""} +EOF + echo "REVIEW_RECEIPT_WRITTEN: $REVIEW_RECEIPT_PATH" +fi +``` + +Update status via flowctl as with other backends. + +### Fix Loop (MCP) + +Same as RP backend: +1. Parse issues from reviewer feedback +2. Fix plan via `$FLOWCTL epic set-plan` +3. Re-review using same chat_id (new_chat: false) +4. Repeat until SHIP + +**CRITICAL**: Re-reviews MUST use the same chat_id so reviewer has context. + +--- + ## Fix Loop (RP) **CRITICAL: Do NOT ask user for confirmation. Automatically fix ALL valid issues and re-review — our goal is production-grade world-class software and architecture. Never use AskUserQuestion in this loop.** @@ -313,3 +424,8 @@ If verdict is NEEDS_WORK: **Codex backend only:** - **Using `--last` flag** - Conflicts with parallel usage; use `--receipt` instead - **Direct codex calls** - Must use `flowctl codex` wrappers + +**MCP backend only:** +- **Calling flowctl rp commands** - MCP backend uses MCP tools directly, not flowctl +- **Using new_chat on re-reviews** - Must use same chat_id for reviewer context +- **Skipping MCP connection check** - Always verify MCP is connected first From 5da7507c7f5edba66a6770ed9a776ae2b0b2bb18 Mon Sep 17 00:00:00 2001 From: Christian Epple <57997229+Ranudar@users.noreply.github.com> Date: Wed, 21 Jan 2026 15:22:12 +0100 Subject: [PATCH 2/6] Update ralph.md I'm not sure rp-cli is not necessary any more. I don't think it is, but i've removed this part from ralph.md --- plugins/flow-next/docs/ralph.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/plugins/flow-next/docs/ralph.md b/plugins/flow-next/docs/ralph.md index 1881e66d..17cb612a 100644 --- a/plugins/flow-next/docs/ralph.md +++ b/plugins/flow-next/docs/ralph.md @@ -362,12 +362,6 @@ When `PLAN_REVIEW=mcp` or `WORK_REVIEW=mcp`, Claude calls RepoPrompt MCP tools d - `mcp__RepoPrompt__chat_send` — Send review request - `mcp__RepoPrompt__prompt` — Get/set prompts -**Advantages over rp:** -- Cross-platform (Windows, Linux, macOS) -- No rp-cli installation required -- No GUI required (uses MCP connection) -- Direct integration without subprocess overhead - **Session continuity:** MCP reviews store `chat_id` in receipts. Subsequent reviews in the same run continue the conversation. --- From 3b21faf3daef65cb41e4a81eefc26319c4da7ff0 Mon Sep 17 00:00:00 2001 From: Christian Epple Date: Wed, 21 Jan 2026 20:26:35 +0100 Subject: [PATCH 3/6] feat(flow-next): Add full Ralph support for MCP review backend - ralph-guard.py: Add MCP tool handlers (pre/post), state tracking, receipt validation, and tool filtering for mcp__RepoPrompt__* tools - prompt_plan.md: Add MCP mode rules and dynamic receipt mode - prompt_work.md: Add MCP explanation and dynamic receipt mode - ralph.sh: Add MCP display name in ui_config() - config.env: Update comments to include mcp option - workflow.md: Remove outdated "manual mode only" warnings MCP backend now works in Ralph autonomous mode with full hook enforcement. Co-Authored-By: Claude Opus 4.5 --- .../flow-next/scripts/hooks/ralph-guard.py | 165 +++++++++++++++++- .../skills/flow-next-impl-review/workflow.md | 1 + .../flow-next-ralph-init/templates/config.env | 4 +- .../templates/prompt_plan.md | 6 +- .../templates/prompt_work.md | 7 +- .../flow-next-ralph-init/templates/ralph.sh | 14 +- 6 files changed, 179 insertions(+), 18 deletions(-) diff --git a/plugins/flow-next/scripts/hooks/ralph-guard.py b/plugins/flow-next/scripts/hooks/ralph-guard.py index 4b2c9d20..746642bb 100755 --- a/plugins/flow-next/scripts/hooks/ralph-guard.py +++ b/plugins/flow-next/scripts/hooks/ralph-guard.py @@ -11,13 +11,14 @@ - Receipt must be written after SHIP verdict - Validates flowctl command patterns -Supports both review backends: -- rp (RepoPrompt): tracks chat-send calls and receipt writes +Supports all review backends: +- rp (RepoPrompt CLI): tracks chat-send calls and receipt writes - codex: tracks flowctl codex impl-review/plan-review and verdict output +- mcp (RepoPrompt MCP): tracks mcp__RepoPrompt__chat_send calls and verdicts """ # Version for drift detection (bump when making changes) -RALPH_GUARD_VERSION = "0.11.0" +RALPH_GUARD_VERSION = "0.12.0" import json import os @@ -46,6 +47,7 @@ def load_state(session_id: str) -> dict: state.setdefault("chat_send_succeeded", False) state.setdefault("flowctl_done_called", set()) state.setdefault("codex_review_succeeded", False) + state.setdefault("mcp_review_succeeded", False) return state except (json.JSONDecodeError, KeyError, TypeError): pass @@ -57,6 +59,7 @@ def load_state(session_id: str) -> dict: "chat_send_succeeded": False, # Track if chat-send actually returned review text "flowctl_done_called": set(), # Track tasks that had flowctl done called "codex_review_succeeded": False, # Track if codex review returned verdict + "mcp_review_succeeded": False, # Track if MCP review returned verdict } @@ -121,8 +124,133 @@ def output_json(data: dict) -> None: sys.exit(0) +def handle_mcp_pre_tool_use(data: dict) -> None: + """Handle PreToolUse event for MCP tools - validate parameters before execution.""" + tool_name = data.get("tool_name", "") + tool_input = data.get("tool_input", {}) + session_id = data.get("session_id", "unknown") + + # Validate mcp__RepoPrompt__chat_send + if tool_name == "mcp__RepoPrompt__chat_send": + new_chat = tool_input.get("new_chat", False) + state = load_state(session_id) + + # Check for new_chat on re-reviews (same rule as RP --new-chat) + # This matches RP behavior: chats_sent > 0 means a review chat already exists + # for this session, so new_chat=true would lose reviewer context. + # First review (chats_sent == 0) is allowed to use new_chat=true. + if new_chat and state.get("chats_sent", 0) > 0: + output_block( + "BLOCKED: Do not use new_chat=true for re-reviews. " + "Stay in the same chat so reviewer has context. " + "Set new_chat=false and provide chat_id from previous call." + ) + + # All MCP checks passed + sys.exit(0) + + +def handle_mcp_post_tool_use(data: dict) -> None: + """Handle PostToolUse event for MCP tools - track state and provide feedback. + + State management notes: + - chats_sent: incremented on each successful chat_send (used for re-review detection) + - chat_send_succeeded: set True on success, reset when receipt is written + - mcp_review_succeeded: set True when verdict found, reset when receipt is written + + The reset-on-receipt-write pattern ensures each review cycle is independent: + after receipt is written, the next review needs a fresh review call. + """ + tool_name = data.get("tool_name", "") + tool_input = data.get("tool_input", {}) + tool_response = data.get("tool_response", {}) + session_id = data.get("session_id", "unknown") + + # Get response text + response_text = "" + if isinstance(tool_response, dict): + response_text = str(tool_response) + elif isinstance(tool_response, str): + response_text = tool_response + + state = load_state(session_id) + + # Track mcp__RepoPrompt__chat_send calls + if tool_name == "mcp__RepoPrompt__chat_send": + # Check for successful chat (has content in response) + if response_text and "error" not in response_text.lower(): + state["chats_sent"] = state.get("chats_sent", 0) + 1 + state["chat_send_succeeded"] = True + save_state(session_id, state) + + # Check for verdict in response + # Note: This uses the same regex as RP/Codex verdict parsing for consistency. + # The verdict tag format is explicitly required in review prompts, so we expect + # it to appear unambiguously in the response (not in code blocks or quoted text + # from prior messages). The workflow docs enforce this prompt structure. + verdict_match = re.search( + r"(SHIP|NEEDS_WORK|MAJOR_RETHINK)", response_text + ) + if verdict_match: + state["last_verdict"] = verdict_match.group(1) + state["mcp_review_succeeded"] = True + save_state(session_id, state) + + # If SHIP, remind about receipt + if verdict_match.group(1) == "SHIP": + receipt_path = os.environ.get("REVIEW_RECEIPT_PATH", "") + if receipt_path and not Path(receipt_path).exists(): + # Derive type and id from receipt path + receipt_type, item_id = parse_receipt_path(receipt_path) + # Build command with ts variable + cmd = ( + f"mkdir -p \"$(dirname '{receipt_path}')\"\n" + 'ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)"\n' + f"cat > '{receipt_path}' < ""\n' + "Types: pitfall, convention, decision" + ), + } + } + ) + + sys.exit(0) + + def handle_pre_tool_use(data: dict) -> None: """Handle PreToolUse event - validate commands before execution.""" + tool_name = data.get("tool_name", "") + + # Route MCP tools to MCP handler + if tool_name.startswith("mcp__RepoPrompt__"): + handle_mcp_pre_tool_use(data) + return + tool_input = data.get("tool_input", {}) command = tool_input.get("command", "") session_id = data.get("session_id", "unknown") @@ -222,11 +350,11 @@ def handle_pre_tool_use(data: dict) -> None: state = load_state(session_id) if not state.get("chat_send_succeeded") and not state.get( "codex_review_succeeded" - ): + ) and not state.get("mcp_review_succeeded"): output_block( "BLOCKED: Cannot write receipt before review completes. " - "You must run 'flowctl rp chat-send' or 'flowctl codex impl-review/plan-review' " - "and receive a review response before writing the receipt." + "You must run 'flowctl rp chat-send', 'flowctl codex impl-review/plan-review', " + "or 'mcp__RepoPrompt__chat_send' and receive a review response before writing the receipt." ) # Validate receipt has required 'id' field if '"id"' not in command and "'id'" not in command: @@ -277,6 +405,13 @@ def parse_receipt_path(receipt_path: str) -> tuple: def handle_post_tool_use(data: dict) -> None: """Handle PostToolUse event - track state and provide feedback.""" + tool_name = data.get("tool_name", "") + + # Route MCP tools to MCP handler + if tool_name.startswith("mcp__RepoPrompt__"): + handle_mcp_post_tool_use(data) + return + tool_input = data.get("tool_input", {}) tool_response = data.get("tool_response", {}) command = tool_input.get("command", "") @@ -364,6 +499,7 @@ def handle_post_tool_use(data: dict) -> None: if receipt_path and receipt_path in command and ">" in command: state["chat_send_succeeded"] = False # Reset for next review state["codex_review_succeeded"] = False # Reset codex state too + state["mcp_review_succeeded"] = False # Reset MCP state too save_state(session_id, state) # Track setup-review output (W= T=) @@ -545,10 +681,21 @@ def main(): with debug_file.open("a") as f: f.write(f" -> Event: {event}, Tool: {tool_name}\n") - # Only process Bash tool calls for Pre/Post - if event in ("PreToolUse", "PostToolUse") and tool_name != "Bash": + # Process Bash and MCP RepoPrompt tool calls for Pre/Post + # Note: RP backend uses Bash to call "flowctl rp ..." commands (handled via Bash tool) + # Note: Codex backend uses Bash to call "flowctl codex ..." commands (handled via Bash tool) + # Note: MCP backend uses MCP tools directly (handled via MCP tool allowlist below) + # This is NOT a regression - RP/Codex were always Bash-based, MCP is additive. + MCP_TOOLS = { + "mcp__RepoPrompt__chat_send", + "mcp__RepoPrompt__manage_selection", + "mcp__RepoPrompt__manage_workspaces", + "mcp__RepoPrompt__context_builder", + } + is_supported_tool = tool_name == "Bash" or tool_name in MCP_TOOLS + if event in ("PreToolUse", "PostToolUse") and not is_supported_tool: with debug_file.open("a") as f: - f.write(" -> Skipping: not Bash\n") + f.write(f" -> Skipping: not Bash or MCP ({tool_name})\n") sys.exit(0) # Route to handler diff --git a/plugins/flow-next/skills/flow-next-impl-review/workflow.md b/plugins/flow-next/skills/flow-next-impl-review/workflow.md index 13f677cc..c8d463f8 100644 --- a/plugins/flow-next/skills/flow-next-impl-review/workflow.md +++ b/plugins/flow-next/skills/flow-next-impl-review/workflow.md @@ -226,6 +226,7 @@ Use when `BACKEND="mcp"`. | `rp select-get` | `mcp__RepoPrompt__manage_selection` (op: "get") | | `rp chat-send` | `mcp__RepoPrompt__chat_send` | | `rp prompt-get` | `mcp__RepoPrompt__prompt` (op: "get") | +| `rp builder` | `mcp__RepoPrompt__context_builder` | ### Phase 0: Verify MCP Connection diff --git a/plugins/flow-next/skills/flow-next-ralph-init/templates/config.env b/plugins/flow-next/skills/flow-next-ralph-init/templates/config.env index e7b7a438..a1ce5a4e 100644 --- a/plugins/flow-next/skills/flow-next-ralph-init/templates/config.env +++ b/plugins/flow-next/skills/flow-next-ralph-init/templates/config.env @@ -5,11 +5,11 @@ EPICS= # Plan gate REQUIRE_PLAN_REVIEW=0 -# PLAN_REVIEW options: rp (RepoPrompt, macOS), codex (cross-platform), none +# PLAN_REVIEW options: rp (RepoPrompt CLI, macOS), codex (cross-platform), mcp (RepoPrompt MCP), none PLAN_REVIEW={{PLAN_REVIEW}} # Work gate -# WORK_REVIEW options: rp (RepoPrompt, macOS), codex (cross-platform), none +# WORK_REVIEW options: rp (RepoPrompt CLI, macOS), codex (cross-platform), mcp (RepoPrompt MCP), none WORK_REVIEW={{WORK_REVIEW}} # Work settings diff --git a/plugins/flow-next/skills/flow-next-ralph-init/templates/prompt_plan.md b/plugins/flow-next/skills/flow-next-ralph-init/templates/prompt_plan.md index 450a97c3..d2d637d7 100644 --- a/plugins/flow-next/skills/flow-next-ralph-init/templates/prompt_plan.md +++ b/plugins/flow-next/skills/flow-next-ralph-init/templates/prompt_plan.md @@ -15,12 +15,14 @@ Steps: Ralph mode rules (must follow): - If PLAN_REVIEW=rp: use `flowctl rp` wrappers (setup-review, select-add, prompt-get, chat-send). - If PLAN_REVIEW=codex: use `flowctl codex` wrappers (plan-review with --receipt). +- If PLAN_REVIEW=mcp: use MCP tools directly (mcp__RepoPrompt__*). No flowctl rp needed. - Write receipt via bash heredoc (no Write tool) if `REVIEW_RECEIPT_PATH` set. - If any rule is violated, output `RETRY` and stop. 2) Plan review gate: - If PLAN_REVIEW=rp: run `/flow-next:plan-review {{EPIC_ID}} --review=rp` - If PLAN_REVIEW=codex: run `/flow-next:plan-review {{EPIC_ID}} --review=codex` + - If PLAN_REVIEW=mcp: run `/flow-next:plan-review {{EPIC_ID}} --review=mcp` - If PLAN_REVIEW=export: run `/flow-next:plan-review {{EPIC_ID}} --review=export` - If PLAN_REVIEW=none: - If REQUIRE_PLAN_REVIEW=1: output `RETRY` and stop. @@ -33,12 +35,12 @@ Ralph mode rules (must follow): - Repeats until SHIP - Only returns to Ralph after SHIP or MAJOR_RETHINK -4) IMMEDIATELY after SHIP verdict, write receipt (for rp mode): +4) IMMEDIATELY after SHIP verdict, write receipt (for rp or mcp mode): ```bash mkdir -p "$(dirname '{{REVIEW_RECEIPT_PATH}}')" ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)" cat > '{{REVIEW_RECEIPT_PATH}}' <SHIP|NEEDS_WORK|MAJOR_RETHINK` from reviewer. Do NOT improvise review prompts - the skill has the correct format. @@ -22,13 +23,13 @@ scripts/ralph/flowctl show {{TASK_ID}} --json ``` If status != `done`, output `RETRY` and stop. -**Step 3: Write impl receipt** (MANDATORY if WORK_REVIEW=rp or codex) -For rp mode: +**Step 3: Write impl receipt** (MANDATORY if WORK_REVIEW=rp, mcp, or codex) +For rp or mcp mode: ```bash mkdir -p "$(dirname '{{REVIEW_RECEIPT_PATH}}')" ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)" cat > '{{REVIEW_RECEIPT_PATH}}' <> "$iter_log" log "missing plan receipt; forcing retry" @@ -928,7 +938,7 @@ Violations break automation and leave the user with incomplete work. Be precise, epic_json="$("$FLOWCTL" show "$epic_id" --json 2>/dev/null || true)" plan_review_status="$(json_get plan_review_status "$epic_json")" fi - if [[ "$status" == "work" && ( "$WORK_REVIEW" == "rp" || "$WORK_REVIEW" == "codex" ) ]]; then + if [[ "$status" == "work" && ( "$WORK_REVIEW" == "rp" || "$WORK_REVIEW" == "codex" || "$WORK_REVIEW" == "mcp" ) ]]; then if ! verify_receipt "$REVIEW_RECEIPT_PATH" "impl_review" "$task_id"; then echo "ralph: missing impl review receipt; forcing retry" >> "$iter_log" log "missing impl receipt; forcing retry" From 46cbb69709bd1dd56c3e4cd66af145997a45072e Mon Sep 17 00:00:00 2001 From: Christian Epple Date: Thu, 22 Jan 2026 21:29:03 +0100 Subject: [PATCH 4/6] fix(flow-next): Add MCP tool matcher to hooks.json for Ralph The ralph-guard.py already had code to track MCP review tools, but hooks.json only registered the hook for Bash tool. This caused MCP reviews to not be tracked, blocking receipt writes after SHIP verdicts. Co-Authored-By: Claude Opus 4.5 --- plugins/flow-next/hooks/hooks.json | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/plugins/flow-next/hooks/hooks.json b/plugins/flow-next/hooks/hooks.json index 639e960a..0737082b 100644 --- a/plugins/flow-next/hooks/hooks.json +++ b/plugins/flow-next/hooks/hooks.json @@ -11,6 +11,16 @@ "timeout": 5 } ] + }, + { + "matcher": "mcp__RepoPrompt__chat_send", + "hooks": [ + { + "type": "command", + "command": "${CLAUDE_PLUGIN_ROOT}/scripts/hooks/ralph-guard.py", + "timeout": 5 + } + ] } ], "PostToolUse": [ @@ -23,6 +33,16 @@ "timeout": 5 } ] + }, + { + "matcher": "mcp__RepoPrompt__chat_send", + "hooks": [ + { + "type": "command", + "command": "${CLAUDE_PLUGIN_ROOT}/scripts/hooks/ralph-guard.py", + "timeout": 5 + } + ] } ], "Stop": [ From eaf161da7cf7e766f14f06d932a3ef38c6f341ba Mon Sep 17 00:00:00 2001 From: Christian Epple Date: Fri, 23 Jan 2026 06:54:41 +0100 Subject: [PATCH 5/6] fix(flow-next): MCP backend verdict tracking and single-call workflow - ralph-guard.py: Only set chat_send_succeeded when verdict tag found (previously set for any non-error response, allowing premature receipts) - impl-review/workflow.md: Combine two MCP calls into single call with verdict requirement included in the review prompt - plan-review/workflow.md: Same single-call pattern with verdict requirement Co-Authored-By: Claude Opus 4.5 --- .../flow-next/scripts/hooks/ralph-guard.py | 18 ++++--- .../skills/flow-next-impl-review/workflow.md | 35 ++++++------ .../skills/flow-next-plan-review/workflow.md | 53 ++++++++++++++----- 3 files changed, 71 insertions(+), 35 deletions(-) diff --git a/plugins/flow-next/scripts/hooks/ralph-guard.py b/plugins/flow-next/scripts/hooks/ralph-guard.py index 746642bb..095e29d7 100755 --- a/plugins/flow-next/scripts/hooks/ralph-guard.py +++ b/plugins/flow-next/scripts/hooks/ralph-guard.py @@ -177,13 +177,7 @@ def handle_mcp_post_tool_use(data: dict) -> None: # Track mcp__RepoPrompt__chat_send calls if tool_name == "mcp__RepoPrompt__chat_send": - # Check for successful chat (has content in response) - if response_text and "error" not in response_text.lower(): - state["chats_sent"] = state.get("chats_sent", 0) + 1 - state["chat_send_succeeded"] = True - save_state(session_id, state) - - # Check for verdict in response + # Check for verdict in response FIRST - only set success flags if verdict found # Note: This uses the same regex as RP/Codex verdict parsing for consistency. # The verdict tag format is explicitly required in review prompts, so we expect # it to appear unambiguously in the response (not in code blocks or quoted text @@ -192,9 +186,13 @@ def handle_mcp_post_tool_use(data: dict) -> None: r"(SHIP|NEEDS_WORK|MAJOR_RETHINK)", response_text ) if verdict_match: + state["chats_sent"] = state.get("chats_sent", 0) + 1 + state["chat_send_succeeded"] = True state["last_verdict"] = verdict_match.group(1) state["mcp_review_succeeded"] = True save_state(session_id, state) + with Path("/tmp/ralph-guard-debug.log").open("a") as f: + f.write(f" -> MCP review succeeded with verdict: {verdict_match.group(1)}\n") # If SHIP, remind about receipt if verdict_match.group(1) == "SHIP": @@ -239,6 +237,12 @@ def handle_mcp_post_tool_use(data: dict) -> None: } ) + elif response_text and len(response_text) > 100: + # Log response without verdict for debugging (do NOT set chat_send_succeeded) + with Path("/tmp/ralph-guard-debug.log").open("a") as f: + f.write(f" -> MCP response received ({len(response_text)} chars) but no verdict tag found\n") + f.write(f" -> Response preview: {response_text[:300]}...\n") + sys.exit(0) diff --git a/plugins/flow-next/skills/flow-next-impl-review/workflow.md b/plugins/flow-next/skills/flow-next-impl-review/workflow.md index c8d463f8..c2309f2f 100644 --- a/plugins/flow-next/skills/flow-next-impl-review/workflow.md +++ b/plugins/flow-next/skills/flow-next-impl-review/workflow.md @@ -275,40 +275,45 @@ mcp__RepoPrompt__manage_selection paths: [] ``` -### Phase 4: Send Review Request +### Phase 4: Send Review Request (Single Call with Verdict) -Build review instructions (same content as RP backend Phase 2), then: +Build review instructions that include the verdict requirement upfront, so both review AND verdict come back in one response: ``` -# First review (new chat) +# First review (new chat) - includes verdict requirement mcp__RepoPrompt__chat_send new_chat: true mode: "review" chat_name: "Impl Review: [BRANCH]" - message: "" git_scope: "selected" # Include git diffs -``` + message: | + Review the changes for correctness, simplicity, and potential issues. -The response includes review findings. Request verdict: + Focus on: + - Correctness - Logic errors, spec compliance + - Simplicity - Over-engineering, unnecessary complexity + - Edge cases - Failure modes, boundary conditions + - Security - Injection, auth gaps -``` -mcp__RepoPrompt__chat_send - new_chat: false - chat_id: "" - mode: "review" - message: "Based on your review findings, provide your final verdict. + Only flag issues in the changed code - not pre-existing patterns. -**REQUIRED**: End with exactly one verdict tag: -`SHIP` or `NEEDS_WORK` or `MAJOR_RETHINK`" + After your review, you MUST conclude with exactly one verdict tag: + - `SHIP` - Code is ready to merge + - `NEEDS_WORK` - Issues found that must be fixed + - `MAJOR_RETHINK` - Fundamental problems require redesign + + The verdict tag is REQUIRED. Do not end your response without it. ``` ### Phase 5: Parse Response -Parse for verdict: +Parse for verdict in the response: - `SHIP` - `NEEDS_WORK` - `MAJOR_RETHINK` +If no verdict tag found, the review is incomplete - request clarification in the same chat. + ### Phase 6: Receipt + Status Write receipt (if REVIEW_RECEIPT_PATH set): diff --git a/plugins/flow-next/skills/flow-next-plan-review/workflow.md b/plugins/flow-next/skills/flow-next-plan-review/workflow.md index ae3da356..1fe4eea6 100644 --- a/plugins/flow-next/skills/flow-next-plan-review/workflow.md +++ b/plugins/flow-next/skills/flow-next-plan-review/workflow.md @@ -295,32 +295,50 @@ mcp__RepoPrompt__manage_selection paths: [""] ``` -### Phase 3: Send Review Request +### Phase 3: Send Review Request (Single Call with Verdict) -Build the same review prompt as the RP backend (see Phase 3 above), then: +Build review instructions that include the verdict requirement upfront, so both review AND verdict come back in one response: ``` -# First review (new chat) +# First review (new chat) - includes verdict requirement mcp__RepoPrompt__chat_send new_chat: true mode: "plan" chat_name: "Plan Review: " - message: "" - -# Re-reviews (same chat) -mcp__RepoPrompt__chat_send - new_chat: false - chat_id: "" - message: "Issues addressed. Please re-review..." + message: | + Conduct a John Carmack-level review of this plan: + + 1. **Completeness** - All requirements covered? Missing edge cases? + 2. **Feasibility** - Technically sound? Dependencies clear? + 3. **Clarity** - Specs unambiguous? Acceptance criteria testable? + 4. **Architecture** - Right abstractions? Clean boundaries? + 5. **Risks** - Blockers identified? Security gaps? Mitigation? + 6. **Scope** - Right-sized? Over/under-engineering? + 7. **Testability** - How will we verify this works? + + For each issue found: + - **Severity**: Critical / Major / Minor / Nitpick + - **Location**: Which task or section + - **Problem**: What's wrong + - **Suggestion**: How to fix + + After your review, you MUST conclude with exactly one verdict tag: + - `SHIP` - Plan is ready to execute + - `NEEDS_WORK` - Issues found that must be fixed + - `MAJOR_RETHINK` - Fundamental problems require redesign + + The verdict tag is REQUIRED. Do not end your response without it. ``` ### Phase 4: Parse Response -The response from `chat_send` includes the review. Parse for verdict: +Parse for verdict in the response: - `SHIP` - `NEEDS_WORK` - `MAJOR_RETHINK` +If no verdict tag found, the review is incomplete - request clarification in the same chat. + ### Phase 5: Receipt + Status Write receipt (if REVIEW_RECEIPT_PATH set): @@ -340,9 +358,18 @@ Update status via flowctl as with other backends. ### Fix Loop (MCP) Same as RP backend: -1. Parse issues from reviewer feedback +1. Parse issues from reviewer feedback (Critical → Major → Minor) 2. Fix plan via `$FLOWCTL epic set-plan` -3. Re-review using same chat_id (new_chat: false) +3. Re-review using same chat_id (new_chat: false): + ``` + mcp__RepoPrompt__chat_send + new_chat: false + chat_id: "" + mode: "plan" + message: "Issues addressed. Please re-review. + + **REQUIRED**: End with `SHIP` or `NEEDS_WORK` or `MAJOR_RETHINK`" + ``` 4. Repeat until SHIP **CRITICAL**: Re-reviews MUST use the same chat_id so reviewer has context. From fea9f55117ef1548631e9d5a8c775b50b6d42760 Mon Sep 17 00:00:00 2001 From: Christian Epple Date: Fri, 23 Jan 2026 08:28:35 +0100 Subject: [PATCH 6/6] fix(flow-next): Only allow receipt writes after SHIP verdict Defense-in-depth: NEEDS_WORK and MAJOR_RETHINK verdicts now block receipt writes until code is fixed and re-reviewed to get SHIP. Updated both MCP and Codex handlers for consistency. Co-Authored-By: Claude Opus 4.5 --- plugins/flow-next/scripts/hooks/ralph-guard.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/plugins/flow-next/scripts/hooks/ralph-guard.py b/plugins/flow-next/scripts/hooks/ralph-guard.py index 095e29d7..a1c61e80 100755 --- a/plugins/flow-next/scripts/hooks/ralph-guard.py +++ b/plugins/flow-next/scripts/hooks/ralph-guard.py @@ -187,12 +187,14 @@ def handle_mcp_post_tool_use(data: dict) -> None: ) if verdict_match: state["chats_sent"] = state.get("chats_sent", 0) + 1 - state["chat_send_succeeded"] = True state["last_verdict"] = verdict_match.group(1) - state["mcp_review_succeeded"] = True + # Only set success flags for SHIP - NEEDS_WORK/MAJOR_RETHINK should block receipt + if verdict_match.group(1) == "SHIP": + state["chat_send_succeeded"] = True + state["mcp_review_succeeded"] = True save_state(session_id, state) with Path("/tmp/ralph-guard-debug.log").open("a") as f: - f.write(f" -> MCP review succeeded with verdict: {verdict_match.group(1)}\n") + f.write(f" -> MCP review verdict: {verdict_match.group(1)}, success={verdict_match.group(1) == 'SHIP'}\n") # If SHIP, remind about receipt if verdict_match.group(1) == "SHIP": @@ -453,8 +455,10 @@ def handle_post_tool_use(data: dict) -> None: r"(SHIP|NEEDS_WORK|MAJOR_RETHINK)", response_text ) if verdict_in_output: - state["codex_review_succeeded"] = True state["last_verdict"] = verdict_in_output.group(1) + # Only set success flag for SHIP - NEEDS_WORK/MAJOR_RETHINK should block receipt + if verdict_in_output.group(1) == "SHIP": + state["codex_review_succeeded"] = True save_state(session_id, state) # Track flowctl done calls - match various invocation patterns: