diff --git a/.agents/plugins/marketplace.json b/.agents/plugins/marketplace.json index bc2559de..230e4844 100644 --- a/.agents/plugins/marketplace.json +++ b/.agents/plugins/marketplace.json @@ -6,7 +6,7 @@ "plugins": [ { "name": "flow-next", - "version": "2.1.1", + "version": "2.1.2", "source": { "source": "local", "path": "./plugins/flow-next" diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 177f5ca8..c8136470 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -6,13 +6,13 @@ }, "metadata": { "description": "Plan-first workflows for Claude Code and Factory Droid. Ships flow-next: zero-dep, spec-driven, Ralph autonomous mode.", - "version": "2.1.1" + "version": "2.1.2" }, "plugins": [ { "name": "flow-next", "description": "Zero-dependency planning + execution with .flow/ task tracking and Ralph autonomous mode (multi-model review gates). Worker subagent per task for context isolation. Includes 21 subagents, 24 commands, 28 skills.", - "version": "2.1.1", + "version": "2.1.2", "author": { "name": "Gordon Mickel", "email": "gordon@mickel.tech", diff --git a/.flow/bin/flowctl.py b/.flow/bin/flowctl.py index 7725015f..291092d0 100755 --- a/.flow/bin/flowctl.py +++ b/.flow/bin/flowctl.py @@ -1021,8 +1021,11 @@ def save_task_definition(task_id: str, definition: dict) -> None: # because `load_flow_config()` always merges this default block in, an absent / # null / unrelated write must NOT activate the bridge. The bridge is active iff # raw `tracker.enabled == true` OR raw `tracker.type ∈ {linear, github}` (see -# `tracker_sync_active`). All `perEvent` leaves default `off`, so even a stray -# `enabled=true` does nothing until a specific event is opted in. +# `tracker_sync_active`). All `perEvent` leaves default `off`, so a stray +# `enabled=true` opts in no PER-EVENT lifecycle sync until a specific event is +# enabled — EXCEPT the two fn-66 unconditional bridge-active paths: make-pr's +# PR↔issue link + In Review push, and `land.merged`'s Done-on-merge (both ride the +# bridge-active predicate alone, not a perEvent leaf — see SKILL.md / steps.md). TRACKER_TYPES = {"linear", "github"} TRACKER_PER_EVENT_LEAVES = {"off", "pull", "push", "reconcile", "comment"} TRACKER_TIEBREAKS = {"flow-wins", "tracker-wins", "always-ask"} @@ -1068,9 +1071,17 @@ def get_default_tracker_config() -> dict: # /flow-next:qa skill treats any non-`off` value as `comment`. # Default `off` keeps every existing repo silent until opted in. "qa": "off", - # fn-60 (R13) — opt-in post-merge touchpoint for /flow-next:land - # (flip linked issue terminal + release/verdict comment). Nested - # like work.*; default off keeps non-land repos at zero overhead. + # fn-60 (R13) / fn-66 (R10) — post-merge touchpoint for /flow-next:land. + # fn-66 makes land.merged the SOLE `Done` driver and ACTIVE-BY-DEFAULT + # whenever the bridge is active (like make-pr's unconditional PR-link + # path) — the merge→Done projection rides the bridge-active predicate + # alone, NOT this leaf. A real merge is the only event that legitimately + # projects terminal Done (gated on the GitHub MERGED probe), so leaving + # it opt-in would strand boards at In Review post-merge. The schema + # default stays `off` (a bare enabled=true activates no lifecycle sync + # — fn-52.1 invariant); when the bridge IS active the land skill fires + # this touchpoint regardless of the leaf, which then only tunes the + # optional verdict comment, never the (MERGED-gated) status write. "land": {"merged": "off"}, }, "perTracker": { diff --git a/.flow/memory/bug/build-errors/policy-claim-inversion-sweep-all-2026-06-18.md b/.flow/memory/bug/build-errors/policy-claim-inversion-sweep-all-2026-06-18.md new file mode 100644 index 00000000..62b68120 --- /dev/null +++ b/.flow/memory/bug/build-errors/policy-claim-inversion-sweep-all-2026-06-18.md @@ -0,0 +1,28 @@ +--- +title: "Policy-claim inversion: sweep ALL surfaces (both ceremony copies, docs, CLI head" +date: "2026-06-18" +track: bug +category: build-errors +module: plugins/flow-next/skills/flow-next-tracker-sync/steps.md +tags: [fn-66, tracker-sync, ceremony-duplicate, dispatch-grammar, docs-parity, steps.md, SKILL.md] +problem_type: build-error +symptoms: 3 NEEDS_WORK rounds — each surfaced the next un-updated duplicate of an inverted policy claim (completionReview/land.merged Done ownership) +root_cause: "discovery ceremony + 'owns Done' prose duplicated across SKILL.md, steps.md, 3 docs pages, github.md, and flowctl.py header; first pass touched only the primary site" +resolution_type: fix +related_to: [bug/build-errors/id-grammar-widening-must-cover-the-full-2026-06-03, bug/build-errors/status-policy-map-needs-a-matching-2026-06-18] +--- + +## Problem +Re-scoping the tracker-sync lifecycle touchpoints (fn-66.2) took three NEEDS_WORK rounds because a policy claim ("completionReview flips Done/verified", "the one exception is make-pr") was duplicated across MORE surfaces than the first edit pass touched. The discovery ceremony exists in TWO places (`flow-next-tracker-sync/SKILL.md:87-94` AND `steps.md:49-60`); the stale "owns Done" / "one exception" prose also lived in `docs/teams.md`, `docs/flowctl.md`, `references/github.md`, and the `flowctl.py` activation header. Each review round surfaced the next un-updated copy. + +## What Didn't Work +Editing the "primary" site (SKILL.md ceremony + the three touchpoint files) and assuming the policy was now consistent. The reviewer kept (correctly) citing a stale duplicate — most notably `steps.md`'s second copy of the ceremony, which I'd missed entirely. + +## Solution +- Grep the ENTIRE skill + docs tree for the policy claim's keywords BEFORE the first review, not after: `grep -rn "flip Done\|owns the Done\|one exception\|completionReview reconcile\|enabled=true does nothing"` across `skills/`, `docs/`, and `scripts/flowctl.py`. +- The discovery ceremony is mirrored in `SKILL.md` AND `steps.md` — any ceremony-seed or activation-exception change must touch BOTH. +- Lifecycle dispatch grammar must stay canonical `operation: , event: ` — descriptors like "(In Review)", "+ link $PR_URL", "verdict + R-ID coverage" belong in surrounding prose, NEVER inside the operation token (memory mirror-regen-exposes-latent-canonical). Both the touchpoint dispatch AND its retro-fire copy carry the grammar. + +## Prevention +- When inverting a policy claim, treat it as a repo-wide string sweep: enumerate every surface (canonical skills, both ceremony copies, all docs pages, the CLI header comment) before sending to review. +- A dispatch-grammar change needs a grep for the op token across the touchpoint file's main path AND its end-of-run retro-fire path. diff --git a/.flow/memory/bug/build-errors/status-policy-map-needs-a-matching-2026-06-18.md b/.flow/memory/bug/build-errors/status-policy-map-needs-a-matching-2026-06-18.md new file mode 100644 index 00000000..400df2ef --- /dev/null +++ b/.flow/memory/bug/build-errors/status-policy-map-needs-a-matching-2026-06-18.md @@ -0,0 +1,25 @@ +--- +title: Status-policy map needs a matching reconcile-loop branch per rung (map ≠ write) +date: "2026-06-18" +track: bug +category: build-errors +module: plugins/flow-next/skills/flow-next-tracker-sync/references/status-sync.md +tags: [fn-66, tracker-sync, status, reconcile, who-wins, in-review, merge-evidence, rp-review] +problem_type: build-error +symptoms: "new normalized rung mapped in the table but unreachable — reconcile loop had no branch, setStatus never fired" +root_cause: edited the flow→normalized table + fixtures without adding the matching reconcile-loop branch; fixture asserted a setStatus the loop couldn't produce +resolution_type: fix +related_to: [bug/build-errors/id-grammar-widening-must-cover-the-full-2026-06-03, bug/build-errors/skill-prose-must-match-real-flowctl-2026-06-10] +--- + +## Problem +A status-policy reference table can map a flow state to a normalized rung (e.g. open PR → `in-review`) while the *reconcile loop* that actually writes status has NO branch for that rung — so the projected status silently falls through to the conflictTiebreak default and never drives a `setStatus`. fn-66.1 added `flowToNormalized(spec, prEvidence)` mapping open-PR → `in-review`, but the `reconcileStatus` if/elif ladder had no `flowNorm == in-review` branch. Fixture S-H asserted `setStatus(in-review)`, contradicting the loop. RP impl-review caught it (NEEDS_WORK). + +## What Didn't Work +Editing only the flow→normalized lookup table and the fixtures. A who-wins/reconcile policy has TWO layers — the mapping (what a state *means*) and the loop (what to *write*). Changing the map without adding the matching loop branch leaves the new rung unreachable. + +## Solution +For every new normalized rung the map can emit, add a matching explicit branch in the reconcile loop, ordered correctly against the existing deadlock-first / terminal-wins rules. Also: a "preserve existing state" nuance that lives only in a fixture must be promoted to a real loop branch (S-G: prEvidence=none projects in-review but must NOT force a downgrade — needs its own guarded branch checked BEFORE the generic in-review push). And model ALL probe outcomes as explicit enum values (added `ambiguous` + `probe-error` alongside merged/open/closed-unmerged/none) so unknown-branch / gh-failure routes to non-terminal+NEEDS_HUMAN rather than being mistaken for `none`/`merged`. status-sync.md reconcile loop + flow→normalized table + fixtures S-G/S-J. + +## Prevention +When editing a status/who-wins policy that has a mapping table AND a reconcile loop: every rung the table can emit needs a corresponding loop branch that decides whether/what to write, ordered against the collision-first rules. A fixture asserting a `setStatus(X)` is the oracle — grep the loop for a branch that produces X before shipping. Model probe/evidence inputs as an exhaustive enum (including the failure/ambiguous cases), never just the happy buckets. diff --git a/.flow/memory/knowledge/decisions/tracker-sync-is-projection-not-2026-06-01.md b/.flow/memory/knowledge/decisions/tracker-sync-is-projection-not-2026-06-01.md index 7207bfc2..ccf5fa8c 100644 --- a/.flow/memory/knowledge/decisions/tracker-sync-is-projection-not-2026-06-01.md +++ b/.flow/memory/knowledge/decisions/tracker-sync-is-projection-not-2026-06-01.md @@ -28,6 +28,7 @@ Build the tracker sync bridge as PROJECTION / visibility, not coordination. The - Body, status, and comments all sync two-way. Body reconciliation is agentic (host-agent semantic 3-way merge against a `lastSyncedAt` merge-base snapshot, translating between flow's structured spec and the tracker's free-form issue); only genuine contradictions surface for a human, and in Ralph mode they queue while confident merges proceed. - Linear-first ordering accepts the headless/Codex/cron MCP-availability caveat as a documented constraint (GitHub-via-gh is the headless-robust path, shipped second). - A future /flow-next:strategy run should fold this into an explicit track; this decision feeds that conversation. +- **fn-66 (FLOW-15) refines the status projection, not the projection-vs-coordination stance.** Two of the two-way status states are now evidence-gated: `In Review` is **actively projected** at make-pr (an open PR is the In Review lifecycle rung, projected unconditionally whenever the bridge is active), and terminal `Done` is **gated on a GitHub-confirmed `MERGED` probe** for the spec branch — local Flow completion (`done` + completion-review `ship`) is necessary, never sufficient, and `land.merged` is the sole Done driver (active-by-default when the bridge is active). This keeps the tracker an honest mirror of reality ("In Review" / "shipped") without making it a control plane: flow still drives state, the merge probe is read-only evidence, and a human's manual board edit still wins per the who-wins tiebreak. Projection-not-coordination is unchanged; the projection is just more lifecycle-accurate. ## Future extension (NOT in fn-52): board-triggered per-spec executor A Symphony-shaped trigger layer can sit on top of the fn-52 sync bridge — but it is a SEPARATE strategy decision (it makes the tracker a trigger, revisiting projection-not-coordination). diff --git a/.flow/specs/fn-66-tracker-sync-reserve-linear-done-for.json b/.flow/specs/fn-66-tracker-sync-reserve-linear-done-for.json index cebb650b..491b1255 100644 --- a/.flow/specs/fn-66-tracker-sync-reserve-linear-done-for.json +++ b/.flow/specs/fn-66-tracker-sync-reserve-linear-done-for.json @@ -1,7 +1,7 @@ { "branch_name": "fn-66-tracker-sync-reserve-linear-done-for", - "completion_review_status": "unknown", - "completion_reviewed_at": null, + "completion_review_status": "ship", + "completion_reviewed_at": "2026-06-18T09:51:21.220686Z", "created_at": "2026-06-17T11:26:35.802051Z", "default_impl": null, "default_review": null, @@ -29,5 +29,5 @@ "mergeBaseTracker": "\n> **Origin:** Linear issue [FLOW-15](https://linear.app/gmickel/issue/FLOW-15) (team Flow Next, project Development, `Bug`, High), filed 2026-06-17 after a SapienXT incident. Grabbed via `/flow-next:tracker-sync`; this spec is the canonical source of truth and FLOW-15 is its co-editable mirror.\n\n## Goal & Context\n\ntracker-sync can push a projected tracker issue to **`Done`** when a Flow spec is locally complete (all tasks `done` + `completion_review_status == ship`) **even though no PR exists or the PR has not merged**. The repo convention (SapienXT incident, 2026-06-17) is unambiguous: **Linear `Done` means *merged*.** Local Flow completion means implementation + review are done, NOT that the change shipped.\n\n**Observed failure:** SapienXT `fn-29` / `WOR-27` reached all-tasks-done + completion-review `SHIP`; closeout/tracker-sync pushed `WOR-27` \u2192 `Done` with **no GitHub PR** and **unmerged commits**; a human had to drag it back to `In Progress`.\n\n**Goal:** make the tracker-sync status policy lifecycle-accurate \u2014 `Done` is reserved for *merge-confirmed* state, and an open PR maps to `In Review` \u2014 so the board never claims \"shipped\" for work that isn't.\n\n## Architecture & Data Models\n\nThe status projection lives in the tracker-sync **status who-wins** layer (`skills/flow-next-tracker-sync/references/status-sync.md`) plus the per-event touchpoints the host skills fire (the `tracker.perEvent.*` table in `flow-next-tracker-sync/SKILL.md`; current seeded values: `work.firstClaim=push`, `work.done=comment`, `makePr=comment`, `completionReview=reconcile`, `land.merged=off`). The fix is a **status mapping policy** keyed off lifecycle phase + **PR/merge evidence**, applied consistently at every touchpoint that can write status.\n\n**The canonical lifecycle \u2192 tracker-state map** (this is the spec's core contract):\n\n| Flow lifecycle phase | Evidence | Tracker state |\n|---|---|---|\n| Task first-claimed / work underway | task `in_progress` | `In Progress` |\n| make-pr created/found an **open** PR | open PR exists for `branch_name` | `In Review` |\n| PR **merged** | GitHub reports the linked PR `MERGED` | `Done` (terminal) |\n| All tasks done + completion `ship`, **no merged PR** | no PR / open PR / closed-unmerged | **NEVER `Done`** \u2014 stay `In Review` (open PR) or non-terminal + a comment; ambiguous \u2192 `NEEDS_HUMAN` |\n\n- **Merge-evidence gate (the central rule).** The flow\u2192normalized mapping becomes a function of **`(spec status, completion_review_status, PR-merge-evidence)`** \u2014 `flowToNormalized(spec, prEvidence)` \u2014 NOT of spec state alone. No write path (automatic touchpoint OR a manual `/flow-next:tracker-sync` reconcile) may set the terminal `Done`/completed state without **GitHub-confirmed `MERGED`** evidence for the spec's `branch_name` (`gh pr list --head --state all` \u2192 `MERGED`). Flow `done` + completion `ship` is necessary, never sufficient. The gate is an **invariant on the outbound terminal write**, not a property of one touchpoint \u2014 so a manual merge-evidenced reconcile is the legitimate recovery path (it MAY terminal-write when `MERGED` exists), while any local-completion-only write may not.\n- **No-PR all-done \u2014 exact state.** All tasks done + completion `ship` + **no PR at all**: tracker-sync makes **no status advance** \u2014 the issue stays at its current non-terminal state (typically `In Progress`); it is NOT moved to `In Review` (nothing is in review) and NOT to `Done`. Optionally a one-line comment notes \"locally complete, no PR yet.\" Pilot (below) is what then drives `make-pr`.\n- **make-pr \u2192 `In Review` (on the unconditional PR-link path).** make-pr's PR\u2194issue link is **unconditional whenever the bridge is active** (not perEvent-gated \u2014 it powers Linear Diffs). The `In Review` status push rides that same unconditional path: when make-pr creates/finds an **open** PR, reconcile the linked issue to `In Review` (the existing Flow Next `type: started` state) alongside the PR-link comment \u2014 so In Review is not gated behind `perEvent.makePr` being non-`off`.\n- **completionReview \u2192 not terminal (dispatch becomes `comment`-shaped).** The `completionReview` touchpoint stops pushing status: re-scope its dispatch from `reconcile` (which implies status+body) to a **`comment`-shaped** effect (verdict + R-ID coverage), and at most leave the issue at `In Review` if an open PR exists. Never `Done`.\n- **merge/land \u2192 `Done` (the automatic driver, active by default).** `land.merged` is the **automatic** Done driver, gated on the `MERGED` probe. To avoid boards sticking at `In Review` after a real merge, `land.merged` must be **active by default when the bridge is active** (resolve in T2: default-on via the discovery ceremony, or unconditional-when-bridge-active \u2014 NOT left `off`, which would never project Done at all). The merge-evidence invariant means even this write self-checks `MERGED` rather than trusting the caller.\n- **GitHub adapter parity.** The same `flowToNormalized(spec, prEvidence)` gate applies on the GitHub adapter (`references/github.md` terminal/closed mapping): its reduced-fidelity `setStatus(done|verified)` path must also require `MERGED` evidence, or it regresses the bug via the back door. Note the transport-blind invariant in `references/adapter-interface.md` (terminal outbound writes require merge evidence).\n- **pilot classification gap.** Pilot must not return terminal `NO_WORK` for an all-done / completion-`ship` spec that has **no open and no merged PR** \u2014 that state means make-pr never ran (or its PR was lost). Pilot already has an all-done PR-probe branch (classifies `make-pr` when no PR exists); this spec hardens it so the no-PR all-done case dispatches `make-pr` or reports `NEEDS_HUMAN`, never silently `NO_WORK`.\n\n## API Contracts\n\n- **status-sync policy** (`references/status-sync.md`): add the lifecycle\u2192state map + the merge-evidence gate to the who-wins ladder. The mapping is **transport-blind** (Linear state names / GitHub labels resolved per adapter); `Done`/closed terminal is gated on merge evidence regardless of adapter.\n- **perEvent semantics** unchanged as keys, but their *status effect* is re-specified: `makePr` may push `In Review` (open-PR evidence); `completionReview` never pushes terminal; a merge touchpoint pushes `Done` (merge evidence). No new config key required unless a `tracker.statePolicy` knob proves necessary during planning \u2014 default behavior is the corrected map.\n- **Merge probe** (shared): `gh pr list --head \"$BRANCH_NAME\" --state all --json state` \u2192 `MERGED` present \u21d2 merge evidence; `OPEN` \u21d2 In Review; `CLOSED` (unmerged) / none \u21d2 non-terminal + NEEDS_HUMAN/comment. Reuse land's existing probe shape.\n- **pilot**: the all-done classification branch returns `make-pr` (no PR) or `NEEDS_HUMAN` (closed-unmerged / merged-but-open-spec inconsistency) \u2014 never `NO_WORK` for an all-done spec lacking a merged PR.\n\n## Edge Cases & Constraints\n\n- **No PR at all** (the FLOW-15 incident): all-done + completion-ship, no PR \u2192 tracker stays non-terminal; pilot dispatches make-pr; never `Done`.\n- **Open PR:** \u2192 `In Review`; not `Done` until merged.\n- **Merged PR:** \u2192 `Done` (terminal) \u2014 the only path to Done.\n- **Closed-unmerged PR / missing branch / ambiguous PR state:** do NOT mark `Done`; emit `NEEDS_HUMAN` or leave non-terminal with an explanatory comment.\n- **GitHub adapter parity:** the same merge-evidence rule applies \u2014 `Done`/closed label only on a merged PR; the GitHub adapter's reduced-fidelity status must still gate terminal on merge.\n- **Idempotency / re-entry:** re-running a touchpoint must not thrash status (In Review \u2192 In Review is a no-op; a merged spec re-synced stays Done).\n- **Backward-safety:** a spec already at `Done` from a real merge is untouched; the change only prevents *premature* Done.\n- **Who-wins interaction:** a human manually moving the issue must still win per the existing tiebreak \u2014 the merge-evidence gate constrains tracker-sync's *own* writes, not human edits.\n\n## Acceptance Criteria\n\n- **R1:** tracker-sync status policy never maps Flow `done` + completion-review `ship` directly to tracker `Done` unless **merged-PR evidence** exists (the GitHub `MERGED` probe for the spec's `branch_name`).\n- **R2:** The make-pr lifecycle touchpoint moves the linked issue to `In Review` when an open PR exists for the branch (in addition to the existing PR-link comment).\n- **R3:** The merge/land reconciliation moves the linked issue to `Done` only when GitHub reports the linked PR `MERGED`.\n- **R4:** completion-review (`SHIP`) never pushes a terminal `Done`; it posts its verdict/evidence and at most ensures `In Review`.\n- **R5:** Pilot never returns terminal `NO_WORK` for an all-done / completion-`ship` spec with no open and no merged PR \u2014 it dispatches `make-pr` or returns `NEEDS_HUMAN`.\n- **R6:** Closed-unmerged PR, missing branch, or ambiguous PR state never produces `Done` \u2014 `NEEDS_HUMAN` or non-terminal + comment.\n- **R7:** Regression coverage exercises the full matrix: **no PR, open PR, merged PR, closed-unmerged PR** (and the pilot all-done-no-PR case), asserting the resulting tracker state / verdict for each.\n- **R8:** GitHub adapter honors the same merge-evidence gate for its terminal state.\n- **R9:** Docs updated \u2014 `references/status-sync.md`, `references/github.md` (terminal mapping), `references/adapter-interface.md` (transport-blind merge-evidence note), the `perEvent` status semantics in `flow-next-tracker-sync/SKILL.md` + `docs/tracker-sync.md` (incl. inverting the `:131` \"spec-completion-review owns Done\" claim) + `docs/teams.md` (Linear Diffs paragraph), the make-pr / completion-review / land touchpoint prose, the pilot classification note; Codex mirror regenerated; flow-next.dev tracker-sync / land pages updated in the same workstream.\n- **R10:** `land.merged` is active by default when the bridge is active (so a real merge actually projects `Done`), and the terminal-write merge-evidence invariant holds on **every** path \u2014 automatic touchpoints AND a manual `/flow-next:tracker-sync` reconcile (which may terminal-write iff `MERGED` evidence exists). The flow\u2192normalized mapping is `flowToNormalized(spec, prEvidence)`.\n\n## Boundaries\n\n- **No new lifecycle phases** \u2014 only the state *mapping* + the merge-evidence gate change; the touchpoints themselves (firstClaim / makePr / work.done / completionReview / merged) stay where they are.\n- **No change to merge mechanics** \u2014 land still owns merging; this constrains the *status write* to require merge evidence, it doesn't move the merge.\n- **No override of human edits** \u2014 the existing who-wins tiebreak for human status changes is preserved.\n- **Not dependency projection** \u2014 distinct from FLOW-14 / fn-64 (explicitly noted in FLOW-15).\n- **Tracker stays a projection** \u2014 flow remains the source of truth; this only corrects what state the projection writes.\n\n## Decision Context\n\n`Done` is a *claim about reality* (\"this shipped\"), and the board is read by people who don't see the repo \u2014 so a premature `Done` is a correctness bug, not a cosmetic one (the SapienXT human had to manually undo it). The repo already has the exact primitive needed: land's `gh pr list --head --state all` MERGED probe. The fix is to make **every** terminal-status write depend on that evidence instead of trusting local Flow completion, and to give the open-PR state its own honest rung (`In Review`).\n\nReserving `Done` for merge evidence also closes the pilot gap symmetrically: an all-done spec with no merged PR is *unfinished from the board's perspective*, so pilot must keep driving it (make-pr) or surface it (`NEEDS_HUMAN`) rather than declaring `NO_WORK`. The lifecycle is only consistent if \"done locally,\" \"in review,\" and \"shipped\" are three distinct, evidence-backed states end to end.\n\n## Requirement coverage\n\n| R-ID | Task |\n|---|---|\n| R1, R6, R7, R8 | fn-66.1 \u2014 status-sync policy: `flowToNormalized(spec, prEvidence)` merge-evidence gate + `In Review` rung + worked-fixture matrix (S-G..S-J, exact states) + linear-ladder + github.md + adapter-interface sync |\n| R2, R3, R4, R10 | fn-66.2 \u2014 touchpoint re-scoping: completionReview (dispatch\u2192`comment`, never terminal) / makePr (\u2192 In Review on the unconditional PR-link path) / land.merged (\u2192 Done on MERGED evidence, **active-by-default when bridge active**) + manual merge-evidenced recovery + flowctl perEvent default/test |\n| R5, R6 | fn-66.3 \u2014 pilot all-done hardening: never NO_WORK for an all-done spec lacking a merged PR (make-pr / defer-to-land / NEEDS_HUMAN) |\n| R9 | fn-66.4 \u2014 docs (tracker-sync.md, SKILL prose, decision-record note) + CHANGELOG + version bump + Codex mirror regen + flow-next.dev (tracker-sync + land) |\n", "url": "https://linear.app/gmickel/issue/FLOW-15" }, - "updated_at": "2026-06-17T11:43:49.215053Z" + "updated_at": "2026-06-18T09:51:21.220836Z" } diff --git a/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.1.md b/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.1.md index 2d2fb526..a6414431 100644 --- a/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.1.md +++ b/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.1.md @@ -27,9 +27,8 @@ This task is the POLICY core; the touchpoints (fn-66.2) and pilot (fn-66.3) enfo - [ ] `references/github.md` terminal/closed mapping requires `MERGED` evidence; `references/adapter-interface.md` notes the transport-blind "terminal outbound write requires merge evidence" invariant. - [ ] Fixtures S-G..S-J assert EXACT states (not "non-terminal"): no-PR all-done → stays `In Progress` (no advance); open-PR → `In Review`; merged-PR → `Done`; closed-unmerged → `In Progress`/non-terminal + NEEDS_HUMAN. ## Done summary -TBD - +Gated terminal tracker Done/verified on a MERGED merge-evidence probe (flow→normalized map is now flowToNormalized(spec, prEvidence)), added an In Review rung for open PRs wired into the reconcile loop, and routed closed-unmerged/ambiguous/probe-error to non-terminal + NEEDS_HUMAN. Updated status-sync.md, linear-ladder.md, github.md, adapter-interface.md in lockstep + Codex mirror, with worked fixtures S-G..S-J asserting exact projected states. who-wins/deadlock-first ordering untouched. ## Evidence -- Commits: -- Tests: -- PRs: +- Commits: 0480e82, 6948fe8 +- Tests: plugins/flow-next/scripts/ci_test.sh (67/0), python3 -m unittest discover -s plugins/flow-next/tests -p test_*.py (1104 OK, skipped=2), scripts/sync-codex.sh (mirror regenerated, all gates pass) +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.2.json b/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.2.json index 700ee5da..ffd9ea74 100644 --- a/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.2.json +++ b/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.2.json @@ -3,12 +3,14 @@ "claim_note": "", "claimed_at": null, "created_at": "2026-06-17T11:31:22.145573Z", - "depends_on": [], + "depends_on": [ + "fn-66-tracker-sync-reserve-linear-done-for.1" + ], "id": "fn-66-tracker-sync-reserve-linear-done-for.2", "priority": null, "spec": "fn-66-tracker-sync-reserve-linear-done-for", "spec_path": ".flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.2.md", "status": "todo", "title": "Touchpoint re-scoping: completionReview never-terminal / makePr to In Review / land.merged Done-on-merge-evidence", - "updated_at": "2026-06-17T11:41:04.608961Z" + "updated_at": "2026-06-18T07:50:21.366899Z" } diff --git a/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.2.md b/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.2.md index 61b35bad..e7ae05a9 100644 --- a/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.2.md +++ b/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.2.md @@ -25,9 +25,8 @@ Depends on fn-66.1 (the policy/map). This task wires the callers to it. No merge - [ ] make-pr's `In Review` push rides the UNCONDITIONAL PR-link path (active whenever the bridge is active), not gated behind `perEvent.makePr != off` (R2). - [ ] A manual `/flow-next:tracker-sync` reconcile MAY terminal-write `Done` iff `MERGED` evidence exists (the merge-evidence invariant is per-write, not per-touchpoint) (R10). ## Done summary -TBD - +Re-scoped the three tracker-sync status touchpoints to the corrected lifecycle policy: completionReview is now comment-shaped (verdict + R-ID coverage, never terminal Done), make-pr pushes In Review on the unconditional bridge-active PR-link path, and land.merged is the sole active-by-default Done driver that self-checks the GitHub MERGED probe. Added a flowctl perEvent round-trip test, a steps.md setStatus merge-evidence gate, and updated docs + the Codex mirror. RepoPrompt impl-review reached SHIP (R2/R3/R4/R10 all met) after 3 fix rounds. ## Evidence -- Commits: -- Tests: -- PRs: +- Commits: 0ac5dae, d78f505, fe62787, 5d022d4 +- Tests: python3 -m unittest discover -s plugins/flow-next/tests (1110 tests, OK), bash plugins/flow-next/scripts/ci_test.sh (67 passed, 0 failed) +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.3.md b/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.3.md index 56fb5f4e..17ef753a 100644 --- a/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.3.md +++ b/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.3.md @@ -18,9 +18,8 @@ Independent of fn-66.1/.2 (edits only pilot workflow.md), but conceptually the p - [ ] Closed-unmerged PR, missing branch, or merged-but-still-open-spec → `NEEDS_HUMAN` (R6), never Done/NO_WORK. - [ ] The classification table + crash-class note in pilot workflow.md reflect the new cases; reasons are greppable in the verdict line. ## Done summary -TBD - +Hardened pilot's all-done classification: an all-done spec with an open PR now emits an explicit greppable PILOT_VERDICT=DEFERRED_TO_LAND (stage=land) instead of silently collapsing to terminal NO_WORK; no-PR all-done always classifies make-pr (FLOW-15 case); closed-unmerged/missing-branch/merged-but-open-spec stay NEEDS_HUMAN. Registered the new DEFERRED_TO_LAND verdict token + land stage in the SKILL.md grammar and ralph.md /goal driver recipes; regenerated the Codex mirror. ## Evidence -- Commits: -- Tests: -- PRs: +- Commits: 17e58dccc80b9ae73955a7381c3fa18e8e1b5da9, 6f69001c95676ad9ac3bd985a6216131ad28e69b +- Tests: bash plugins/flow-next/scripts/ci_test.sh (67 passed, 0 failed), bash scripts/sync-codex.sh (mirror regen, all gates pass) +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.4.json b/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.4.json index 9f0b2000..3cc571df 100644 --- a/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.4.json +++ b/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.4.json @@ -3,12 +3,16 @@ "claim_note": "", "claimed_at": null, "created_at": "2026-06-17T11:31:22.928310Z", - "depends_on": [], + "depends_on": [ + "fn-66-tracker-sync-reserve-linear-done-for.1", + "fn-66-tracker-sync-reserve-linear-done-for.2", + "fn-66-tracker-sync-reserve-linear-done-for.3" + ], "id": "fn-66-tracker-sync-reserve-linear-done-for.4", "priority": null, "spec": "fn-66-tracker-sync-reserve-linear-done-for", "spec_path": ".flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.4.md", "status": "todo", "title": "Docs + CHANGELOG + version bump + Codex mirror + flow-next.dev", - "updated_at": "2026-06-17T11:41:04.988581Z" + "updated_at": "2026-06-18T07:50:21.634711Z" } diff --git a/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.4.md b/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.4.md index c54a7d54..14d709e3 100644 --- a/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.4.md +++ b/.flow/tasks/fn-66-tracker-sync-reserve-linear-done-for.4.md @@ -24,9 +24,8 @@ Docs-last; depends on fn-66.1/.2/.3 settling so prose matches reality. GLOSSARY: - [ ] `plugins/flow-next/docs/teams.md` Linear Diffs paragraph updated — "spec-completion-review owns Done" → "land/merged evidence owns Done". - [ ] `references/github.md` + `adapter-interface.md` doc notes shipped (terminal-requires-merge-evidence). ## Done summary -TBD - +fn-66.4 docs/release pass: audited that fn-66.1/.2/.3 fully inverted the tracker-sync.md/teams.md/status-sync.md "completion-review owns Done" prose to "land.merged owns Done" + In Review (no fix needed); added the decision-record Consequences note, the CHANGELOG 2.1.2 entry, and bumped flow-next 2.1.1→2.1.2 across all manifests + README + Codex/Cursor mirrors. RepoPrompt impl-review SHIP (2 rounds; the one NEEDS_WORK was a stale installed-copy artifact outside the repo/diff, not a real defect). ## Evidence -- Commits: -- Tests: -- PRs: +- Commits: 8decd4a6ba4df1752e014cc650972acaa64d39d6 +- Tests: bash plugins/flow-next/scripts/ci_test.sh (67 passed), python3 -m unittest discover -s tests (1110 passed, 2 skipped) +- PRs: \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 141e2c25..9207cf66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,22 @@ All notable changes to the flow-next. +## [flow-next 2.1.2] - 2026-06-18 + +### Fixed +- **tracker-sync reserves Linear `Done` for *merged* PRs; an open PR maps to `In Review`** (fn-66 / FLOW-15). tracker-sync could push a projected tracker issue to **`Done`** when a Flow spec was *locally* complete (all tasks `done` + completion-review `ship`) **even though no PR existed or the PR had not merged** — the SapienXT incident (`fn-29` / `WOR-27` reached all-done + `SHIP`, closeout pushed `WOR-27` → `Done` with no GitHub PR and unmerged commits, and a human had to drag it back). `Done` is a claim about reality ("this shipped"), read by people who don't see the repo, so a premature `Done` is a correctness bug, not cosmetic. + - **Merge-evidence gate** (fn-66.1): the flow→normalized mapping is now `flowToNormalized(spec, prEvidence)` — a function of `(spec status, completion_review_status, PR-merge-evidence)`, not spec state alone. **No** write path — automatic touchpoint OR a manual `/flow-next:tracker-sync` reconcile — may set the terminal `Done`/completed state without a GitHub-confirmed `MERGED` probe for the spec's `branch_name` (`gh pr list --head --state all` → `MERGED`). Local Flow completion is necessary, never sufficient. The invariant is **transport-blind** ([`references/status-sync.md`](plugins/flow-next/skills/flow-next-tracker-sync/references/status-sync.md), [`references/adapter-interface.md`](plugins/flow-next/skills/flow-next-tracker-sync/references/adapter-interface.md)) — every adapter receives a terminal normalized status only after the gate. Worked-fixture matrix (no-PR / open / merged / closed-unmerged) added. + - **make-pr → `In Review`, unconditional when bridge active** (fn-66.2): an open PR *is* the In Review lifecycle rung. make-pr now moves the linked issue to `In Review` (alongside the existing PR-link comment) on the same unconditional path that powers Linear Diffs — not gated behind `perEvent.makePr`. + - **completion-review never terminal** (fn-66.2): the `completionReview` touchpoint is re-scoped from `reconcile` to a `comment`-shaped effect — it posts its verdict + R-ID coverage and at most leaves the issue at `In Review`, **never `Done`**. + - **land/merge → `Done`, active-by-default + self-checked** (fn-66.2): `land.merged` is the **sole** Done driver and is **active-by-default when the bridge is active** (leaving it opt-in would strand boards at `In Review` after a real merge). The terminal write self-checks the `MERGED` probe; the `perEvent.land.merged` leaf, if set, only tunes the optional verdict comment, never the status. + - **GitHub adapter parity** (fn-66.1): the GitHub adapter's reduced-fidelity terminal `setStatus(done|verified)` honors the same `MERGED` gate, so the bug can't regress via the back door. + +### Changed +- **Pilot never returns terminal `NO_WORK` for an all-done spec lacking a merged PR** (fn-66.3). An all-done / completion-`ship` spec with **no** PR classifies `make-pr` and dispatches it; one with an **open** PR is land's work, so pilot records it as a *deferred candidate* and — if no other candidate is selectable — terminates with the new distinct, greppable `PILOT_VERDICT=DEFERRED_TO_LAND` line (registered in the `ralph.md` `/goal` driver grammar), never collapsing to `NO_WORK`. Closed-unmerged / missing-branch / merged-but-open-spec all-done states surface `NEEDS_HUMAN`. + +### Notes +- Patch release — a behavior **fix** to the status projection, not a new capability. Boundaries hold: no new lifecycle phases, no change to merge mechanics (land still owns merging — this only constrains the *status write* to require merge evidence), no override of human board edits (the who-wins tiebreak is preserved), tracker stays a projection. Docs updated across `tracker-sync.md`, `teams.md`, the tracker-sync / work / pilot / land skill prose, the `references/{status-sync,github,adapter-interface}.md` notes, and the projection decision record; Codex mirror regenerated + audited; flow-next.dev ships the counterpart tracker-sync + land pass. + ## [flow-next 2.1.1] - 2026-06-17 ### Added diff --git a/README.md b/README.md index c3dbe5aa..8aaa189f 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ # Flow-Next [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) -[![Flow-next](https://img.shields.io/badge/Flow--next-v2.1.1-green)](CHANGELOG.md) +[![Flow-next](https://img.shields.io/badge/Flow--next-v2.1.2-green)](CHANGELOG.md) [![Docs](https://img.shields.io/badge/Docs-📖-informational)](plugins/flow-next/docs/README.md) [![Author](https://img.shields.io/badge/Author-Gordon_Mickel-orange)](https://mickel.tech) diff --git a/plugins/flow-next/.claude-plugin/plugin.json b/plugins/flow-next/.claude-plugin/plugin.json index 9bf3f62c..a98988cc 100644 --- a/plugins/flow-next/.claude-plugin/plugin.json +++ b/plugins/flow-next/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "flow-next", - "version": "2.1.1", + "version": "2.1.2", "description": "Zero-dependency planning + execution with .flow/ task tracking and Ralph autonomous mode (multi-model review gates). Worker subagent per task for context isolation. Prime assesses 8 pillars (48 criteria) with GitHub API integration. Includes 21 subagents, 24 commands, 28 skills.", "author": { "name": "Gordon Mickel", diff --git a/plugins/flow-next/.codex-plugin/plugin.json b/plugins/flow-next/.codex-plugin/plugin.json index 72971425..190014b8 100644 --- a/plugins/flow-next/.codex-plugin/plugin.json +++ b/plugins/flow-next/.codex-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "flow-next", - "version": "2.1.1", + "version": "2.1.2", "description": "Zero-dependency planning + execution with .flow/ task tracking and Ralph autonomous mode. Worker subagent per task for context isolation. Compatible with Codex, Claude Code, and Factory Droid.", "author": { "name": "Gordon Mickel", diff --git a/plugins/flow-next/.cursor-plugin/plugin.json b/plugins/flow-next/.cursor-plugin/plugin.json index d4f49c28..319bedcb 100644 --- a/plugins/flow-next/.cursor-plugin/plugin.json +++ b/plugins/flow-next/.cursor-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "flow-next", - "version": "2.1.1", + "version": "2.1.2", "description": "Zero-dependency, spec-driven agentic SDLC: durable specs, context-fit plans, re-anchored workers, adversarial cross-model review, receipts, and Ralph autonomous mode.", "author": { "name": "Gordon Mickel", diff --git a/plugins/flow-next/codex/skills/flow-next-land/workflow.md b/plugins/flow-next/codex/skills/flow-next-land/workflow.md index cec0759c..ea213a30 100644 --- a/plugins/flow-next/codex/skills/flow-next-land/workflow.md +++ b/plugins/flow-next/codex/skills/flow-next-land/workflow.md @@ -455,24 +455,47 @@ git log --oneline -1 # evidence echo: the squash commit referencing the PR - Clean non-`.flow/` tree required before starting and asserted after. - **Idempotency probe BEFORE acting**: check for an existing tag/GitHub release for the target version (`git tag -l `, `gh release view `); already present → resume past completed steps, never re-tag. - Release-step failure AFTER the successful merge → verdict `NEEDS_HUMAN` + durable label on the (merged) PR via 3.4 — the merge is NEVER retried, and later ticks never blindly re-run the failed step (re-entry only resumes via the idempotency probe). - - Release completed → verdict `RELEASED`.4. **Tracker touchpoint (opt-in, best-effort)** — deliberately AFTER release-follow so the verdict comment can carry the release outcome — — gated exactly like every fn-57 touchpoint (active AND leaf ≠ off/null; an unseeded leaf reads `null` = off): + - Release completed → verdict `RELEASED`. +4. **Tracker touchpoint — the SOLE `Done` driver (fn-66, R3/R10)** — deliberately AFTER release-follow so the verdict comment can carry the release outcome. **`land.merged` is active-by-default whenever the bridge is active** — it is NOT gated behind `tracker.perEvent.land.merged != off`. This is deliberate (fn-66, R10): a real merge is the ONLY event that legitimately projects `Done`, so leaving it opt-in would let boards stick at `In Review` forever after a merge. Like make-pr's unconditional PR-link path, the merge→Done projection rides the bridge-active predicate alone (the `land.merged` leaf, if a repo set it, only tunes the optional verdict comment — never gates the status): ```bash - LEAF="$("$FLOWCTL" config get tracker.perEvent.land.merged --json | jq -r '.value')" TRACKER_FIRE=0 - if [ "$("$FLOWCTL" sync active --json | jq -r '.active')" = "true" ] \ - && [ "$LEAF" != "off" ] && [ "$LEAF" != "null" ]; then - TRACKER_FIRE=1 + if [ "$("$FLOWCTL" sync active --json | jq -r '.active')" = "true" ]; then + TRACKER_FIRE=1 # active-by-default — no perEvent gate (fn-66, R10) fi ``` - `TRACKER_FIRE == 1` → you MUST dispatch the tracker-sync skill via the Skill tool — this is a real skill invocation, not narration, and it uses the fn-57 lifecycle dispatch grammar (operation token + event tag, same shape as work/make-pr touchpoints): + **Self-check the `MERGED` probe before dispatching the terminal push (fn-66, R3).** This touchpoint runs from the post-merge tail, but it must NOT trust the caller's claim of a merge — it re-confirms GitHub reports the linked PR `MERGED` for the spec branch. **Critically, do NOT reuse the Phase 3.5 `MERGED_PR_NUM`** — on the normal babysit path the PR was `OPEN` at discovery, so that variable is empty even though land just merged it. **Re-probe fresh, after the `gh pr merge` succeeded and before deciding `TRACKER_TERMINAL_OK`** (this also covers the re-entry path, where the pre-merge probe already saw `MERGED`). The merge-evidence invariant is an **invariant on the outbound terminal write**, enforced at the source — if the fresh probe is not a clean `MERGED`, do NOT dispatch a `Done` push (fall back to a non-terminal comment or `NEEDS_HUMAN`), so no path writes `Done` without merge evidence: + + ```bash + # Fresh post-merge re-probe — NEVER the stale Phase 3.5 MERGED_PR_NUM (empty on the + # normal OPEN→merge path). This is the authoritative merge-evidence read for the gate. + MERGED_CONFIRMED="$(gh pr list --head "$BRANCH_NAME" --state all --json state \ + | jq -r '[.[] | select(.state == "MERGED")] | length')" + if [ "$TRACKER_FIRE" = "1" ] && [ "${MERGED_CONFIRMED:-0}" -gt 0 ]; then + TRACKER_TERMINAL_OK=1 # GitHub-confirmed MERGED → the gate is satisfied + else + TRACKER_TERMINAL_OK=0 # no clean MERGED → comment only, never terminal + fi + ``` + + `TRACKER_FIRE == 1` → you MUST dispatch the tracker-sync skill via the Skill tool — this is a real skill invocation, not narration, and it uses the fn-57 lifecycle dispatch grammar (operation token + event tag, same shape as work/make-pr touchpoints). **The `TRACKER_TERMINAL_OK` self-check selects the operation** — land does NOT trust the caller's merge claim, it branches on its own GitHub-`MERGED` probe (fn-66, R3): + + - **`TRACKER_TERMINAL_OK == 1`** (clean GitHub `MERGED`) → dispatch the terminal `push`: ```text skill: flow-next-tracker-sync (operation: push , event: land.merged) ``` - The push projects the just-closed spec onto the linked issue — status who-wins flips it to the configured terminal state — and posts the merge/release verdict comment (include `merged PR: ` and, when the release step ran, the released version). The tracker-sync skill owns the transport, status who-wins, comment dedup, and emits its own receipt event-tagged `land.merged` (the fn-57 layer). Best-effort: a dispatch failure or tracker error surfaces as a stderr warning in the PR's evidence block and NEVER changes the PR's verdict (the close and any release already stand). Persist any tracked sync state the touchpoint updated with a best-effort follow-up commit (`git add ".flow/specs/${spec}.json" .flow/sync-runs && git commit -m "chore(flow): sync state for ${spec} land.merged touchpoint" && git push` — file-scoped so pre-existing .flow dirtiness never rides along; no rollback needed, it carries this spec's sync state + receipts only). + The `push` projects the just-closed spec; the tracker-sync skill's own `flowToNormalized(spec, merged)` gate (status-sync.md S-I) resolves the terminal rung — `verified` if completion review shipped, else `done` — so status who-wins flips the issue to the merge-confirmed terminal state, ALSO posting the merge/release verdict comment (include `merged PR: ` and, when the release step ran, the released version). The Done write is **double-gated** (this caller's probe AND the skill's own merge-evidence gate). + + - **`TRACKER_TERMINAL_OK == 0`** (no clean `MERGED` — e.g. the post-merge probe came back empty/ambiguous, a corruption signal) → do NOT dispatch a terminal `push`. Dispatch the **comment-only** path instead and surface `NEEDS_HUMAN`, so no path writes `Done` without merge evidence: + + ```text + skill: flow-next-tracker-sync (operation: comment , event: land.merged) # verdict comment only, no terminal status + ``` + + Best-effort either way: a dispatch failure or tracker error surfaces as a stderr warning in the PR's evidence block and NEVER changes the PR's verdict (the close and any release already stand). Persist any tracked sync state the touchpoint updated with a best-effort follow-up commit (`git add ".flow/specs/${spec}.json" .flow/sync-runs && git commit -m "chore(flow): sync state for ${spec} land.merged touchpoint" && git push` — file-scoped so pre-existing .flow dirtiness never rides along; no rollback needed, it carries this spec's sync state + receipts only). Verdict `MERGED` (or `RELEASED`). On success, drop the PR's ledger entry (atomic `jq 'del(.[$pr])'` + `mv`). End on the base branch with a clean tree (the original branch may have been the now-deleted PR branch — the base IS the restore target after a merge). diff --git a/plugins/flow-next/codex/skills/flow-next-make-pr/workflow.md b/plugins/flow-next/codex/skills/flow-next-make-pr/workflow.md index 302c369b..2545f643 100644 --- a/plugins/flow-next/codex/skills/flow-next-make-pr/workflow.md +++ b/plugins/flow-next/codex/skills/flow-next-make-pr/workflow.md @@ -1664,10 +1664,12 @@ fi R24 invariant: under Ralph the PR URL is the **sole stdout artefact** in machine-parseable form (`PR_URL=`), so the harness can capture it via `eval "$(/flow-next:make-pr ...)"` or by grep / tail. Everything else (memory write notes, recovery hints, breadcrumbs, the §5.7 tracker-sync check + `Tracker sync:` summary line) routes through stderr where the harness logs it but doesn't parse it. -### 5.6 — Tracker sync (opt-in) — link the PR to the issue (Diffs-ready) +### 5.6 — Tracker sync (opt-in) — link the PR to the issue + move it to In Review (Diffs-ready) **Runs whenever the tracker bridge is active, after `gh pr create` returned a `$PR_URL` in §4.6 (never under `--dry-run` — Phase 4.0 short-circuits before Phase 5).** No separate `makePr` opt-in — linking a PR to its issue is zero-/near-zero-cost hygiene and is the whole value (Linear Diffs). Links the PR to the tracker issue (R10), append-only and conflict-free (R8). **Not Ralph-blocked** (attaching a link is a confident, conflict-free op). +**In Review status push rides this SAME unconditional bridge-active path (fn-66, R2).** Because an open PR for the branch is by definition the *In Review* lifecycle rung, moving the linked issue to `In Review` is part of the same PR↔issue linkage that powers Linear Diffs — it is **NOT gated behind `tracker.perEvent.makePr != off`** (that leaf gates only the optional breadcrumb comment, not the link/status that make the bridge useful). A just-created PR is `OPEN`, so the merge-evidence probe yields `open` and `reconcileStatus(spec, issue, open)` → `in-review` (status-sync.md row 4); the dispatch below reconciles the issue to that non-terminal rung (never terminal — a freshly-opened PR has no merge evidence). The dispatch uses the **`reconcile`** op (not `push`) precisely so this In Review nudge rides the body-preserving 3-way merge — a `push` would re-render and overwrite the issue body first (steps.md push() lines 134-136), clobbering human tracker-side edits. + The **primary linkage already happened in §4.6a** — the `Ref ` line in the PR body, which makes the host's tracker integration auto-link the PR. §5.6 is the **enhancement layer** and is **transport- and tracker-type-aware**: - **Linear (`tracker.type == linear`):** the §4.6a body ref is what makes **Linear Diffs** render the PR inside the issue (Linear's GitHub integration auto-links on the `WOR-N` identifier). On the **GraphQL rung**, additionally create the *rich* GitHub-PR attachment + status sync via `attachmentLinkURL(issueId, $PR_URL)` (Linear auto-detects the GitHub URL; do NOT use `attachmentCreate` — that yields a dumb attachment with no diff/status). On the **MCP rung** there is no URL-attach tool, so it relies entirely on the §4.6a auto-link (sufficient — the integration does the rest). Optionally also post a one-line breadcrumb comment. @@ -1677,20 +1679,35 @@ The **primary linkage already happened in §4.6a** — the `Ref ` li ```bash if [[ -n "$PR_URL" ]] \ && [ "$("$FLOWCTL" sync active --json | jq -r '.active')" = "true" ]; then - # Invoke the flow-next-tracker-sync skill: link $PR_URL to the issue. - # skill: flow-next-tracker-sync (operation: link $PR_URL, event: makePr) - # linear → rich attachment via attachmentLinkURL (GraphQL rung) + optional breadcrumb comment; - # the §4.6a body ref already enabled the auto-link + Diffs. - # github → native `Refs #N` (github.md). - # Unlinked spec → flow-first push (create + link) first, then link the PR / Diff - # (tracker-sync §Phase 3 create-if-unlinked). No-op only if no transport reachable. + # Invoke the flow-next-tracker-sync skill with the canonical lifecycle dispatch + # grammar — `operation: , event: ` (verbatim, no descriptors in + # the operation token): + # skill: flow-next-tracker-sync (operation: reconcile , event: makePr) + # The `reconcile` op (open-PR evidence) moves the issue to In Review AND links $PR_URL — + # BOTH ride this unconditional bridge-active path (NOT gated behind perEvent.makePr): + # the link powers Diffs and In Review is the honest lifecycle state for an open PR. + # WHY `reconcile`, NOT `push` (fn-66 regression fix): `push` renders the COMPLETE + # spec body and writeIssue's it BEFORE setStatus (steps.md push() lines 134-136), so + # opening a PR just to nudge In Review would CLOBBER any human tracker-side body edits + # made since the last sync. `reconcile` runs the 3-way body merge (steps.md reconcile() + # lines 177-185) that PRESERVES tracker-side edits, and sets In Review as part of the + # SAME op via reconcileStatus(spec, issue, open) → in-review (status-sync.md row 4 / R2). + # A genuine body conflict queues (sync defer) or asks — it NEVER blocks the open PR. + # linear → rich attachment via attachmentLinkURL (GraphQL rung) + setStatus(in-review) + # via reconcileStatus (open prEvidence → in-review, non-terminal, status-sync.md + # row 4); the §4.6a body ref already enabled the auto-link + Diffs. Optional breadcrumb comment. + # github → native `Refs #N` (github.md) + status:in-review label. + # (the PR URL itself rides as evidence in the comment/attachment, not the op token.) + # The open PR is the merge-evidence `open` bucket → In Review, NEVER terminal (no MERGED). + # Unlinked spec → flow-first link (create + base-snapshot) first, then reconcile the now-linked + # spec → link the PR / Diff + In Review (tracker-sync §Phase 3 create-if-unlinked). No-op only if no transport reachable. # Best-effort — the PR is already open; a tracker failure must NOT exit non-zero. # Under Ralph, framing routes to stderr (keeps the PR_URL= stdout invariant). : fi ``` -The PR is already open before this step; a tracker failure surfaces as a stderr warning and never changes the exit code (same non-fatal discipline as the `--memory` write in §5.1). The skill emits its own receipt, event-tagged `--event makePr` — the tag §5.7's end-of-run `sync check` audits. +The PR is already open before this step; a tracker failure surfaces as a stderr warning and never changes the exit code (same non-fatal discipline as the `--memory` write in §5.1). The skill emits its own receipt, event-tagged `--event makePr` — the tag §5.7's end-of-run `sync check` audits. The `In Review` status push is non-terminal (`reconcileStatus(spec, issue, open) → in-review`, status-sync.md row 4) — an open PR is never `Done`. **`reconcile` (not `push`) is deliberate (fn-66):** the body-preserving 3-way merge means moving the issue to In Review on PR open can never overwrite human tracker-side body edits — the prior conflict-free guarantee of the old `link $PR_URL` path, now extended to the status nudge. ### 5.7 — Tracker-sync end-of-run check — LAST action before exit (fn-57) @@ -1711,7 +1728,7 @@ SINCE=$(gh pr view "$PR_URL" --json createdAt --jq .createdAt 2>/dev/null || tru **Retro-fire on MISSING — exactly ONE cycle, never blocking:** 1. Record the retro-fire start anchor (the re-check needs it as `--since`): `date -u +%Y-%m-%dT%H:%M:%SZ` -2. Invoke the **flow-next-tracker-sync skill directly** — the same dispatch as §5.6, with its `event:` tag: `skill: flow-next-tracker-sync (operation: link $PR_URL, event: makePr)` — NEVER this check block as a wrapper (no recursion). +2. Invoke the **flow-next-tracker-sync skill directly** — the same dispatch as §5.6, with its `event:` tag, in the canonical `operation: , event: ` grammar: `skill: flow-next-tracker-sync (operation: reconcile , event: makePr)` (the `reconcile` op links $PR_URL + moves the issue to In Review via the body-preserving 3-way merge — never clobbers tracker-side edits, fn-66; the PR URL rides as evidence, not in the op token) — NEVER this check block as a wrapper (no recursion). 3. Re-check with `--since` = the step-1 anchor: `"$FLOWCTL" sync check "$SPEC_ID" --events makePr --since "" --json` 4. Record the final state in the summary slot. Still MISSING after the one cycle is a recorded, visible outcome — never a second retro-fire, never a block (the PR is already open; a tracker hiccup must not become a hard stop). Recovery guidance lives in the receipt note + `docs/tracker-sync.md`. diff --git a/plugins/flow-next/codex/skills/flow-next-pilot/SKILL.md b/plugins/flow-next/codex/skills/flow-next-pilot/SKILL.md index 2b3ad4dc..f0170f23 100644 --- a/plugins/flow-next/codex/skills/flow-next-pilot/SKILL.md +++ b/plugins/flow-next/codex/skills/flow-next-pilot/SKILL.md @@ -102,10 +102,12 @@ The `/goal` validator is transcript-blind: it reads conversation output only and Every tick ends with exactly one terminal line, the last line of the response, with nothing after it: ```text -PILOT_VERDICT= spec= stage= reason="" +PILOT_VERDICT= spec= stage= reason="" ``` -Use `spec=-` and `stage=-` when no spec was selected. Stage values are exactly `plan`, `plan-review`, `work`, `make-pr`, or `-`. +Use `spec=-` and `stage=-` when no spec was selected. Stage values are exactly `plan`, `plan-review`, `work`, `make-pr`, `land`, or `-`. + +`DEFERRED_TO_LAND` is a distinct *non-terminal-work* verdict (stage `land`): every remaining all-done candidate has an open PR that land — not pilot — owns. It is deliberately separated from `NO_WORK` so a driver can route it to `/flow-next:land` instead of stopping; an all-done spec with an open PR is real outstanding work, never absence of work. Driver condition examples: diff --git a/plugins/flow-next/codex/skills/flow-next-pilot/workflow.md b/plugins/flow-next/codex/skills/flow-next-pilot/workflow.md index 2bcf6642..35d41cf9 100644 --- a/plugins/flow-next/codex/skills/flow-next-pilot/workflow.md +++ b/plugins/flow-next/codex/skills/flow-next-pilot/workflow.md @@ -115,7 +115,7 @@ Classify from `SPEC_JSON` plus `TASKS_JSON`; first match wins: | any task is `todo` or `blocked` (canonical task statuses are `todo`, `in_progress`, `blocked`, `done`) | `work` | | the only non-`done` tasks are `in_progress` own/unassigned (other-actor claims were already skipped at SELECT) | `NEEDS_HUMAN`, reason `stale in-progress claim — work's ready-driven loop cannot resume it` | | all tasks done and `completion_review_status != "ship"` and review backend is configured | `work` | -| all tasks done and completion is ship-or-ungated | PR probe, then `make-pr`, skip, or `NEEDS_HUMAN` | +| all tasks done and completion is ship-or-ungated | PR probe, then `make-pr` (no PR), defer-to-land (open PR), or `NEEDS_HUMAN` (closed-unmerged / missing-branch / merged-but-open-spec) | A spec whose only remaining tasks are `blocked` still classifies as `work`; if work cannot advance it, the healthy-no-advance strike path handles it. An in-progress-only spec is different: work's Phase 3a drives off `flowctl ready --spec`, which never returns an `in_progress` task, so dispatching would burn strikes or wrongly enter the completion-review path — the stale-claim `NEEDS_HUMAN` is crash-class (no dispatch, no strike). @@ -132,13 +132,13 @@ CLOSED_PR=$(printf '%s\n' "${PR_JSON:-[]}" | jq -r '.[] | select(.state == "CLOS MERGED_PR=$(printf '%s\n' "${PR_JSON:-[]}" | jq -r '.[] | select(.state == "MERGED") | .url' | head -1) ``` -Classification outcomes for the all-done branch: +Classification outcomes for the all-done branch (the all-done invariant: an all-done / completion-`ship` spec lacking a **merged** PR is *unfinished from the board's perspective* — pilot keeps driving it (`make-pr`), defers it to land (open PR), or surfaces it (`NEEDS_HUMAN`); it never collapses to terminal `NO_WORK`): - gh missing, unauthenticated, or API failure: `PILOT_VERDICT=NEEDS_HUMAN spec= stage=make-pr reason="gh probe failed at all-done branch"`. -- OPEN PR exists: this spec is finished from pilot's perspective; skip to the next SELECT candidate. +- OPEN PR exists (and no MERGED PR): this spec is **deferred to land** — land owns the open PR, not pilot — so record it as a *deferred candidate* and skip to the next SELECT candidate. This is an explicit defer, never a silent finish: if no later candidate is selectable, the tick terminates with the distinct, greppable `PILOT_VERDICT=DEFERRED_TO_LAND` line (Phase 6), never `NO_WORK`. Track the deferred spec id + open-PR url so the terminal line can name it. +- No PR exists: stage is `make-pr`. This is the FLOW-15 case (all-done, no PR — make-pr never ran or its PR was lost); it MUST classify `make-pr` and never fall through to `NO_WORK`. - CLOSED PR exists and no OPEN PR exists: `NEEDS_HUMAN`, because the PR was closed without merge and pilot never silently reopens human-rejected work. - MERGED PR exists while the spec is still open: `NEEDS_HUMAN`, because the state is inconsistent and pilot must not create a second PR. -- No PR exists: stage is `make-pr`. Dry-run stops after classification. It prints selected spec, stage, review backend, task counts, consulted status fields, PR probe result if any, skipped candidates, and any would-clear ledger entries. It writes no ledger (the ledger file is never created or modified on a dry-run tick), checks out no branch, and dispatches nothing: @@ -307,10 +307,22 @@ Crash-class outcomes are `NEEDS_HUMAN`: sub-skill crash, dirty non-`.flow/` tree PILOT_VERDICT=NEEDS_HUMAN spec= stage= reason="" ``` -No selectable candidate or all candidates finished with existing OPEN PRs yields: +An all-done spec with an **open** PR is *not* crash-class — it is the benign `DEFERRED_TO_LAND` terminal below (land owns the merge). Only the closed-unmerged, missing-branch, and merged-but-open-spec all-done states are `NEEDS_HUMAN`. An all-done spec with **no** PR is never terminal at all — it classifies `make-pr` and dispatches. -```text -PILOT_VERDICT=NO_WORK spec=- stage=- reason="no ready spec with satisfied deps" -``` +Terminal verdict when no spec was dispatched, split by why — the two cases are distinct and must never be conflated: + +- **No selectable candidate at all** (none open+ready, or all skipped for unsatisfied deps / other-actor claims) yields `NO_WORK`: + + ```text + PILOT_VERDICT=NO_WORK spec=- stage=- reason="no ready spec with satisfied deps" + ``` + +- **Every remaining candidate was deferred to land** (each all-done with an existing OPEN PR — the only reason they weren't dispatched) yields the distinct, greppable `DEFERRED_TO_LAND` verdict, naming the deferred spec so a transcript-only driver can hand it to `/flow-next:land`. This case MUST NOT collapse to `NO_WORK`: a `DONE`-but-open-PR spec is real outstanding work that land owns, not absence of work. + + ```text + PILOT_VERDICT=DEFERRED_TO_LAND spec= stage=land reason="all tasks done, open PR — land owns the merge" + ``` + + When more than one candidate was deferred, name the first deferred spec (stable id order) in the line; the reason still reads `defer to land`. The `PILOT_VERDICT` line is always the last line of the tick output. Print nothing after it. diff --git a/plugins/flow-next/codex/skills/flow-next-tracker-sync/SKILL.md b/plugins/flow-next/codex/skills/flow-next-tracker-sync/SKILL.md index 1c345973..4007ddff 100644 --- a/plugins/flow-next/codex/skills/flow-next-tracker-sync/SKILL.md +++ b/plugins/flow-next/codex/skills/flow-next-tracker-sync/SKILL.md @@ -64,7 +64,7 @@ Never reimplement a flowctl helper inline; never push a merge/judgment decision ## Discovery ceremony (R2) — detect / surface / ask / never-assume -The bridge is **off until explicitly enabled**. The ceremony probes four signals, surfaces present AND absent, ASKS, and writes config **only on confirmation** — with provenance. No-signal ⇒ nothing written; `enabled` stays `false`. Never assume. But **once the user confirms, enabling is opt-OUT, not opt-in**: the ceremony activates the whole pipeline (every `perEvent` event) by default — hooking up the bridge means you want it to sync. The user excludes events at ceremony time or turns any off later (`flowctl config set tracker.perEvent. off`). The `get_default_config()` schema default stays `off`, so a bare `enabled=true` set WITHOUT the ceremony activates **no lifecycle-event sync** (every `perEvent` event stays dormant) — only the ceremony's explicit writes activate them. (The lone exception: make-pr's PR↔issue link is unconditional whenever the bridge is active — no per-event gate, by design.) +The bridge is **off until explicitly enabled**. The ceremony probes four signals, surfaces present AND absent, ASKS, and writes config **only on confirmation** — with provenance. No-signal ⇒ nothing written; `enabled` stays `false`. Never assume. But **once the user confirms, enabling is opt-OUT, not opt-in**: the ceremony activates the whole pipeline (every `perEvent` event) by default — hooking up the bridge means you want it to sync. The user excludes events at ceremony time or turns any off later (`flowctl config set tracker.perEvent. off`). The `get_default_config()` schema default stays `off`, so a bare `enabled=true` set WITHOUT the ceremony activates **no lifecycle-event sync** (every `perEvent` event stays dormant) — only the ceremony's explicit writes activate them. (Two exceptions are unconditional whenever the bridge is active — no per-event gate, by design: (1) make-pr's PR↔issue link **and its In Review status push** (fn-66, R2 — an open PR is the In Review rung, riding the same Diffs-powering link path); (2) **`land.merged`** (fn-66, R10 — a real merge is the SOLE event that projects terminal `Done`, gated on the GitHub `MERGED` probe; leaving it opt-in would strand boards at In Review post-merge).) Probe these four signals (detection lives in the skill, not flowctl — same shape as fn-51's driver-ladder detection): @@ -91,7 +91,7 @@ $FLOWCTL config set tracker.perEvent.work.firstClaim push $FLOWCTL config set tracker.perEvent.work.done comment $FLOWCTL config set tracker.perEvent.makePr comment $FLOWCTL config set tracker.perEvent.resolvePr comment -$FLOWCTL config set tracker.perEvent.completionReview reconcile +$FLOWCTL config set tracker.perEvent.completionReview comment # fn-66: comment-shaped (verdict + R-ID coverage) — NEVER terminal Done; land.merged is the sole Done driver (active-by-default, no perEvent seed needed) ``` Confirm the result with `flowctl sync active --json` (must report `active: true` once enabled/type are set). Negative path: user declines ⇒ write nothing; `sync active` stays `active: false`. diff --git a/plugins/flow-next/codex/skills/flow-next-tracker-sync/references/adapter-interface.md b/plugins/flow-next/codex/skills/flow-next-tracker-sync/references/adapter-interface.md index 56a568da..423ce9cb 100644 --- a/plugins/flow-next/codex/skills/flow-next-tracker-sync/references/adapter-interface.md +++ b/plugins/flow-next/codex/skills/flow-next-tracker-sync/references/adapter-interface.md @@ -84,6 +84,8 @@ The wire-agnostic shapes the transports produce and reconcile consumes. These ar Who-wins (R7, implemented in fn-52.5 — [status-sync.md](status-sync.md)): tracker wins `done`/`verified`; flow wins `in-progress`; `priority` + `deferred`/`wontfix` surface to the user, never auto-changed. Comments/evidence two-way append + dedup (R8) is [comments-sync.md](comments-sync.md). +**Transport-blind terminal invariant (R1, R8).** A **terminal outbound write** (mapping a spec to normalized `done`/`verified`, i.e. closing/completing the tracker issue) **requires merge evidence**: the flow→normalized map is `flowToNormalized(spec, prEvidence)` ([status-sync.md](status-sync.md)) and emits a terminal status ONLY when a `MERGED` PR probe for the spec branch is present. Local completion (spec `done` + completion review shipped) is necessary, not sufficient. This invariant is **transport-blind** — every adapter (Linear fn-52.3, GitHub fn-52.7, any future tracker) receives a terminal normalized status only after the gate, and maps it DOWN to its native closed state without re-deciding. No adapter ever closes an issue from local spec state alone. + ### `relation` The wire-agnostic edge shape `listIssueRelations` produces. One entry per directed blocked-by edge the transport can see for the inspected issue. diff --git a/plugins/flow-next/codex/skills/flow-next-tracker-sync/references/github.md b/plugins/flow-next/codex/skills/flow-next-tracker-sync/references/github.md index 3292c386..711137b5 100644 --- a/plugins/flow-next/codex/skills/flow-next-tracker-sync/references/github.md +++ b/plugins/flow-next/codex/skills/flow-next-tracker-sync/references/github.md @@ -142,6 +142,19 @@ edited directly on GitHub by a human who didn't touch the label): | `done` / `verified` | `gh issue close --reason completed` | set `status:` (so `verified` vs `done` is recoverable) | | `deferred` / `wontfix` | **do NOT auto-apply** — these are R7 surface-only | — | +**Terminal close requires merge evidence (R1, R8 — same gate as Linear).** The +`done` / `verified` row above is a *transport* mapping: the adapter only ever +*receives* a terminal normalized status to write because the upstream +`flowToNormalized(spec, prEvidence)` map ([status-sync.md](status-sync.md)) gated it +on a `MERGED` merge-evidence probe for the spec branch. A locally-`done` spec with no +merged PR normalizes to **`in-review`** (→ open issue + `status:in-review`), so this +adapter **never closes the issue** for a spec that lacks a merged PR. The +merge-evidence gate is transport-blind — it applies identically on the GitHub adapter +and the Linear adapter (R8); this firewall just maps the already-gated normalized +status DOWN to GitHub's `OPEN`/`CLOSED`+reason. A `closed-unmerged` / missing-branch +probe never produces a terminal normalized status, so no `gh issue close --reason +completed` is ever driven from local completion alone. + **`deferred` / `wontfix` are surfaced, never auto-applied** (R7 semantics, same as Linear's `canceled`-type states): the adapter reports the desired transition to the user (or queues it in Ralph) rather than closing the issue as `not planned` @@ -435,7 +448,9 @@ attachment, no Linear Diffs (GitHub has its own PR review UI). make-pr §4.6a's Linear branch is GitHub-typed-skipped; instead the GitHub adapter ensures the PR body carries a **non-closing** reference to the issue — `Refs #` (NOT `Fixes #`, which would auto-close the issue on merge and bypass -spec-completion-review; flow-next owns the lifecycle, R7/R10). GitHub then renders +flow-next's `land.merged` Done projection — the merge-evidence-gated lifecycle +that owns the terminal `Done` transition post-fn-66; flow-next owns the lifecycle, +R7/R10). GitHub then renders the PR↔issue cross-link automatically. Gate is the same as Linear: bridge **active AND tracker.type == github** — no separate `makePr` opt-in. There is no rich-attach step (the cross-reference IS the link). diff --git a/plugins/flow-next/codex/skills/flow-next-tracker-sync/references/linear-ladder.md b/plugins/flow-next/codex/skills/flow-next-tracker-sync/references/linear-ladder.md index 7cb49b99..0671cb08 100644 --- a/plugins/flow-next/codex/skills/flow-next-tracker-sync/references/linear-ladder.md +++ b/plugins/flow-next/codex/skills/flow-next-tracker-sync/references/linear-ladder.md @@ -106,10 +106,21 @@ taxonomy) plus an optional config name-override (`tracker.perTracker.statusMap`) |---|---|---| | `backlog` | `backlog` | — | | `unstarted` | `planned` | — | -| `started` | `in-progress` | flow wins | +| `started` | `in-progress` / `in-review` (the In Review rung — a `started`-type state named "In Review", resolved via `statusMap`) | flow wins (`in-progress`); In Review is the open-PR rung | | `completed` | `done` (or `verified` via name-map, e.g. a "Verified" state) | tracker wins | | `canceled` | `wontfix` (or `deferred` via name-map) | surface to user, never auto-change | +**Outbound terminal requires merge evidence (R1, lockstep with +[status-sync.md](status-sync.md) flow→normalized).** This table is the *inbound* +(tracker→normalized) read map. The *outbound* flow→normalized map gates terminal +`done`/`verified` on a `MERGED` merge-evidence probe: a locally-`done` spec maps to +**`in-review`** (Linear In Review — a `started`-type "In Review" state) until a +`MERGED` PR is observed for its branch; `setStatus(done|verified)` is driven ONLY +once `prEvidence == merged`. The In Review state resolves through the same +name/type → `stateId` map below (a `started`-type state whose name matches the +configured In Review rung). Keep this in lockstep with the +`flowToNormalized(spec, prEvidence)` table in status-sync.md — they must not drift. + Build the **name/type → `stateId` map once at config time** (MCP: `list_issue_statuses`; GraphQL: `workflowStates(first:100, filter:{team:…}){ id name type }`) so `setStatus` can resolve a normalized status back to the team's concrete diff --git a/plugins/flow-next/codex/skills/flow-next-tracker-sync/references/status-sync.md b/plugins/flow-next/codex/skills/flow-next-tracker-sync/references/status-sync.md index 04e628f5..aa000d55 100644 --- a/plugins/flow-next/codex/skills/flow-next-tracker-sync/references/status-sync.md +++ b/plugins/flow-next/codex/skills/flow-next-tracker-sync/references/status-sync.md @@ -42,15 +42,99 @@ vocabulary in the adapter ([linear-ladder.md](linear-ladder.md) status table). T ### flow → normalized (what the spec's status *means* on the tracker) The spec's tracker-facing status is derived from the spec + its tasks (one spec ↔ -one issue, R3 — there are no per-task sub-issues): +one issue, R3 — there are no per-task sub-issues) **and a merge-evidence probe**. +The signature is **`flowToNormalized(spec, prEvidence)`** — it takes PR-merge +evidence as a second input, NOT spec state alone. **Local completion is necessary, +not sufficient, for a terminal status (R1).** A spec that is locally `done` with a +shipped completion review is still only `in-review` on the tracker until a +`MERGED`-state PR for its branch is observed; `done`/`verified` are reserved for +merge-confirmed work. -| flow condition | normalized | Rationale | -|---|---|---| -| spec `open`, **no** task `in_progress`/`done` yet (all `todo`) | `planned` (or `backlog` if no tasks exist) | authored, not started | -| spec `open`, **any** task `in_progress` or some `done` | `in-progress` | work underway | -| spec `done`, `completion_review_status != ship` | `in-review` | implementation complete, awaiting completion review | -| spec `done`, `completion_review_status == ship` | `verified` | completion review shipped | -| spec `done`, no completion-review configured | `done` | terminal, no review gate | +**`prEvidence`** is the result of the merge-evidence probe for the spec branch +(reuse verbatim from land `workflow.md:99-104` / pilot `:126-132`): + +```bash +BRANCH_NAME=$(.flow/bin/flowctl show "$SPEC_ID" --json | jq -r .branch_name) +# Bare `gh pr view` returns rc 0 even for CLOSED/MERGED — ALWAYS filter .state via jq. +PR_JSON=$(gh pr list --head "$BRANCH_NAME" --state all \ + --json url,state,number,isDraft 2>/dev/null) +MERGED=$(printf '%s' "$PR_JSON" | jq '[.[] | select(.state=="MERGED")] | length') +OPEN=$(printf '%s' "$PR_JSON" | jq '[.[] | select(.state=="OPEN")] | length') +# prEvidence ∈ { +# merged ≥1 MERGED +# open ≥1 OPEN, 0 MERGED +# closed-unmerged ≥1 CLOSED, 0 MERGED/OPEN +# none no PR for branch (probe succeeded, empty result) +# ambiguous a state the four buckets above don't cleanly cover — e.g. a +# branch with BOTH an open AND a closed-unmerged PR, or a draft- +# only result where no clear merge/open/closed signal dominates +# probe-error the gh probe itself failed (non-zero rc, no auth, network) — +# branch_name unknown counts here (cannot probe) +# } +``` + +**Probe failure / unknown branch is NOT merge evidence.** If `spec.branch_name` is +empty/unknown, or the `gh pr list` probe errors (rc≠0, no `gh`/auth, network), treat +`prEvidence` as `probe-error` — never as `none` and never as `merged`. Both +`probe-error` and `ambiguous` are non-terminal and route to NEEDS_HUMAN (below); +terminal is reachable ONLY from an unambiguous `merged`. + +> Use `-F` not `-f` for numeric `gh api` fields (a `-f number=…` stringifies the +> JSON value — memory `gh-api-f-stringifies`). The probe above uses `gh pr list`, +> not `gh api`, so it is unaffected; the note is for any follow-on `gh api` call. + +`flowToNormalized` maps to the normalized rung the spec *would* project. **It never +forces a rung downgrade by itself** — the reconcile loop (below) decides whether to +*write* it. In particular, `prEvidence == none` projects `in-review` but the loop's +**no-PR preserve rule** keeps an already-valid non-terminal tracker state (S-G); the +`in-review` projection only drives a `setStatus` when there is a real open-PR +(`open`) signal behind it. + +**Row-order discipline (load-bearing).** `prEvidence` is evaluated **FIRST** — a PR +signal (`merged` / `open`) decides the rung *before* any local task/completion-status +row gets a vote. The local-status rows (the `no PR evidence` block) apply **only** +when there is no PR signal (`none` / `closed-unmerged` / `ambiguous` / `probe-error`). +This ordering is what makes an all-tasks-done OPEN spec with an open PR project +`in-review` (not `in-progress`), and a merged ungated/`unknown`-completion spec reach +terminal Done (not stay `in-review`). The merge-evidence INVARIANT is intact: terminal +(`done`/`verified`) is reachable ONLY from `prEvidence == merged`. + +| # | flow condition | `prEvidence` | normalized | Rationale | +|---|---|---|---|---| +| 1 | spec `done`, no completion-review configured | `merged` | **`done`** | terminal, no review gate, **merge-confirmed** — a merge is a merge | +| 2 | spec `done`, `completion_review_status == ship` | `merged` | **`verified`** | completion review shipped **and** PR merged — terminal, verified | +| 3 | spec `done`, `completion_review_status != ship` (incl. `unknown`) | `merged` | `in-review` | PR merged but a configured completion review is not yet `ship` — stay in review until verified | +| 4 | spec at any local status (incl. all-tasks-done OPEN, or spec `done`) | `open` | `in-review` | open PR awaiting merge — the In Review rung, drives `setStatus(in-review)` (R2). The open-PR signal wins over the local task rows | +| 5 | spec `done` | `none` | **`in-review`** projection (NOT terminal); loop **preserves** an existing non-terminal state (S-G) | no PR exists — no merge evidence, no open-PR signal → never terminal, never a forced advance (R1) | +| 6 | spec `done` | `closed-unmerged` / `ambiguous` / `probe-error` | **`in-review`** (NOT terminal) **+ surface NEEDS_HUMAN** | locally shipped but the probe is not a clean MERGED — never terminal; the conflict goes to a human (R6) | +| 7 | spec `open`, **any** task `in_progress` or some `done` | `none` / `closed-unmerged` / `ambiguous` / `probe-error` | `in-progress` | work underway, no open/merged PR signal | +| 8 | spec `open`, **no** task `in_progress`/`done` yet (all `todo`) | `none` / `closed-unmerged` / `ambiguous` / `probe-error` | `planned` (or `backlog` if no tasks exist) | authored, not started | + +> **Why row 3 catches `unknown`.** flowctl normalizes a missing completion-review +> field to `unknown` (`flowctl.py:2296-2297`), and for repos without a +> completion-review backend pilot treats `completion_review_status != ship` as +> ungated. Row 3 is the `merged` + non-`ship` rung **only when a completion review is +> actually configured** — when no completion-review backend is configured at all, +> row 1 fires first (a spec with no review gate has nothing to wait on, so a merge is +> terminal `done`). Distinguish the two by `flowctl config`: "no completion-review +> configured" ⇒ row 1; "configured but the spec's review isn't `ship`/`unknown`-while- +> backend-present" ⇒ row 3. + +> **Why rows 4–6 sit above rows 7–8.** A PR signal — open or merged — is stronger +> evidence of where the work *is* than the local task ledger. In the normal make-pr +> path the spec is still `open` with all tasks `done` (flow-next-work/phases.md:488 +> says not to close the spec before the PR; land later discovers `status==open && +> tasks==done`). Evaluating the open-PR row (4) before the broad "some task done → +> in-progress" row (7) is what moves that issue to **In Review** on the make-pr push +> (flow-next-make-pr/workflow.md:1685-1690) instead of leaving it at In Progress. + +**Terminal (`done`/`verified`) is impossible without a `MERGED` probe result.** The +old map (pre-fn-66) mapped `spec done + completion ship → verified` and `spec done, +no review → done` with NO merge check, so a locally-completed spec auto-closed its +tracker issue before the PR merged. The merge-evidence gate fixes that at the root, +upstream of the who-wins ladder (which is unchanged). A `closed-unmerged` / +missing-branch / ambiguous probe NEVER yields terminal — it stays `in-review` (a +non-terminal rung) and surfaces NEEDS_HUMAN for the closed-unmerged case (R6). `deferred` / `wontfix` have no native flow status — they only ever arrive **from** the tracker side and are **surfaced, never auto-applied** (see the who-wins table). @@ -91,7 +175,8 @@ the two current normalized statuses, which are always available). ``` reconcileStatus(spec, issue): - flowNorm = flowToNormalized(spec) # table above + prEvidence = mergeEvidenceProbe(spec.branch_name) # merged|open|closed-unmerged|none|ambiguous + flowNorm = flowToNormalized(spec, prEvidence) # table above — terminal needs MERGED trackerNorm = issue.status.normalized # adapter already mapped it if flowNorm == trackerNorm: @@ -106,6 +191,17 @@ reconcileStatus(spec, issue): # genuine status deadlock (tracker=done × flow=in-progress, simultaneously) → # R1 conflictTiebreak fallback (next section). NOT a silent auto-close. + # ── CLOSED-UNMERGED / AMBIGUOUS / PROBE-ERROR — flow is locally done but the + # merge probe is NOT a clean MERGED. flowNorm is in-review (non-terminal — the + # gate forbade terminal), but the closed-without-merge / missing-branch / + # ambiguous / probe-error condition is a conflict a human must judge: surface + # NEEDS_HUMAN and do NOT write any status. Caught before the in-review + # advancement so it never silently pushes a rung. ── + elif spec.status == done and prEvidence ∈ {closed-unmerged, ambiguous, probe-error}: + # R6: locally shipped, but no merged PR and the probe is not clean → + # surface NEEDS_HUMAN (interactive ask / Ralph `sync defer --reason `). + # NO setStatus, NO terminal, NO spec change. Tracker keeps its current state. + elif trackerNorm ∈ {done, verified} (terminal, flow NOT in-progress): # tracker wins terminal — flow is at backlog/planned/done, so the tracker's # closure folds in cleanly (no live in-progress work to contradict it) @@ -116,12 +212,28 @@ reconcileStatus(spec, issue): # flow wins in-progress — push flow's progress to the tracker setStatus(trackerId, in-progress) [transport — linear-ladder.md] + # ── NO-PR PRESERVE RULE (S-G) — flow is locally done but prEvidence is `none` + # (no PR exists). flowNorm is in-review, but if the tracker is ALREADY at a + # valid non-terminal state (backlog/planned/in-progress/in-review) we do NOT + # force a rung change: a locally-shipped spec with no PR has no merge evidence + # and no open-PR signal, so we KEEP the current non-terminal state (no advance, + # no terminal). This is checked before the generic in-review push so `none` + # never drives an unconditional in-progress→in-review downgrade. ── + elif flowNorm == in-review and prEvidence == none + and trackerNorm ∈ {backlog, planned, in-progress, in-review}: + # S-G: preserve the existing valid non-terminal state — no setStatus, no advance. + + elif flowNorm == in-review and trackerNorm ∈ {backlog, planned, in-progress}: + # flow is in review (open PR — prEvidence=open), tracker behind → push the + # In Review rung (R2). Non-terminal advance; issue stays OPEN. + setStatus(trackerId, in-review) [transport — linear-ladder.md / github.md] + elif trackerNorm ∈ {deferred, wontfix} OR priority differs: # surface, never auto-change (interactive ask / Ralph queue) — see below - elif flowNorm ∈ {done, verified} and trackerNorm ∈ {backlog, planned}: - # flow reached terminal, tracker still pre-active (not in-progress → not a - # deadlock) — push flow's closure out + elif flowNorm ∈ {done, verified} and trackerNorm ∈ {backlog, planned, in-review}: + # flow reached terminal (prEvidence=merged — the gate passed), tracker still + # pre-terminal (not in-progress → not a deadlock) — push flow's closure out setStatus(trackerId, flowNorm) [transport] else: @@ -202,7 +314,7 @@ detail lives in [linear-ladder.md](linear-ladder.md)): | `triage` | `backlog` | spec stays `open`/unstarted | — | | `backlog` | `backlog` | spec stays `open`/unstarted | — | | `unstarted` | `planned` | spec stays `open` (planned) | — | -| `started` | `in-progress` | spec `open`, work underway | **flow wins** (flow drives the loop) | +| `started` | `in-progress` (or `in-review` for a `started`-type state named "In Review" — the open-PR rung, resolved via `statusMap`) | spec `open`, work underway / in review | **flow wins** (flow drives the loop) | | `completed` | `done` (or `verified` via a "Verified" name-override) | spec → `done` | **tracker wins terminal** | | `canceled` | `wontfix` (or `deferred` via a name-override) | **surface — do NOT auto-change** | surface to user | @@ -432,6 +544,119 @@ reconciles** — the run does **not** crash or abort. comments reconcile proceeds, and no `setStatus` is driven from the unmapped status. PASS iff no crash and the rest of the sync completes. +### Fixture S-G — no-PR all-done → stays In Progress, NO terminal advance (R1) + +**Flow:** spec `done`, all tasks `done`, `completion_review_status == ship`. +**`prEvidence`:** `none` (no PR exists for the spec branch). +**Tracker:** `status.normalized = "in-progress"` (board shows work underway — a valid +non-terminal state). + +**Expected:** `flowToNormalized(spec, none)` → **`in-review`** (terminal is gated on +`MERGED`; local ship is necessary, not sufficient). Tracker is already `in-progress`, +a valid non-terminal state. The reconcile **keeps the current non-terminal state / +does NOT advance to terminal** — it does NOT downgrade `in-progress`→`in-review` +unconditionally, and it does NOT close the issue. (`in-review` is "ahead of" +`in-progress`; with no merge evidence the bridge leaves the live non-terminal state +as-is rather than forcing a rung change. The point of the fixture: a locally-shipped +spec with no merged PR NEVER advances the tracker to `Done`.) + +**Oracle:** the spec/issue stay **non-terminal** — the issue stays **In Progress**; +**no** `setStatus(done|verified)` and **no** `gh issue close` is driven; no terminal +advance. PASS iff the locally-`done`+shipped spec does NOT close the tracker issue +absent a merged PR, and the existing valid non-terminal state is preserved. + +### Fixture S-H — open (unmerged) PR → In Review (R2) + +**Flow:** spec `done`, `completion_review_status == ship`. +**`prEvidence`:** `open` (one `OPEN` PR for the spec branch, 0 `MERGED`). +**Tracker:** `status.normalized = "in-progress"`. + +**Expected:** `flowToNormalized(spec, open)` → **`in-review`**. The open PR is the In +Review rung (R2): `setStatus(trackerId, in-review)` → Linear `In Review` (`state.type: +started`-family rung) / GitHub `status:in-review` label, issue stays **OPEN**. NOT +terminal — the PR has not merged. + +**Oracle:** exactly one `setStatus(in-review)`; the issue is **In Review** and stays +open; no close. PASS iff the open-PR spec projects to In Review, never to Done. + +### Fixture S-I — merged PR → Done (terminal, merge-confirmed) (R1) + +**Flow:** spec `done`, no completion-review configured. +**`prEvidence`:** `merged` (≥1 `MERGED` PR for the spec branch). +**Tracker:** `status.normalized = "in-review"`. + +**Expected:** `flowToNormalized(spec, merged)` → **`done`** (terminal — the `MERGED` +probe is present, so the gate is satisfied). `setStatus(trackerId, done)` → Linear +`completed`-type Done / GitHub `gh issue close --reason completed` + `status:done`. +(Had `completion_review_status == ship`, it would be **`verified`** instead.) + +**Oracle:** exactly one terminal `setStatus(done)` (issue closed/Done). PASS iff a +merge-confirmed spec — and ONLY a merge-confirmed spec — reaches terminal Done. + +### Fixture S-J — closed-unmerged PR → non-terminal + NEEDS_HUMAN (R6) + +**Flow:** spec `done`, `completion_review_status == ship`. +**`prEvidence`:** `closed-unmerged` (a PR for the branch is `CLOSED` with 0 `MERGED` +and 0 `OPEN` — closed without merging). +**Tracker:** `status.normalized = "in-progress"`. + +**Expected:** `flowToNormalized(spec, closed-unmerged)` → **`in-review`** (NON-terminal +— a closed-without-merge PR is NOT merge evidence, so terminal is forbidden). The +ambiguity (locally shipped, but the PR was closed unmerged) **surfaces NEEDS_HUMAN** +(interactive ask / Ralph `sync defer --reason closed-unmerged`). The issue stays +**non-terminal** (In Progress preserved); no terminal write. + +**Oracle:** **no** `setStatus(done|verified)` / no close; exactly one NEEDS_HUMAN +surfaced/queued entry naming the closed-unmerged branch; the issue stays +non-terminal. PASS iff a closed-unmerged spec never auto-closes the tracker issue and +the conflict reaches a human. + +> **`ambiguous` and `probe-error` share S-J's path.** A `prEvidence` of `ambiguous` +> (e.g. both an open AND a closed-unmerged PR on the branch) or `probe-error` +> (`gh` failed / no auth / unknown `branch_name`) is handled by the **same** +> reconcile branch as `closed-unmerged`: `flowToNormalized` → `in-review` +> (non-terminal), and the loop surfaces NEEDS_HUMAN (`sync defer --reason ambiguous` +> / `--reason probe-error`) with **no** status write. Terminal is reachable ONLY +> from an unambiguous `merged` (S-I) — a failed or ambiguous probe never closes the +> issue. + +### Fixture S-K — all-tasks-done OPEN spec + open PR → In Review (row-order, Thread A) + +**Flow:** spec **`open`** (NOT yet `done` — the normal make-pr path leaves the spec +`open` after all tasks finish; flow-next-work/phases.md:488), **all tasks `done`**. +**`prEvidence`:** `open` (one `OPEN` PR for the spec branch, 0 `MERGED`). +**Tracker:** `status.normalized = "in-progress"`. + +**Expected:** `flowToNormalized(spec, open)` → **`in-review`** (row 4 — the open-PR +signal is evaluated **before** the "some task done → in-progress" local row, so it +wins). The make-pr push (flow-next-make-pr/workflow.md:1685-1690) drives +`setStatus(trackerId, in-review)` → the issue moves to **In Review**, stays OPEN, NOT +terminal. + +**Oracle:** exactly one `setStatus(in-review)`; the issue is **In Review** (NOT left +at In Progress). PASS iff an all-tasks-done OPEN spec with an open PR projects to In +Review — the pre-fn-66 row order returned `in-progress` here and the make-pr push +never advanced the issue. Regression guard for Thread A. + +### Fixture S-L — merged ungated / `unknown`-completion spec → terminal Done (row-order, Thread B) + +**Flow:** spec `done`, **`completion_review_status == unknown`** (no completion-review +backend configured — flowctl normalizes the missing field to `unknown`, +flowctl.py:2296-2297; pilot treats `!= ship` as ungated, flow-next-pilot/workflow.md:117-122). +**`prEvidence`:** `merged` (≥1 `MERGED` PR for the spec branch). +**Tracker:** `status.normalized = "in-review"`. + +**Expected:** `flowToNormalized(spec, merged)` → **`done`** (terminal — row 1 fires +because no completion-review backend is configured; a merge is a merge for an ungated +repo). `setStatus(trackerId, done)` → Linear `completed`-type Done / GitHub +`gh issue close --reason completed`. The `unknown` completion status does **not** +trap the spec in `in-review`. + +**Oracle:** exactly one terminal `setStatus(done)` (issue closed/Done). PASS iff a +merged ungated/`unknown`-completion spec reaches terminal Done — the pre-fn-66 row +order let row 3 (`!= ship → in-review`) catch `unknown` first, so `land.merged` never +wrote Done for ungated projects. Regression guard for Thread B. + ## Boundaries - **This is the status/metadata layer, not the body merge or the transport.** The diff --git a/plugins/flow-next/codex/skills/flow-next-tracker-sync/steps.md b/plugins/flow-next/codex/skills/flow-next-tracker-sync/steps.md index c8c4c860..89847ad4 100644 --- a/plugins/flow-next/codex/skills/flow-next-tracker-sync/steps.md +++ b/plugins/flow-next/codex/skills/flow-next-tracker-sync/steps.md @@ -55,11 +55,11 @@ Only when the bridge is not yet active (`flowctl sync active --json` → `active $FLOWCTL config set tracker.perEvent.work.done comment # status comment + evidence $FLOWCTL config set tracker.perEvent.makePr comment # PR link is unconditional; extra status comment $FLOWCTL config set tracker.perEvent.resolvePr comment # resolution comment - $FLOWCTL config set tracker.perEvent.completionReview reconcile # flip Done/verified + verdict / R-ID coverage + $FLOWCTL config set tracker.perEvent.completionReview comment # fn-66: verdict + R-ID coverage comment; NEVER terminal Done (land.merged is the sole Done driver) $FLOWCTL config set tracker.perTracker.teamId "" # if the user named one $FLOWCTL sync active --json # confirm active: true ``` - **Never assume — but default-on is not assuming.** No signal / user declines the bridge ⇒ write nothing; `enabled` stays `false`; `sync active` stays `active: false`. Confirming the bridge IS the consent to sync the pipeline. The **config schema default stays `off`** (in `get_default_config()`), so a bare `tracker.enabled=true` set by hand or a script — WITHOUT this ceremony — activates **no lifecycle-event sync** (every `perEvent` event stays dormant). The **one exception** is make-pr's PR↔issue link, which is unconditional whenever the bridge is active (no per-event gate, by design — it powers Linear Diffs); only the ceremony's explicit per-event writes activate the events themselves. Users opt out per event afterward via `flowctl config set tracker.perEvent. off`. + **Never assume — but default-on is not assuming.** No signal / user declines the bridge ⇒ write nothing; `enabled` stays `false`; `sync active` stays `active: false`. Confirming the bridge IS the consent to sync the pipeline. The **config schema default stays `off`** (in `get_default_config()`), so a bare `tracker.enabled=true` set by hand or a script — WITHOUT this ceremony — activates **no lifecycle-event sync** (every `perEvent` event stays dormant). The **two exceptions** are unconditional whenever the bridge is active (no per-event gate, by design): (1) make-pr's PR↔issue link **and its In Review status push** (fn-66, R2 — an open PR is the In Review rung, riding the same Diffs-powering link path); (2) **`land.merged`** (fn-66, R10 — a real merge is the SOLE event that projects terminal `Done`, gated on the GitHub `MERGED` probe; leaving it opt-in would strand boards at In Review post-merge). Only the ceremony's explicit per-event writes activate the other events themselves. Users opt out per event afterward via `flowctl config set tracker.perEvent. off`. 5. **Readiness state (fn-58, R4 — optional, skippable).** After the config writes, ask one more question via `plain-text numbered prompt`: *which tracker workflow state means "ready for work"?* When set, every pull-side sync projects that state onto the local spec `ready` flag ([→ ref: status-sync.md] § Readiness projection) — **the tracker becomes authoritative for readiness** (a local `flowctl spec ready` is overwritten on the next sync). Readiness is optional: always offer **skip** (and lead with it when no candidate state exists); skipping writes nothing and leaves `tracker.readyState: null` — the readiness gate stays dormant (R7). - **Linear** — discover the team's states first: MCP `list_issue_statuses(team:)` → `{id, name, type}` per state (GraphQL rung: `workflowStates(first:100, filter:{team:{name:{eq:$team}}}){ nodes { id name type } }` — explicit `first:` on every connection). Lead with a recommendation: a state whose **name** looks like "Ready" / "Next" / "Ready for Dev" (case-insensitive); if none, lead with skip. Validate the chosen state resolves (`get_issue_status()`) before writing. Store the state **name** (names are what humans see; the projection matches case-insensitive/trimmed). - **GitHub** — issues have no workflow states; readiness resolves to a **label** name (suggest `ready`). Pre-create it so the projection never trips on a missing label — tolerate **only** already-exists (the create fails with a 422 when the label exists; that is fine, idempotent). Any other failure (auth / permissions / wrong repo / API) must surface — never write `tracker.readyState` for a label that isn't confirmed to exist, or every later pull hits the stale-config warn/noop (github.md § Readiness label) and the flag never moves: @@ -132,9 +132,16 @@ Route the operation; each layer calls hooks that operate on the normalized struc ``` push(spec): + prEvidence = mergeEvidenceProbe(spec.branch_name) → status-sync.md (merged|open|closed-unmerged|none|ambiguous|probe-error) body = renderFlowToTracker(spec) → body-merge.md Step 3 (flow→tracker) — COMPLETE spec, ALL sections; never summarize/truncate writeIssue(issue{... body ...}) [→ ref: transport] # GitHub UPDATE preserves the flow-owned region (github.md § writeIssue) — body=renderFlowToTracker output never contains it, so a raw full-body replace would wipe it and make projectDepRelations misread the ledgered edge as a remote removal → false collision. Write retains; merge strips (body-merge.md Step 0.5). - setStatus(map flow status → tracker status) → status-sync.md (who-wins) + setStatus(flowToNormalized(spec, prEvidence) → tracker status) → status-sync.md (who-wins) + # MERGE-EVIDENCE GATE (fn-66): the terminal rung (done/verified) is reachable + # ONLY when prEvidence == merged. flowToNormalized refuses terminal for + # none/open/closed-unmerged/ambiguous/probe-error — so NO push (automatic land.merged + # OR a manual reconcile) ever writes Done without a GitHub MERGED probe. The gate is a + # per-WRITE invariant (status-sync.md): a manual merge-evidenced push MAY terminal-write; + # a local-completion-only push never does. postComment(lifecycle event marker) → comments-sync.md (append + dedup) projectDepRelations(spec, issue) → § projectDepRelations below — depends_on_epics → blocked-by relations (additive, ledger-tracked, never advances lastSyncedAt; warns on unlinked dep; skipped when no transport) sync set-merge-base (BOTH halves) + set-last-synced # snapshot the pushed pair (body-merge.md Step 5) @@ -166,6 +173,7 @@ conflict) lives in that reference: reconcile(spec): base = sync get-state → merge-base snapshot (BOTH forms: mergeBaseFlow + mergeBaseTracker) issue = fetchIssue(trackerId) [→ ref: transport] + prEvidence = mergeEvidenceProbe(spec.branch_name) → status-sync.md (feeds reconcileStatus; gates terminal) projectReadiness(spec, issue) → status-sync.md § Readiness projection (rides every issue read; independent of the body merge — runs even when the body diverges) projectDepRelations(spec, issue) → § projectDepRelations below (rides every issue read like projectReadiness; independent of the body merge — runs even when the body diverges or conflicts) merged = threeWayMergeBody(base, flowBody, issue.body) → body-merge.md @@ -176,7 +184,12 @@ reconcile(spec): Ralph → sync defer (queue the scoped conflict, never block) [R9/R11] receipt: diverged else: - writeIssue(merged) + setStatus(who-wins) [transport → fn-52.3/.7] + status-sync.md (who-wins) + writeIssue(merged) + setStatus(reconcileStatus(spec, issue, prEvidence)) [transport → fn-52.3/.7] + status-sync.md (who-wins) + # MERGE-EVIDENCE GATE (fn-66): reconcileStatus runs flowToNormalized(spec, prEvidence) + # first — a manual reconcile MAY terminal-write Done/verified IFF prEvidence == merged + # (status-sync.md S-I); closed-unmerged/ambiguous/probe-error → in-review + NEEDS_HUMAN + # (S-J), none → preserve non-terminal (S-G). The terminal-write merge-evidence invariant + # holds on this manual path exactly as on the automatic land.merged touchpoint. sync set-merge-base (BOTH halves) + sync set-last-synced # body-merge.md Step 5 — ONLY on success receipt: merged | updated # no-base bootstrap (first link): body-merge.md "First-sync / no-base bootstrap" — diff --git a/plugins/flow-next/codex/skills/flow-next-work/SKILL.md b/plugins/flow-next/codex/skills/flow-next-work/SKILL.md index f0d12855..614f06f6 100644 --- a/plugins/flow-next/codex/skills/flow-next-work/SKILL.md +++ b/plugins/flow-next/codex/skills/flow-next-work/SKILL.md @@ -168,7 +168,7 @@ If user chose review, pass the review mode to the worker. The worker agent invok |---|---|---| | first task claimed (phases.md 3b.1) | `tracker.perEvent.work.firstClaim` | move the linked issue In-Progress | | task done (phases.md 3d.1) | `tracker.perEvent.work.done` | post a status comment + evidence (tests / commits / PR) | -| spec-completion-review SHIP (phases.md 3g) | `tracker.perEvent.completionReview` | flip the issue Done/verified + post verdict / R-ID coverage | +| spec-completion-review SHIP (phases.md 3g) | `tracker.perEvent.completionReview` | post verdict / R-ID coverage as a comment; NEVER terminal Done (fn-66 — Done is reserved for a MERGED PR, driven by land.merged) — at most leaves the issue at In Review | (capture / interview / plan / make-pr / resolve-pr carry their own touchpoints in those skills, gated identically on `tracker.perEvent.{capture,interview,plan,makePr,resolvePr}`.) diff --git a/plugins/flow-next/codex/skills/flow-next-work/phases.md b/plugins/flow-next/codex/skills/flow-next-work/phases.md index 34ac66d8..d89bb2ce 100644 --- a/plugins/flow-next/codex/skills/flow-next-work/phases.md +++ b/plugins/flow-next/codex/skills/flow-next-work/phases.md @@ -390,25 +390,29 @@ $FLOWCTL show --json | jq -r '.completion_review_status' 2. After skill returns with SHIP: - Set status: `$FLOWCTL spec set-completion-review-status --status ship --json` - - **Tracker sync (opt-in) — SHIP → issue Done/verified + verdict:** runs only when the tracker bridge is active AND `completionReview` is opted in. With no tracker configured this is a no-op. Hooked **here at the caller** (not inside the review skill) because this is where `completion_review_status=ship` lands: + - **Tracker sync (opt-in) — SHIP → verdict comment, NEVER terminal Done (fn-66):** runs only when the tracker bridge is active AND `completionReview` is opted in. With no tracker configured this is a no-op. Hooked **here at the caller** (not inside the review skill) because this is where `completion_review_status=ship` lands. **Local completion review is NOT merge evidence** — `Done` is reserved for a `MERGED` PR (fn-66 status-sync `flowToNormalized`), so this touchpoint is **comment-shaped only**: it posts the verdict + R-ID coverage and at most leaves the issue at `In Review` (if an open PR exists). It NEVER pushes `Done`/`verified`: ```bash if [ "$($FLOWCTL sync active --json | jq -r '.active')" = "true" ] \ && [ "$($FLOWCTL config get tracker.perEvent.completionReview --json | jq -r '.value')" != "off" ] \ && [ "$($FLOWCTL config get tracker.perEvent.completionReview --json | jq -r '.value')" != "null" ]; then - # Invoke the flow-next-tracker-sync skill: flip the linked issue to Done/verified - # (status who-wins — tracker wins `done`/`verified`, R7) and post the verdict + - # R-ID coverage as a comment. - # skill: flow-next-tracker-sync (operation: push , status + verdict comment, event: work.completionReview) - # Unlinked spec → flow-first push (create + link) first, then status + verdict - # comment (tracker-sync §Phase 3 create-if-unlinked). No-op only if no transport; Ralph queues. + # Invoke the flow-next-tracker-sync skill: post the completion-review verdict + + # R-ID coverage as a comment (comment-shaped — NEVER a terminal status push). + # The skill's reconcileStatus gate (status-sync.md flowToNormalized) refuses + # terminal `Done`/`verified` without a MERGED probe, so even if a stale config + # set this leaf to `reconcile` the gate keeps it non-terminal: at most it leaves + # the issue at `In Review` (open-PR evidence). land.merged is the SOLE Done driver. + # skill: flow-next-tracker-sync (operation: comment , event: work.completionReview) + # (the comment carries the verdict + R-ID coverage as evidence — never a status push) + # Unlinked spec → flow-first push (create + link) first, then the verdict comment + # (tracker-sync §Phase 3 create-if-unlinked). No-op only if no transport; Ralph queues. # The skill's receipts carry --event work.completionReview — audited by Phase 5's sync check. : fi ``` - Go to Phase 4 (Quality) -**Note:** The spec-completion-review skill gets SHIP from the reviewer but does NOT set the status itself. The caller (work skill or Ralph) sets `completion_review_status=ship` after successful review — and (when opted in) flips the linked tracker issue Done/verified here. The review skill (`flow-next-spec-completion-review`) is NOT edited; the touchpoint lives at this caller. +**Note:** The spec-completion-review skill gets SHIP from the reviewer but does NOT set the status itself. The caller (work skill or Ralph) sets `completion_review_status=ship` after successful review — and (when opted in) posts the verdict / R-ID-coverage comment to the linked tracker issue here. It does **NOT** flip the issue to `Done`/`verified` (fn-66: that is gated on a `MERGED` PR and driven solely by `land.merged`). The review skill (`flow-next-spec-completion-review`) is NOT edited; the touchpoint lives at this caller. **Fix loop behavior**: Same as impl-review. If reviewer returns NEEDS_WORK: 1. Skill parses issues @@ -503,7 +507,7 @@ EVENTS="work.firstClaim,work.done" # ← substitute the actual triggered set 2. For each MISSING event, invoke the **flow-next-tracker-sync skill directly** — the same dispatch as the touchpoint that missed, with its `event:` tag — NEVER this check block as a wrapper (no recursion): - `work.firstClaim` → `skill: flow-next-tracker-sync (operation: push , status-only, event: work.firstClaim)` - `work.done` → `skill: flow-next-tracker-sync (operation: comment , event: work.done)` - - `work.completionReview` → `skill: flow-next-tracker-sync (operation: push , status + verdict comment, event: work.completionReview)` + - `work.completionReview` → `skill: flow-next-tracker-sync (operation: comment , event: work.completionReview)` — comment-shaped (verdict + R-ID coverage as evidence), NEVER terminal (fn-66) 3. Re-check the missed events only, `--since` = the step-1 anchor: `"$FLOWCTL" sync check "$SPEC_ID" --events "" --since "" --json` 4. Record the final state in the summary slot. Still MISSING after the one cycle is a recorded, visible outcome — never a second retro-fire, never a block (the work is already done; a tracker hiccup must not become a hard stop). Recovery guidance lives in the receipt note + `docs/tracker-sync.md`. diff --git a/plugins/flow-next/docs/flowctl.md b/plugins/flow-next/docs/flowctl.md index 28a1b3c5..26f27fde 100644 --- a/plugins/flow-next/docs/flowctl.md +++ b/plugins/flow-next/docs/flowctl.md @@ -554,9 +554,9 @@ flowctl config toggle memory.enabled [--json] | `tracker.enabled` | bool | `false` | Enable the tracker-sync bridge (see [`sync`](#sync)). The bridge is active iff raw `tracker.enabled == true` OR raw `tracker.type ∈ {linear, github}`. | | `tracker.type` | string | `null` | Tracker backend: `linear` or `github`. | | `tracker.provenance` | string | `null` | Free-form provenance written by the discovery ceremony on confirmation (who/when/signals). | -| `tracker.perEvent.` | string | `off` | Per-lifecycle-event sync op. Events: `capture`, `interview`, `plan`, `work.firstClaim`, `work.done`, `makePr`, `resolvePr`, `completionReview`. Leaf values: `off | pull | push | reconcile | comment`. **Schema default `off`** — so a bare `enabled=true` set without the ceremony fires no lifecycle-event sync (accidental-enable guard; make-pr's unconditional PR↔issue link is the one exception — see [`tracker-sync.md`](tracker-sync.md)). But the `/flow-next:tracker-sync` discovery ceremony **activates all events by default (opt-out)** when you hook up the bridge; you turn any off with `config set tracker.perEvent. off`. | +| `tracker.perEvent.` | string | `off` | Per-lifecycle-event sync op. Events: `capture`, `interview`, `plan`, `work.firstClaim`, `work.done`, `makePr`, `resolvePr`, `completionReview`. Leaf values: `off | pull | push | reconcile | comment`. **Schema default `off`** — so a bare `enabled=true` set without the ceremony fires no lifecycle-event sync (accidental-enable guard; two paths are unconditional whenever the bridge is active — make-pr's PR↔issue link + `In Review` push, and `land.merged`'s `Done`-on-merge — see [`tracker-sync.md`](tracker-sync.md)). But the `/flow-next:tracker-sync` discovery ceremony **activates all events by default (opt-out)** when you hook up the bridge; you turn any off with `config set tracker.perEvent. off`. `completionReview` is seeded `comment` (verdict + R-ID coverage; **never terminal `Done`** — fn-66). | | `tracker.perEvent.qa` | string | `off` | **`/flow-next:qa` verdict post (fn-53, R9).** Posts the live-app QA ship verdict (`type: qa_verdict`) as a tracker comment when set non-`off` AND the bridge is active. Leaf values: **`off | comment` only** — `comment` is the only sensible verb for a verdict; `push`/`pull`/`reconcile` operate on the issue body/status and don't apply, so the QA skill treats any non-`off` value as `comment`. **Default `off`, and — unlike the other events — NOT switched on by the discovery ceremony's opt-out default-on set**: a QA verdict post is QA-specific opt-in, enabled explicitly with `config set tracker.perEvent.qa comment`. The post is best-effort and never blocks the verdict. | -| `tracker.perEvent.land.merged` | string | `off` | **`/flow-next:land` post-merge touchpoint (fn-60, 1.14.0+).** After land merges a PR and closes its spec, dispatches tracker-sync (`operation: push `, event tag `land.merged`) — status flips to the configured terminal state and the merge/release verdict comment is posted. **Default `off` and NOT switched on by the discovery ceremony's opt-out default-on set** (like `qa`): enable explicitly with `config set tracker.perEvent.land.merged push`. Best-effort — a tracker failure never blocks land's tail or changes the PR's verdict. | +| `tracker.perEvent.land.merged` | string | `off` | **`/flow-next:land` post-merge touchpoint (fn-60, 1.14.0+; fn-66 made it the sole `Done` driver).** After land merges a PR and closes its spec, dispatches tracker-sync (`operation: push `, event tag `land.merged`) — status flips to the **merge-confirmed** terminal state (`done`/`verified`, gated on the GitHub `MERGED` probe) and the merge/release verdict comment is posted. **fn-66: active-by-default whenever the bridge is active** — a real merge is the ONLY event that legitimately projects terminal `Done`, so this touchpoint rides the bridge-active predicate alone (NOT gated behind this leaf, which then only tunes the optional verdict comment). The schema default stays `off` (accidental-enable guard); the land skill fires it on bridge-active regardless. Best-effort — a tracker failure never blocks land's tail or changes the PR's verdict. | | `tracker.perTracker.teamId` / `projectId` / `labelMap` / `priorityMap` | mixed | `null` / `{}` | Per-tracker linkage hints (Linear team/project ids; label/priority maps). | | `tracker.staleAfterHours` | int | `24` | Staleness threshold (hours) consumed by `sync list-stale`. | | `tracker.conflictTiebreak` | string | `always-ask` | Status who-wins tiebreak: `flow-wins | tracker-wins | always-ask`. In Ralph mode `always-ask` resolves to *queue*, not prompt. | diff --git a/plugins/flow-next/docs/ralph.md b/plugins/flow-next/docs/ralph.md index 9ab65902..17782adf 100644 --- a/plugins/flow-next/docs/ralph.md +++ b/plugins/flow-next/docs/ralph.md @@ -193,8 +193,8 @@ Both drivers deliberately stop at a **draft PR**. The third loop, [`/flow-next:l Driver recipes: - Claude Code `/loop` v2.1.72+ (loops expire after 7 days): `/loop 10m /flow-next:pilot` -- Claude Code `/goal` v2.1.139+ (`/goal` validators are transcript-blind, so phrase the stop condition against the verdict grammar): `/goal keep running /flow-next:pilot until it prints PILOT_VERDICT=NO_WORK, or stop after 20 turns` -- Codex `/goal`: opt-in `[features] goals = true`, CLI >= 0.128.0. No `$skill-in-goal` syntax — write a plain-text objective that names pilot behavior and `PILOT_VERDICT=`. +- Claude Code `/goal` v2.1.139+ (`/goal` validators are transcript-blind, so phrase the stop condition against the verdict grammar): `/goal keep running /flow-next:pilot until it prints PILOT_VERDICT=NO_WORK, or stop after 20 turns` — note `PILOT_VERDICT=DEFERRED_TO_LAND` is its own terminal (an all-done spec whose open PR land owns); route it to `/flow-next:land`, not a pilot re-run. +- Codex `/goal`: opt-in `[features] goals = true`, CLI >= 0.128.0. No `$skill-in-goal` syntax — write a plain-text objective that names pilot behavior and `PILOT_VERDICT=` (`DEFERRED_TO_LAND` routes to land, not a pilot re-run). - Ship loop (after the build loop's draft PRs are open — babysitting waits on external CI/reviewer events, so use a cadence): `/loop 30m /flow-next:land` **The full pipeline** runs pilot and land **concurrently** — pilot builds spec N while land babysits spec N−1's PR. Same-session (two `/loop` jobs, ticks serialize) works for small backlogs; the real assembly line is **two instances** (Claude Code / Codex), each in **its own clone or git worktree** — both loops mutate the working tree (pilot checks out spec branches, land checks out PR branches for CI fixes), so two loops sharing one checkout would trip each other's dirty-tree guards. GitHub is the shared state: land pushes the spec close after merging, pilot pulls the base branch before planning, and the strike ledgers are per-clone by design. The loops never contend: land touches only specs with all tasks done (in-flight specs stay pilot's), and pilot skips specs that already have an open PR. diff --git a/plugins/flow-next/docs/teams.md b/plugins/flow-next/docs/teams.md index 9e9149bb..7fecf5e1 100644 --- a/plugins/flow-next/docs/teams.md +++ b/plugins/flow-next/docs/teams.md @@ -180,7 +180,7 @@ Opt-in flags for hardened review: `--validate` (validator pass on `NEEDS_WORK` t Configure as a required gate via `--require-completion-review` (in `flowctl next`). The work skill blocks spec-close until spec-completion-review returns SHIP. The fix loop happens internally — the skill keeps iterating until it passes or escalates. -*(+ optional tracker sync)* — `tracker.perEvent.completionReview` reconciles the linked issue (flip Done/verified + verdict / R-ID coverage) when the closing gate passes. On by default once the bridge is hooked up (opt-out per event). +*(+ optional tracker sync)* — `tracker.perEvent.completionReview` posts the verdict + R-ID coverage as a comment on the linked issue when the closing gate passes (and at most leaves it at `In Review`); it **never** flips the issue to `Done`/`verified` (fn-66 — `Done` is reserved for a merged PR and driven solely by `land.merged`). On by default once the bridge is hooked up (opt-out per event). ### [7.5] Live-app QA — optional, before the PR @@ -421,7 +421,7 @@ Ralph emits run logs to `scripts/ralph/runs//` — receipts, verbose logs, Teams that live in Linear or GitHub Issues don't have to leave their board. `/flow-next:tracker-sync` **projects** a `.flow/specs/.md` spec onto a tracker issue (Linear first, GitHub next) and reconciles body, status, and comments two-way. **Projection, not coordination** — the spec stays the source of truth and the quality layer; the tracker is a co-editable mirror that never drives flow state or spawns agents. Hook it up once via the discovery ceremony; from then on the lifecycle (capture → plan → work → completion-review) keeps the linked issue in sync — on by default per event, opt out with `flowctl config set tracker.perEvent. off`. -**Linear Diffs — review the PR inside the issue.** When `tracker.type == linear`, Flow-Next makes your PRs [Linear Diffs](https://linear.app/docs/diffs)-ready automatically: `/flow-next:make-pr` writes a **non-closing** `Ref WOR-N` line into the PR body (plus a rich PR attachment on the GraphQL transport), so Linear's GitHub integration auto-links the PR and renders its full diff, file changes, checks, and inline review threads **directly on the issue** — you approve / request changes / merge without leaving Linear. *Non-closing* (`Ref`, not `Fixes`) is deliberate: the PR renders as a diff but does **not** auto-complete the issue on merge — `/flow-next:spec-completion-review` owns the Done transition. The PR↔issue link is unconditional once the bridge is active (no `makePr` opt-in needed). One-time Linear-side setup is required (the GitHub integration with code access, your personal GitHub connection, and "Enable code reviews"). **GitHub-tracker** users get no Linear Diffs — the PR is cross-linked natively (`Refs #N`) and review happens on GitHub. +**Linear Diffs — review the PR inside the issue.** When `tracker.type == linear`, Flow-Next makes your PRs [Linear Diffs](https://linear.app/docs/diffs)-ready automatically: `/flow-next:make-pr` writes a **non-closing** `Ref WOR-N` line into the PR body (plus a rich PR attachment on the GraphQL transport), so Linear's GitHub integration auto-links the PR and renders its full diff, file changes, checks, and inline review threads **directly on the issue** — you approve / request changes / merge without leaving Linear. *Non-closing* (`Ref`, not `Fixes`) is deliberate: the PR renders as a diff but does **not** auto-complete the issue on merge — `/flow-next:land`'s `land.merged` touchpoint owns the `Done` transition (fn-66), gated on a GitHub-confirmed `MERGED` probe (completion review only posts a verdict comment + at most `In Review`). The PR↔issue link **and the move to `In Review`** are unconditional once the bridge is active (no `makePr` opt-in needed); `land.merged`'s move to `Done` is likewise active-by-default. One-time Linear-side setup is required (the GitHub integration with code access, your personal GitHub connection, and "Enable code reviews"). **GitHub-tracker** users get no Linear Diffs — the PR is cross-linked natively (`Refs #N`) and review happens on GitHub. Full reference — setup ceremony, hybrid ids, transport ladder, who-wins reconciliation: [`tracker-sync.md`](tracker-sync.md). A screenshot of a Flow-Next PR rendered as a Linear Diff is on [flow-next.dev](https://flow-next.dev/teams/tracker-sync/#linear-diffs--review-the-pr-inside-the-issue). diff --git a/plugins/flow-next/docs/tracker-sync.md b/plugins/flow-next/docs/tracker-sync.md index 28541871..2d3d9bf0 100644 --- a/plugins/flow-next/docs/tracker-sync.md +++ b/plugins/flow-next/docs/tracker-sync.md @@ -102,9 +102,10 @@ Sync is wired into seven lifecycle skills. **When you hook the bridge up via the | plan | `tracker.perEvent.plan` | `reconcile` | a spec is decomposed into tasks | | work (first claim) | `tracker.perEvent.work.firstClaim` | `push` | the first task of a spec is claimed | | work (done) | `tracker.perEvent.work.done` | `comment` | a task completes | -| make-pr | `tracker.perEvent.makePr` | `comment` | a PR is opened | +| make-pr | `tracker.perEvent.makePr` | `comment` | a PR is opened (→ issue **In Review** + PR link, unconditional when bridge active — fn-66) | | resolve-pr | `tracker.perEvent.resolvePr` | `comment` | PR threads are resolved | -| completion review | `tracker.perEvent.completionReview` | `reconcile` | a spec-completion review runs | +| completion review | `tracker.perEvent.completionReview` | `comment` | a spec-completion review runs (verdict + R-ID coverage; **never terminal Done** — fn-66) | +| land (merged) | `tracker.perEvent.land.merged` | `push` | a PR **merges** (→ issue **Done**, the SOLE Done driver, gated on the GitHub `MERGED` probe; **active-by-default** when bridge active — fn-66) | The lifecycle skills value-check `flowctl sync active` and the specific `perEvent` leaf, short-circuiting cleanly when the bridge is off or an event was opted out — so a no-tracker repo (or an excluded event) costs a single value-check, no transport. @@ -112,23 +113,28 @@ The lifecycle skills value-check `flowctl sync active` and the specific `perEven **Auto-link on first touch (create-if-unlinked).** When a lifecycle event fires for a spec that isn't yet linked — e.g. you went straight to `/flow-next:plan` instead of `/flow-next:capture` — the tracker-sync skill **flow-first-pushes (creates the issue + links it) *before* running the event's operation**, then later events reconcile the now-linked spec (skill `steps.md` §Phase 3 "create-if-unlinked"). That is the point of the opt-out model: an active bridge keeps **every** in-flow-authored spec in sync, not just the ones you remembered to link by hand. The spec ↔ one-issue grain is unchanged — tasks never become sub-issues. Only `unlink` no-ops on an unlinked spec; every other op creates-then-syncs (and still no-ops cleanly if no transport is reachable). -**Activation is ceremony-gated, not flag-gated.** The config *schema* default for every `perEvent` leaf stays `off`, so a bare `tracker.enabled=true` set by hand or a script — without running the discovery ceremony — fires **no lifecycle-event sync** (every `perEvent` event stays dormant). Only the ceremony's explicit per-event writes (or your own `config set`) turn events on. This keeps the accidental-enable guard while making the *intended* path (run the ceremony) sync everything. **The one thing that is *not* gated this way is make-pr's PR↔issue link** — it's unconditional whenever the bridge is active (the exception documented just below), so a bare `enabled=true` plus a linked spec will still add a `Ref` line on the next make-pr. That linkage is cheap, conflict-free, and the whole point (Linear Diffs); it does not mutate the spec or fire the lifecycle touchpoints. +**Activation is ceremony-gated, not flag-gated.** The config *schema* default for every `perEvent` leaf stays `off`, so a bare `tracker.enabled=true` set by hand or a script — without running the discovery ceremony — fires **no lifecycle-event sync** (every `perEvent` event stays dormant). Only the ceremony's explicit per-event writes (or your own `config set`) turn events on. This keeps the accidental-enable guard while making the *intended* path (run the ceremony) sync everything. **The two things that are *not* gated this way are make-pr's PR↔issue link (+ In Review) and `land.merged`'s Done-on-merge** — both unconditional whenever the bridge is active (the exceptions documented just below), so a bare `enabled=true` plus a linked spec will still add a `Ref` line + move the issue to In Review on the next make-pr, and move it to Done on a confirmed merge. The make-pr linkage is cheap, conflict-free, and the whole point (Linear Diffs); the land.merged Done is merge-evidence-gated so it only fires for genuinely shipped work. Neither mutates the spec beyond the linked issue's status. -**One exception — PR linkage is unconditional when the bridge is active.** make-pr always links the new PR to its tracker issue when `sync active` and the issue is linked — it does **not** require opting `makePr` in. Linking a PR to its issue is zero-/near-zero-cost hygiene and is the whole value (it powers Linear Diffs, below), so there's no reason to gate it. The `perEvent.makePr` leaf still governs any *extra* make-pr sync (e.g. a status comment). make-pr additionally **verifies the ref landed** post-create (§4.6b): it fetches the LIVE PR body via `gh pr view --json body` and, when the `Ref ` line is absent (e.g. an agent hand-rolled `gh pr create` and bypassed the deterministic append), repairs it append-only via `gh pr edit` — mechanical, idempotent, fully non-fatal. +**Two unconditional paths when the bridge is active (fn-66).** Some status transitions are too important to leave opt-in: + +1. **make-pr — PR link + In Review.** make-pr always links the new PR to its tracker issue *and* moves the issue to **In Review** when `sync active` and the issue is linked — it does **not** require opting `makePr` in. An open PR *is* the In Review lifecycle rung (`flowToNormalized(spec, open) → in-review`, non-terminal), and the link powers Linear Diffs — both ride the same unconditional path. The `perEvent.makePr` leaf still governs any *extra* make-pr sync (e.g. an optional breadcrumb comment). make-pr additionally **verifies the ref landed** post-create (§4.6b): it fetches the LIVE PR body via `gh pr view --json body` and, when the `Ref ` line is absent (e.g. an agent hand-rolled `gh pr create` and bypassed the deterministic append), repairs it append-only via `gh pr edit` — mechanical, idempotent, fully non-fatal. +2. **land — Done on merge.** `land.merged` is **active-by-default** when the bridge is active and is the **SOLE** driver of the terminal `Done` state. A real merge is the only event that legitimately projects "shipped", so leaving it opt-in would strand boards at In Review forever after a merge. The terminal write self-checks the GitHub `MERGED` probe (the merge-evidence invariant) — no path writes `Done` without it. The `perEvent.land.merged` leaf, if set, only tunes the optional verdict comment, never the (MERGED-gated) status. + +These are the only two unconditional touchpoints; everything else stays `perEvent`-gated. ### MISSING after retro-fire — recovery A `Tracker sync: MISSING: (retro-fire failed: )` summary line means the touchpoint didn't fire AND the one bounded retro-fire couldn't recover it — typically no reachable transport (MCP server down, no `LINEAR_API_KEY`, `gh` unauthenticated). The primary work is unaffected: tracker sync is best-effort and never blocks, so the task is done / the PR is open. To recover by hand: 1. **Read the failure reason** from the run's receipts: `ls -t .flow/sync-runs/sync--*.json | head -3` — the `status` (`noop` / `errored`) and `note` fields on the event-tagged receipt say why it failed. -2. **Once transport returns**, re-fire the missed touchpoint manually via the skill: `/flow-next:tracker-sync push ` for status events (`work.firstClaim`, `work.completionReview`), or the matching op for comment events (`comment ` for `work.done` / `makePr`). +2. **Once transport returns**, re-fire the missed touchpoint manually via the skill: `/flow-next:tracker-sync push ` for the status event (`work.firstClaim`), or the matching `comment ` op for comment events (`work.done`, `makePr`, and `work.completionReview` — the last posts only a verdict + R-ID coverage comment, never a terminal status per fn-66). 3. **Verify**: `flowctl sync check --events --since ` now prints `OK:`. ## Linear Diffs — review the PR inside the issue [Linear Diffs](https://linear.app/docs/diffs) (GA May 2026) renders a GitHub PR's diff, file changes, checks, and inline review threads directly on the Linear issue, and lets you approve / request changes / merge from Linear. flow-next makes your PRs **Diffs-ready automatically** when `tracker.type == linear`: -- **What flow-next does:** make-pr puts a **non-closing** `Ref WOR-N` line in the PR body (make-pr §4.6a) so Linear's GitHub integration auto-links the PR to the issue — which is exactly what makes the diff render. On the GraphQL transport it also creates the rich PR attachment (`attachmentLinkURL`) for status sync. *Non-closing* (`Ref`, not `Fixes`) is deliberate: the PR links + renders as a diff but does **not** auto-complete the Linear issue on merge — flow-next's spec-completion-review owns the Done transition. +- **What flow-next does:** make-pr puts a **non-closing** `Ref WOR-N` line in the PR body (make-pr §4.6a) so Linear's GitHub integration auto-links the PR to the issue — which is exactly what makes the diff render. On the GraphQL transport it also creates the rich PR attachment (`attachmentLinkURL`) for status sync. *Non-closing* (`Ref`, not `Fixes`) is deliberate: the PR links + renders as a diff but does **not** auto-complete the Linear issue on merge — flow-next's `land.merged` touchpoint owns the `Done` transition (fn-66), gated on a GitHub-confirmed `MERGED` probe. (Pre-fn-66 this said "spec-completion-review owns the Done transition" — that was the bug FLOW-15 caught: completion review is *local* completion, not merge evidence, so it could close the issue before the PR merged. Completion review now posts only a verdict comment + at most `In Review`; `Done` is reserved for a merged PR.) - **What you must enable (one-time, Linear-side — flow-next can't set these):** the Linear **GitHub integration with code access** to the repo, your **personal GitHub connection**, and **"Enable code reviews"** in Linear settings. Without them the PR still links and status still syncs; only the rendered diff view needs them. - **GitHub tracker:** no Linear Diffs — the PR is cross-linked natively (`Refs #N`) in the same repo; review happens on GitHub. diff --git a/plugins/flow-next/scripts/flowctl.py b/plugins/flow-next/scripts/flowctl.py index 7725015f..291092d0 100755 --- a/plugins/flow-next/scripts/flowctl.py +++ b/plugins/flow-next/scripts/flowctl.py @@ -1021,8 +1021,11 @@ def save_task_definition(task_id: str, definition: dict) -> None: # because `load_flow_config()` always merges this default block in, an absent / # null / unrelated write must NOT activate the bridge. The bridge is active iff # raw `tracker.enabled == true` OR raw `tracker.type ∈ {linear, github}` (see -# `tracker_sync_active`). All `perEvent` leaves default `off`, so even a stray -# `enabled=true` does nothing until a specific event is opted in. +# `tracker_sync_active`). All `perEvent` leaves default `off`, so a stray +# `enabled=true` opts in no PER-EVENT lifecycle sync until a specific event is +# enabled — EXCEPT the two fn-66 unconditional bridge-active paths: make-pr's +# PR↔issue link + In Review push, and `land.merged`'s Done-on-merge (both ride the +# bridge-active predicate alone, not a perEvent leaf — see SKILL.md / steps.md). TRACKER_TYPES = {"linear", "github"} TRACKER_PER_EVENT_LEAVES = {"off", "pull", "push", "reconcile", "comment"} TRACKER_TIEBREAKS = {"flow-wins", "tracker-wins", "always-ask"} @@ -1068,9 +1071,17 @@ def get_default_tracker_config() -> dict: # /flow-next:qa skill treats any non-`off` value as `comment`. # Default `off` keeps every existing repo silent until opted in. "qa": "off", - # fn-60 (R13) — opt-in post-merge touchpoint for /flow-next:land - # (flip linked issue terminal + release/verdict comment). Nested - # like work.*; default off keeps non-land repos at zero overhead. + # fn-60 (R13) / fn-66 (R10) — post-merge touchpoint for /flow-next:land. + # fn-66 makes land.merged the SOLE `Done` driver and ACTIVE-BY-DEFAULT + # whenever the bridge is active (like make-pr's unconditional PR-link + # path) — the merge→Done projection rides the bridge-active predicate + # alone, NOT this leaf. A real merge is the only event that legitimately + # projects terminal Done (gated on the GitHub MERGED probe), so leaving + # it opt-in would strand boards at In Review post-merge. The schema + # default stays `off` (a bare enabled=true activates no lifecycle sync + # — fn-52.1 invariant); when the bridge IS active the land skill fires + # this touchpoint regardless of the leaf, which then only tunes the + # optional verdict comment, never the (MERGED-gated) status write. "land": {"merged": "off"}, }, "perTracker": { diff --git a/plugins/flow-next/skills/flow-next-land/workflow.md b/plugins/flow-next/skills/flow-next-land/workflow.md index f237d251..0c3ce962 100644 --- a/plugins/flow-next/skills/flow-next-land/workflow.md +++ b/plugins/flow-next/skills/flow-next-land/workflow.md @@ -455,24 +455,47 @@ git log --oneline -1 # evidence echo: the squash commit referencing the PR - Clean non-`.flow/` tree required before starting and asserted after. - **Idempotency probe BEFORE acting**: check for an existing tag/GitHub release for the target version (`git tag -l `, `gh release view `); already present → resume past completed steps, never re-tag. - Release-step failure AFTER the successful merge → verdict `NEEDS_HUMAN` + durable label on the (merged) PR via 3.4 — the merge is NEVER retried, and later ticks never blindly re-run the failed step (re-entry only resumes via the idempotency probe). - - Release completed → verdict `RELEASED`.4. **Tracker touchpoint (opt-in, best-effort)** — deliberately AFTER release-follow so the verdict comment can carry the release outcome — — gated exactly like every fn-57 touchpoint (active AND leaf ≠ off/null; an unseeded leaf reads `null` = off): + - Release completed → verdict `RELEASED`. +4. **Tracker touchpoint — the SOLE `Done` driver (fn-66, R3/R10)** — deliberately AFTER release-follow so the verdict comment can carry the release outcome. **`land.merged` is active-by-default whenever the bridge is active** — it is NOT gated behind `tracker.perEvent.land.merged != off`. This is deliberate (fn-66, R10): a real merge is the ONLY event that legitimately projects `Done`, so leaving it opt-in would let boards stick at `In Review` forever after a merge. Like make-pr's unconditional PR-link path, the merge→Done projection rides the bridge-active predicate alone (the `land.merged` leaf, if a repo set it, only tunes the optional verdict comment — never gates the status): ```bash - LEAF="$("$FLOWCTL" config get tracker.perEvent.land.merged --json | jq -r '.value')" TRACKER_FIRE=0 - if [ "$("$FLOWCTL" sync active --json | jq -r '.active')" = "true" ] \ - && [ "$LEAF" != "off" ] && [ "$LEAF" != "null" ]; then - TRACKER_FIRE=1 + if [ "$("$FLOWCTL" sync active --json | jq -r '.active')" = "true" ]; then + TRACKER_FIRE=1 # active-by-default — no perEvent gate (fn-66, R10) fi ``` - `TRACKER_FIRE == 1` → you MUST dispatch the tracker-sync skill via the Skill tool — this is a real skill invocation, not narration, and it uses the fn-57 lifecycle dispatch grammar (operation token + event tag, same shape as work/make-pr touchpoints): + **Self-check the `MERGED` probe before dispatching the terminal push (fn-66, R3).** This touchpoint runs from the post-merge tail, but it must NOT trust the caller's claim of a merge — it re-confirms GitHub reports the linked PR `MERGED` for the spec branch. **Critically, do NOT reuse the Phase 3.5 `MERGED_PR_NUM`** — on the normal babysit path the PR was `OPEN` at discovery, so that variable is empty even though land just merged it. **Re-probe fresh, after the `gh pr merge` succeeded and before deciding `TRACKER_TERMINAL_OK`** (this also covers the re-entry path, where the pre-merge probe already saw `MERGED`). The merge-evidence invariant is an **invariant on the outbound terminal write**, enforced at the source — if the fresh probe is not a clean `MERGED`, do NOT dispatch a `Done` push (fall back to a non-terminal comment or `NEEDS_HUMAN`), so no path writes `Done` without merge evidence: - ```text - skill: flow-next-tracker-sync (operation: push , event: land.merged) + ```bash + # Fresh post-merge re-probe — NEVER the stale Phase 3.5 MERGED_PR_NUM (empty on the + # normal OPEN→merge path). This is the authoritative merge-evidence read for the gate. + MERGED_CONFIRMED="$(gh pr list --head "$BRANCH_NAME" --state all --json state \ + | jq -r '[.[] | select(.state == "MERGED")] | length')" + if [ "$TRACKER_FIRE" = "1" ] && [ "${MERGED_CONFIRMED:-0}" -gt 0 ]; then + TRACKER_TERMINAL_OK=1 # GitHub-confirmed MERGED → the gate is satisfied + else + TRACKER_TERMINAL_OK=0 # no clean MERGED → comment only, never terminal + fi ``` - The push projects the just-closed spec onto the linked issue — status who-wins flips it to the configured terminal state — and posts the merge/release verdict comment (include `merged PR: ` and, when the release step ran, the released version). The tracker-sync skill owns the transport, status who-wins, comment dedup, and emits its own receipt event-tagged `land.merged` (the fn-57 layer). Best-effort: a dispatch failure or tracker error surfaces as a stderr warning in the PR's evidence block and NEVER changes the PR's verdict (the close and any release already stand). Persist any tracked sync state the touchpoint updated with a best-effort follow-up commit (`git add ".flow/specs/${spec}.json" .flow/sync-runs && git commit -m "chore(flow): sync state for ${spec} land.merged touchpoint" && git push` — file-scoped so pre-existing .flow dirtiness never rides along; no rollback needed, it carries this spec's sync state + receipts only). + `TRACKER_FIRE == 1` → you MUST dispatch the tracker-sync skill via the Skill tool — this is a real skill invocation, not narration, and it uses the fn-57 lifecycle dispatch grammar (operation token + event tag, same shape as work/make-pr touchpoints). **The `TRACKER_TERMINAL_OK` self-check selects the operation** — land does NOT trust the caller's merge claim, it branches on its own GitHub-`MERGED` probe (fn-66, R3): + + - **`TRACKER_TERMINAL_OK == 1`** (clean GitHub `MERGED`) → dispatch the terminal `push`: + + ```text + skill: flow-next-tracker-sync (operation: push , event: land.merged) + ``` + + The `push` projects the just-closed spec; the tracker-sync skill's own `flowToNormalized(spec, merged)` gate (status-sync.md S-I) resolves the terminal rung — `verified` if completion review shipped, else `done` — so status who-wins flips the issue to the merge-confirmed terminal state, ALSO posting the merge/release verdict comment (include `merged PR: ` and, when the release step ran, the released version). The Done write is **double-gated** (this caller's probe AND the skill's own merge-evidence gate). + + - **`TRACKER_TERMINAL_OK == 0`** (no clean `MERGED` — e.g. the post-merge probe came back empty/ambiguous, a corruption signal) → do NOT dispatch a terminal `push`. Dispatch the **comment-only** path instead and surface `NEEDS_HUMAN`, so no path writes `Done` without merge evidence: + + ```text + skill: flow-next-tracker-sync (operation: comment , event: land.merged) # verdict comment only, no terminal status + ``` + + Best-effort either way: a dispatch failure or tracker error surfaces as a stderr warning in the PR's evidence block and NEVER changes the PR's verdict (the close and any release already stand). Persist any tracked sync state the touchpoint updated with a best-effort follow-up commit (`git add ".flow/specs/${spec}.json" .flow/sync-runs && git commit -m "chore(flow): sync state for ${spec} land.merged touchpoint" && git push` — file-scoped so pre-existing .flow dirtiness never rides along; no rollback needed, it carries this spec's sync state + receipts only). Verdict `MERGED` (or `RELEASED`). On success, drop the PR's ledger entry (atomic `jq 'del(.[$pr])'` + `mv`). End on the base branch with a clean tree (the original branch may have been the now-deleted PR branch — the base IS the restore target after a merge). diff --git a/plugins/flow-next/skills/flow-next-make-pr/workflow.md b/plugins/flow-next/skills/flow-next-make-pr/workflow.md index c65d6bd9..13bcc842 100644 --- a/plugins/flow-next/skills/flow-next-make-pr/workflow.md +++ b/plugins/flow-next/skills/flow-next-make-pr/workflow.md @@ -1664,10 +1664,12 @@ fi R24 invariant: under Ralph the PR URL is the **sole stdout artefact** in machine-parseable form (`PR_URL=`), so the harness can capture it via `eval "$(/flow-next:make-pr ...)"` or by grep / tail. Everything else (memory write notes, recovery hints, breadcrumbs, the §5.7 tracker-sync check + `Tracker sync:` summary line) routes through stderr where the harness logs it but doesn't parse it. -### 5.6 — Tracker sync (opt-in) — link the PR to the issue (Diffs-ready) +### 5.6 — Tracker sync (opt-in) — link the PR to the issue + move it to In Review (Diffs-ready) **Runs whenever the tracker bridge is active, after `gh pr create` returned a `$PR_URL` in §4.6 (never under `--dry-run` — Phase 4.0 short-circuits before Phase 5).** No separate `makePr` opt-in — linking a PR to its issue is zero-/near-zero-cost hygiene and is the whole value (Linear Diffs). Links the PR to the tracker issue (R10), append-only and conflict-free (R8). **Not Ralph-blocked** (attaching a link is a confident, conflict-free op). +**In Review status push rides this SAME unconditional bridge-active path (fn-66, R2).** Because an open PR for the branch is by definition the *In Review* lifecycle rung, moving the linked issue to `In Review` is part of the same PR↔issue linkage that powers Linear Diffs — it is **NOT gated behind `tracker.perEvent.makePr != off`** (that leaf gates only the optional breadcrumb comment, not the link/status that make the bridge useful). A just-created PR is `OPEN`, so the merge-evidence probe yields `open` and `reconcileStatus(spec, issue, open)` → `in-review` (status-sync.md row 4); the dispatch below reconciles the issue to that non-terminal rung (never terminal — a freshly-opened PR has no merge evidence). The dispatch uses the **`reconcile`** op (not `push`) precisely so this In Review nudge rides the body-preserving 3-way merge — a `push` would re-render and overwrite the issue body first (steps.md push() lines 134-136), clobbering human tracker-side edits. + The **primary linkage already happened in §4.6a** — the `Ref ` line in the PR body, which makes the host's tracker integration auto-link the PR. §5.6 is the **enhancement layer** and is **transport- and tracker-type-aware**: - **Linear (`tracker.type == linear`):** the §4.6a body ref is what makes **Linear Diffs** render the PR inside the issue (Linear's GitHub integration auto-links on the `WOR-N` identifier). On the **GraphQL rung**, additionally create the *rich* GitHub-PR attachment + status sync via `attachmentLinkURL(issueId, $PR_URL)` (Linear auto-detects the GitHub URL; do NOT use `attachmentCreate` — that yields a dumb attachment with no diff/status). On the **MCP rung** there is no URL-attach tool, so it relies entirely on the §4.6a auto-link (sufficient — the integration does the rest). Optionally also post a one-line breadcrumb comment. @@ -1677,20 +1679,35 @@ The **primary linkage already happened in §4.6a** — the `Ref ` li ```bash if [[ -n "$PR_URL" ]] \ && [ "$("$FLOWCTL" sync active --json | jq -r '.active')" = "true" ]; then - # Invoke the flow-next-tracker-sync skill: link $PR_URL to the issue. - # skill: flow-next-tracker-sync (operation: link $PR_URL, event: makePr) - # linear → rich attachment via attachmentLinkURL (GraphQL rung) + optional breadcrumb comment; - # the §4.6a body ref already enabled the auto-link + Diffs. - # github → native `Refs #N` (github.md). - # Unlinked spec → flow-first push (create + link) first, then link the PR / Diff - # (tracker-sync §Phase 3 create-if-unlinked). No-op only if no transport reachable. + # Invoke the flow-next-tracker-sync skill with the canonical lifecycle dispatch + # grammar — `operation: , event: ` (verbatim, no descriptors in + # the operation token): + # skill: flow-next-tracker-sync (operation: reconcile , event: makePr) + # The `reconcile` op (open-PR evidence) moves the issue to In Review AND links $PR_URL — + # BOTH ride this unconditional bridge-active path (NOT gated behind perEvent.makePr): + # the link powers Diffs and In Review is the honest lifecycle state for an open PR. + # WHY `reconcile`, NOT `push` (fn-66 regression fix): `push` renders the COMPLETE + # spec body and writeIssue's it BEFORE setStatus (steps.md push() lines 134-136), so + # opening a PR just to nudge In Review would CLOBBER any human tracker-side body edits + # made since the last sync. `reconcile` runs the 3-way body merge (steps.md reconcile() + # lines 177-185) that PRESERVES tracker-side edits, and sets In Review as part of the + # SAME op via reconcileStatus(spec, issue, open) → in-review (status-sync.md row 4 / R2). + # A genuine body conflict queues (sync defer) or asks — it NEVER blocks the open PR. + # linear → rich attachment via attachmentLinkURL (GraphQL rung) + setStatus(in-review) + # via reconcileStatus (open prEvidence → in-review, non-terminal, status-sync.md + # row 4); the §4.6a body ref already enabled the auto-link + Diffs. Optional breadcrumb comment. + # github → native `Refs #N` (github.md) + status:in-review label. + # (the PR URL itself rides as evidence in the comment/attachment, not the op token.) + # The open PR is the merge-evidence `open` bucket → In Review, NEVER terminal (no MERGED). + # Unlinked spec → flow-first link (create + base-snapshot) first, then reconcile the now-linked + # spec → link the PR / Diff + In Review (tracker-sync §Phase 3 create-if-unlinked). No-op only if no transport reachable. # Best-effort — the PR is already open; a tracker failure must NOT exit non-zero. # Under Ralph, framing routes to stderr (keeps the PR_URL= stdout invariant). : fi ``` -The PR is already open before this step; a tracker failure surfaces as a stderr warning and never changes the exit code (same non-fatal discipline as the `--memory` write in §5.1). The skill emits its own receipt, event-tagged `--event makePr` — the tag §5.7's end-of-run `sync check` audits. +The PR is already open before this step; a tracker failure surfaces as a stderr warning and never changes the exit code (same non-fatal discipline as the `--memory` write in §5.1). The skill emits its own receipt, event-tagged `--event makePr` — the tag §5.7's end-of-run `sync check` audits. The `In Review` status push is non-terminal (`reconcileStatus(spec, issue, open) → in-review`, status-sync.md row 4) — an open PR is never `Done`. **`reconcile` (not `push`) is deliberate (fn-66):** the body-preserving 3-way merge means moving the issue to In Review on PR open can never overwrite human tracker-side body edits — the prior conflict-free guarantee of the old `link $PR_URL` path, now extended to the status nudge. ### 5.7 — Tracker-sync end-of-run check — LAST action before exit (fn-57) @@ -1711,7 +1728,7 @@ SINCE=$(gh pr view "$PR_URL" --json createdAt --jq .createdAt 2>/dev/null || tru **Retro-fire on MISSING — exactly ONE cycle, never blocking:** 1. Record the retro-fire start anchor (the re-check needs it as `--since`): `date -u +%Y-%m-%dT%H:%M:%SZ` -2. Invoke the **flow-next-tracker-sync skill directly** — the same dispatch as §5.6, with its `event:` tag: `skill: flow-next-tracker-sync (operation: link $PR_URL, event: makePr)` — NEVER this check block as a wrapper (no recursion). +2. Invoke the **flow-next-tracker-sync skill directly** — the same dispatch as §5.6, with its `event:` tag, in the canonical `operation: , event: ` grammar: `skill: flow-next-tracker-sync (operation: reconcile , event: makePr)` (the `reconcile` op links $PR_URL + moves the issue to In Review via the body-preserving 3-way merge — never clobbers tracker-side edits, fn-66; the PR URL rides as evidence, not in the op token) — NEVER this check block as a wrapper (no recursion). 3. Re-check with `--since` = the step-1 anchor: `"$FLOWCTL" sync check "$SPEC_ID" --events makePr --since "" --json` 4. Record the final state in the summary slot. Still MISSING after the one cycle is a recorded, visible outcome — never a second retro-fire, never a block (the PR is already open; a tracker hiccup must not become a hard stop). Recovery guidance lives in the receipt note + `docs/tracker-sync.md`. diff --git a/plugins/flow-next/skills/flow-next-pilot/SKILL.md b/plugins/flow-next/skills/flow-next-pilot/SKILL.md index d70d6a55..9da49368 100644 --- a/plugins/flow-next/skills/flow-next-pilot/SKILL.md +++ b/plugins/flow-next/skills/flow-next-pilot/SKILL.md @@ -102,10 +102,12 @@ The `/goal` validator is transcript-blind: it reads conversation output only and Every tick ends with exactly one terminal line, the last line of the response, with nothing after it: ```text -PILOT_VERDICT= spec= stage= reason="" +PILOT_VERDICT= spec= stage= reason="" ``` -Use `spec=-` and `stage=-` when no spec was selected. Stage values are exactly `plan`, `plan-review`, `work`, `make-pr`, or `-`. +Use `spec=-` and `stage=-` when no spec was selected. Stage values are exactly `plan`, `plan-review`, `work`, `make-pr`, `land`, or `-`. + +`DEFERRED_TO_LAND` is a distinct *non-terminal-work* verdict (stage `land`): every remaining all-done candidate has an open PR that land — not pilot — owns. It is deliberately separated from `NO_WORK` so a driver can route it to `/flow-next:land` instead of stopping; an all-done spec with an open PR is real outstanding work, never absence of work. Driver condition examples: diff --git a/plugins/flow-next/skills/flow-next-pilot/workflow.md b/plugins/flow-next/skills/flow-next-pilot/workflow.md index f4e89f03..66b85352 100644 --- a/plugins/flow-next/skills/flow-next-pilot/workflow.md +++ b/plugins/flow-next/skills/flow-next-pilot/workflow.md @@ -115,7 +115,7 @@ Classify from `SPEC_JSON` plus `TASKS_JSON`; first match wins: | any task is `todo` or `blocked` (canonical task statuses are `todo`, `in_progress`, `blocked`, `done`) | `work` | | the only non-`done` tasks are `in_progress` own/unassigned (other-actor claims were already skipped at SELECT) | `NEEDS_HUMAN`, reason `stale in-progress claim — work's ready-driven loop cannot resume it` | | all tasks done and `completion_review_status != "ship"` and review backend is configured | `work` | -| all tasks done and completion is ship-or-ungated | PR probe, then `make-pr`, skip, or `NEEDS_HUMAN` | +| all tasks done and completion is ship-or-ungated | PR probe, then `make-pr` (no PR), defer-to-land (open PR), or `NEEDS_HUMAN` (closed-unmerged / missing-branch / merged-but-open-spec) | A spec whose only remaining tasks are `blocked` still classifies as `work`; if work cannot advance it, the healthy-no-advance strike path handles it. An in-progress-only spec is different: work's Phase 3a drives off `flowctl ready --spec`, which never returns an `in_progress` task, so dispatching would burn strikes or wrongly enter the completion-review path — the stale-claim `NEEDS_HUMAN` is crash-class (no dispatch, no strike). @@ -132,13 +132,13 @@ CLOSED_PR=$(printf '%s\n' "${PR_JSON:-[]}" | jq -r '.[] | select(.state == "CLOS MERGED_PR=$(printf '%s\n' "${PR_JSON:-[]}" | jq -r '.[] | select(.state == "MERGED") | .url' | head -1) ``` -Classification outcomes for the all-done branch: +Classification outcomes for the all-done branch (the all-done invariant: an all-done / completion-`ship` spec lacking a **merged** PR is *unfinished from the board's perspective* — pilot keeps driving it (`make-pr`), defers it to land (open PR), or surfaces it (`NEEDS_HUMAN`); it never collapses to terminal `NO_WORK`): - gh missing, unauthenticated, or API failure: `PILOT_VERDICT=NEEDS_HUMAN spec= stage=make-pr reason="gh probe failed at all-done branch"`. -- OPEN PR exists: this spec is finished from pilot's perspective; skip to the next SELECT candidate. +- OPEN PR exists (and no MERGED PR): this spec is **deferred to land** — land owns the open PR, not pilot — so record it as a *deferred candidate* and skip to the next SELECT candidate. This is an explicit defer, never a silent finish: if no later candidate is selectable, the tick terminates with the distinct, greppable `PILOT_VERDICT=DEFERRED_TO_LAND` line (Phase 6), never `NO_WORK`. Track the deferred spec id + open-PR url so the terminal line can name it. +- No PR exists: stage is `make-pr`. This is the FLOW-15 case (all-done, no PR — make-pr never ran or its PR was lost); it MUST classify `make-pr` and never fall through to `NO_WORK`. - CLOSED PR exists and no OPEN PR exists: `NEEDS_HUMAN`, because the PR was closed without merge and pilot never silently reopens human-rejected work. - MERGED PR exists while the spec is still open: `NEEDS_HUMAN`, because the state is inconsistent and pilot must not create a second PR. -- No PR exists: stage is `make-pr`. Dry-run stops after classification. It prints selected spec, stage, review backend, task counts, consulted status fields, PR probe result if any, skipped candidates, and any would-clear ledger entries. It writes no ledger (the ledger file is never created or modified on a dry-run tick), checks out no branch, and dispatches nothing: @@ -307,10 +307,22 @@ Crash-class outcomes are `NEEDS_HUMAN`: sub-skill crash, dirty non-`.flow/` tree PILOT_VERDICT=NEEDS_HUMAN spec= stage= reason="" ``` -No selectable candidate or all candidates finished with existing OPEN PRs yields: +An all-done spec with an **open** PR is *not* crash-class — it is the benign `DEFERRED_TO_LAND` terminal below (land owns the merge). Only the closed-unmerged, missing-branch, and merged-but-open-spec all-done states are `NEEDS_HUMAN`. An all-done spec with **no** PR is never terminal at all — it classifies `make-pr` and dispatches. -```text -PILOT_VERDICT=NO_WORK spec=- stage=- reason="no ready spec with satisfied deps" -``` +Terminal verdict when no spec was dispatched, split by why — the two cases are distinct and must never be conflated: + +- **No selectable candidate at all** (none open+ready, or all skipped for unsatisfied deps / other-actor claims) yields `NO_WORK`: + + ```text + PILOT_VERDICT=NO_WORK spec=- stage=- reason="no ready spec with satisfied deps" + ``` + +- **Every remaining candidate was deferred to land** (each all-done with an existing OPEN PR — the only reason they weren't dispatched) yields the distinct, greppable `DEFERRED_TO_LAND` verdict, naming the deferred spec so a transcript-only driver can hand it to `/flow-next:land`. This case MUST NOT collapse to `NO_WORK`: a `DONE`-but-open-PR spec is real outstanding work that land owns, not absence of work. + + ```text + PILOT_VERDICT=DEFERRED_TO_LAND spec= stage=land reason="all tasks done, open PR — land owns the merge" + ``` + + When more than one candidate was deferred, name the first deferred spec (stable id order) in the line; the reason still reads `defer to land`. The `PILOT_VERDICT` line is always the last line of the tick output. Print nothing after it. diff --git a/plugins/flow-next/skills/flow-next-tracker-sync/SKILL.md b/plugins/flow-next/skills/flow-next-tracker-sync/SKILL.md index ea97fe0e..7295722d 100644 --- a/plugins/flow-next/skills/flow-next-tracker-sync/SKILL.md +++ b/plugins/flow-next/skills/flow-next-tracker-sync/SKILL.md @@ -64,7 +64,7 @@ Never reimplement a flowctl helper inline; never push a merge/judgment decision ## Discovery ceremony (R2) — detect / surface / ask / never-assume -The bridge is **off until explicitly enabled**. The ceremony probes four signals, surfaces present AND absent, ASKS, and writes config **only on confirmation** — with provenance. No-signal ⇒ nothing written; `enabled` stays `false`. Never assume. But **once the user confirms, enabling is opt-OUT, not opt-in**: the ceremony activates the whole pipeline (every `perEvent` event) by default — hooking up the bridge means you want it to sync. The user excludes events at ceremony time or turns any off later (`flowctl config set tracker.perEvent. off`). The `get_default_config()` schema default stays `off`, so a bare `enabled=true` set WITHOUT the ceremony activates **no lifecycle-event sync** (every `perEvent` event stays dormant) — only the ceremony's explicit writes activate them. (The lone exception: make-pr's PR↔issue link is unconditional whenever the bridge is active — no per-event gate, by design.) +The bridge is **off until explicitly enabled**. The ceremony probes four signals, surfaces present AND absent, ASKS, and writes config **only on confirmation** — with provenance. No-signal ⇒ nothing written; `enabled` stays `false`. Never assume. But **once the user confirms, enabling is opt-OUT, not opt-in**: the ceremony activates the whole pipeline (every `perEvent` event) by default — hooking up the bridge means you want it to sync. The user excludes events at ceremony time or turns any off later (`flowctl config set tracker.perEvent. off`). The `get_default_config()` schema default stays `off`, so a bare `enabled=true` set WITHOUT the ceremony activates **no lifecycle-event sync** (every `perEvent` event stays dormant) — only the ceremony's explicit writes activate them. (Two exceptions are unconditional whenever the bridge is active — no per-event gate, by design: (1) make-pr's PR↔issue link **and its In Review status push** (fn-66, R2 — an open PR is the In Review rung, riding the same Diffs-powering link path); (2) **`land.merged`** (fn-66, R10 — a real merge is the SOLE event that projects terminal `Done`, gated on the GitHub `MERGED` probe; leaving it opt-in would strand boards at In Review post-merge).) Probe these four signals (detection lives in the skill, not flowctl — same shape as fn-51's driver-ladder detection): @@ -91,7 +91,7 @@ $FLOWCTL config set tracker.perEvent.work.firstClaim push $FLOWCTL config set tracker.perEvent.work.done comment $FLOWCTL config set tracker.perEvent.makePr comment $FLOWCTL config set tracker.perEvent.resolvePr comment -$FLOWCTL config set tracker.perEvent.completionReview reconcile +$FLOWCTL config set tracker.perEvent.completionReview comment # fn-66: comment-shaped (verdict + R-ID coverage) — NEVER terminal Done; land.merged is the sole Done driver (active-by-default, no perEvent seed needed) ``` Confirm the result with `flowctl sync active --json` (must report `active: true` once enabled/type are set). Negative path: user declines ⇒ write nothing; `sync active` stays `active: false`. diff --git a/plugins/flow-next/skills/flow-next-tracker-sync/references/adapter-interface.md b/plugins/flow-next/skills/flow-next-tracker-sync/references/adapter-interface.md index 60bc61c9..cebc6069 100644 --- a/plugins/flow-next/skills/flow-next-tracker-sync/references/adapter-interface.md +++ b/plugins/flow-next/skills/flow-next-tracker-sync/references/adapter-interface.md @@ -84,6 +84,8 @@ The wire-agnostic shapes the transports produce and reconcile consumes. These ar Who-wins (R7, implemented in fn-52.5 — [status-sync.md](status-sync.md)): tracker wins `done`/`verified`; flow wins `in-progress`; `priority` + `deferred`/`wontfix` surface to the user, never auto-changed. Comments/evidence two-way append + dedup (R8) is [comments-sync.md](comments-sync.md). +**Transport-blind terminal invariant (R1, R8).** A **terminal outbound write** (mapping a spec to normalized `done`/`verified`, i.e. closing/completing the tracker issue) **requires merge evidence**: the flow→normalized map is `flowToNormalized(spec, prEvidence)` ([status-sync.md](status-sync.md)) and emits a terminal status ONLY when a `MERGED` PR probe for the spec branch is present. Local completion (spec `done` + completion review shipped) is necessary, not sufficient. This invariant is **transport-blind** — every adapter (Linear fn-52.3, GitHub fn-52.7, any future tracker) receives a terminal normalized status only after the gate, and maps it DOWN to its native closed state without re-deciding. No adapter ever closes an issue from local spec state alone. + ### `relation` The wire-agnostic edge shape `listIssueRelations` produces. One entry per directed blocked-by edge the transport can see for the inspected issue. diff --git a/plugins/flow-next/skills/flow-next-tracker-sync/references/github.md b/plugins/flow-next/skills/flow-next-tracker-sync/references/github.md index ece77fbc..2b22b266 100644 --- a/plugins/flow-next/skills/flow-next-tracker-sync/references/github.md +++ b/plugins/flow-next/skills/flow-next-tracker-sync/references/github.md @@ -142,6 +142,19 @@ edited directly on GitHub by a human who didn't touch the label): | `done` / `verified` | `gh issue close --reason completed` | set `status:` (so `verified` vs `done` is recoverable) | | `deferred` / `wontfix` | **do NOT auto-apply** — these are R7 surface-only | — | +**Terminal close requires merge evidence (R1, R8 — same gate as Linear).** The +`done` / `verified` row above is a *transport* mapping: the adapter only ever +*receives* a terminal normalized status to write because the upstream +`flowToNormalized(spec, prEvidence)` map ([status-sync.md](status-sync.md)) gated it +on a `MERGED` merge-evidence probe for the spec branch. A locally-`done` spec with no +merged PR normalizes to **`in-review`** (→ open issue + `status:in-review`), so this +adapter **never closes the issue** for a spec that lacks a merged PR. The +merge-evidence gate is transport-blind — it applies identically on the GitHub adapter +and the Linear adapter (R8); this firewall just maps the already-gated normalized +status DOWN to GitHub's `OPEN`/`CLOSED`+reason. A `closed-unmerged` / missing-branch +probe never produces a terminal normalized status, so no `gh issue close --reason +completed` is ever driven from local completion alone. + **`deferred` / `wontfix` are surfaced, never auto-applied** (R7 semantics, same as Linear's `canceled`-type states): the adapter reports the desired transition to the user (or queues it in Ralph) rather than closing the issue as `not planned` @@ -435,7 +448,9 @@ attachment, no Linear Diffs (GitHub has its own PR review UI). make-pr §4.6a's Linear branch is GitHub-typed-skipped; instead the GitHub adapter ensures the PR body carries a **non-closing** reference to the issue — `Refs #` (NOT `Fixes #`, which would auto-close the issue on merge and bypass -spec-completion-review; flow-next owns the lifecycle, R7/R10). GitHub then renders +flow-next's `land.merged` Done projection — the merge-evidence-gated lifecycle +that owns the terminal `Done` transition post-fn-66; flow-next owns the lifecycle, +R7/R10). GitHub then renders the PR↔issue cross-link automatically. Gate is the same as Linear: bridge **active AND tracker.type == github** — no separate `makePr` opt-in. There is no rich-attach step (the cross-reference IS the link). diff --git a/plugins/flow-next/skills/flow-next-tracker-sync/references/linear-ladder.md b/plugins/flow-next/skills/flow-next-tracker-sync/references/linear-ladder.md index f4863fd6..d35f3c8d 100644 --- a/plugins/flow-next/skills/flow-next-tracker-sync/references/linear-ladder.md +++ b/plugins/flow-next/skills/flow-next-tracker-sync/references/linear-ladder.md @@ -106,10 +106,21 @@ taxonomy) plus an optional config name-override (`tracker.perTracker.statusMap`) |---|---|---| | `backlog` | `backlog` | — | | `unstarted` | `planned` | — | -| `started` | `in-progress` | flow wins | +| `started` | `in-progress` / `in-review` (the In Review rung — a `started`-type state named "In Review", resolved via `statusMap`) | flow wins (`in-progress`); In Review is the open-PR rung | | `completed` | `done` (or `verified` via name-map, e.g. a "Verified" state) | tracker wins | | `canceled` | `wontfix` (or `deferred` via name-map) | surface to user, never auto-change | +**Outbound terminal requires merge evidence (R1, lockstep with +[status-sync.md](status-sync.md) flow→normalized).** This table is the *inbound* +(tracker→normalized) read map. The *outbound* flow→normalized map gates terminal +`done`/`verified` on a `MERGED` merge-evidence probe: a locally-`done` spec maps to +**`in-review`** (Linear In Review — a `started`-type "In Review" state) until a +`MERGED` PR is observed for its branch; `setStatus(done|verified)` is driven ONLY +once `prEvidence == merged`. The In Review state resolves through the same +name/type → `stateId` map below (a `started`-type state whose name matches the +configured In Review rung). Keep this in lockstep with the +`flowToNormalized(spec, prEvidence)` table in status-sync.md — they must not drift. + Build the **name/type → `stateId` map once at config time** (MCP: `list_issue_statuses`; GraphQL: `workflowStates(first:100, filter:{team:…}){ id name type }`) so `setStatus` can resolve a normalized status back to the team's concrete diff --git a/plugins/flow-next/skills/flow-next-tracker-sync/references/status-sync.md b/plugins/flow-next/skills/flow-next-tracker-sync/references/status-sync.md index ada5f84e..c3e8c242 100644 --- a/plugins/flow-next/skills/flow-next-tracker-sync/references/status-sync.md +++ b/plugins/flow-next/skills/flow-next-tracker-sync/references/status-sync.md @@ -42,15 +42,99 @@ vocabulary in the adapter ([linear-ladder.md](linear-ladder.md) status table). T ### flow → normalized (what the spec's status *means* on the tracker) The spec's tracker-facing status is derived from the spec + its tasks (one spec ↔ -one issue, R3 — there are no per-task sub-issues): +one issue, R3 — there are no per-task sub-issues) **and a merge-evidence probe**. +The signature is **`flowToNormalized(spec, prEvidence)`** — it takes PR-merge +evidence as a second input, NOT spec state alone. **Local completion is necessary, +not sufficient, for a terminal status (R1).** A spec that is locally `done` with a +shipped completion review is still only `in-review` on the tracker until a +`MERGED`-state PR for its branch is observed; `done`/`verified` are reserved for +merge-confirmed work. -| flow condition | normalized | Rationale | -|---|---|---| -| spec `open`, **no** task `in_progress`/`done` yet (all `todo`) | `planned` (or `backlog` if no tasks exist) | authored, not started | -| spec `open`, **any** task `in_progress` or some `done` | `in-progress` | work underway | -| spec `done`, `completion_review_status != ship` | `in-review` | implementation complete, awaiting completion review | -| spec `done`, `completion_review_status == ship` | `verified` | completion review shipped | -| spec `done`, no completion-review configured | `done` | terminal, no review gate | +**`prEvidence`** is the result of the merge-evidence probe for the spec branch +(reuse verbatim from land `workflow.md:99-104` / pilot `:126-132`): + +```bash +BRANCH_NAME=$(.flow/bin/flowctl show "$SPEC_ID" --json | jq -r .branch_name) +# Bare `gh pr view` returns rc 0 even for CLOSED/MERGED — ALWAYS filter .state via jq. +PR_JSON=$(gh pr list --head "$BRANCH_NAME" --state all \ + --json url,state,number,isDraft 2>/dev/null) +MERGED=$(printf '%s' "$PR_JSON" | jq '[.[] | select(.state=="MERGED")] | length') +OPEN=$(printf '%s' "$PR_JSON" | jq '[.[] | select(.state=="OPEN")] | length') +# prEvidence ∈ { +# merged ≥1 MERGED +# open ≥1 OPEN, 0 MERGED +# closed-unmerged ≥1 CLOSED, 0 MERGED/OPEN +# none no PR for branch (probe succeeded, empty result) +# ambiguous a state the four buckets above don't cleanly cover — e.g. a +# branch with BOTH an open AND a closed-unmerged PR, or a draft- +# only result where no clear merge/open/closed signal dominates +# probe-error the gh probe itself failed (non-zero rc, no auth, network) — +# branch_name unknown counts here (cannot probe) +# } +``` + +**Probe failure / unknown branch is NOT merge evidence.** If `spec.branch_name` is +empty/unknown, or the `gh pr list` probe errors (rc≠0, no `gh`/auth, network), treat +`prEvidence` as `probe-error` — never as `none` and never as `merged`. Both +`probe-error` and `ambiguous` are non-terminal and route to NEEDS_HUMAN (below); +terminal is reachable ONLY from an unambiguous `merged`. + +> Use `-F` not `-f` for numeric `gh api` fields (a `-f number=…` stringifies the +> JSON value — memory `gh-api-f-stringifies`). The probe above uses `gh pr list`, +> not `gh api`, so it is unaffected; the note is for any follow-on `gh api` call. + +`flowToNormalized` maps to the normalized rung the spec *would* project. **It never +forces a rung downgrade by itself** — the reconcile loop (below) decides whether to +*write* it. In particular, `prEvidence == none` projects `in-review` but the loop's +**no-PR preserve rule** keeps an already-valid non-terminal tracker state (S-G); the +`in-review` projection only drives a `setStatus` when there is a real open-PR +(`open`) signal behind it. + +**Row-order discipline (load-bearing).** `prEvidence` is evaluated **FIRST** — a PR +signal (`merged` / `open`) decides the rung *before* any local task/completion-status +row gets a vote. The local-status rows (the `no PR evidence` block) apply **only** +when there is no PR signal (`none` / `closed-unmerged` / `ambiguous` / `probe-error`). +This ordering is what makes an all-tasks-done OPEN spec with an open PR project +`in-review` (not `in-progress`), and a merged ungated/`unknown`-completion spec reach +terminal Done (not stay `in-review`). The merge-evidence INVARIANT is intact: terminal +(`done`/`verified`) is reachable ONLY from `prEvidence == merged`. + +| # | flow condition | `prEvidence` | normalized | Rationale | +|---|---|---|---|---| +| 1 | spec `done`, no completion-review configured | `merged` | **`done`** | terminal, no review gate, **merge-confirmed** — a merge is a merge | +| 2 | spec `done`, `completion_review_status == ship` | `merged` | **`verified`** | completion review shipped **and** PR merged — terminal, verified | +| 3 | spec `done`, `completion_review_status != ship` (incl. `unknown`) | `merged` | `in-review` | PR merged but a configured completion review is not yet `ship` — stay in review until verified | +| 4 | spec at any local status (incl. all-tasks-done OPEN, or spec `done`) | `open` | `in-review` | open PR awaiting merge — the In Review rung, drives `setStatus(in-review)` (R2). The open-PR signal wins over the local task rows | +| 5 | spec `done` | `none` | **`in-review`** projection (NOT terminal); loop **preserves** an existing non-terminal state (S-G) | no PR exists — no merge evidence, no open-PR signal → never terminal, never a forced advance (R1) | +| 6 | spec `done` | `closed-unmerged` / `ambiguous` / `probe-error` | **`in-review`** (NOT terminal) **+ surface NEEDS_HUMAN** | locally shipped but the probe is not a clean MERGED — never terminal; the conflict goes to a human (R6) | +| 7 | spec `open`, **any** task `in_progress` or some `done` | `none` / `closed-unmerged` / `ambiguous` / `probe-error` | `in-progress` | work underway, no open/merged PR signal | +| 8 | spec `open`, **no** task `in_progress`/`done` yet (all `todo`) | `none` / `closed-unmerged` / `ambiguous` / `probe-error` | `planned` (or `backlog` if no tasks exist) | authored, not started | + +> **Why row 3 catches `unknown`.** flowctl normalizes a missing completion-review +> field to `unknown` (`flowctl.py:2296-2297`), and for repos without a +> completion-review backend pilot treats `completion_review_status != ship` as +> ungated. Row 3 is the `merged` + non-`ship` rung **only when a completion review is +> actually configured** — when no completion-review backend is configured at all, +> row 1 fires first (a spec with no review gate has nothing to wait on, so a merge is +> terminal `done`). Distinguish the two by `flowctl config`: "no completion-review +> configured" ⇒ row 1; "configured but the spec's review isn't `ship`/`unknown`-while- +> backend-present" ⇒ row 3. + +> **Why rows 4–6 sit above rows 7–8.** A PR signal — open or merged — is stronger +> evidence of where the work *is* than the local task ledger. In the normal make-pr +> path the spec is still `open` with all tasks `done` (flow-next-work/phases.md:488 +> says not to close the spec before the PR; land later discovers `status==open && +> tasks==done`). Evaluating the open-PR row (4) before the broad "some task done → +> in-progress" row (7) is what moves that issue to **In Review** on the make-pr push +> (flow-next-make-pr/workflow.md:1685-1690) instead of leaving it at In Progress. + +**Terminal (`done`/`verified`) is impossible without a `MERGED` probe result.** The +old map (pre-fn-66) mapped `spec done + completion ship → verified` and `spec done, +no review → done` with NO merge check, so a locally-completed spec auto-closed its +tracker issue before the PR merged. The merge-evidence gate fixes that at the root, +upstream of the who-wins ladder (which is unchanged). A `closed-unmerged` / +missing-branch / ambiguous probe NEVER yields terminal — it stays `in-review` (a +non-terminal rung) and surfaces NEEDS_HUMAN for the closed-unmerged case (R6). `deferred` / `wontfix` have no native flow status — they only ever arrive **from** the tracker side and are **surfaced, never auto-applied** (see the who-wins table). @@ -91,8 +175,9 @@ the two current normalized statuses, which are always available). ``` reconcileStatus(spec, issue): - flowNorm = flowToNormalized(spec) # table above - trackerNorm = issue.status.normalized # adapter already mapped it + prEvidence = mergeEvidenceProbe(spec.branch_name) # merged|open|closed-unmerged|none|ambiguous + flowNorm = flowToNormalized(spec, prEvidence) # table above — terminal needs MERGED + trackerNorm = issue.status.normalized # adapter already mapped it if flowNorm == trackerNorm: noop (status already agrees) — no setStatus, no spec change @@ -106,6 +191,17 @@ reconcileStatus(spec, issue): # genuine status deadlock (tracker=done × flow=in-progress, simultaneously) → # R1 conflictTiebreak fallback (next section). NOT a silent auto-close. + # ── CLOSED-UNMERGED / AMBIGUOUS / PROBE-ERROR — flow is locally done but the + # merge probe is NOT a clean MERGED. flowNorm is in-review (non-terminal — the + # gate forbade terminal), but the closed-without-merge / missing-branch / + # ambiguous / probe-error condition is a conflict a human must judge: surface + # NEEDS_HUMAN and do NOT write any status. Caught before the in-review + # advancement so it never silently pushes a rung. ── + elif spec.status == done and prEvidence ∈ {closed-unmerged, ambiguous, probe-error}: + # R6: locally shipped, but no merged PR and the probe is not clean → + # surface NEEDS_HUMAN (interactive ask / Ralph `sync defer --reason `). + # NO setStatus, NO terminal, NO spec change. Tracker keeps its current state. + elif trackerNorm ∈ {done, verified} (terminal, flow NOT in-progress): # tracker wins terminal — flow is at backlog/planned/done, so the tracker's # closure folds in cleanly (no live in-progress work to contradict it) @@ -116,12 +212,28 @@ reconcileStatus(spec, issue): # flow wins in-progress — push flow's progress to the tracker setStatus(trackerId, in-progress) [transport — linear-ladder.md] + # ── NO-PR PRESERVE RULE (S-G) — flow is locally done but prEvidence is `none` + # (no PR exists). flowNorm is in-review, but if the tracker is ALREADY at a + # valid non-terminal state (backlog/planned/in-progress/in-review) we do NOT + # force a rung change: a locally-shipped spec with no PR has no merge evidence + # and no open-PR signal, so we KEEP the current non-terminal state (no advance, + # no terminal). This is checked before the generic in-review push so `none` + # never drives an unconditional in-progress→in-review downgrade. ── + elif flowNorm == in-review and prEvidence == none + and trackerNorm ∈ {backlog, planned, in-progress, in-review}: + # S-G: preserve the existing valid non-terminal state — no setStatus, no advance. + + elif flowNorm == in-review and trackerNorm ∈ {backlog, planned, in-progress}: + # flow is in review (open PR — prEvidence=open), tracker behind → push the + # In Review rung (R2). Non-terminal advance; issue stays OPEN. + setStatus(trackerId, in-review) [transport — linear-ladder.md / github.md] + elif trackerNorm ∈ {deferred, wontfix} OR priority differs: # surface, never auto-change (interactive ask / Ralph queue) — see below - elif flowNorm ∈ {done, verified} and trackerNorm ∈ {backlog, planned}: - # flow reached terminal, tracker still pre-active (not in-progress → not a - # deadlock) — push flow's closure out + elif flowNorm ∈ {done, verified} and trackerNorm ∈ {backlog, planned, in-review}: + # flow reached terminal (prEvidence=merged — the gate passed), tracker still + # pre-terminal (not in-progress → not a deadlock) — push flow's closure out setStatus(trackerId, flowNorm) [transport] else: @@ -202,7 +314,7 @@ detail lives in [linear-ladder.md](linear-ladder.md)): | `triage` | `backlog` | spec stays `open`/unstarted | — | | `backlog` | `backlog` | spec stays `open`/unstarted | — | | `unstarted` | `planned` | spec stays `open` (planned) | — | -| `started` | `in-progress` | spec `open`, work underway | **flow wins** (flow drives the loop) | +| `started` | `in-progress` (or `in-review` for a `started`-type state named "In Review" — the open-PR rung, resolved via `statusMap`) | spec `open`, work underway / in review | **flow wins** (flow drives the loop) | | `completed` | `done` (or `verified` via a "Verified" name-override) | spec → `done` | **tracker wins terminal** | | `canceled` | `wontfix` (or `deferred` via a name-override) | **surface — do NOT auto-change** | surface to user | @@ -432,6 +544,119 @@ reconciles** — the run does **not** crash or abort. comments reconcile proceeds, and no `setStatus` is driven from the unmapped status. PASS iff no crash and the rest of the sync completes. +### Fixture S-G — no-PR all-done → stays In Progress, NO terminal advance (R1) + +**Flow:** spec `done`, all tasks `done`, `completion_review_status == ship`. +**`prEvidence`:** `none` (no PR exists for the spec branch). +**Tracker:** `status.normalized = "in-progress"` (board shows work underway — a valid +non-terminal state). + +**Expected:** `flowToNormalized(spec, none)` → **`in-review`** (terminal is gated on +`MERGED`; local ship is necessary, not sufficient). Tracker is already `in-progress`, +a valid non-terminal state. The reconcile **keeps the current non-terminal state / +does NOT advance to terminal** — it does NOT downgrade `in-progress`→`in-review` +unconditionally, and it does NOT close the issue. (`in-review` is "ahead of" +`in-progress`; with no merge evidence the bridge leaves the live non-terminal state +as-is rather than forcing a rung change. The point of the fixture: a locally-shipped +spec with no merged PR NEVER advances the tracker to `Done`.) + +**Oracle:** the spec/issue stay **non-terminal** — the issue stays **In Progress**; +**no** `setStatus(done|verified)` and **no** `gh issue close` is driven; no terminal +advance. PASS iff the locally-`done`+shipped spec does NOT close the tracker issue +absent a merged PR, and the existing valid non-terminal state is preserved. + +### Fixture S-H — open (unmerged) PR → In Review (R2) + +**Flow:** spec `done`, `completion_review_status == ship`. +**`prEvidence`:** `open` (one `OPEN` PR for the spec branch, 0 `MERGED`). +**Tracker:** `status.normalized = "in-progress"`. + +**Expected:** `flowToNormalized(spec, open)` → **`in-review`**. The open PR is the In +Review rung (R2): `setStatus(trackerId, in-review)` → Linear `In Review` (`state.type: +started`-family rung) / GitHub `status:in-review` label, issue stays **OPEN**. NOT +terminal — the PR has not merged. + +**Oracle:** exactly one `setStatus(in-review)`; the issue is **In Review** and stays +open; no close. PASS iff the open-PR spec projects to In Review, never to Done. + +### Fixture S-I — merged PR → Done (terminal, merge-confirmed) (R1) + +**Flow:** spec `done`, no completion-review configured. +**`prEvidence`:** `merged` (≥1 `MERGED` PR for the spec branch). +**Tracker:** `status.normalized = "in-review"`. + +**Expected:** `flowToNormalized(spec, merged)` → **`done`** (terminal — the `MERGED` +probe is present, so the gate is satisfied). `setStatus(trackerId, done)` → Linear +`completed`-type Done / GitHub `gh issue close --reason completed` + `status:done`. +(Had `completion_review_status == ship`, it would be **`verified`** instead.) + +**Oracle:** exactly one terminal `setStatus(done)` (issue closed/Done). PASS iff a +merge-confirmed spec — and ONLY a merge-confirmed spec — reaches terminal Done. + +### Fixture S-J — closed-unmerged PR → non-terminal + NEEDS_HUMAN (R6) + +**Flow:** spec `done`, `completion_review_status == ship`. +**`prEvidence`:** `closed-unmerged` (a PR for the branch is `CLOSED` with 0 `MERGED` +and 0 `OPEN` — closed without merging). +**Tracker:** `status.normalized = "in-progress"`. + +**Expected:** `flowToNormalized(spec, closed-unmerged)` → **`in-review`** (NON-terminal +— a closed-without-merge PR is NOT merge evidence, so terminal is forbidden). The +ambiguity (locally shipped, but the PR was closed unmerged) **surfaces NEEDS_HUMAN** +(interactive ask / Ralph `sync defer --reason closed-unmerged`). The issue stays +**non-terminal** (In Progress preserved); no terminal write. + +**Oracle:** **no** `setStatus(done|verified)` / no close; exactly one NEEDS_HUMAN +surfaced/queued entry naming the closed-unmerged branch; the issue stays +non-terminal. PASS iff a closed-unmerged spec never auto-closes the tracker issue and +the conflict reaches a human. + +> **`ambiguous` and `probe-error` share S-J's path.** A `prEvidence` of `ambiguous` +> (e.g. both an open AND a closed-unmerged PR on the branch) or `probe-error` +> (`gh` failed / no auth / unknown `branch_name`) is handled by the **same** +> reconcile branch as `closed-unmerged`: `flowToNormalized` → `in-review` +> (non-terminal), and the loop surfaces NEEDS_HUMAN (`sync defer --reason ambiguous` +> / `--reason probe-error`) with **no** status write. Terminal is reachable ONLY +> from an unambiguous `merged` (S-I) — a failed or ambiguous probe never closes the +> issue. + +### Fixture S-K — all-tasks-done OPEN spec + open PR → In Review (row-order, Thread A) + +**Flow:** spec **`open`** (NOT yet `done` — the normal make-pr path leaves the spec +`open` after all tasks finish; flow-next-work/phases.md:488), **all tasks `done`**. +**`prEvidence`:** `open` (one `OPEN` PR for the spec branch, 0 `MERGED`). +**Tracker:** `status.normalized = "in-progress"`. + +**Expected:** `flowToNormalized(spec, open)` → **`in-review`** (row 4 — the open-PR +signal is evaluated **before** the "some task done → in-progress" local row, so it +wins). The make-pr push (flow-next-make-pr/workflow.md:1685-1690) drives +`setStatus(trackerId, in-review)` → the issue moves to **In Review**, stays OPEN, NOT +terminal. + +**Oracle:** exactly one `setStatus(in-review)`; the issue is **In Review** (NOT left +at In Progress). PASS iff an all-tasks-done OPEN spec with an open PR projects to In +Review — the pre-fn-66 row order returned `in-progress` here and the make-pr push +never advanced the issue. Regression guard for Thread A. + +### Fixture S-L — merged ungated / `unknown`-completion spec → terminal Done (row-order, Thread B) + +**Flow:** spec `done`, **`completion_review_status == unknown`** (no completion-review +backend configured — flowctl normalizes the missing field to `unknown`, +flowctl.py:2296-2297; pilot treats `!= ship` as ungated, flow-next-pilot/workflow.md:117-122). +**`prEvidence`:** `merged` (≥1 `MERGED` PR for the spec branch). +**Tracker:** `status.normalized = "in-review"`. + +**Expected:** `flowToNormalized(spec, merged)` → **`done`** (terminal — row 1 fires +because no completion-review backend is configured; a merge is a merge for an ungated +repo). `setStatus(trackerId, done)` → Linear `completed`-type Done / GitHub +`gh issue close --reason completed`. The `unknown` completion status does **not** +trap the spec in `in-review`. + +**Oracle:** exactly one terminal `setStatus(done)` (issue closed/Done). PASS iff a +merged ungated/`unknown`-completion spec reaches terminal Done — the pre-fn-66 row +order let row 3 (`!= ship → in-review`) catch `unknown` first, so `land.merged` never +wrote Done for ungated projects. Regression guard for Thread B. + ## Boundaries - **This is the status/metadata layer, not the body merge or the transport.** The diff --git a/plugins/flow-next/skills/flow-next-tracker-sync/steps.md b/plugins/flow-next/skills/flow-next-tracker-sync/steps.md index 36eec36b..cb8131e4 100644 --- a/plugins/flow-next/skills/flow-next-tracker-sync/steps.md +++ b/plugins/flow-next/skills/flow-next-tracker-sync/steps.md @@ -53,11 +53,11 @@ Only when the bridge is not yet active (`flowctl sync active --json` → `active $FLOWCTL config set tracker.perEvent.work.done comment # status comment + evidence $FLOWCTL config set tracker.perEvent.makePr comment # PR link is unconditional; extra status comment $FLOWCTL config set tracker.perEvent.resolvePr comment # resolution comment - $FLOWCTL config set tracker.perEvent.completionReview reconcile # flip Done/verified + verdict / R-ID coverage + $FLOWCTL config set tracker.perEvent.completionReview comment # fn-66: verdict + R-ID coverage comment; NEVER terminal Done (land.merged is the sole Done driver) $FLOWCTL config set tracker.perTracker.teamId "" # if the user named one $FLOWCTL sync active --json # confirm active: true ``` - **Never assume — but default-on is not assuming.** No signal / user declines the bridge ⇒ write nothing; `enabled` stays `false`; `sync active` stays `active: false`. Confirming the bridge IS the consent to sync the pipeline. The **config schema default stays `off`** (in `get_default_config()`), so a bare `tracker.enabled=true` set by hand or a script — WITHOUT this ceremony — activates **no lifecycle-event sync** (every `perEvent` event stays dormant). The **one exception** is make-pr's PR↔issue link, which is unconditional whenever the bridge is active (no per-event gate, by design — it powers Linear Diffs); only the ceremony's explicit per-event writes activate the events themselves. Users opt out per event afterward via `flowctl config set tracker.perEvent. off`. + **Never assume — but default-on is not assuming.** No signal / user declines the bridge ⇒ write nothing; `enabled` stays `false`; `sync active` stays `active: false`. Confirming the bridge IS the consent to sync the pipeline. The **config schema default stays `off`** (in `get_default_config()`), so a bare `tracker.enabled=true` set by hand or a script — WITHOUT this ceremony — activates **no lifecycle-event sync** (every `perEvent` event stays dormant). The **two exceptions** are unconditional whenever the bridge is active (no per-event gate, by design): (1) make-pr's PR↔issue link **and its In Review status push** (fn-66, R2 — an open PR is the In Review rung, riding the same Diffs-powering link path); (2) **`land.merged`** (fn-66, R10 — a real merge is the SOLE event that projects terminal `Done`, gated on the GitHub `MERGED` probe; leaving it opt-in would strand boards at In Review post-merge). Only the ceremony's explicit per-event writes activate the other events themselves. Users opt out per event afterward via `flowctl config set tracker.perEvent. off`. 5. **Readiness state (fn-58, R4 — optional, skippable).** After the config writes, ask one more question via `AskUserQuestion`: *which tracker workflow state means "ready for work"?* When set, every pull-side sync projects that state onto the local spec `ready` flag ([→ ref: status-sync.md] § Readiness projection) — **the tracker becomes authoritative for readiness** (a local `flowctl spec ready` is overwritten on the next sync). Readiness is optional: always offer **skip** (and lead with it when no candidate state exists); skipping writes nothing and leaves `tracker.readyState: null` — the readiness gate stays dormant (R7). - **Linear** — discover the team's states first: MCP `list_issue_statuses(team:)` → `{id, name, type}` per state (GraphQL rung: `workflowStates(first:100, filter:{team:{name:{eq:$team}}}){ nodes { id name type } }` — explicit `first:` on every connection). Lead with a recommendation: a state whose **name** looks like "Ready" / "Next" / "Ready for Dev" (case-insensitive); if none, lead with skip. Validate the chosen state resolves (`get_issue_status()`) before writing. Store the state **name** (names are what humans see; the projection matches case-insensitive/trimmed). - **GitHub** — issues have no workflow states; readiness resolves to a **label** name (suggest `ready`). Pre-create it so the projection never trips on a missing label — tolerate **only** already-exists (the create fails with a 422 when the label exists; that is fine, idempotent). Any other failure (auth / permissions / wrong repo / API) must surface — never write `tracker.readyState` for a label that isn't confirmed to exist, or every later pull hits the stale-config warn/noop (github.md § Readiness label) and the flag never moves: @@ -130,9 +130,16 @@ Route the operation; each layer calls hooks that operate on the normalized struc ``` push(spec): + prEvidence = mergeEvidenceProbe(spec.branch_name) → status-sync.md (merged|open|closed-unmerged|none|ambiguous|probe-error) body = renderFlowToTracker(spec) → body-merge.md Step 3 (flow→tracker) — COMPLETE spec, ALL sections; never summarize/truncate writeIssue(issue{... body ...}) [→ ref: transport] # GitHub UPDATE preserves the flow-owned region (github.md § writeIssue) — body=renderFlowToTracker output never contains it, so a raw full-body replace would wipe it and make projectDepRelations misread the ledgered edge as a remote removal → false collision. Write retains; merge strips (body-merge.md Step 0.5). - setStatus(map flow status → tracker status) → status-sync.md (who-wins) + setStatus(flowToNormalized(spec, prEvidence) → tracker status) → status-sync.md (who-wins) + # MERGE-EVIDENCE GATE (fn-66): the terminal rung (done/verified) is reachable + # ONLY when prEvidence == merged. flowToNormalized refuses terminal for + # none/open/closed-unmerged/ambiguous/probe-error — so NO push (automatic land.merged + # OR a manual reconcile) ever writes Done without a GitHub MERGED probe. The gate is a + # per-WRITE invariant (status-sync.md): a manual merge-evidenced push MAY terminal-write; + # a local-completion-only push never does. postComment(lifecycle event marker) → comments-sync.md (append + dedup) projectDepRelations(spec, issue) → § projectDepRelations below — depends_on_epics → blocked-by relations (additive, ledger-tracked, never advances lastSyncedAt; warns on unlinked dep; skipped when no transport) sync set-merge-base (BOTH halves) + set-last-synced # snapshot the pushed pair (body-merge.md Step 5) @@ -164,6 +171,7 @@ conflict) lives in that reference: reconcile(spec): base = sync get-state → merge-base snapshot (BOTH forms: mergeBaseFlow + mergeBaseTracker) issue = fetchIssue(trackerId) [→ ref: transport] + prEvidence = mergeEvidenceProbe(spec.branch_name) → status-sync.md (feeds reconcileStatus; gates terminal) projectReadiness(spec, issue) → status-sync.md § Readiness projection (rides every issue read; independent of the body merge — runs even when the body diverges) projectDepRelations(spec, issue) → § projectDepRelations below (rides every issue read like projectReadiness; independent of the body merge — runs even when the body diverges or conflicts) merged = threeWayMergeBody(base, flowBody, issue.body) → body-merge.md @@ -174,7 +182,12 @@ reconcile(spec): Ralph → sync defer (queue the scoped conflict, never block) [R9/R11] receipt: diverged else: - writeIssue(merged) + setStatus(who-wins) [transport → fn-52.3/.7] + status-sync.md (who-wins) + writeIssue(merged) + setStatus(reconcileStatus(spec, issue, prEvidence)) [transport → fn-52.3/.7] + status-sync.md (who-wins) + # MERGE-EVIDENCE GATE (fn-66): reconcileStatus runs flowToNormalized(spec, prEvidence) + # first — a manual reconcile MAY terminal-write Done/verified IFF prEvidence == merged + # (status-sync.md S-I); closed-unmerged/ambiguous/probe-error → in-review + NEEDS_HUMAN + # (S-J), none → preserve non-terminal (S-G). The terminal-write merge-evidence invariant + # holds on this manual path exactly as on the automatic land.merged touchpoint. sync set-merge-base (BOTH halves) + sync set-last-synced # body-merge.md Step 5 — ONLY on success receipt: merged | updated # no-base bootstrap (first link): body-merge.md "First-sync / no-base bootstrap" — diff --git a/plugins/flow-next/skills/flow-next-work/SKILL.md b/plugins/flow-next/skills/flow-next-work/SKILL.md index ce035b25..7b31dffb 100644 --- a/plugins/flow-next/skills/flow-next-work/SKILL.md +++ b/plugins/flow-next/skills/flow-next-work/SKILL.md @@ -168,7 +168,7 @@ If user chose review, pass the review mode to the worker. The worker invokes `/f |---|---|---| | first task claimed (phases.md 3b.1) | `tracker.perEvent.work.firstClaim` | move the linked issue In-Progress | | task done (phases.md 3d.1) | `tracker.perEvent.work.done` | post a status comment + evidence (tests / commits / PR) | -| spec-completion-review SHIP (phases.md 3g) | `tracker.perEvent.completionReview` | flip the issue Done/verified + post verdict / R-ID coverage | +| spec-completion-review SHIP (phases.md 3g) | `tracker.perEvent.completionReview` | post verdict / R-ID coverage as a comment; NEVER terminal Done (fn-66 — Done is reserved for a MERGED PR, driven by land.merged) — at most leaves the issue at In Review | (capture / interview / plan / make-pr / resolve-pr carry their own touchpoints in those skills, gated identically on `tracker.perEvent.{capture,interview,plan,makePr,resolvePr}`.) diff --git a/plugins/flow-next/skills/flow-next-work/phases.md b/plugins/flow-next/skills/flow-next-work/phases.md index 28f620c1..f4c34482 100644 --- a/plugins/flow-next/skills/flow-next-work/phases.md +++ b/plugins/flow-next/skills/flow-next-work/phases.md @@ -410,25 +410,29 @@ $FLOWCTL show --json | jq -r '.completion_review_status' 2. After skill returns with SHIP: - Set status: `$FLOWCTL spec set-completion-review-status --status ship --json` - - **Tracker sync (opt-in) — SHIP → issue Done/verified + verdict:** runs only when the tracker bridge is active AND `completionReview` is opted in. With no tracker configured this is a no-op. Hooked **here at the caller** (not inside the review skill) because this is where `completion_review_status=ship` lands: + - **Tracker sync (opt-in) — SHIP → verdict comment, NEVER terminal Done (fn-66):** runs only when the tracker bridge is active AND `completionReview` is opted in. With no tracker configured this is a no-op. Hooked **here at the caller** (not inside the review skill) because this is where `completion_review_status=ship` lands. **Local completion review is NOT merge evidence** — `Done` is reserved for a `MERGED` PR (fn-66 status-sync `flowToNormalized`), so this touchpoint is **comment-shaped only**: it posts the verdict + R-ID coverage and at most leaves the issue at `In Review` (if an open PR exists). It NEVER pushes `Done`/`verified`: ```bash if [ "$($FLOWCTL sync active --json | jq -r '.active')" = "true" ] \ && [ "$($FLOWCTL config get tracker.perEvent.completionReview --json | jq -r '.value')" != "off" ] \ && [ "$($FLOWCTL config get tracker.perEvent.completionReview --json | jq -r '.value')" != "null" ]; then - # Invoke the flow-next-tracker-sync skill: flip the linked issue to Done/verified - # (status who-wins — tracker wins `done`/`verified`, R7) and post the verdict + - # R-ID coverage as a comment. - # skill: flow-next-tracker-sync (operation: push , status + verdict comment, event: work.completionReview) - # Unlinked spec → flow-first push (create + link) first, then status + verdict - # comment (tracker-sync §Phase 3 create-if-unlinked). No-op only if no transport; Ralph queues. + # Invoke the flow-next-tracker-sync skill: post the completion-review verdict + + # R-ID coverage as a comment (comment-shaped — NEVER a terminal status push). + # The skill's reconcileStatus gate (status-sync.md flowToNormalized) refuses + # terminal `Done`/`verified` without a MERGED probe, so even if a stale config + # set this leaf to `reconcile` the gate keeps it non-terminal: at most it leaves + # the issue at `In Review` (open-PR evidence). land.merged is the SOLE Done driver. + # skill: flow-next-tracker-sync (operation: comment , event: work.completionReview) + # (the comment carries the verdict + R-ID coverage as evidence — never a status push) + # Unlinked spec → flow-first push (create + link) first, then the verdict comment + # (tracker-sync §Phase 3 create-if-unlinked). No-op only if no transport; Ralph queues. # The skill's receipts carry --event work.completionReview — audited by Phase 5's sync check. : fi ``` - Go to Phase 4 (Quality) -**Note:** The spec-completion-review skill gets SHIP from the reviewer but does NOT set the status itself. The caller (work skill or Ralph) sets `completion_review_status=ship` after successful review — and (when opted in) flips the linked tracker issue Done/verified here. The review skill (`flow-next-spec-completion-review`) is NOT edited; the touchpoint lives at this caller. +**Note:** The spec-completion-review skill gets SHIP from the reviewer but does NOT set the status itself. The caller (work skill or Ralph) sets `completion_review_status=ship` after successful review — and (when opted in) posts the verdict / R-ID-coverage comment to the linked tracker issue here. It does **NOT** flip the issue to `Done`/`verified` (fn-66: that is gated on a `MERGED` PR and driven solely by `land.merged`). The review skill (`flow-next-spec-completion-review`) is NOT edited; the touchpoint lives at this caller. **Fix loop behavior**: Same as impl-review. If reviewer returns NEEDS_WORK: 1. Skill parses issues @@ -523,7 +527,7 @@ EVENTS="work.firstClaim,work.done" # ← substitute the actual triggered set 2. For each MISSING event, invoke the **flow-next-tracker-sync skill directly** — the same dispatch as the touchpoint that missed, with its `event:` tag — NEVER this check block as a wrapper (no recursion): - `work.firstClaim` → `skill: flow-next-tracker-sync (operation: push , status-only, event: work.firstClaim)` - `work.done` → `skill: flow-next-tracker-sync (operation: comment , event: work.done)` - - `work.completionReview` → `skill: flow-next-tracker-sync (operation: push , status + verdict comment, event: work.completionReview)` + - `work.completionReview` → `skill: flow-next-tracker-sync (operation: comment , event: work.completionReview)` — comment-shaped (verdict + R-ID coverage as evidence), NEVER terminal (fn-66) 3. Re-check the missed events only, `--since` = the step-1 anchor: `"$FLOWCTL" sync check "$SPEC_ID" --events "" --since "" --json` 4. Record the final state in the summary slot. Still MISSING after the one cycle is a recorded, visible outcome — never a second retro-fire, never a block (the work is already done; a tracker hiccup must not become a hard stop). Recovery guidance lives in the receipt note + `docs/tracker-sync.md`. diff --git a/plugins/flow-next/tests/test_land_tracker_event.py b/plugins/flow-next/tests/test_land_tracker_event.py new file mode 100644 index 00000000..32542bd1 --- /dev/null +++ b/plugins/flow-next/tests/test_land_tracker_event.py @@ -0,0 +1,180 @@ +"""`tracker.perEvent.land.merged` Done-on-merge touchpoint leaf (fn-66, R3/R10). + +fn-66 makes `land.merged` the SOLE driver of the terminal `Done` tracker state +and **active-by-default when the bridge is active** (like make-pr's unconditional +PR-link path) — the merge→Done projection rides the bridge-active predicate alone, +NOT this leaf. The config-schema default for the nested `land.merged` leaf stays +`off` (the fn-52.1 accidental-enable invariant: a bare `enabled=true` activates no +lifecycle-event sync), and when set the leaf only tunes the optional verdict +comment — never the (MERGED-gated) status write. + +This test proves the leaf round-trips through the **production CLI** — the real +argparse dispatch (`init` → `config set` → `config get`) invoked via a subprocess +against `scripts/flowctl.py`, NOT an in-process import (the dual-copy invariant: +the bundled CLI must run this code). It mirrors `test_qa_tracker_event.py` style. + +Asserts: + 1. fresh repo, no `config set` → `tracker.perEvent.land.merged` merges to `"off"` + (NOT null) via the merge-defaults path; + 2. `set tracker.perEvent.land.merged push` → `get` round-trips to `"push"`; + 3. setting the land.merged leaf does NOT clobber a sibling perEvent leaf + (`tracker.perEvent.completionReview` stays `"off"`, and the nested + `tracker.perEvent.work.firstClaim` stays `"off"`); + 4. setting the land.merged leaf does NOT flip the value-checked activation + predicate (no type / not enabled → bridge stays inactive); + 5. the leaf lives under the SAME nested `perEvent` object as the fn-52/.53/.60 + leaves (`get_default_tracker_config()` shape), defaulting `off`, alongside an + unchanged `completionReview` default of `off`; + 6. `push` (the land.merged op verb) is a recognised `TRACKER_PER_EVENT_LEAVES` + verb (no enum change was needed — only the semantics of the leaf changed). + +Hermetic: each test runs in its own `tempfile.TemporaryDirectory`, inits a +throwaway `.flow/`, and shells `sys.executable scripts/flowctl.py` (no network, no +LLM). Windows-portable: `pathlib` everywhere, `sys.executable`, no shell string. + +Run: + python3 -m unittest plugins.flow-next.tests.test_land_tracker_event -v +""" + +from __future__ import annotations + +import importlib.util +import json +import os +import subprocess +import sys +import tempfile +import unittest +from pathlib import Path +from typing import Any + + +HERE = Path(__file__).resolve() +FLOWCTL_PY = HERE.parent.parent / "scripts" / "flowctl.py" + + +def _load_flowctl() -> Any: + """In-process load — used ONLY to assert the defaults-dict shape (1 test). + + The round-trip tests below go through the subprocess CLI, not this import. + """ + spec = importlib.util.spec_from_file_location( + "flowctl_land_tracker_event_under_test", FLOWCTL_PY + ) + mod = importlib.util.module_from_spec(spec) + assert spec.loader is not None + sys.modules[spec.name] = mod + spec.loader.exec_module(mod) + return mod + + +def _flowctl(cwd: Path, *args: str, expect_rc: int = 0) -> dict[str, Any]: + """Run the production CLI as a subprocess; parse the `--json` payload.""" + cmd = [sys.executable, str(FLOWCTL_PY), *args, "--json"] + proc = subprocess.run( + cmd, + cwd=str(cwd), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env={**os.environ, "FLOW_NO_DEPRECATION": "1"}, + ) + if proc.returncode != expect_rc: + raise AssertionError( + f"rc={proc.returncode} (expected {expect_rc}): args={args} " + f"stdout={proc.stdout!r} stderr={proc.stderr!r}" + ) + return json.loads(proc.stdout.decode("utf-8")) + + +def _init_repo(tmp: Path) -> None: + subprocess.check_call( + [sys.executable, str(FLOWCTL_PY), "init", "--json"], + cwd=str(tmp), + stdout=subprocess.DEVNULL, + ) + + +class LandTrackerEventLeafTestCase(unittest.TestCase): + def setUp(self) -> None: + self._tmp = tempfile.TemporaryDirectory() + self.repo = Path(self._tmp.name) + _init_repo(self.repo) + + def tearDown(self) -> None: + self._tmp.cleanup() + + # ── (1) fresh default merges to "off", NOT null ────────────────────── + + def test_fresh_repo_land_merged_leaf_defaults_off(self) -> None: + out = _flowctl( + self.repo, "config", "get", "tracker.perEvent.land.merged" + ) + self.assertEqual(out["value"], "off") + + # ── (2) set push → get round-trips ─────────────────────────────────── + + def test_set_push_round_trips_via_cli(self) -> None: + set_out = _flowctl( + self.repo, "config", "set", "tracker.perEvent.land.merged", "push" + ) + self.assertEqual(set_out["value"], "push") + get_out = _flowctl( + self.repo, "config", "get", "tracker.perEvent.land.merged" + ) + self.assertEqual(get_out["value"], "push") + + # ── (3) setting land.merged does NOT clobber sibling perEvent leaves ── + + def test_set_land_merged_preserves_sibling_perevent_leaves(self) -> None: + _flowctl( + self.repo, "config", "set", "tracker.perEvent.land.merged", "push" + ) + # completionReview default unchanged (the fn-66 re-scope keeps the + # SCHEMA default `off` — the discovery ceremony seeds `comment`). + cr = _flowctl( + self.repo, "config", "get", "tracker.perEvent.completionReview" + ) + self.assertEqual(cr["value"], "off") + sibling = _flowctl( + self.repo, "config", "get", "tracker.perEvent.work.firstClaim" + ) + self.assertEqual(sibling["value"], "off") + # And the land.merged leaf itself is intact. + lm = _flowctl( + self.repo, "config", "get", "tracker.perEvent.land.merged" + ) + self.assertEqual(lm["value"], "push") + + # ── (4) setting land.merged does NOT activate the bridge ───────────── + + def test_set_land_merged_does_not_activate_the_bridge(self) -> None: + _flowctl( + self.repo, "config", "set", "tracker.perEvent.land.merged", "push" + ) + active = _flowctl(self.repo, "sync", "active") + self.assertFalse(active["active"]) + + # ── (5) shape assertion — nested perEvent, defaults off ────────────── + + def test_land_merged_leaf_in_default_tracker_config_nested(self) -> None: + flowctl = _load_flowctl() + pe = flowctl.get_default_tracker_config()["perEvent"] + self.assertIn("land", pe) + self.assertIn("merged", pe["land"]) + self.assertEqual(pe["land"]["merged"], "off") + # completionReview SCHEMA default stays off (ceremony seeds comment). + self.assertEqual(pe["completionReview"], "off") + # Same nested perEvent object as the fn-52 leaves. + self.assertEqual(pe["work"]["firstClaim"], "off") + self.assertEqual(pe["makePr"], "off") + + # ── (6) `push` is a recognised perEvent verb ───────────────────────── + + def test_push_is_a_recognised_per_event_verb(self) -> None: + flowctl = _load_flowctl() + self.assertIn("push", flowctl.TRACKER_PER_EVENT_LEAVES) + self.assertIn("off", flowctl.TRACKER_PER_EVENT_LEAVES) + + +if __name__ == "__main__": + unittest.main()