From 1760471974a802aaab2a5a74bc4b22ecaa8e8410 Mon Sep 17 00:00:00 2001
From: Ian White <ianwhitedeveloper@gmail.com>
Date: Wed, 18 Feb 2026 08:50:10 -0600
Subject: [PATCH 1/6] =?UTF-8?q?feat(ai):=20PR=203=20=E2=80=94=20parsers=20?=
 =?UTF-8?q?+=20execute-agent=20module?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add agent-parser, extraction-parser, aggregation, and execute-agent
modules with full unit test coverage.

- agent-parser: parseStringResult, parseOpenCodeNDJSON, unwrapEnvelope
  (new shared export), unwrapAgentResult. Shared unwrapEnvelope breaks
  duplication between agent-parser and execute-agent (WIP fix #9).
- extraction-parser: parseExtractionResult with multi-strategy JSON
  parsing (direct, markdown fence, pre-parsed object), and
  resolveImportPaths for prompt file resolution.
- aggregation: normalizeJudgment, calculateRequiredPasses,
  aggregatePerAssertionResults with Zod validation.
- execute-agent: extracted from ai-runner.js to break the circular
  dependency (ai-runner ↔ test-extractor). Logger injected at
  executeAgent call site rather than created inside spawnProcess
  (WIP fix #8). Uses shared unwrapEnvelope from agent-parser.
- Test files use test.each for all table-driven cases per convention.

164 tests pass, 0 lint errors, TypeScript checks pass.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 source/agent-parser.js           | 108 +++++++
 source/agent-parser.test.js      | 408 ++++++++++++++++++++++++++
 source/aggregation.js            | 110 +++++++
 source/aggregation.test.js       | 488 +++++++++++++++++++++++++++++++
 source/execute-agent.js          | 182 ++++++++++++
 source/execute-agent.test.js     | 277 ++++++++++++++++++
 source/extraction-parser.js      | 127 ++++++++
 source/extraction-parser.test.js | 226 ++++++++++++++
 8 files changed, 1926 insertions(+)
 create mode 100644 source/agent-parser.js
 create mode 100644 source/agent-parser.test.js
 create mode 100644 source/aggregation.js
 create mode 100644 source/aggregation.test.js
 create mode 100644 source/execute-agent.js
 create mode 100644 source/execute-agent.test.js
 create mode 100644 source/extraction-parser.js
 create mode 100644 source/extraction-parser.test.js

diff --git a/source/agent-parser.js b/source/agent-parser.js
new file mode 100644
index 00000000..38f41aaa
--- /dev/null
+++ b/source/agent-parser.js
@@ -0,0 +1,108 @@
+import { createError } from 'error-causes';
+import { ParseError } from './ai-errors.js';
+
+/**
+ * Parse a string result from an agent, attempting multiple strategies:
+ * 1. Direct JSON parse if string starts with { or [
+ * 2. Extract and parse markdown-wrapped JSON (```json\n...\n```)
+ * 3. Keep as plain text if neither works
+ */
+export const parseStringResult = (result, logger) => {
+  const trimmed = result.trim();
+
+  if (trimmed.startsWith('{') || trimmed.startsWith('[')) {
+    try {
+      const parsed = JSON.parse(trimmed);
+      logger.log('Successfully parsed string as JSON');
+      return parsed;
+    } catch {
+      logger.log('Direct JSON parse failed, trying markdown extraction');
+    }
+  }
+
+  const markdownMatch = result.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
+  if (markdownMatch) {
+    logger.log('Found markdown-wrapped JSON, extracting...');
+    try {
+      const parsed = JSON.parse(markdownMatch[1]);
+      logger.log('Successfully parsed markdown-wrapped JSON');
+      return parsed;
+    } catch {
+      logger.log('Failed to parse markdown content, keeping original string');
+    }
+  }
+
+  logger.log('String is not valid JSON, keeping as plain text');
+  return result;
+};
+
+/**
+ * Parse OpenCode's NDJSON output, extracting and concatenating all "text" events.
+ */
+export const parseOpenCodeNDJSON = (ndjson, logger) => {
+  logger.log('Parsing OpenCode NDJSON output...');
+
+  const lines = ndjson.trim().split('\n').filter(line => line.trim());
+
+  const textEvents = lines.reduce((acc, line) => {
+    try {
+      const event = JSON.parse(line);
+      if (event.type === 'text' && event.part?.text) {
+        logger.log(`Found text event with ${event.part.text.length} characters`);
+        acc.push(event.part.text);
+      }
+    } catch (err) {
+      logger.log(`Warning: Failed to parse NDJSON line: ${err.message}`);
+    }
+    return acc;
+  }, []);
+
+  if (textEvents.length === 0) {
+    throw createError({
+      ...ParseError,
+      message: 'No text events found in OpenCode output',
+      code: 'NO_TEXT_EVENTS',
+      ndjsonLength: ndjson.length,
+      linesProcessed: lines.length
+    });
+  }
+
+  const combinedText = textEvents.join('');
+  logger.log(`Combined ${textEvents.length} text event(s) into ${combinedText.length} characters`);
+  return combinedText;
+};
+
+/**
+ * Unwrap a JSON envelope object { result: ... }, returning the inner value.
+ * If no envelope is present, returns the object as-is.
+ * Shared helper used by unwrapAgentResult and execute-agent's raw output handling.
+ */
+export const unwrapEnvelope = (parsed) =>
+  parsed?.result !== undefined ? parsed.result : parsed;
+
+/**
+ * Unwrap agent result from potential JSON envelope and parse nested JSON.
+ * Handles Claude CLI's envelope format { result: "..." } and nested JSON strings.
+ * @throws {Error} If output is not valid JSON after all parsing attempts
+ */
+export const unwrapAgentResult = (processedOutput, logger) => {
+  const parsed = parseStringResult(processedOutput, logger);
+
+  if (typeof parsed === 'string') {
+    throw createError({
+      ...ParseError,
+      message: `Agent output is not valid JSON: ${parsed.slice(0, 100)}`,
+      outputPreview: parsed.slice(0, 100)
+    });
+  }
+
+  const unwrapped = unwrapEnvelope(parsed);
+
+  logger.log(`Parsed result type: ${typeof unwrapped}`);
+  if (typeof unwrapped === 'string') {
+    logger.log('Result is string, attempting to parse as JSON');
+    return parseStringResult(unwrapped, logger);
+  }
+
+  return unwrapped;
+};
diff --git a/source/agent-parser.test.js b/source/agent-parser.test.js
new file mode 100644
index 00000000..1eee4b18
--- /dev/null
+++ b/source/agent-parser.test.js
@@ -0,0 +1,408 @@
+import { describe, test } from 'vitest';
+import { assert } from './vitest.js';
+import { Try } from './riteway.js';
+import {
+  parseStringResult,
+  parseOpenCodeNDJSON,
+  unwrapEnvelope,
+  unwrapAgentResult
+} from './agent-parser.js';
+
+const createMockLogger = () => {
+  const logs = [];
+  return {
+    log: (...args) => logs.push(args.join(' ')),
+    logs
+  };
+};
+
+describe('parseStringResult()', () => {
+  test('parses direct JSON when string starts with {', () => {
+    const logger = createMockLogger();
+    const input = '{"passed": true, "output": "test"}';
+
+    const result = parseStringResult(input, logger);
+
+    assert({
+      given: 'JSON string starting with {',
+      should: 'parse as JSON object',
+      actual: JSON.stringify(result),
+      expected: '{"passed":true,"output":"test"}'
+    });
+
+    assert({
+      given: 'successful JSON parse',
+      should: 'log success message',
+      actual: logger.logs.some(log => log.includes('Successfully parsed string as JSON')),
+      expected: true
+    });
+  });
+
+  test('parses direct JSON when string starts with [', () => {
+    const logger = createMockLogger();
+    const input = '[{"id": 1}, {"id": 2}]';
+
+    const result = parseStringResult(input, logger);
+
+    assert({
+      given: 'JSON string starting with [',
+      should: 'parse as JSON array',
+      actual: result.length,
+      expected: 2
+    });
+  });
+
+  test('extracts markdown-wrapped JSON when direct parse fails', () => {
+    const logger = createMockLogger();
+    const input = '```json\n{"passed": true, "output": "test"}\n```';
+
+    const result = parseStringResult(input, logger);
+
+    assert({
+      given: 'markdown-wrapped JSON',
+      should: 'extract and parse JSON',
+      actual: JSON.stringify(result),
+      expected: '{"passed":true,"output":"test"}'
+    });
+
+    assert({
+      given: 'markdown extraction',
+      should: 'log markdown extraction',
+      actual: logger.logs.some(log => log.includes('markdown-wrapped JSON')),
+      expected: true
+    });
+  });
+
+  test('extracts markdown-wrapped JSON without json language tag', () => {
+    const logger = createMockLogger();
+    const input = '```\n{"passed": true}\n```';
+
+    const result = parseStringResult(input, logger);
+
+    assert({
+      given: 'markdown without json tag',
+      should: 'extract and parse JSON',
+      actual: result.passed,
+      expected: true
+    });
+  });
+
+  test('tries markdown extraction even if string starts with {', () => {
+    const logger = createMockLogger();
+    const input = '{ broken json ```json\n{"passed": true}\n```';
+
+    const result = parseStringResult(input, logger);
+
+    assert({
+      given: 'malformed JSON with markdown fallback',
+      should: 'extract from markdown block',
+      actual: result.passed,
+      expected: true
+    });
+
+    assert({
+      given: 'fallback scenario',
+      should: 'log failed parse and markdown extraction',
+      actual: logger.logs.some(log => log.includes('trying markdown extraction')),
+      expected: true
+    });
+  });
+
+  test('returns plain text when no parsing succeeds', () => {
+    const logger = createMockLogger();
+    const input = 'This is just plain text with no JSON';
+
+    const result = parseStringResult(input, logger);
+
+    assert({
+      given: 'plain text string',
+      should: 'return original string',
+      actual: result,
+      expected: input
+    });
+
+    assert({
+      given: 'no valid JSON',
+      should: 'log keeping as plain text',
+      actual: logger.logs.some(log => log.includes('keeping as plain text')),
+      expected: true
+    });
+  });
+
+  test('handles malformed markdown gracefully', () => {
+    const logger = createMockLogger();
+    const input = '```json\n{ broken: json }\n```';
+
+    const result = parseStringResult(input, logger);
+
+    assert({
+      given: 'markdown with invalid JSON',
+      should: 'return original string',
+      actual: result,
+      expected: input
+    });
+
+    assert({
+      given: 'failed markdown parse',
+      should: 'log failure',
+      actual: logger.logs.some(log => log.includes('Failed to parse markdown content')),
+      expected: true
+    });
+  });
+
+  test('trims whitespace before parsing', () => {
+    const logger = createMockLogger();
+    const input = '  \n  {"passed": true}  \n  ';
+
+    const result = parseStringResult(input, logger);
+
+    assert({
+      given: 'JSON with surrounding whitespace',
+      should: 'parse successfully',
+      actual: result.passed,
+      expected: true
+    });
+
+    assert({
+      given: 'JSON with surrounding whitespace',
+      should: 'return parsed object matching trimmed input',
+      actual: result,
+      expected: { passed: true }
+    });
+  });
+});
+
+describe('parseOpenCodeNDJSON()', () => {
+  test('extracts text from single text event', () => {
+    const logger = createMockLogger();
+    const ndjson = '{"type":"step_start","timestamp":1770245956364}\n' +
+        '{"type":"text","part":{"text":"```json\\n{\\"status\\": \\"ok\\"}\\n```"}}\n' +
+        '{"type":"step_finish","timestamp":1770245956211}';
+
+    const result = parseOpenCodeNDJSON(ndjson, logger);
+
+    assert({
+      given: 'NDJSON with single text event',
+      should: 'extract text content',
+      actual: result,
+      expected: '```json\n{"status": "ok"}\n```'
+    });
+
+    assert({
+      given: 'successful text extraction',
+      should: 'log found text event',
+      actual: logger.logs.some(log => log.includes('Found text event')),
+      expected: true
+    });
+  });
+
+  test('concatenates multiple text events', () => {
+    const logger = createMockLogger();
+    const ndjson = '{"type":"text","part":{"text":"Part 1"}}\n' +
+        '{"type":"text","part":{"text":" Part 2"}}\n' +
+        '{"type":"text","part":{"text":" Part 3"}}';
+
+    const result = parseOpenCodeNDJSON(ndjson, logger);
+
+    assert({
+      given: 'NDJSON with multiple text events',
+      should: 'concatenate all text content',
+      actual: result,
+      expected: 'Part 1 Part 2 Part 3'
+    });
+  });
+
+  test('filters out non-text events', () => {
+    const logger = createMockLogger();
+    const ndjson = '{"type":"step_start","data":"ignored"}\n' +
+        '{"type":"text","part":{"text":"Hello"}}\n' +
+        '{"type":"step_finish","data":"ignored"}\n' +
+        '{"type":"text","part":{"text":" World"}}';
+
+    const result = parseOpenCodeNDJSON(ndjson, logger);
+
+    assert({
+      given: 'NDJSON with mixed event types',
+      should: 'extract only text events',
+      actual: result,
+      expected: 'Hello World'
+    });
+  });
+
+  test('skips malformed JSON lines', () => {
+    const logger = createMockLogger();
+    const ndjson = '{invalid json}\n' +
+        '{"type":"text","part":{"text":"Valid text"}}\n' +
+        'not json at all';
+
+    const result = parseOpenCodeNDJSON(ndjson, logger);
+
+    assert({
+      given: 'NDJSON with malformed lines',
+      should: 'skip invalid lines and process valid ones',
+      actual: result,
+      expected: 'Valid text'
+    });
+
+    assert({
+      given: 'malformed JSON',
+      should: 'log warning for failed parse',
+      actual: logger.logs.some(log => log.includes('Failed to parse NDJSON line')),
+      expected: true
+    });
+  });
+
+  test('throws error when no text events found', () => {
+    const logger = createMockLogger();
+    const ndjson = '{"type":"step_start","data":"no text here"}\n' +
+        '{"type":"step_finish","data":"still no text"}';
+
+    const error = Try(parseOpenCodeNDJSON, ndjson, logger);
+
+    assert({
+      given: 'NDJSON with no text events',
+      should: 'throw Error with cause',
+      actual: error instanceof Error && error.cause !== undefined,
+      expected: true
+    });
+
+    assert({
+      given: 'NDJSON with no text events',
+      should: 'have ParseError name in cause',
+      actual: error?.cause?.name,
+      expected: 'ParseError'
+    });
+
+    assert({
+      given: 'NDJSON with no text events',
+      should: 'have NO_TEXT_EVENTS code in cause',
+      actual: error?.cause?.code,
+      expected: 'NO_TEXT_EVENTS'
+    });
+
+    assert({
+      given: 'NDJSON with no text events',
+      should: 'include ndjsonLength in cause',
+      actual: typeof error?.cause?.ndjsonLength === 'number',
+      expected: true
+    });
+
+    assert({
+      given: 'NDJSON with no text events',
+      should: 'include linesProcessed in cause',
+      actual: error?.cause?.linesProcessed,
+      expected: 2
+    });
+  });
+
+  test('handles empty lines in NDJSON', () => {
+    const logger = createMockLogger();
+    const ndjson = '\n\n{"type":"text","part":{"text":"Hello"}}\n\n\n{"type":"text","part":{"text":" World"}}\n\n';
+
+    const result = parseOpenCodeNDJSON(ndjson, logger);
+
+    assert({
+      given: 'NDJSON with empty lines',
+      should: 'filter empty lines and process valid events',
+      actual: result,
+      expected: 'Hello World'
+    });
+  });
+
+  test('preserves markdown-wrapped JSON in text', () => {
+    const logger = createMockLogger();
+    const ndjson = '{"type":"text","part":{"text":"```json\\n{\\"passed\\":true}\\n```"}}';
+
+    const result = parseOpenCodeNDJSON(ndjson, logger);
+
+    assert({
+      given: 'text event with markdown-wrapped JSON',
+      should: 'preserve markdown formatting',
+      actual: result,
+      expected: '```json\n{"passed":true}\n```'
+    });
+  });
+});
+
+describe('unwrapEnvelope()', () => {
+  test.each([
+    ['object with result field', { result: { passed: true } }, { passed: true }],
+    ['object with result as string', { result: 'raw string' }, 'raw string'],
+    ['object with result as null', { result: null }, null],
+    ['object without result field', { passed: true, score: 80 }, { passed: true, score: 80 }],
+  ])('%s', (_, input, expected) => {
+    assert({
+      given: `parsed object: ${_}`,
+      should: 'return the unwrapped value',
+      actual: unwrapEnvelope(input),
+      expected
+    });
+  });
+
+  test('returns object as-is when result key is absent', () => {
+    const input = { status: 'ok', data: [1, 2, 3] };
+    assert({
+      given: 'object with no result key',
+      should: 'return the original object',
+      actual: unwrapEnvelope(input),
+      expected: input
+    });
+  });
+});
+
+describe('unwrapAgentResult()', () => {
+  test('unwraps Claude envelope and returns parsed inner object', () => {
+    const logger = createMockLogger();
+    const envelope = JSON.stringify({ result: JSON.stringify({ passed: true, score: 90 }) });
+
+    const result = unwrapAgentResult(envelope, logger);
+
+    assert({
+      given: 'Claude CLI envelope wrapping a JSON string',
+      should: 'return the fully parsed inner object',
+      actual: result,
+      expected: { passed: true, score: 90 }
+    });
+  });
+
+  test('returns parsed object when no envelope present', () => {
+    const logger = createMockLogger();
+    const direct = JSON.stringify({ passed: false, score: 40 });
+
+    const result = unwrapAgentResult(direct, logger);
+
+    assert({
+      given: 'direct JSON object (no envelope)',
+      should: 'return the parsed object',
+      actual: result,
+      expected: { passed: false, score: 40 }
+    });
+  });
+
+  test('throws ParseError when output is not valid JSON', () => {
+    const logger = createMockLogger();
+
+    const error = Try(unwrapAgentResult, 'plain text response', logger);
+
+    assert({
+      given: 'plain text that is not valid JSON',
+      should: 'throw Error with ParseError cause',
+      actual: error?.cause?.name,
+      expected: 'ParseError'
+    });
+  });
+
+  test('handles markdown-wrapped envelope', () => {
+    const logger = createMockLogger();
+    const markdownEnvelope = '```json\n{"result": {"passed": true}}\n```';
+
+    const result = unwrapAgentResult(markdownEnvelope, logger);
+
+    assert({
+      given: 'markdown-wrapped JSON with result envelope',
+      should: 'extract, unwrap and return the inner object',
+      actual: result,
+      expected: { passed: true }
+    });
+  });
+});
diff --git a/source/aggregation.js b/source/aggregation.js
new file mode 100644
index 00000000..bb128d45
--- /dev/null
+++ b/source/aggregation.js
@@ -0,0 +1,110 @@
+import { createError } from 'error-causes';
+import { ValidationError, ParseError } from './ai-errors.js';
+import {
+  defaults,
+  calculateRequiredPassesSchema
+} from './constants.js';
+
+/**
+ * Normalize a judge response (already parsed from TAP YAML) to ensure consistent
+ * structure with safe defaults for missing fields.
+ * @throws {Error} If raw is not an object (null, string, undefined, etc.)
+ */
+export const normalizeJudgment = (raw, { requirement, runIndex, logger }) => {
+  if (typeof raw !== 'object' || raw === null) {
+    throw createError({
+      ...ParseError,
+      message: 'Judge returned non-object response',
+      code: 'JUDGE_INVALID_RESPONSE',
+      requirement,
+      runIndex,
+      rawResponse: raw
+    });
+  }
+
+  if (raw?.actual === undefined || raw?.expected === undefined) {
+    logger.log(`Warning: Judge response missing fields for "${requirement}" run ${runIndex + 1}`);
+  }
+
+  return {
+    passed: raw?.passed === true,
+    actual: raw?.actual ?? 'No actual provided',
+    expected: raw?.expected ?? 'No expected provided',
+    score: Number.isFinite(raw?.score) ? Math.max(0, Math.min(100, raw.score)) : 0
+  };
+};
+
+/**
+ * Calculate the number of passes required to meet the threshold.
+ * Uses ceiling to ensure threshold is met or exceeded.
+ * @throws {Error} If validation fails (non-integer runs, invalid threshold, etc.)
+ */
+export const calculateRequiredPasses = ({ runs = defaults.runs, threshold = defaults.threshold } = {}) => {
+  try {
+    const validated = calculateRequiredPassesSchema.parse({ runs, threshold });
+    return Math.ceil((validated.runs * validated.threshold) / 100);
+  } catch (zodError) {
+    const issues = zodError.issues || [];
+    const messages = issues.map(issue =>
+      `${issue.path.join('.')}: ${issue.message}`
+    ).join('; ');
+
+    throw createError({
+      ...ValidationError,
+      message: `Invalid parameters for calculateRequiredPasses: ${messages}`,
+      code: 'INVALID_CALCULATION_PARAMS',
+      runs,
+      threshold,
+      cause: zodError
+    });
+  }
+};
+
+/**
+ * Aggregate results from per-assertion test runs.
+ * Each assertion is independently evaluated against the threshold.
+ * Overall pass requires all assertions to meet the threshold.
+ */
+export const aggregatePerAssertionResults = ({ perAssertionResults, threshold, runs }) => {
+  try {
+    calculateRequiredPassesSchema.parse({ runs, threshold });
+  } catch (zodError) {
+    const issues = zodError.issues || [];
+    const messages = issues.map(issue =>
+      `${issue.path.join('.')}: ${issue.message}`
+    ).join('; ');
+
+    throw createError({
+      ...ValidationError,
+      message: `Invalid parameters for aggregatePerAssertionResults: ${messages}`,
+      code: 'INVALID_AGGREGATION_PARAMS',
+      runs,
+      threshold,
+      cause: zodError
+    });
+  }
+
+  const requiredPasses = calculateRequiredPasses({ runs, threshold });
+
+  const assertions = perAssertionResults.map(({ requirement, runResults }) => {
+    const passCount = runResults.filter(r => r.passed).length;
+    const totalScore = runResults.reduce((sum, r) => sum + (r.score ?? 0), 0);
+    const averageScore = runResults.length > 0
+      ? Math.round((totalScore / runResults.length) * 100) / 100
+      : 0;
+
+    return {
+      requirement,
+      passed: passCount >= requiredPasses,
+      passCount,
+      totalRuns: runs,
+      averageScore,
+      runResults
+    };
+  });
+
+  return {
+    passed: assertions.every(a => a.passed),
+    assertions
+  };
+};
diff --git a/source/aggregation.test.js b/source/aggregation.test.js
new file mode 100644
index 00000000..3e45f979
--- /dev/null
+++ b/source/aggregation.test.js
@@ -0,0 +1,488 @@
+import { describe, test, vi } from 'vitest';
+import { assert } from './vitest.js';
+import { Try } from './riteway.js';
+import {
+  normalizeJudgment,
+  calculateRequiredPasses,
+  aggregatePerAssertionResults
+} from './aggregation.js';
+
+describe('calculateRequiredPasses()', () => {
+  test.each([
+    ['4 runs, 75% threshold', { runs: 4, threshold: 75 }, 3],
+    ['5 runs, 75% threshold', { runs: 5, threshold: 75 }, 4],
+    ['10 runs, 80% threshold', { runs: 10, threshold: 80 }, 8],
+    ['4 runs, 80% threshold', { runs: 4, threshold: 80 }, 4],
+  ])('%s requires correct pass count', (_, options, expected) => {
+    assert({
+      given: _,
+      should: `require ${expected} passes`,
+      actual: calculateRequiredPasses(options),
+      expected
+    });
+  });
+
+  test('uses default values when called with no arguments', () => {
+    assert({
+      given: 'no arguments',
+      should: 'use defaults (4 runs, 75% threshold) requiring 3 passes',
+      actual: calculateRequiredPasses(),
+      expected: 3
+    });
+  });
+
+  test.each([
+    ['zero runs', { runs: 0, threshold: 75 }, 'runs must be at least 1'],
+    ['negative runs', { runs: -1, threshold: 75 }, 'runs must be at least 1'],
+    ['non-integer runs', { runs: 1.5, threshold: 75 }, 'runs must be an integer'],
+    ['NaN runs', { runs: NaN, threshold: 75 }, 'expected number, received NaN'],
+  ])('throws ValidationError for %s', (_, options, expectedMessage) => {
+    const error = Try(calculateRequiredPasses, options);
+
+    assert({
+      given: _,
+      should: 'have ValidationError in cause',
+      actual: error?.cause?.name,
+      expected: 'ValidationError'
+    });
+
+    assert({
+      given: _,
+      should: 'have INVALID_CALCULATION_PARAMS code',
+      actual: error?.cause?.code,
+      expected: 'INVALID_CALCULATION_PARAMS'
+    });
+
+    assert({
+      given: _,
+      should: 'include descriptive message',
+      actual: error?.message?.includes(expectedMessage),
+      expected: true
+    });
+  });
+
+  test.each([
+    ['threshold above 100', { runs: 4, threshold: 150 }, 'threshold must be at most 100'],
+    ['negative threshold', { runs: 4, threshold: -10 }, 'threshold must be at least 0'],
+    ['NaN threshold', { runs: 4, threshold: NaN }, 'expected number, received NaN'],
+  ])('throws ValidationError for %s', (_, options, expectedMessage) => {
+    const error = Try(calculateRequiredPasses, options);
+
+    assert({
+      given: _,
+      should: 'have ValidationError in cause',
+      actual: error?.cause?.name,
+      expected: 'ValidationError'
+    });
+
+    assert({
+      given: _,
+      should: 'have INVALID_CALCULATION_PARAMS code',
+      actual: error?.cause?.code,
+      expected: 'INVALID_CALCULATION_PARAMS'
+    });
+
+    assert({
+      given: _,
+      should: 'include descriptive message',
+      actual: error?.message?.includes(expectedMessage),
+      expected: true
+    });
+  });
+});
+
+describe('aggregatePerAssertionResults()', () => {
+  test('aggregates per-assertion results when all assertions pass', () => {
+    const perAssertionResults = [
+      {
+        requirement: 'Given simple addition, should add correctly',
+        runResults: [
+          { passed: true, output: 'ok' },
+          { passed: true, output: 'ok' }
+        ]
+      },
+      {
+        requirement: 'Given format, should output JSON',
+        runResults: [
+          { passed: true, output: 'ok' },
+          { passed: true, output: 'ok' }
+        ]
+      }
+    ];
+
+    const result = aggregatePerAssertionResults({
+      perAssertionResults,
+      threshold: 75,
+      runs: 2
+    });
+
+    assert({
+      given: 'all assertions meeting threshold',
+      should: 'return passed: true',
+      actual: result.passed,
+      expected: true
+    });
+
+    assert({
+      given: 'two assertions',
+      should: 'return assertions array of length 2',
+      actual: result.assertions.length,
+      expected: 2
+    });
+
+    assert({
+      given: 'first assertion with all passes',
+      should: 'mark the assertion as passed',
+      actual: result.assertions[0].passed,
+      expected: true
+    });
+
+    assert({
+      given: 'first assertion with 2 passes',
+      should: 'report passCount 2',
+      actual: result.assertions[0].passCount,
+      expected: 2
+    });
+
+    assert({
+      given: 'first assertion requirement',
+      should: 'preserve the requirement',
+      actual: result.assertions[0].requirement,
+      expected: 'Given simple addition, should add correctly'
+    });
+  });
+
+  test('fails when any assertion does not meet threshold', () => {
+    const perAssertionResults = [
+      {
+        requirement: 'Given addition, should add correctly',
+        runResults: [
+          { passed: true, output: 'ok' },
+          { passed: true, output: 'ok' }
+        ]
+      },
+      {
+        requirement: 'Given format, should output JSON',
+        runResults: [
+          { passed: false, output: 'fail' },
+          { passed: false, output: 'fail' }
+        ]
+      }
+    ];
+
+    const result = aggregatePerAssertionResults({
+      perAssertionResults,
+      threshold: 75,
+      runs: 2
+    });
+
+    assert({
+      given: 'one assertion failing threshold',
+      should: 'return passed: false',
+      actual: result.passed,
+      expected: false
+    });
+
+    assert({
+      given: 'the passing assertion',
+      should: 'mark it as passed',
+      actual: result.assertions[0].passed,
+      expected: true
+    });
+
+    assert({
+      given: 'the failing assertion',
+      should: 'mark it as failed',
+      actual: result.assertions[1].passed,
+      expected: false
+    });
+
+    assert({
+      given: 'the failing assertion',
+      should: 'have passCount 0',
+      actual: result.assertions[1].passCount,
+      expected: 0
+    });
+  });
+
+  test('includes per-assertion run results and totalRuns', () => {
+    const runResults = [
+      { passed: true, output: 'run 1' },
+      { passed: false, output: 'run 2' }
+    ];
+    const perAssertionResults = [{ requirement: 'test assertion', runResults }];
+
+    const result = aggregatePerAssertionResults({
+      perAssertionResults,
+      threshold: 50,
+      runs: 2
+    });
+
+    assert({
+      given: 'per-assertion run results',
+      should: 'include run results in the assertion',
+      actual: result.assertions[0].runResults,
+      expected: runResults
+    });
+
+    assert({
+      given: 'per-assertion run results',
+      should: 'include totalRuns per assertion',
+      actual: result.assertions[0].totalRuns,
+      expected: 2
+    });
+  });
+
+  test('calculates averageScore from run results', () => {
+    const perAssertionResults = [
+      {
+        requirement: 'test with scores',
+        runResults: [
+          { passed: true, score: 85 },
+          { passed: true, score: 95 },
+          { passed: true, score: 90 }
+        ]
+      }
+    ];
+
+    const result = aggregatePerAssertionResults({
+      perAssertionResults,
+      threshold: 75,
+      runs: 3
+    });
+
+    assert({
+      given: 'three runs with scores 85, 95, 90',
+      should: 'calculate average score of 90',
+      actual: result.assertions[0].averageScore,
+      expected: 90
+    });
+  });
+
+  test('defaults missing scores to 0 when calculating average', () => {
+    const perAssertionResults = [
+      {
+        requirement: 'test without scores',
+        runResults: [
+          { passed: true },
+          { passed: true }
+        ]
+      }
+    ];
+
+    const result = aggregatePerAssertionResults({
+      perAssertionResults,
+      threshold: 75,
+      runs: 2
+    });
+
+    assert({
+      given: 'run results with no score fields',
+      should: 'report averageScore of 0',
+      actual: result.assertions[0].averageScore,
+      expected: 0
+    });
+  });
+
+  test.each([
+    ['runs above maximum', { runs: 1001, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'],
+    ['threshold above maximum', { runs: 4, threshold: 150 }, 'INVALID_AGGREGATION_PARAMS'],
+  ])('throws ValidationError for %s', (_, { runs, threshold }, expectedCode) => {
+    const perAssertionResults = [
+      { requirement: 'test', runResults: [{ passed: true }] }
+    ];
+
+    const error = Try(() =>
+      aggregatePerAssertionResults({ perAssertionResults, threshold, runs })
+    );
+
+    assert({
+      given: _,
+      should: 'throw validation error',
+      actual: error instanceof Error,
+      expected: true
+    });
+
+    assert({
+      given: _,
+      should: 'have correct error code',
+      actual: error?.cause?.code,
+      expected: expectedCode
+    });
+  });
+});
+
+describe('normalizeJudgment()', () => {
+  const createMockLogger = () => ({ log: vi.fn() });
+
+  test('is exported as a function', () => {
+    assert({
+      given: 'aggregation module',
+      should: 'export normalizeJudgment',
+      actual: typeof normalizeJudgment,
+      expected: 'function'
+    });
+  });
+
+  test('passes through complete valid input unchanged', () => {
+    const logger = createMockLogger();
+    const raw = { passed: true, actual: 'Result from agent', expected: 'Expected output', score: 85 };
+
+    const result = normalizeJudgment(raw, { requirement: 'test assertion', runIndex: 0, logger });
+
+    assert({
+      given: 'complete valid judgment with passed: true',
+      should: 'preserve passed as true',
+      actual: result.passed,
+      expected: true
+    });
+
+    assert({
+      given: 'complete valid judgment',
+      should: 'preserve actual value',
+      actual: result.actual,
+      expected: 'Result from agent'
+    });
+
+    assert({
+      given: 'complete valid judgment',
+      should: 'preserve expected value',
+      actual: result.expected,
+      expected: 'Expected output'
+    });
+
+    assert({
+      given: 'complete valid judgment with score 85',
+      should: 'preserve score value',
+      actual: result.score,
+      expected: 85
+    });
+  });
+
+  test('defaults passed to false when missing', () => {
+    const logger = createMockLogger();
+    const result = normalizeJudgment(
+      { actual: 'Result', expected: 'Expected', score: 50 },
+      { requirement: 'test', runIndex: 0, logger }
+    );
+
+    assert({
+      given: 'judgment missing passed field',
+      should: 'default passed to false',
+      actual: result.passed,
+      expected: false
+    });
+  });
+
+  test('defaults missing actual and expected with warning log', () => {
+    const logger = createMockLogger();
+    const result = normalizeJudgment(
+      { passed: true, score: 100 },
+      { requirement: 'test assertion', runIndex: 2, logger }
+    );
+
+    assert({
+      given: 'judgment missing actual',
+      should: 'default actual to "No actual provided"',
+      actual: result.actual,
+      expected: 'No actual provided'
+    });
+
+    assert({
+      given: 'judgment missing expected',
+      should: 'default expected to "No expected provided"',
+      actual: result.expected,
+      expected: 'No expected provided'
+    });
+
+    assert({
+      given: 'judgment missing actual and expected',
+      should: 'log warning with requirement and run number',
+      actual: logger.log.mock.calls[0][0],
+      expected: 'Warning: Judge response missing fields for "test assertion" run 3'
+    });
+  });
+
+  test.each([
+    ['score 150', 150, 100],
+    ['score -50', -50, 0],
+    ['NaN score', NaN, 0],
+  ])('normalizes %s correctly', (_, score, expected) => {
+    const logger = createMockLogger();
+    const result = normalizeJudgment(
+      { passed: true, actual: 'Result', expected: 'Expected', score },
+      { requirement: 'test', runIndex: 0, logger }
+    );
+
+    assert({
+      given: `judgment with ${_}`,
+      should: `normalize score to ${expected}`,
+      actual: result.score,
+      expected
+    });
+  });
+
+  test('defaults missing score to 0', () => {
+    const logger = createMockLogger();
+    const result = normalizeJudgment(
+      { passed: true, actual: 'Result', expected: 'Expected' },
+      { requirement: 'test', runIndex: 0, logger }
+    );
+
+    assert({
+      given: 'judgment missing score',
+      should: 'default to 0',
+      actual: result.score,
+      expected: 0
+    });
+  });
+
+  test.each([
+    ['null input', null],
+    ['string input', 'not an object'],
+    ['undefined input', undefined],
+  ])('throws ParseError for %s', (_, input) => {
+    const logger = createMockLogger();
+    const error = Try(normalizeJudgment, input, { requirement: 'test assertion', runIndex: 1, logger });
+
+    assert({
+      given: _,
+      should: 'have ParseError name in cause',
+      actual: error?.cause?.name,
+      expected: 'ParseError'
+    });
+
+    assert({
+      given: _,
+      should: 'have JUDGE_INVALID_RESPONSE code in cause',
+      actual: error?.cause?.code,
+      expected: 'JUDGE_INVALID_RESPONSE'
+    });
+  });
+
+  test('includes requirement and runIndex in ParseError cause', () => {
+    const logger = createMockLogger();
+    const error = Try(normalizeJudgment, null, { requirement: 'test assertion', runIndex: 1, logger });
+
+    assert({
+      given: 'null input',
+      should: 'include requirement in cause',
+      actual: error?.cause?.requirement,
+      expected: 'test assertion'
+    });
+
+    assert({
+      given: 'null input',
+      should: 'include runIndex in cause',
+      actual: error?.cause?.runIndex,
+      expected: 1
+    });
+
+    assert({
+      given: 'null input',
+      should: 'include rawResponse in cause',
+      actual: error?.cause?.rawResponse,
+      expected: null
+    });
+  });
+});
diff --git a/source/execute-agent.js b/source/execute-agent.js
new file mode 100644
index 00000000..62ec2949
--- /dev/null
+++ b/source/execute-agent.js
@@ -0,0 +1,182 @@
+import { spawn } from 'child_process';
+import { createError } from 'error-causes';
+import { ParseError, TimeoutError, AgentProcessError } from './ai-errors.js';
+import { createDebugLogger } from './debug-logger.js';
+import { unwrapEnvelope, unwrapAgentResult } from './agent-parser.js';
+
+const withTimeout = (promise, ms, errorFactory) =>
+  Promise.race([
+    promise,
+    new Promise((_, reject) =>
+      setTimeout(() => reject(createError(errorFactory())), ms)
+    )
+  ]);
+
+const collectProcessOutput = (proc) =>
+  new Promise((resolve, reject) => {
+    let stdout = '';
+    let stderr = '';
+
+    proc.stdout.on('data', (data) => { stdout += data.toString(); });
+    proc.stderr.on('data', (data) => { stderr += data.toString(); });
+    proc.on('close', (code) => { resolve({ stdout, stderr, code }); });
+    proc.on('error', reject);
+  });
+
+/**
+ * Spawn an agent CLI subprocess and collect output.
+ * Logger is injected to avoid coupling: it is created once at executeAgent level.
+ */
+const spawnProcess = async ({ agentConfig, prompt, logger }) => {
+  const { command, args = [] } = agentConfig;
+  const allArgs = [...args, prompt];
+
+  logger.log('\nExecuting agent command:');
+  logger.command(command, args);
+  logger.log(`Prompt length: ${prompt.length} characters`);
+
+  try {
+    const proc = spawn(command, allArgs);
+    proc.stdin.end();
+    return await collectProcessOutput(proc);
+  } catch (err) {
+    throw createError({
+      ...AgentProcessError,
+      message: `Failed to spawn agent process: ${err.message}`,
+      command,
+      args: args.join(' '),
+      cause: err
+    });
+  }
+};
+
+/**
+ * Try to unwrap a JSON envelope { result: ... } from a raw string, returning the
+ * inner value as a string. Falls back to the original string if not JSON or no envelope.
+ */
+const unwrapRawOutput = (output) => {
+  if (!output.trim().startsWith('{')) return output;
+  try {
+    return unwrapEnvelope(JSON.parse(output));
+  } catch {
+    return output;
+  }
+};
+
+/**
+ * Process agent stdout: apply optional parseOutput preprocessing, then either
+ * return raw unwrapped string (rawOutput=true) or parse full JSON result.
+ */
+const processAgentOutput = ({ agentConfig, rawOutput, logger }) => ({ stdout }) => {
+  const { command, args = [], parseOutput } = agentConfig;
+
+  try {
+    const processedOutput = parseOutput ? parseOutput(stdout, logger) : stdout;
+
+    if (rawOutput) {
+      logger.log('Raw output requested - unwrapping JSON envelope');
+      const result = unwrapRawOutput(processedOutput);
+
+      if (typeof result !== 'string') {
+        throw createError({
+          ...ParseError,
+          message: `Raw output requested but result is not a string: ${typeof result}`,
+          resultType: typeof result
+        });
+      }
+
+      logger.log(`Returning raw output (${result.length} characters)`);
+      logger.flush();
+      return result;
+    }
+
+    const result = unwrapAgentResult(processedOutput, logger);
+    logger.result(result);
+    logger.flush();
+    return result;
+  } catch (err) {
+    const truncatedStdout = stdout.length > 500 ? `${stdout.slice(0, 500)}...` : stdout;
+    logger.log('JSON parsing failed:', err.message);
+    logger.flush();
+
+    throw createError({
+      ...ParseError,
+      message: `Failed to parse agent output as JSON: ${err.message}`,
+      code: 'AGENT_OUTPUT_PARSE_ERROR',
+      command,
+      args: args.join(' '),
+      stdoutPreview: truncatedStdout,
+      cause: err
+    });
+  }
+};
+
+const runAgentProcess = async ({ agentConfig, prompt, timeout, logger }) => {
+  const { command, args = [] } = agentConfig;
+
+  const { stdout, stderr, code } = await withTimeout(
+    spawnProcess({ agentConfig, prompt, logger }),
+    timeout,
+    () => ({
+      ...TimeoutError,
+      message: `Agent process timed out after ${timeout}ms. Command: ${command} ${args.join(' ')}`,
+      command,
+      args: args.join(' '),
+      timeout
+    })
+  );
+
+  logger.log(`Process exited with code: ${code}`);
+  logger.log(`Stdout length: ${stdout.length} characters`);
+  logger.log(`Stderr length: ${stderr.length} characters`);
+
+  if (code !== 0) {
+    const truncatedStdout = stdout.length > 500 ? `${stdout.slice(0, 500)}...` : stdout;
+    const truncatedStderr = stderr.length > 500 ? `${stderr.slice(0, 500)}...` : stderr;
+
+    logger.log('Process failed with non-zero exit code');
+    logger.flush();
+
+    throw createError({
+      ...AgentProcessError,
+      message: `Agent process exited with code ${code}\n` +
+               `Command: ${command} ${args.join(' ')}\n` +
+               `Stderr: ${truncatedStderr}\n` +
+               `Stdout preview: ${truncatedStdout}`,
+      command,
+      args: args.join(' '),
+      exitCode: code,
+      stderr: truncatedStderr,
+      stdoutPreview: truncatedStdout
+    });
+  }
+
+  return { stdout };
+};
+
+/**
+ * Execute an agent CLI subprocess and return parsed JSON output or raw string.
+ * @param {Object} options
+ * @param {Object} options.agentConfig - Agent configuration
+ * @param {string} options.agentConfig.command - Command to execute
+ * @param {Array<string>} [options.agentConfig.args=[]] - Command arguments
+ * @param {Function} [options.agentConfig.parseOutput] - Optional stdout preprocessor
+ * @param {string} options.prompt - Prompt to send to the agent
+ * @param {number} [options.timeout=300000] - Timeout in ms (default: 5 minutes)
+ * @param {boolean} [options.debug=false] - Enable debug logging
+ * @param {string} [options.logFile] - Optional log file path for debug output
+ * @param {boolean} [options.rawOutput=false] - Return raw stdout string without JSON parsing
+ * @returns {Promise<Object|string>} Parsed JSON response or raw string if rawOutput=true
+ */
+export const executeAgent = async ({
+  agentConfig,
+  prompt,
+  timeout = 300000,
+  debug = false,
+  logFile,
+  rawOutput = false
+}) => {
+  const logger = createDebugLogger({ debug, logFile });
+  const processResult = await runAgentProcess({ agentConfig, prompt, timeout, logger });
+  return processAgentOutput({ agentConfig, rawOutput, logger })(processResult);
+};
diff --git a/source/execute-agent.test.js b/source/execute-agent.test.js
new file mode 100644
index 00000000..90a5fb23
--- /dev/null
+++ b/source/execute-agent.test.js
@@ -0,0 +1,277 @@
+import { describe, test, vi, beforeEach } from 'vitest';
+import { assert } from './vitest.js';
+
+vi.mock('child_process', () => ({
+  spawn: vi.fn()
+}));
+
+// Import after mock is registered
+const { spawn } = await import('child_process');
+const { executeAgent } = await import('./execute-agent.js');
+
+/**
+ * Build a mock child process that emits stdout/stderr data then closes.
+ * @param {Object} options
+ * @param {string} [options.stdout=''] - Data to emit on stdout
+ * @param {string} [options.stderr=''] - Data to emit on stderr
+ * @param {number} [options.exitCode=0] - Exit code for the close event
+ */
+const createMockProcess = ({ stdout = '', stderr = '', exitCode = 0 } = {}) => {
+  const listeners = { stdout: {}, stderr: {}, proc: {} };
+
+  const proc = {
+    stdout: {
+      on: (event, cb) => {
+        listeners.stdout[event] = cb;
+      }
+    },
+    stderr: {
+      on: (event, cb) => {
+        listeners.stderr[event] = cb;
+      }
+    },
+    stdin: { end: vi.fn() },
+    on: (event, cb) => {
+      listeners.proc[event] = cb;
+    }
+  };
+
+  // Emit events asynchronously after the next tick so all listeners are registered
+  setTimeout(() => {
+    if (stdout && listeners.stdout.data) listeners.stdout.data(stdout);
+    if (stderr && listeners.stderr.data) listeners.stderr.data(stderr);
+    if (listeners.proc.close) listeners.proc.close(exitCode);
+  }, 0);
+
+  return proc;
+};
+
+const agentConfig = {
+  command: 'claude',
+  args: ['-p', '--output-format', 'json', '--no-session-persistence']
+};
+
+describe('executeAgent()', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  test('returns parsed JSON from agent stdout', async () => {
+    const agentResponse = JSON.stringify({ passed: true, score: 90 });
+    spawn.mockReturnValue(createMockProcess({ stdout: agentResponse }));
+
+    const result = await executeAgent({
+      agentConfig,
+      prompt: 'test prompt'
+    });
+
+    assert({
+      given: 'agent stdout with valid JSON',
+      should: 'return parsed result object',
+      actual: result,
+      expected: { passed: true, score: 90 }
+    });
+  });
+
+  test('unwraps Claude CLI envelope and returns parsed inner result', async () => {
+    const innerResult = { passed: true, score: 85 };
+    const envelope = JSON.stringify({ result: JSON.stringify(innerResult) });
+    spawn.mockReturnValue(createMockProcess({ stdout: envelope }));
+
+    const result = await executeAgent({
+      agentConfig,
+      prompt: 'test prompt'
+    });
+
+    assert({
+      given: 'Claude CLI JSON envelope wrapping a stringified result',
+      should: 'unwrap and parse the inner result',
+      actual: result,
+      expected: innerResult
+    });
+  });
+
+  test('returns raw string when rawOutput is true', async () => {
+    const rawText = 'This is raw agent output';
+    spawn.mockReturnValue(createMockProcess({ stdout: rawText }));
+
+    const result = await executeAgent({
+      agentConfig,
+      prompt: 'test prompt',
+      rawOutput: true
+    });
+
+    assert({
+      given: 'rawOutput: true and plain text stdout',
+      should: 'return the raw string as-is',
+      actual: result,
+      expected: rawText
+    });
+  });
+
+  test('unwraps envelope when rawOutput is true', async () => {
+    const innerText = 'raw output from agent';
+    const envelope = JSON.stringify({ result: innerText });
+    spawn.mockReturnValue(createMockProcess({ stdout: envelope }));
+
+    const result = await executeAgent({
+      agentConfig,
+      prompt: 'test prompt',
+      rawOutput: true
+    });
+
+    assert({
+      given: 'rawOutput: true and JSON envelope wrapping a string',
+      should: 'unwrap and return the inner string',
+      actual: result,
+      expected: innerText
+    });
+  });
+
+  test('applies parseOutput preprocessor before parsing result', async () => {
+    const ndjsonOutput = '{"type":"text","part":{"text":"{\\"passed\\":true}"}}';
+    const parseOutput = vi.fn(() => '{"passed":true}');
+    spawn.mockReturnValue(createMockProcess({ stdout: ndjsonOutput }));
+
+    const result = await executeAgent({
+      agentConfig: { ...agentConfig, parseOutput },
+      prompt: 'test prompt'
+    });
+
+    assert({
+      given: 'agentConfig with parseOutput function',
+      should: 'call parseOutput with stdout',
+      actual: parseOutput.mock.calls.length,
+      expected: 1
+    });
+
+    assert({
+      given: 'parseOutput returns valid JSON',
+      should: 'return the parsed result',
+      actual: result,
+      expected: { passed: true }
+    });
+  });
+
+  test('throws AgentProcessError when exit code is non-zero', async () => {
+    spawn.mockReturnValue(createMockProcess({
+      stdout: '',
+      stderr: 'Permission denied',
+      exitCode: 1
+    }));
+
+    try {
+      await executeAgent({ agentConfig, prompt: 'test prompt' });
+      assert({
+        given: 'non-zero exit code',
+        should: 'have thrown an error',
+        actual: false,
+        expected: true
+      });
+    } catch (err) {
+      assert({
+        given: 'non-zero exit code from agent process',
+        should: 'throw Error with AgentProcessError cause',
+        actual: err?.cause?.name,
+        expected: 'AgentProcessError'
+      });
+
+      assert({
+        given: 'non-zero exit code',
+        should: 'have AGENT_PROCESS_FAILURE code',
+        actual: err?.cause?.code,
+        expected: 'AGENT_PROCESS_FAILURE'
+      });
+
+      assert({
+        given: 'non-zero exit code',
+        should: 'include exit code in cause',
+        actual: err?.cause?.exitCode,
+        expected: 1
+      });
+    }
+  });
+
+  test('throws TimeoutError when timeout is exceeded', async () => {
+    // Process that never closes
+    const proc = {
+      stdout: { on: vi.fn() },
+      stderr: { on: vi.fn() },
+      stdin: { end: vi.fn() },
+      on: vi.fn()
+    };
+    spawn.mockReturnValue(proc);
+
+    try {
+      await executeAgent({
+        agentConfig,
+        prompt: 'test prompt',
+        timeout: 1
+      });
+      assert({
+        given: 'timeout exceeded',
+        should: 'have thrown an error',
+        actual: false,
+        expected: true
+      });
+    } catch (err) {
+      assert({
+        given: 'agent process that exceeds timeout',
+        should: 'throw Error with TimeoutError cause',
+        actual: err?.cause?.name,
+        expected: 'TimeoutError'
+      });
+
+      assert({
+        given: 'timeout exceeded',
+        should: 'have AGENT_TIMEOUT code',
+        actual: err?.cause?.code,
+        expected: 'AGENT_TIMEOUT'
+      });
+    }
+  });
+
+  test('throws ParseError when stdout is not valid JSON (rawOutput: false)', async () => {
+    spawn.mockReturnValue(createMockProcess({ stdout: 'not valid json output' }));
+
+    try {
+      await executeAgent({ agentConfig, prompt: 'test prompt' });
+      assert({
+        given: 'invalid JSON stdout',
+        should: 'have thrown an error',
+        actual: false,
+        expected: true
+      });
+    } catch (err) {
+      assert({
+        given: 'stdout that is not valid JSON',
+        should: 'throw Error with ParseError cause',
+        actual: err?.cause?.name,
+        expected: 'ParseError'
+      });
+    }
+  });
+
+  test('spawns the agent with command, args, and prompt appended', async () => {
+    spawn.mockReturnValue(createMockProcess({ stdout: '{"ok":true}' }));
+
+    await executeAgent({
+      agentConfig,
+      prompt: 'my prompt'
+    });
+
+    assert({
+      given: 'valid agentConfig with command and args',
+      should: 'spawn with the command',
+      actual: spawn.mock.calls[0][0],
+      expected: 'claude'
+    });
+
+    assert({
+      given: 'valid agentConfig with command and args',
+      should: 'spawn with args including the prompt at the end',
+      actual: spawn.mock.calls[0][1],
+      expected: ['-p', '--output-format', 'json', '--no-session-persistence', 'my prompt']
+    });
+  });
+});
diff --git a/source/extraction-parser.js b/source/extraction-parser.js
new file mode 100644
index 00000000..799f37e2
--- /dev/null
+++ b/source/extraction-parser.js
@@ -0,0 +1,127 @@
+import { createError } from 'error-causes';
+import { ExtractionParseError, ExtractionValidationError } from './ai-errors.js';
+import { readFile } from 'fs/promises';
+import { resolve } from 'path';
+
+const assertionRequiredFields = ['id', 'requirement'];
+
+/**
+ * Resolve and read import files, concatenating their contents.
+ *
+ * SECURITY NOTE: Import paths are NOT validated for path traversal.
+ * This allows legitimate cross-project imports (e.g., shared prompt libraries).
+ * Test authors are responsible for not importing sensitive files (.env, credentials).
+ * See PR #394 remediation epic (Wave 1, Task 2) for design rationale.
+ */
+export const resolveImportPaths = async (importPaths, projectRoot, debug) => {
+  if (debug) {
+    console.error(`[DEBUG] Found ${importPaths.length} imports to resolve`);
+  }
+  const importedContents = await Promise.all(
+    importPaths.map(async importPath => {
+      const resolvedPath = resolve(projectRoot, importPath);
+      if (debug) {
+        console.error(`[DEBUG] Reading import: ${importPath} -> ${resolvedPath}`);
+      }
+      try {
+        return await readFile(resolvedPath, 'utf-8');
+      } catch (originalError) {
+        throw createError({
+          name: 'ValidationError',
+          message: `Failed to read imported prompt file: ${importPath}`,
+          code: 'PROMPT_READ_FAILED',
+          path: importPath,
+          resolvedPath,
+          cause: originalError
+        });
+      }
+    })
+  );
+  const result = importedContents.join('\n\n');
+  if (debug) {
+    console.error(`[DEBUG] Imported content length: ${result.length} characters`);
+  }
+  return result;
+};
+
+/** Extract JSON from markdown code fences if present. */
+export const extractJSONFromMarkdown = (str) => {
+  const match = str.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
+  return match ? match[1] : str;
+};
+
+/**
+ * Try to parse a string as JSON, extracting from markdown code fences if needed.
+ * @throws {Error} If parsing fails
+ */
+export const tryParseJSON = (str) => {
+  try {
+    return JSON.parse(extractJSONFromMarkdown(str));
+  } catch (originalError) {
+    throw createError({
+      ...ExtractionParseError,
+      rawInput: str,
+      cause: originalError
+    });
+  }
+};
+
+/**
+ * Parse and validate extraction output from the agent.
+ * Accepts either a raw JSON string or an already-parsed object.
+ * Handles markdown code fences if present.
+ */
+export const parseExtractionResult = (rawOutput) => {
+  const parsed = typeof rawOutput === 'string'
+    ? tryParseJSON(rawOutput)
+    : rawOutput;
+
+  if (typeof parsed !== 'object' || parsed === null) {
+    throw createError({
+      ...ExtractionValidationError,
+      message: 'Extraction result must be a JSON object',
+      rawOutput
+    });
+  }
+
+  if (parsed.userPrompt === undefined || parsed.userPrompt === null) {
+    throw createError({
+      ...ExtractionValidationError,
+      message: 'Extraction result is missing required field: userPrompt',
+      rawOutput
+    });
+  }
+
+  if (!Array.isArray(parsed.importPaths)) {
+    throw createError({
+      ...ExtractionValidationError,
+      message: 'Extraction result is missing required field: importPaths (must be an array)',
+      rawOutput
+    });
+  }
+
+  if (!Array.isArray(parsed.assertions)) {
+    throw createError({
+      ...ExtractionValidationError,
+      message: 'Extraction result is missing required field: assertions (must be an array)',
+      rawOutput
+    });
+  }
+
+  // for loop: early throw on first invalid item; index needed for error context
+  for (let i = 0; i < parsed.assertions.length; i++) {
+    for (const field of assertionRequiredFields) {
+      if (parsed.assertions[i][field] === undefined || parsed.assertions[i][field] === null) {
+        throw createError({
+          ...ExtractionValidationError,
+          message: `Assertion at index ${i} is missing required field: ${field}`,
+          assertionIndex: i,
+          missingField: field,
+          rawOutput
+        });
+      }
+    }
+  }
+
+  return parsed;
+};
diff --git a/source/extraction-parser.test.js b/source/extraction-parser.test.js
new file mode 100644
index 00000000..1d2f69c2
--- /dev/null
+++ b/source/extraction-parser.test.js
@@ -0,0 +1,226 @@
+import { describe, test } from 'vitest';
+import { assert } from './vitest.js';
+import { Try } from './riteway.js';
+import { parseExtractionResult } from './extraction-parser.js';
+
+describe('parseExtractionResult()', () => {
+  test('parses valid extraction result with required fields', () => {
+    const validOutput = JSON.stringify({
+      userPrompt: 'What is 2 + 2?',
+      importPaths: ['test.mdc'],
+      assertions: [
+        { id: 1, requirement: 'Given simple addition, should add correctly' },
+        { id: 2, requirement: 'Given format, should output JSON' }
+      ]
+    });
+
+    const result = parseExtractionResult(validOutput);
+
+    assert({
+      given: 'valid extraction result',
+      should: 'preserve the userPrompt field',
+      actual: result.userPrompt,
+      expected: 'What is 2 + 2?'
+    });
+
+    assert({
+      given: 'valid extraction result',
+      should: 'preserve the importPaths array',
+      actual: Array.isArray(result.importPaths),
+      expected: true
+    });
+
+    assert({
+      given: 'valid extraction result',
+      should: 'preserve importPaths values',
+      actual: result.importPaths[0],
+      expected: 'test.mdc'
+    });
+
+    assert({
+      given: 'valid extraction result',
+      should: 'preserve assertions array length',
+      actual: result.assertions.length,
+      expected: 2
+    });
+
+    assert({
+      given: 'valid extraction result',
+      should: 'preserve assertion requirement field',
+      actual: result.assertions[0].requirement,
+      expected: 'Given simple addition, should add correctly'
+    });
+  });
+
+  test('parses JSON wrapped in markdown code fences', () => {
+    const markdownWrapped = '```json\n{\n  "userPrompt": "test prompt",\n  "importPaths": [],\n  "assertions": [\n    {\n      "id": 1,\n      "requirement": "Given test, should pass"\n    }\n  ]\n}\n```';
+
+    const result = parseExtractionResult(markdownWrapped);
+
+    assert({
+      given: 'JSON wrapped in markdown code fences',
+      should: 'extract and parse the JSON object',
+      actual: typeof result === 'object' && result !== null,
+      expected: true
+    });
+
+    assert({
+      given: 'JSON wrapped in markdown code fences',
+      should: 'preserve the userPrompt field',
+      actual: result.userPrompt,
+      expected: 'test prompt'
+    });
+
+    assert({
+      given: 'JSON wrapped in markdown code fences',
+      should: 'preserve assertions array',
+      actual: result.assertions[0].requirement,
+      expected: 'Given test, should pass'
+    });
+  });
+
+  test('parses JSON with surrounding explanation text and markdown fences', () => {
+    const withExplanation = 'Here is the extraction result you requested:\n\n```json\n{\n  "userPrompt": "test prompt",\n  "importPaths": [],\n  "assertions": [\n    {\n      "id": 1,\n      "requirement": "Given test, should pass"\n    }\n  ]\n}\n```\n\nLet me know if you need more help.';
+
+    const result = parseExtractionResult(withExplanation);
+
+    assert({
+      given: 'JSON with explanation text and markdown fences',
+      should: 'extract and parse the JSON object',
+      actual: typeof result === 'object' && result !== null,
+      expected: true
+    });
+
+    assert({
+      given: 'JSON with explanation text and markdown fences',
+      should: 'return the parsed content',
+      actual: result.assertions[0].requirement,
+      expected: 'Given test, should pass'
+    });
+  });
+
+  test('accepts an already-parsed object', () => {
+    const parsed = {
+      userPrompt: 'test prompt',
+      importPaths: [],
+      assertions: [{ id: 1, requirement: 'Given a test, should pass' }]
+    };
+
+    const result = parseExtractionResult(parsed);
+
+    assert({
+      given: 'an already-parsed object instead of a JSON string',
+      should: 'validate and return the object directly',
+      actual: result.userPrompt,
+      expected: 'test prompt'
+    });
+
+    assert({
+      given: 'an already-parsed object',
+      should: 'preserve the assertions',
+      actual: result.assertions[0].requirement,
+      expected: 'Given a test, should pass'
+    });
+  });
+
+  test('throws ExtractionParseError on malformed non-JSON input', () => {
+    const error = Try(parseExtractionResult, 'This is not JSON at all');
+
+    assert({
+      given: 'non-JSON input',
+      should: 'throw ExtractionParseError cause',
+      actual: error?.cause?.name,
+      expected: 'ExtractionParseError'
+    });
+
+    assert({
+      given: 'non-JSON input',
+      should: 'have EXTRACTION_PARSE_FAILURE code',
+      actual: error?.cause?.code,
+      expected: 'EXTRACTION_PARSE_FAILURE'
+    });
+
+    assert({
+      given: 'non-JSON input',
+      should: 'preserve original parse error as cause',
+      actual: error?.cause?.cause !== undefined,
+      expected: true
+    });
+  });
+
+  test('throws ExtractionValidationError when result has wrong structure', () => {
+    const error = Try(parseExtractionResult, JSON.stringify({ id: 1, description: 'test', prompt: 'test' }));
+
+    assert({
+      given: 'extraction result with invalid structure',
+      should: 'throw ExtractionValidationError cause',
+      actual: error?.cause?.name,
+      expected: 'ExtractionValidationError'
+    });
+
+    assert({
+      given: 'extraction result with invalid structure',
+      should: 'have EXTRACTION_VALIDATION_FAILURE code',
+      actual: error?.cause?.code,
+      expected: 'EXTRACTION_VALIDATION_FAILURE'
+    });
+  });
+
+  test.each([
+    [
+      'missing importPaths',
+      { userPrompt: 'test', assertions: [] },
+      'importPaths'
+    ],
+    [
+      'missing userPrompt',
+      { importPaths: [], assertions: [] },
+      'userPrompt'
+    ],
+    [
+      'missing assertions',
+      { userPrompt: 'test', importPaths: [] },
+      'assertions'
+    ],
+  ])('throws when %s is missing', (_, input, missingField) => {
+    const error = Try(parseExtractionResult, JSON.stringify(input));
+
+    assert({
+      given: `extraction result missing ${missingField}`,
+      should: 'throw ExtractionValidationError',
+      actual: error?.cause?.name,
+      expected: 'ExtractionValidationError'
+    });
+
+    assert({
+      given: `extraction result missing ${missingField}`,
+      should: 'have descriptive error message',
+      actual: error?.message?.includes(missingField),
+      expected: true
+    });
+  });
+
+  test('throws when assertion is missing required field', () => {
+    const missingAssertionFields = JSON.stringify({
+      userPrompt: 'test',
+      importPaths: [],
+      assertions: [{ id: 1 }]
+    });
+
+    const error = Try(parseExtractionResult, missingAssertionFields);
+
+    assert({
+      given: 'assertion missing the requirement field',
+      should: 'throw ExtractionValidationError',
+      actual: error?.cause?.name,
+      expected: 'ExtractionValidationError'
+    });
+
+    assert({
+      given: 'assertion missing the requirement field',
+      should: 'have error message indicating missing field',
+      actual: error?.message?.includes('requirement'),
+      expected: true
+    });
+  });
+});

From 41172a3abae4eb9412f7b0c78673fe45b816b6f1 Mon Sep 17 00:00:00 2001
From: Ian White <ianwhitedeveloper@gmail.com>
Date: Wed, 18 Feb 2026 09:06:05 -0600
Subject: [PATCH 2/6] fix(ai): address PR 3 code review findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- aggregation.js: validate once in aggregatePerAssertionResults — capture
  the Zod-validated result and compute Math.ceil inline, eliminating the
  redundant second schema parse inside calculateRequiredPasses
- aggregation.js: remove misleading optional chaining (raw?.passed etc.)
  after the null-guard throw; use plain property access
- agent-parser.js: replace acc.push() with [...acc, text] in reduce
  accumulator to prefer immutability per JS style guide
- agent-parser.test.js: drop redundant "parsed object:" prefix from
  unwrapEnvelope test.each given fields; remove duplicate standalone
  "no result key" test that overlapped with test.each row
- aggregation.test.js: remove redundant export-existence assertion for
  normalizeJudgment; add empty perAssertionResults edge case (vacuous
  truth — every() on [] returns true)
- execute-agent.test.js: strengthen parseOutput test to verify stdout
  and logger are threaded through as expected (documents WIP fix #8)

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 source/agent-parser.js       |  2 +-
 source/agent-parser.test.js  | 12 +-----------
 source/aggregation.js        | 15 ++++++++-------
 source/aggregation.test.js   | 31 ++++++++++++++++++++++---------
 source/execute-agent.test.js | 16 +++++++++++++++-
 5 files changed, 47 insertions(+), 29 deletions(-)

diff --git a/source/agent-parser.js b/source/agent-parser.js
index 38f41aaa..3960224d 100644
--- a/source/agent-parser.js
+++ b/source/agent-parser.js
@@ -49,7 +49,7 @@ export const parseOpenCodeNDJSON = (ndjson, logger) => {
       const event = JSON.parse(line);
       if (event.type === 'text' && event.part?.text) {
         logger.log(`Found text event with ${event.part.text.length} characters`);
-        acc.push(event.part.text);
+        return [...acc, event.part.text];
       }
     } catch (err) {
       logger.log(`Warning: Failed to parse NDJSON line: ${err.message}`);
diff --git a/source/agent-parser.test.js b/source/agent-parser.test.js
index 1eee4b18..f222c4ed 100644
--- a/source/agent-parser.test.js
+++ b/source/agent-parser.test.js
@@ -332,22 +332,12 @@ describe('unwrapEnvelope()', () => {
     ['object without result field', { passed: true, score: 80 }, { passed: true, score: 80 }],
   ])('%s', (_, input, expected) => {
     assert({
-      given: `parsed object: ${_}`,
+      given: _,
       should: 'return the unwrapped value',
       actual: unwrapEnvelope(input),
       expected
     });
   });
-
-  test('returns object as-is when result key is absent', () => {
-    const input = { status: 'ok', data: [1, 2, 3] };
-    assert({
-      given: 'object with no result key',
-      should: 'return the original object',
-      actual: unwrapEnvelope(input),
-      expected: input
-    });
-  });
 });
 
 describe('unwrapAgentResult()', () => {
diff --git a/source/aggregation.js b/source/aggregation.js
index bb128d45..d4928d8a 100644
--- a/source/aggregation.js
+++ b/source/aggregation.js
@@ -22,15 +22,15 @@ export const normalizeJudgment = (raw, { requirement, runIndex, logger }) => {
     });
   }
 
-  if (raw?.actual === undefined || raw?.expected === undefined) {
+  if (raw.actual === undefined || raw.expected === undefined) {
     logger.log(`Warning: Judge response missing fields for "${requirement}" run ${runIndex + 1}`);
   }
 
   return {
-    passed: raw?.passed === true,
-    actual: raw?.actual ?? 'No actual provided',
-    expected: raw?.expected ?? 'No expected provided',
-    score: Number.isFinite(raw?.score) ? Math.max(0, Math.min(100, raw.score)) : 0
+    passed: raw.passed === true,
+    actual: raw.actual ?? 'No actual provided',
+    expected: raw.expected ?? 'No expected provided',
+    score: Number.isFinite(raw.score) ? Math.max(0, Math.min(100, raw.score)) : 0
   };
 };
 
@@ -66,8 +66,9 @@ export const calculateRequiredPasses = ({ runs = defaults.runs, threshold = defa
  * Overall pass requires all assertions to meet the threshold.
  */
 export const aggregatePerAssertionResults = ({ perAssertionResults, threshold, runs }) => {
+  let validated;
   try {
-    calculateRequiredPassesSchema.parse({ runs, threshold });
+    validated = calculateRequiredPassesSchema.parse({ runs, threshold });
   } catch (zodError) {
     const issues = zodError.issues || [];
     const messages = issues.map(issue =>
@@ -84,7 +85,7 @@ export const aggregatePerAssertionResults = ({ perAssertionResults, threshold, r
     });
   }
 
-  const requiredPasses = calculateRequiredPasses({ runs, threshold });
+  const requiredPasses = Math.ceil((validated.runs * validated.threshold) / 100);
 
   const assertions = perAssertionResults.map(({ requirement, runResults }) => {
     const passCount = runResults.filter(r => r.passed).length;
diff --git a/source/aggregation.test.js b/source/aggregation.test.js
index 3e45f979..961a6d87 100644
--- a/source/aggregation.test.js
+++ b/source/aggregation.test.js
@@ -284,6 +284,28 @@ describe('aggregatePerAssertionResults()', () => {
     });
   });
 
+  test('passes with empty assertions array (vacuous truth)', () => {
+    const result = aggregatePerAssertionResults({
+      perAssertionResults: [],
+      threshold: 75,
+      runs: 4
+    });
+
+    assert({
+      given: 'empty perAssertionResults array',
+      should: 'return passed: true (all zero assertions meet threshold)',
+      actual: result.passed,
+      expected: true
+    });
+
+    assert({
+      given: 'empty perAssertionResults array',
+      should: 'return empty assertions array',
+      actual: result.assertions,
+      expected: []
+    });
+  });
+
   test.each([
     ['runs above maximum', { runs: 1001, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'],
     ['threshold above maximum', { runs: 4, threshold: 150 }, 'INVALID_AGGREGATION_PARAMS'],
@@ -315,15 +337,6 @@ describe('aggregatePerAssertionResults()', () => {
 describe('normalizeJudgment()', () => {
   const createMockLogger = () => ({ log: vi.fn() });
 
-  test('is exported as a function', () => {
-    assert({
-      given: 'aggregation module',
-      should: 'export normalizeJudgment',
-      actual: typeof normalizeJudgment,
-      expected: 'function'
-    });
-  });
-
   test('passes through complete valid input unchanged', () => {
     const logger = createMockLogger();
     const raw = { passed: true, actual: 'Result from agent', expected: 'Expected output', score: 85 };
diff --git a/source/execute-agent.test.js b/source/execute-agent.test.js
index 90a5fb23..92e72e00 100644
--- a/source/execute-agent.test.js
+++ b/source/execute-agent.test.js
@@ -140,11 +140,25 @@ describe('executeAgent()', () => {
 
     assert({
       given: 'agentConfig with parseOutput function',
-      should: 'call parseOutput with stdout',
+      should: 'call parseOutput exactly once',
       actual: parseOutput.mock.calls.length,
       expected: 1
     });
 
+    assert({
+      given: 'agentConfig with parseOutput function',
+      should: 'pass the raw stdout as first argument',
+      actual: parseOutput.mock.calls[0][0],
+      expected: ndjsonOutput
+    });
+
+    assert({
+      given: 'agentConfig with parseOutput function',
+      should: 'pass the logger as second argument (WIP fix #8 — logger threading)',
+      actual: typeof parseOutput.mock.calls[0][1]?.log,
+      expected: 'function'
+    });
+
     assert({
       given: 'parseOutput returns valid JSON',
       should: 'return the parsed result',

From 401aa921caec7485258442e6559a6b2ebba78424 Mon Sep 17 00:00:00 2001
From: Ian White <ianwhitedeveloper@gmail.com>
Date: Wed, 18 Feb 2026 11:33:47 -0600
Subject: [PATCH 3/6] fix(ai): address PR 3 author review findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- aggregation.js: rename `raw` param to `judgeResponse` and
  fold into single options object for normalizeJudgment; removes
  the two-argument signature (breaking change, callers updated)
- aggregation.js: remove calculateRequiredPasses — math is inlined
  in aggregatePerAssertionResults, eliminating double schema parse
- aggregation.test.js: remove calculateRequiredPasses describe block;
  fix Try() usage (direct fn ref, not arrow wrapper); update all
  normalizeJudgment call sites to new single-options signature
- execute-agent.js: extract magic number 500 to maxOutputPreviewLength
  constant (camelCase per javascript.mdc); applied to all 3 truncation sites
- execute-agent.test.js: replace try/catch antipatterns with await Try();
  add Try import from riteway.js
- extraction-parser.test.js: strengthen weak typeof assertions to check
  specific fields; strengthen cause !== undefined to cause.name === SyntaxError

151 tests pass, 0 lint errors, TypeScript clean.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 source/aggregation.js            |  49 +++--------
 source/aggregation.test.js       | 137 +++++++------------------------
 source/execute-agent.js          |   8 +-
 source/execute-agent.test.js     | 119 +++++++++++----------------
 source/extraction-parser.test.js |  14 ++--
 5 files changed, 97 insertions(+), 230 deletions(-)

diff --git a/source/aggregation.js b/source/aggregation.js
index d4928d8a..a3b5bd33 100644
--- a/source/aggregation.js
+++ b/source/aggregation.js
@@ -1,65 +1,36 @@
 import { createError } from 'error-causes';
 import { ValidationError, ParseError } from './ai-errors.js';
-import {
-  defaults,
-  calculateRequiredPassesSchema
-} from './constants.js';
+import { calculateRequiredPassesSchema } from './constants.js';
 
 /**
  * Normalize a judge response (already parsed from TAP YAML) to ensure consistent
  * structure with safe defaults for missing fields.
- * @throws {Error} If raw is not an object (null, string, undefined, etc.)
+ * @throws {Error} If judgeResponse is not an object (null, string, undefined, etc.)
  */
-export const normalizeJudgment = (raw, { requirement, runIndex, logger }) => {
-  if (typeof raw !== 'object' || raw === null) {
+export const normalizeJudgment = ({ judgeResponse, requirement, runIndex, logger }) => {
+  if (typeof judgeResponse !== 'object' || judgeResponse === null) {
     throw createError({
       ...ParseError,
       message: 'Judge returned non-object response',
       code: 'JUDGE_INVALID_RESPONSE',
       requirement,
       runIndex,
-      rawResponse: raw
+      rawResponse: judgeResponse
     });
   }
 
-  if (raw.actual === undefined || raw.expected === undefined) {
+  if (judgeResponse.actual === undefined || judgeResponse.expected === undefined) {
     logger.log(`Warning: Judge response missing fields for "${requirement}" run ${runIndex + 1}`);
   }
 
   return {
-    passed: raw.passed === true,
-    actual: raw.actual ?? 'No actual provided',
-    expected: raw.expected ?? 'No expected provided',
-    score: Number.isFinite(raw.score) ? Math.max(0, Math.min(100, raw.score)) : 0
+    passed: judgeResponse.passed === true,
+    actual: judgeResponse.actual ?? 'No actual provided',
+    expected: judgeResponse.expected ?? 'No expected provided',
+    score: Number.isFinite(judgeResponse.score) ? Math.max(0, Math.min(100, judgeResponse.score)) : 0
   };
 };
 
-/**
- * Calculate the number of passes required to meet the threshold.
- * Uses ceiling to ensure threshold is met or exceeded.
- * @throws {Error} If validation fails (non-integer runs, invalid threshold, etc.)
- */
-export const calculateRequiredPasses = ({ runs = defaults.runs, threshold = defaults.threshold } = {}) => {
-  try {
-    const validated = calculateRequiredPassesSchema.parse({ runs, threshold });
-    return Math.ceil((validated.runs * validated.threshold) / 100);
-  } catch (zodError) {
-    const issues = zodError.issues || [];
-    const messages = issues.map(issue =>
-      `${issue.path.join('.')}: ${issue.message}`
-    ).join('; ');
-
-    throw createError({
-      ...ValidationError,
-      message: `Invalid parameters for calculateRequiredPasses: ${messages}`,
-      code: 'INVALID_CALCULATION_PARAMS',
-      runs,
-      threshold,
-      cause: zodError
-    });
-  }
-};
-
 /**
  * Aggregate results from per-assertion test runs.
  * Each assertion is independently evaluated against the threshold.
diff --git a/source/aggregation.test.js b/source/aggregation.test.js
index 961a6d87..2e7a3afc 100644
--- a/source/aggregation.test.js
+++ b/source/aggregation.test.js
@@ -3,94 +3,9 @@ import { assert } from './vitest.js';
 import { Try } from './riteway.js';
 import {
   normalizeJudgment,
-  calculateRequiredPasses,
   aggregatePerAssertionResults
 } from './aggregation.js';
 
-describe('calculateRequiredPasses()', () => {
-  test.each([
-    ['4 runs, 75% threshold', { runs: 4, threshold: 75 }, 3],
-    ['5 runs, 75% threshold', { runs: 5, threshold: 75 }, 4],
-    ['10 runs, 80% threshold', { runs: 10, threshold: 80 }, 8],
-    ['4 runs, 80% threshold', { runs: 4, threshold: 80 }, 4],
-  ])('%s requires correct pass count', (_, options, expected) => {
-    assert({
-      given: _,
-      should: `require ${expected} passes`,
-      actual: calculateRequiredPasses(options),
-      expected
-    });
-  });
-
-  test('uses default values when called with no arguments', () => {
-    assert({
-      given: 'no arguments',
-      should: 'use defaults (4 runs, 75% threshold) requiring 3 passes',
-      actual: calculateRequiredPasses(),
-      expected: 3
-    });
-  });
-
-  test.each([
-    ['zero runs', { runs: 0, threshold: 75 }, 'runs must be at least 1'],
-    ['negative runs', { runs: -1, threshold: 75 }, 'runs must be at least 1'],
-    ['non-integer runs', { runs: 1.5, threshold: 75 }, 'runs must be an integer'],
-    ['NaN runs', { runs: NaN, threshold: 75 }, 'expected number, received NaN'],
-  ])('throws ValidationError for %s', (_, options, expectedMessage) => {
-    const error = Try(calculateRequiredPasses, options);
-
-    assert({
-      given: _,
-      should: 'have ValidationError in cause',
-      actual: error?.cause?.name,
-      expected: 'ValidationError'
-    });
-
-    assert({
-      given: _,
-      should: 'have INVALID_CALCULATION_PARAMS code',
-      actual: error?.cause?.code,
-      expected: 'INVALID_CALCULATION_PARAMS'
-    });
-
-    assert({
-      given: _,
-      should: 'include descriptive message',
-      actual: error?.message?.includes(expectedMessage),
-      expected: true
-    });
-  });
-
-  test.each([
-    ['threshold above 100', { runs: 4, threshold: 150 }, 'threshold must be at most 100'],
-    ['negative threshold', { runs: 4, threshold: -10 }, 'threshold must be at least 0'],
-    ['NaN threshold', { runs: 4, threshold: NaN }, 'expected number, received NaN'],
-  ])('throws ValidationError for %s', (_, options, expectedMessage) => {
-    const error = Try(calculateRequiredPasses, options);
-
-    assert({
-      given: _,
-      should: 'have ValidationError in cause',
-      actual: error?.cause?.name,
-      expected: 'ValidationError'
-    });
-
-    assert({
-      given: _,
-      should: 'have INVALID_CALCULATION_PARAMS code',
-      actual: error?.cause?.code,
-      expected: 'INVALID_CALCULATION_PARAMS'
-    });
-
-    assert({
-      given: _,
-      should: 'include descriptive message',
-      actual: error?.message?.includes(expectedMessage),
-      expected: true
-    });
-  });
-});
-
 describe('aggregatePerAssertionResults()', () => {
   test('aggregates per-assertion results when all assertions pass', () => {
     const perAssertionResults = [
@@ -314,9 +229,7 @@ describe('aggregatePerAssertionResults()', () => {
       { requirement: 'test', runResults: [{ passed: true }] }
     ];
 
-    const error = Try(() =>
-      aggregatePerAssertionResults({ perAssertionResults, threshold, runs })
-    );
+    const error = Try(aggregatePerAssertionResults, { perAssertionResults, threshold, runs });
 
     assert({
       given: _,
@@ -339,9 +252,9 @@ describe('normalizeJudgment()', () => {
 
   test('passes through complete valid input unchanged', () => {
     const logger = createMockLogger();
-    const raw = { passed: true, actual: 'Result from agent', expected: 'Expected output', score: 85 };
+    const judgeResponse = { passed: true, actual: 'Result from agent', expected: 'Expected output', score: 85 };
 
-    const result = normalizeJudgment(raw, { requirement: 'test assertion', runIndex: 0, logger });
+    const result = normalizeJudgment({ judgeResponse, requirement: 'test assertion', runIndex: 0, logger });
 
     assert({
       given: 'complete valid judgment with passed: true',
@@ -374,10 +287,12 @@ describe('normalizeJudgment()', () => {
 
   test('defaults passed to false when missing', () => {
     const logger = createMockLogger();
-    const result = normalizeJudgment(
-      { actual: 'Result', expected: 'Expected', score: 50 },
-      { requirement: 'test', runIndex: 0, logger }
-    );
+    const result = normalizeJudgment({
+      judgeResponse: { actual: 'Result', expected: 'Expected', score: 50 },
+      requirement: 'test',
+      runIndex: 0,
+      logger
+    });
 
     assert({
       given: 'judgment missing passed field',
@@ -389,10 +304,12 @@ describe('normalizeJudgment()', () => {
 
   test('defaults missing actual and expected with warning log', () => {
     const logger = createMockLogger();
-    const result = normalizeJudgment(
-      { passed: true, score: 100 },
-      { requirement: 'test assertion', runIndex: 2, logger }
-    );
+    const result = normalizeJudgment({
+      judgeResponse: { passed: true, score: 100 },
+      requirement: 'test assertion',
+      runIndex: 2,
+      logger
+    });
 
     assert({
       given: 'judgment missing actual',
@@ -422,10 +339,12 @@ describe('normalizeJudgment()', () => {
     ['NaN score', NaN, 0],
   ])('normalizes %s correctly', (_, score, expected) => {
     const logger = createMockLogger();
-    const result = normalizeJudgment(
-      { passed: true, actual: 'Result', expected: 'Expected', score },
-      { requirement: 'test', runIndex: 0, logger }
-    );
+    const result = normalizeJudgment({
+      judgeResponse: { passed: true, actual: 'Result', expected: 'Expected', score },
+      requirement: 'test',
+      runIndex: 0,
+      logger
+    });
 
     assert({
       given: `judgment with ${_}`,
@@ -437,10 +356,12 @@ describe('normalizeJudgment()', () => {
 
   test('defaults missing score to 0', () => {
     const logger = createMockLogger();
-    const result = normalizeJudgment(
-      { passed: true, actual: 'Result', expected: 'Expected' },
-      { requirement: 'test', runIndex: 0, logger }
-    );
+    const result = normalizeJudgment({
+      judgeResponse: { passed: true, actual: 'Result', expected: 'Expected' },
+      requirement: 'test',
+      runIndex: 0,
+      logger
+    });
 
     assert({
       given: 'judgment missing score',
@@ -456,7 +377,7 @@ describe('normalizeJudgment()', () => {
     ['undefined input', undefined],
   ])('throws ParseError for %s', (_, input) => {
     const logger = createMockLogger();
-    const error = Try(normalizeJudgment, input, { requirement: 'test assertion', runIndex: 1, logger });
+    const error = Try(normalizeJudgment, { judgeResponse: input, requirement: 'test assertion', runIndex: 1, logger });
 
     assert({
       given: _,
@@ -475,7 +396,7 @@ describe('normalizeJudgment()', () => {
 
   test('includes requirement and runIndex in ParseError cause', () => {
     const logger = createMockLogger();
-    const error = Try(normalizeJudgment, null, { requirement: 'test assertion', runIndex: 1, logger });
+    const error = Try(normalizeJudgment, { judgeResponse: null, requirement: 'test assertion', runIndex: 1, logger });
 
     assert({
       given: 'null input',
diff --git a/source/execute-agent.js b/source/execute-agent.js
index 62ec2949..a22d9c11 100644
--- a/source/execute-agent.js
+++ b/source/execute-agent.js
@@ -4,6 +4,8 @@ import { ParseError, TimeoutError, AgentProcessError } from './ai-errors.js';
 import { createDebugLogger } from './debug-logger.js';
 import { unwrapEnvelope, unwrapAgentResult } from './agent-parser.js';
 
+const maxOutputPreviewLength = 500;
+
 const withTimeout = (promise, ms, errorFactory) =>
   Promise.race([
     promise,
@@ -95,7 +97,7 @@ const processAgentOutput = ({ agentConfig, rawOutput, logger }) => ({ stdout })
     logger.flush();
     return result;
   } catch (err) {
-    const truncatedStdout = stdout.length > 500 ? `${stdout.slice(0, 500)}...` : stdout;
+    const truncatedStdout = stdout.length > maxOutputPreviewLength ? `${stdout.slice(0, maxOutputPreviewLength)}...` : stdout;
     logger.log('JSON parsing failed:', err.message);
     logger.flush();
 
@@ -131,8 +133,8 @@ const runAgentProcess = async ({ agentConfig, prompt, timeout, logger }) => {
   logger.log(`Stderr length: ${stderr.length} characters`);
 
   if (code !== 0) {
-    const truncatedStdout = stdout.length > 500 ? `${stdout.slice(0, 500)}...` : stdout;
-    const truncatedStderr = stderr.length > 500 ? `${stderr.slice(0, 500)}...` : stderr;
+    const truncatedStdout = stdout.length > maxOutputPreviewLength ? `${stdout.slice(0, maxOutputPreviewLength)}...` : stdout;
+    const truncatedStderr = stderr.length > maxOutputPreviewLength ? `${stderr.slice(0, maxOutputPreviewLength)}...` : stderr;
 
     logger.log('Process failed with non-zero exit code');
     logger.flush();
diff --git a/source/execute-agent.test.js b/source/execute-agent.test.js
index 92e72e00..d42e21bf 100644
--- a/source/execute-agent.test.js
+++ b/source/execute-agent.test.js
@@ -1,5 +1,6 @@
 import { describe, test, vi, beforeEach } from 'vitest';
 import { assert } from './vitest.js';
+import { Try } from './riteway.js';
 
 vi.mock('child_process', () => ({
   spawn: vi.fn()
@@ -174,36 +175,28 @@ describe('executeAgent()', () => {
       exitCode: 1
     }));
 
-    try {
-      await executeAgent({ agentConfig, prompt: 'test prompt' });
-      assert({
-        given: 'non-zero exit code',
-        should: 'have thrown an error',
-        actual: false,
-        expected: true
-      });
-    } catch (err) {
-      assert({
-        given: 'non-zero exit code from agent process',
-        should: 'throw Error with AgentProcessError cause',
-        actual: err?.cause?.name,
-        expected: 'AgentProcessError'
-      });
-
-      assert({
-        given: 'non-zero exit code',
-        should: 'have AGENT_PROCESS_FAILURE code',
-        actual: err?.cause?.code,
-        expected: 'AGENT_PROCESS_FAILURE'
-      });
-
-      assert({
-        given: 'non-zero exit code',
-        should: 'include exit code in cause',
-        actual: err?.cause?.exitCode,
-        expected: 1
-      });
-    }
+    const err = await Try(executeAgent, { agentConfig, prompt: 'test prompt' });
+
+    assert({
+      given: 'non-zero exit code from agent process',
+      should: 'throw Error with AgentProcessError cause',
+      actual: err?.cause?.name,
+      expected: 'AgentProcessError'
+    });
+
+    assert({
+      given: 'non-zero exit code',
+      should: 'have AGENT_PROCESS_FAILURE code',
+      actual: err?.cause?.code,
+      expected: 'AGENT_PROCESS_FAILURE'
+    });
+
+    assert({
+      given: 'non-zero exit code',
+      should: 'include exit code in cause',
+      actual: err?.cause?.exitCode,
+      expected: 1
+    });
   });
 
   test('throws TimeoutError when timeout is exceeded', async () => {
@@ -216,54 +209,34 @@ describe('executeAgent()', () => {
     };
     spawn.mockReturnValue(proc);
 
-    try {
-      await executeAgent({
-        agentConfig,
-        prompt: 'test prompt',
-        timeout: 1
-      });
-      assert({
-        given: 'timeout exceeded',
-        should: 'have thrown an error',
-        actual: false,
-        expected: true
-      });
-    } catch (err) {
-      assert({
-        given: 'agent process that exceeds timeout',
-        should: 'throw Error with TimeoutError cause',
-        actual: err?.cause?.name,
-        expected: 'TimeoutError'
-      });
-
-      assert({
-        given: 'timeout exceeded',
-        should: 'have AGENT_TIMEOUT code',
-        actual: err?.cause?.code,
-        expected: 'AGENT_TIMEOUT'
-      });
-    }
+    const err = await Try(executeAgent, { agentConfig, prompt: 'test prompt', timeout: 1 });
+
+    assert({
+      given: 'agent process that exceeds timeout',
+      should: 'throw Error with TimeoutError cause',
+      actual: err?.cause?.name,
+      expected: 'TimeoutError'
+    });
+
+    assert({
+      given: 'timeout exceeded',
+      should: 'have AGENT_TIMEOUT code',
+      actual: err?.cause?.code,
+      expected: 'AGENT_TIMEOUT'
+    });
   });
 
   test('throws ParseError when stdout is not valid JSON (rawOutput: false)', async () => {
     spawn.mockReturnValue(createMockProcess({ stdout: 'not valid json output' }));
 
-    try {
-      await executeAgent({ agentConfig, prompt: 'test prompt' });
-      assert({
-        given: 'invalid JSON stdout',
-        should: 'have thrown an error',
-        actual: false,
-        expected: true
-      });
-    } catch (err) {
-      assert({
-        given: 'stdout that is not valid JSON',
-        should: 'throw Error with ParseError cause',
-        actual: err?.cause?.name,
-        expected: 'ParseError'
-      });
-    }
+    const err = await Try(executeAgent, { agentConfig, prompt: 'test prompt' });
+
+    assert({
+      given: 'stdout that is not valid JSON',
+      should: 'throw Error with ParseError cause',
+      actual: err?.cause?.name,
+      expected: 'ParseError'
+    });
   });
 
   test('spawns the agent with command, args, and prompt appended', async () => {
diff --git a/source/extraction-parser.test.js b/source/extraction-parser.test.js
index 1d2f69c2..ba2dfb99 100644
--- a/source/extraction-parser.test.js
+++ b/source/extraction-parser.test.js
@@ -59,8 +59,8 @@ describe('parseExtractionResult()', () => {
 
     assert({
       given: 'JSON wrapped in markdown code fences',
-      should: 'extract and parse the JSON object',
-      actual: typeof result === 'object' && result !== null,
+      should: 'parse importPaths as an array',
+      actual: Array.isArray(result.importPaths),
       expected: true
     });
 
@@ -87,8 +87,8 @@ describe('parseExtractionResult()', () => {
     assert({
       given: 'JSON with explanation text and markdown fences',
       should: 'extract and parse the JSON object',
-      actual: typeof result === 'object' && result !== null,
-      expected: true
+      actual: result.userPrompt,
+      expected: 'test prompt'
     });
 
     assert({
@@ -142,9 +142,9 @@ describe('parseExtractionResult()', () => {
 
     assert({
       given: 'non-JSON input',
-      should: 'preserve original parse error as cause',
-      actual: error?.cause?.cause !== undefined,
-      expected: true
+      should: 'preserve original JSON SyntaxError as cause',
+      actual: error?.cause?.cause?.name,
+      expected: 'SyntaxError'
     });
   });
 

From 6c5406c05fbc1f4b984b99d42fd5893e06be7b66 Mon Sep 17 00:00:00 2001
From: Ian White <ianwhitedeveloper@gmail.com>
Date: Wed, 18 Feb 2026 11:47:26 -0600
Subject: [PATCH 4/6] fix(ai): address PR 3 follow-up review findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- constants.js: rename calculateRequiredPassesSchema to
  aggregationParamsSchema — name now reflects what the schema
  validates (aggregation input params) rather than the deleted
  calculateRequiredPasses function; update all import sites
- aggregation.test.js: add 6 missing Zod validation edge cases
  for aggregatePerAssertionResults (zero runs, negative runs,
  non-integer runs, NaN runs, negative threshold, NaN threshold)
  — coverage gap introduced when calculateRequiredPasses and its
  tests were removed; all cases now exercised via
  aggregatePerAssertionResults test.each

157 tests pass, 0 lint errors, TypeScript clean.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 source/aggregation.js      | 4 ++--
 source/aggregation.test.js | 6 ++++++
 source/constants.js        | 2 +-
 source/constants.test.js   | 8 ++++----
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/source/aggregation.js b/source/aggregation.js
index a3b5bd33..6f8c0cb9 100644
--- a/source/aggregation.js
+++ b/source/aggregation.js
@@ -1,6 +1,6 @@
 import { createError } from 'error-causes';
 import { ValidationError, ParseError } from './ai-errors.js';
-import { calculateRequiredPassesSchema } from './constants.js';
+import { aggregationParamsSchema } from './constants.js';
 
 /**
  * Normalize a judge response (already parsed from TAP YAML) to ensure consistent
@@ -39,7 +39,7 @@ export const normalizeJudgment = ({ judgeResponse, requirement, runIndex, logger
 export const aggregatePerAssertionResults = ({ perAssertionResults, threshold, runs }) => {
   let validated;
   try {
-    validated = calculateRequiredPassesSchema.parse({ runs, threshold });
+    validated = aggregationParamsSchema.parse({ runs, threshold });
   } catch (zodError) {
     const issues = zodError.issues || [];
     const messages = issues.map(issue =>
diff --git a/source/aggregation.test.js b/source/aggregation.test.js
index 2e7a3afc..a3074cde 100644
--- a/source/aggregation.test.js
+++ b/source/aggregation.test.js
@@ -223,7 +223,13 @@ describe('aggregatePerAssertionResults()', () => {
 
   test.each([
     ['runs above maximum', { runs: 1001, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'],
+    ['zero runs', { runs: 0, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'],
+    ['negative runs', { runs: -1, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'],
+    ['non-integer runs', { runs: 1.5, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'],
+    ['NaN runs', { runs: NaN, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'],
     ['threshold above maximum', { runs: 4, threshold: 150 }, 'INVALID_AGGREGATION_PARAMS'],
+    ['negative threshold', { runs: 4, threshold: -10 }, 'INVALID_AGGREGATION_PARAMS'],
+    ['NaN threshold', { runs: 4, threshold: NaN }, 'INVALID_AGGREGATION_PARAMS'],
   ])('throws ValidationError for %s', (_, { runs, threshold }, expectedCode) => {
     const perAssertionResults = [
       { requirement: 'test', runResults: [{ passed: true }] }
diff --git a/source/constants.js b/source/constants.js
index 71c17b6d..2624ee0c 100644
--- a/source/constants.js
+++ b/source/constants.js
@@ -47,7 +47,7 @@ export const agentSchema = z.enum(constraints.supportedAgents, {
   message: `agent must be one of: ${constraints.supportedAgents.join(', ')}`
 });
 
-export const calculateRequiredPassesSchema = z.object({
+export const aggregationParamsSchema = z.object({
   runs: runsSchema,
   threshold: thresholdSchema
 });
diff --git a/source/constants.test.js b/source/constants.test.js
index b68a15a9..ce80e926 100644
--- a/source/constants.test.js
+++ b/source/constants.test.js
@@ -8,7 +8,7 @@ import {
   concurrencySchema,
   timeoutSchema,
   agentSchema,
-  calculateRequiredPassesSchema,
+  aggregationParamsSchema,
   aiTestOptionsSchema
 } from './constants.js';
 
@@ -346,9 +346,9 @@ describe('constants module', () => {
     });
   });
 
-  describe('calculateRequiredPassesSchema', () => {
+  describe('aggregationParamsSchema', () => {
     test('validates complete object with defaults', () => {
-      const result = calculateRequiredPassesSchema.parse({
+      const result = aggregationParamsSchema.parse({
         runs: 5,
         threshold: 80
       });
@@ -362,7 +362,7 @@ describe('constants module', () => {
     });
 
     test('reports multiple validation errors', () => {
-      const result = calculateRequiredPassesSchema.safeParse({
+      const result = aggregationParamsSchema.safeParse({
         runs: -1,
         threshold: 150
       });

From a80bb8ed50fbd3760b8a891253e1cd579485fcc5 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Wed, 18 Feb 2026 20:59:55 +0000
Subject: [PATCH 5/6] refactor(test): complete PR review remediation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🐛 - Remove weak instanceof Error assertions

🔄 - Add threshold calculation verification tests

Tests now verify threshold-based pass/fail logic directly

164 tests passing, 0 lint errors, TypeScript clean

Co-authored-by: Ian White <ian.white.developer@gmail.com>
---
 source/agent-parser.test.js |  7 ------
 source/aggregation.test.js  | 43 +++++++++++++++++++++++++++++++++----
 2 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/source/agent-parser.test.js b/source/agent-parser.test.js
index f222c4ed..2aa41da6 100644
--- a/source/agent-parser.test.js
+++ b/source/agent-parser.test.js
@@ -259,13 +259,6 @@ describe('parseOpenCodeNDJSON()', () => {
 
     const error = Try(parseOpenCodeNDJSON, ndjson, logger);
 
-    assert({
-      given: 'NDJSON with no text events',
-      should: 'throw Error with cause',
-      actual: error instanceof Error && error.cause !== undefined,
-      expected: true
-    });
-
     assert({
       given: 'NDJSON with no text events',
       should: 'have ParseError name in cause',
diff --git a/source/aggregation.test.js b/source/aggregation.test.js
index a3074cde..616eb033 100644
--- a/source/aggregation.test.js
+++ b/source/aggregation.test.js
@@ -221,6 +221,41 @@ describe('aggregatePerAssertionResults()', () => {
     });
   });
 
+  test.each([
+    ['4 runs, 75% threshold', 4, 75, 3, 3, true],
+    ['4 runs, 75% threshold', 4, 75, 2, 2, false],
+    ['5 runs, 75% threshold', 5, 75, 4, 4, true],
+    ['5 runs, 75% threshold', 5, 75, 3, 3, false],
+    ['10 runs, 80% threshold', 10, 80, 8, 8, true],
+    ['4 runs, 80% threshold', 4, 80, 4, 4, true],
+    ['4 runs, 80% threshold', 4, 80, 3, 3, false],
+  ])('applies threshold correctly: %s with %i passes', (_, runs, threshold, passCount, totalPasses, expectedPass) => {
+    const runResults = [
+      ...Array(passCount).fill({ passed: true, score: 100 }),
+      ...Array(runs - passCount).fill({ passed: false, score: 0 })
+    ];
+
+    const result = aggregatePerAssertionResults({
+      perAssertionResults: [{ requirement: 'test assertion', runResults }],
+      threshold,
+      runs
+    });
+
+    assert({
+      given: `${_} with ${passCount} of ${runs} passes`,
+      should: expectedPass ? 'pass the assertion' : 'fail the assertion',
+      actual: result.assertions[0].passed,
+      expected: expectedPass
+    });
+
+    assert({
+      given: `${_} with ${passCount} passes`,
+      should: `report passCount of ${totalPasses}`,
+      actual: result.assertions[0].passCount,
+      expected: totalPasses
+    });
+  });
+
   test.each([
     ['runs above maximum', { runs: 1001, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'],
     ['zero runs', { runs: 0, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'],
@@ -239,14 +274,14 @@ describe('aggregatePerAssertionResults()', () => {
 
     assert({
       given: _,
-      should: 'throw validation error',
-      actual: error instanceof Error,
-      expected: true
+      should: 'have ValidationError name in cause',
+      actual: error?.cause?.name,
+      expected: 'ValidationError'
     });
 
     assert({
       given: _,
-      should: 'have correct error code',
+      should: 'have correct error code in cause',
       actual: error?.cause?.code,
       expected: expectedCode
     });

From 5d1cc7fbac2bb856df4100db9197db508f7ea4f5 Mon Sep 17 00:00:00 2001
From: Ian White <ianwhitedeveloper@gmail.com>
Date: Wed, 18 Feb 2026 15:16:28 -0600
Subject: [PATCH 6/6] fix(ai): remove implementation detail from test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- execute-agent.test.js: remove logger type assertion from
  parseOutput test — typeof checks violate tdd.mdc:64 and
  logger threading is an implementation detail; the three
  remaining assertions (call count, stdout arg, parsed result)
  collectively verify correct integration

164 tests pass, 0 lint errors, TypeScript clean.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 source/execute-agent.test.js | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/source/execute-agent.test.js b/source/execute-agent.test.js
index d42e21bf..01b934a0 100644
--- a/source/execute-agent.test.js
+++ b/source/execute-agent.test.js
@@ -153,13 +153,6 @@ describe('executeAgent()', () => {
       expected: ndjsonOutput
     });
 
-    assert({
-      given: 'agentConfig with parseOutput function',
-      should: 'pass the logger as second argument (WIP fix #8 — logger threading)',
-      actual: typeof parseOutput.mock.calls[0][1]?.log,
-      expected: 'function'
-    });
-
     assert({
       given: 'parseOutput returns valid JSON',
       should: 'return the parsed result',