diff --git a/source/agent-parser.js b/source/agent-parser.js
new file mode 100644
index 00000000..3960224d
--- /dev/null
+++ b/source/agent-parser.js
@@ -0,0 +1,108 @@
+import { createError } from 'error-causes';
+import { ParseError } from './ai-errors.js';
+
+/**
+ * Parse a string result from an agent, attempting multiple strategies:
+ * 1. Direct JSON parse if string starts with { or [
+ * 2. Extract and parse markdown-wrapped JSON (```json\n...\n```)
+ * 3. Keep as plain text if neither works
+ */
+export const parseStringResult = (result, logger) => {
+  const trimmed = result.trim();
+
+  if (trimmed.startsWith('{') || trimmed.startsWith('[')) {
+    try {
+      const parsed = JSON.parse(trimmed);
+      logger.log('Successfully parsed string as JSON');
+      return parsed;
+    } catch {
+      logger.log('Direct JSON parse failed, trying markdown extraction');
+    }
+  }
+
+  const markdownMatch = result.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
+  if (markdownMatch) {
+    logger.log('Found markdown-wrapped JSON, extracting...');
+    try {
+      const parsed = JSON.parse(markdownMatch[1]);
+      logger.log('Successfully parsed markdown-wrapped JSON');
+      return parsed;
+    } catch {
+      logger.log('Failed to parse markdown content, keeping original string');
+    }
+  }
+
+  logger.log('String is not valid JSON, keeping as plain text');
+  return result;
+};
+
+/**
+ * Parse OpenCode's NDJSON output, extracting and concatenating all "text" events.
+ */
+export const parseOpenCodeNDJSON = (ndjson, logger) => {
+  logger.log('Parsing OpenCode NDJSON output...');
+
+  const lines = ndjson.trim().split('\n').filter(line => line.trim());
+
+  const textEvents = lines.reduce((acc, line) => {
+    try {
+      const event = JSON.parse(line);
+      if (event.type === 'text' && event.part?.text) {
+        logger.log(`Found text event with ${event.part.text.length} characters`);
+        return [...acc, event.part.text];
+      }
+    } catch (err) {
+      logger.log(`Warning: Failed to parse NDJSON line: ${err.message}`);
+    }
+    return acc;
+  }, []);
+
+  if (textEvents.length === 0) {
+    throw createError({
+      ...ParseError,
+      message: 'No text events found in OpenCode output',
+      code: 'NO_TEXT_EVENTS',
+      ndjsonLength: ndjson.length,
+      linesProcessed: lines.length
+    });
+  }
+
+  const combinedText = textEvents.join('');
+  logger.log(`Combined ${textEvents.length} text event(s) into ${combinedText.length} characters`);
+  return combinedText;
+};
+
+/**
+ * Unwrap a JSON envelope object { result: ... }, returning the inner value.
+ * If no envelope is present, returns the object as-is.
+ * Shared helper used by unwrapAgentResult and execute-agent's raw output handling.
+ */
+export const unwrapEnvelope = (parsed) =>
+  parsed?.result !== undefined ? parsed.result : parsed;
+
+/**
+ * Unwrap agent result from potential JSON envelope and parse nested JSON.
+ * Handles Claude CLI's envelope format { result: "..." } and nested JSON strings.
+ * @throws {Error} If output is not valid JSON after all parsing attempts
+ */
+export const unwrapAgentResult = (processedOutput, logger) => {
+  const parsed = parseStringResult(processedOutput, logger);
+
+  if (typeof parsed === 'string') {
+    throw createError({
+      ...ParseError,
+      message: `Agent output is not valid JSON: ${parsed.slice(0, 100)}`,
+      outputPreview: parsed.slice(0, 100)
+    });
+  }
+
+  const unwrapped = unwrapEnvelope(parsed);
+
+  logger.log(`Parsed result type: ${typeof unwrapped}`);
+  if (typeof unwrapped === 'string') {
+    logger.log('Result is string, attempting to parse as JSON');
+    return parseStringResult(unwrapped, logger);
+  }
+
+  return unwrapped;
+};
diff --git a/source/agent-parser.test.js b/source/agent-parser.test.js
new file mode 100644
index 00000000..2aa41da6
--- /dev/null
+++ b/source/agent-parser.test.js
@@ -0,0 +1,391 @@
+import { describe, test } from 'vitest';
+import { assert } from './vitest.js';
+import { Try } from './riteway.js';
+import {
+  parseStringResult,
+  parseOpenCodeNDJSON,
+  unwrapEnvelope,
+  unwrapAgentResult
+} from './agent-parser.js';
+
+const createMockLogger = () => {
+  const logs = [];
+  return {
+    log: (...args) => logs.push(args.join(' ')),
+    logs
+  };
+};
+
+describe('parseStringResult()', () => {
+  test('parses direct JSON when string starts with {', () => {
+    const logger = createMockLogger();
+    const input = '{"passed": true, "output": "test"}';
+
+    const result = parseStringResult(input, logger);
+
+    assert({
+      given: 'JSON string starting with {',
+      should: 'parse as JSON object',
+      actual: JSON.stringify(result),
+      expected: '{"passed":true,"output":"test"}'
+    });
+
+    assert({
+      given: 'successful JSON parse',
+      should: 'log success message',
+      actual: logger.logs.some(log => log.includes('Successfully parsed string as JSON')),
+      expected: true
+    });
+  });
+
+  test('parses direct JSON when string starts with [', () => {
+    const logger = createMockLogger();
+    const input = '[{"id": 1}, {"id": 2}]';
+
+    const result = parseStringResult(input, logger);
+
+    assert({
+      given: 'JSON string starting with [',
+      should: 'parse as JSON array',
+      actual: result.length,
+      expected: 2
+    });
+  });
+
+  test('extracts markdown-wrapped JSON when direct parse fails', () => {
+    const logger = createMockLogger();
+    const input = '```json\n{"passed": true, "output": "test"}\n```';
+
+    const result = parseStringResult(input, logger);
+
+    assert({
+      given: 'markdown-wrapped JSON',
+      should: 'extract and parse JSON',
+      actual: JSON.stringify(result),
+      expected: '{"passed":true,"output":"test"}'
+    });
+
+    assert({
+      given: 'markdown extraction',
+      should: 'log markdown extraction',
+      actual: logger.logs.some(log => log.includes('markdown-wrapped JSON')),
+      expected: true
+    });
+  });
+
+  test('extracts markdown-wrapped JSON without json language tag', () => {
+    const logger = createMockLogger();
+    const input = '```\n{"passed": true}\n```';
+
+    const result = parseStringResult(input, logger);
+
+    assert({
+      given: 'markdown without json tag',
+      should: 'extract and parse JSON',
+      actual: result.passed,
+      expected: true
+    });
+  });
+
+  test('tries markdown extraction even if string starts with {', () => {
+    const logger = createMockLogger();
+    const input = '{ broken json ```json\n{"passed": true}\n```';
+
+    const result = parseStringResult(input, logger);
+
+    assert({
+      given: 'malformed JSON with markdown fallback',
+      should: 'extract from markdown block',
+      actual: result.passed,
+      expected: true
+    });
+
+    assert({
+      given: 'fallback scenario',
+      should: 'log failed parse and markdown extraction',
+      actual: logger.logs.some(log => log.includes('trying markdown extraction')),
+      expected: true
+    });
+  });
+
+  test('returns plain text when no parsing succeeds', () => {
+    const logger = createMockLogger();
+    const input = 'This is just plain text with no JSON';
+
+    const result = parseStringResult(input, logger);
+
+    assert({
+      given: 'plain text string',
+      should: 'return original string',
+      actual: result,
+      expected: input
+    });
+
+    assert({
+      given: 'no valid JSON',
+      should: 'log keeping as plain text',
+      actual: logger.logs.some(log => log.includes('keeping as plain text')),
+      expected: true
+    });
+  });
+
+  test('handles malformed markdown gracefully', () => {
+    const logger = createMockLogger();
+    const input = '```json\n{ broken: json }\n```';
+
+    const result = parseStringResult(input, logger);
+
+    assert({
+      given: 'markdown with invalid JSON',
+      should: 'return original string',
+      actual: result,
+      expected: input
+    });
+
+    assert({
+      given: 'failed markdown parse',
+      should: 'log failure',
+      actual: logger.logs.some(log => log.includes('Failed to parse markdown content')),
+      expected: true
+    });
+  });
+
+  test('trims whitespace before parsing', () => {
+    const logger = createMockLogger();
+    const input = '  \n  {"passed": true}  \n  ';
+
+    const result = parseStringResult(input, logger);
+
+    assert({
+      given: 'JSON with surrounding whitespace',
+      should: 'parse successfully',
+      actual: result.passed,
+      expected: true
+    });
+
+    assert({
+      given: 'JSON with surrounding whitespace',
+      should: 'return parsed object matching trimmed input',
+      actual: result,
+      expected: { passed: true }
+    });
+  });
+});
+
+describe('parseOpenCodeNDJSON()', () => {
+  test('extracts text from single text event', () => {
+    const logger = createMockLogger();
+    const ndjson = '{"type":"step_start","timestamp":1770245956364}\n' +
+        '{"type":"text","part":{"text":"```json\\n{\\"status\\": \\"ok\\"}\\n```"}}\n' +
+        '{"type":"step_finish","timestamp":1770245956211}';
+
+    const result = parseOpenCodeNDJSON(ndjson, logger);
+
+    assert({
+      given: 'NDJSON with single text event',
+      should: 'extract text content',
+      actual: result,
+      expected: '```json\n{"status": "ok"}\n```'
+    });
+
+    assert({
+      given: 'successful text extraction',
+      should: 'log found text event',
+      actual: logger.logs.some(log => log.includes('Found text event')),
+      expected: true
+    });
+  });
+
+  test('concatenates multiple text events', () => {
+    const logger = createMockLogger();
+    const ndjson = '{"type":"text","part":{"text":"Part 1"}}\n' +
+        '{"type":"text","part":{"text":" Part 2"}}\n' +
+        '{"type":"text","part":{"text":" Part 3"}}';
+
+    const result = parseOpenCodeNDJSON(ndjson, logger);
+
+    assert({
+      given: 'NDJSON with multiple text events',
+      should: 'concatenate all text content',
+      actual: result,
+      expected: 'Part 1 Part 2 Part 3'
+    });
+  });
+
+  test('filters out non-text events', () => {
+    const logger = createMockLogger();
+    const ndjson = '{"type":"step_start","data":"ignored"}\n' +
+        '{"type":"text","part":{"text":"Hello"}}\n' +
+        '{"type":"step_finish","data":"ignored"}\n' +
+        '{"type":"text","part":{"text":" World"}}';
+
+    const result = parseOpenCodeNDJSON(ndjson, logger);
+
+    assert({
+      given: 'NDJSON with mixed event types',
+      should: 'extract only text events',
+      actual: result,
+      expected: 'Hello World'
+    });
+  });
+
+  test('skips malformed JSON lines', () => {
+    const logger = createMockLogger();
+    const ndjson = '{invalid json}\n' +
+        '{"type":"text","part":{"text":"Valid text"}}\n' +
+        'not json at all';
+
+    const result = parseOpenCodeNDJSON(ndjson, logger);
+
+    assert({
+      given: 'NDJSON with malformed lines',
+      should: 'skip invalid lines and process valid ones',
+      actual: result,
+      expected: 'Valid text'
+    });
+
+    assert({
+      given: 'malformed JSON',
+      should: 'log warning for failed parse',
+      actual: logger.logs.some(log => log.includes('Failed to parse NDJSON line')),
+      expected: true
+    });
+  });
+
+  test('throws error when no text events found', () => {
+    const logger = createMockLogger();
+    const ndjson = '{"type":"step_start","data":"no text here"}\n' +
+        '{"type":"step_finish","data":"still no text"}';
+
+    const error = Try(parseOpenCodeNDJSON, ndjson, logger);
+
+    assert({
+      given: 'NDJSON with no text events',
+      should: 'have ParseError name in cause',
+      actual: error?.cause?.name,
+      expected: 'ParseError'
+    });
+
+    assert({
+      given: 'NDJSON with no text events',
+      should: 'have NO_TEXT_EVENTS code in cause',
+      actual: error?.cause?.code,
+      expected: 'NO_TEXT_EVENTS'
+    });
+
+    assert({
+      given: 'NDJSON with no text events',
+      should: 'include ndjsonLength in cause',
+      actual: typeof error?.cause?.ndjsonLength === 'number',
+      expected: true
+    });
+
+    assert({
+      given: 'NDJSON with no text events',
+      should: 'include linesProcessed in cause',
+      actual: error?.cause?.linesProcessed,
+      expected: 2
+    });
+  });
+
+  test('handles empty lines in NDJSON', () => {
+    const logger = createMockLogger();
+    const ndjson = '\n\n{"type":"text","part":{"text":"Hello"}}\n\n\n{"type":"text","part":{"text":" World"}}\n\n';
+
+    const result = parseOpenCodeNDJSON(ndjson, logger);
+
+    assert({
+      given: 'NDJSON with empty lines',
+      should: 'filter empty lines and process valid events',
+      actual: result,
+      expected: 'Hello World'
+    });
+  });
+
+  test('preserves markdown-wrapped JSON in text', () => {
+    const logger = createMockLogger();
+    const ndjson = '{"type":"text","part":{"text":"```json\\n{\\"passed\\":true}\\n```"}}';
+
+    const result = parseOpenCodeNDJSON(ndjson, logger);
+
+    assert({
+      given: 'text event with markdown-wrapped JSON',
+      should: 'preserve markdown formatting',
+      actual: result,
+      expected: '```json\n{"passed":true}\n```'
+    });
+  });
+});
+
+describe('unwrapEnvelope()', () => {
+  test.each([
+    ['object with result field', { result: { passed: true } }, { passed: true }],
+    ['object with result as string', { result: 'raw string' }, 'raw string'],
+    ['object with result as null', { result: null }, null],
+    ['object without result field', { passed: true, score: 80 }, { passed: true, score: 80 }],
+  ])('%s', (_, input, expected) => {
+    assert({
+      given: _,
+      should: 'return the unwrapped value',
+      actual: unwrapEnvelope(input),
+      expected
+    });
+  });
+});
+
+describe('unwrapAgentResult()', () => {
+  test('unwraps Claude envelope and returns parsed inner object', () => {
+    const logger = createMockLogger();
+    const envelope = JSON.stringify({ result: JSON.stringify({ passed: true, score: 90 }) });
+
+    const result = unwrapAgentResult(envelope, logger);
+
+    assert({
+      given: 'Claude CLI envelope wrapping a JSON string',
+      should: 'return the fully parsed inner object',
+      actual: result,
+      expected: { passed: true, score: 90 }
+    });
+  });
+
+  test('returns parsed object when no envelope present', () => {
+    const logger = createMockLogger();
+    const direct = JSON.stringify({ passed: false, score: 40 });
+
+    const result = unwrapAgentResult(direct, logger);
+
+    assert({
+      given: 'direct JSON object (no envelope)',
+      should: 'return the parsed object',
+      actual: result,
+      expected: { passed: false, score: 40 }
+    });
+  });
+
+  test('throws ParseError when output is not valid JSON', () => {
+    const logger = createMockLogger();
+
+    const error = Try(unwrapAgentResult, 'plain text response', logger);
+
+    assert({
+      given: 'plain text that is not valid JSON',
+      should: 'throw Error with ParseError cause',
+      actual: error?.cause?.name,
+      expected: 'ParseError'
+    });
+  });
+
+  test('handles markdown-wrapped envelope', () => {
+    const logger = createMockLogger();
+    const markdownEnvelope = '```json\n{"result": {"passed": true}}\n```';
+
+    const result = unwrapAgentResult(markdownEnvelope, logger);
+
+    assert({
+      given: 'markdown-wrapped JSON with result envelope',
+      should: 'extract, unwrap and return the inner object',
+      actual: result,
+      expected: { passed: true }
+    });
+  });
+});
diff --git a/source/aggregation.js b/source/aggregation.js
new file mode 100644
index 00000000..6f8c0cb9
--- /dev/null
+++ b/source/aggregation.js
@@ -0,0 +1,82 @@
+import { createError } from 'error-causes';
+import { ValidationError, ParseError } from './ai-errors.js';
+import { aggregationParamsSchema } from './constants.js';
+
+/**
+ * Normalize a judge response (already parsed from TAP YAML) to ensure consistent
+ * structure with safe defaults for missing fields.
+ * @throws {Error} If judgeResponse is not an object (null, string, undefined, etc.)
+ */
+export const normalizeJudgment = ({ judgeResponse, requirement, runIndex, logger }) => {
+  if (typeof judgeResponse !== 'object' || judgeResponse === null) {
+    throw createError({
+      ...ParseError,
+      message: 'Judge returned non-object response',
+      code: 'JUDGE_INVALID_RESPONSE',
+      requirement,
+      runIndex,
+      rawResponse: judgeResponse
+    });
+  }
+
+  if (judgeResponse.actual === undefined || judgeResponse.expected === undefined) {
+    logger.log(`Warning: Judge response missing fields for "${requirement}" run ${runIndex + 1}`);
+  }
+
+  return {
+    passed: judgeResponse.passed === true,
+    actual: judgeResponse.actual ?? 'No actual provided',
+    expected: judgeResponse.expected ?? 'No expected provided',
+    score: Number.isFinite(judgeResponse.score) ? Math.max(0, Math.min(100, judgeResponse.score)) : 0
+  };
+};
+
+/**
+ * Aggregate results from per-assertion test runs.
+ * Each assertion is independently evaluated against the threshold.
+ * Overall pass requires all assertions to meet the threshold.
+ */
+export const aggregatePerAssertionResults = ({ perAssertionResults, threshold, runs }) => {
+  let validated;
+  try {
+    validated = aggregationParamsSchema.parse({ runs, threshold });
+  } catch (zodError) {
+    const issues = zodError.issues || [];
+    const messages = issues.map(issue =>
+      `${issue.path.join('.')}: ${issue.message}`
+    ).join('; ');
+
+    throw createError({
+      ...ValidationError,
+      message: `Invalid parameters for aggregatePerAssertionResults: ${messages}`,
+      code: 'INVALID_AGGREGATION_PARAMS',
+      runs,
+      threshold,
+      cause: zodError
+    });
+  }
+
+  const requiredPasses = Math.ceil((validated.runs * validated.threshold) / 100);
+
+  const assertions = perAssertionResults.map(({ requirement, runResults }) => {
+    const passCount = runResults.filter(r => r.passed).length;
+    const totalScore = runResults.reduce((sum, r) => sum + (r.score ?? 0), 0);
+    const averageScore = runResults.length > 0
+      ? Math.round((totalScore / runResults.length) * 100) / 100
+      : 0;
+
+    return {
+      requirement,
+      passed: passCount >= requiredPasses,
+      passCount,
+      totalRuns: runs,
+      averageScore,
+      runResults
+    };
+  });
+
+  return {
+    passed: assertions.every(a => a.passed),
+    assertions
+  };
+};
diff --git a/source/aggregation.test.js b/source/aggregation.test.js
new file mode 100644
index 00000000..616eb033
--- /dev/null
+++ b/source/aggregation.test.js
@@ -0,0 +1,463 @@
+import { describe, test, vi } from 'vitest';
+import { assert } from './vitest.js';
+import { Try } from './riteway.js';
+import {
+  normalizeJudgment,
+  aggregatePerAssertionResults
+} from './aggregation.js';
+
+describe('aggregatePerAssertionResults()', () => {
+  test('aggregates per-assertion results when all assertions pass', () => {
+    const perAssertionResults = [
+      {
+        requirement: 'Given simple addition, should add correctly',
+        runResults: [
+          { passed: true, output: 'ok' },
+          { passed: true, output: 'ok' }
+        ]
+      },
+      {
+        requirement: 'Given format, should output JSON',
+        runResults: [
+          { passed: true, output: 'ok' },
+          { passed: true, output: 'ok' }
+        ]
+      }
+    ];
+
+    const result = aggregatePerAssertionResults({
+      perAssertionResults,
+      threshold: 75,
+      runs: 2
+    });
+
+    assert({
+      given: 'all assertions meeting threshold',
+      should: 'return passed: true',
+      actual: result.passed,
+      expected: true
+    });
+
+    assert({
+      given: 'two assertions',
+      should: 'return assertions array of length 2',
+      actual: result.assertions.length,
+      expected: 2
+    });
+
+    assert({
+      given: 'first assertion with all passes',
+      should: 'mark the assertion as passed',
+      actual: result.assertions[0].passed,
+      expected: true
+    });
+
+    assert({
+      given: 'first assertion with 2 passes',
+      should: 'report passCount 2',
+      actual: result.assertions[0].passCount,
+      expected: 2
+    });
+
+    assert({
+      given: 'first assertion requirement',
+      should: 'preserve the requirement',
+      actual: result.assertions[0].requirement,
+      expected: 'Given simple addition, should add correctly'
+    });
+  });
+
+  test('fails when any assertion does not meet threshold', () => {
+    const perAssertionResults = [
+      {
+        requirement: 'Given addition, should add correctly',
+        runResults: [
+          { passed: true, output: 'ok' },
+          { passed: true, output: 'ok' }
+        ]
+      },
+      {
+        requirement: 'Given format, should output JSON',
+        runResults: [
+          { passed: false, output: 'fail' },
+          { passed: false, output: 'fail' }
+        ]
+      }
+    ];
+
+    const result = aggregatePerAssertionResults({
+      perAssertionResults,
+      threshold: 75,
+      runs: 2
+    });
+
+    assert({
+      given: 'one assertion failing threshold',
+      should: 'return passed: false',
+      actual: result.passed,
+      expected: false
+    });
+
+    assert({
+      given: 'the passing assertion',
+      should: 'mark it as passed',
+      actual: result.assertions[0].passed,
+      expected: true
+    });
+
+    assert({
+      given: 'the failing assertion',
+      should: 'mark it as failed',
+      actual: result.assertions[1].passed,
+      expected: false
+    });
+
+    assert({
+      given: 'the failing assertion',
+      should: 'have passCount 0',
+      actual: result.assertions[1].passCount,
+      expected: 0
+    });
+  });
+
+  test('includes per-assertion run results and totalRuns', () => {
+    const runResults = [
+      { passed: true, output: 'run 1' },
+      { passed: false, output: 'run 2' }
+    ];
+    const perAssertionResults = [{ requirement: 'test assertion', runResults }];
+
+    const result = aggregatePerAssertionResults({
+      perAssertionResults,
+      threshold: 50,
+      runs: 2
+    });
+
+    assert({
+      given: 'per-assertion run results',
+      should: 'include run results in the assertion',
+      actual: result.assertions[0].runResults,
+      expected: runResults
+    });
+
+    assert({
+      given: 'per-assertion run results',
+      should: 'include totalRuns per assertion',
+      actual: result.assertions[0].totalRuns,
+      expected: 2
+    });
+  });
+
+  test('calculates averageScore from run results', () => {
+    const perAssertionResults = [
+      {
+        requirement: 'test with scores',
+        runResults: [
+          { passed: true, score: 85 },
+          { passed: true, score: 95 },
+          { passed: true, score: 90 }
+        ]
+      }
+    ];
+
+    const result = aggregatePerAssertionResults({
+      perAssertionResults,
+      threshold: 75,
+      runs: 3
+    });
+
+    assert({
+      given: 'three runs with scores 85, 95, 90',
+      should: 'calculate average score of 90',
+      actual: result.assertions[0].averageScore,
+      expected: 90
+    });
+  });
+
+  test('defaults missing scores to 0 when calculating average', () => {
+    const perAssertionResults = [
+      {
+        requirement: 'test without scores',
+        runResults: [
+          { passed: true },
+          { passed: true }
+        ]
+      }
+    ];
+
+    const result = aggregatePerAssertionResults({
+      perAssertionResults,
+      threshold: 75,
+      runs: 2
+    });
+
+    assert({
+      given: 'run results with no score fields',
+      should: 'report averageScore of 0',
+      actual: result.assertions[0].averageScore,
+      expected: 0
+    });
+  });
+
+  test('passes with empty assertions array (vacuous truth)', () => {
+    const result = aggregatePerAssertionResults({
+      perAssertionResults: [],
+      threshold: 75,
+      runs: 4
+    });
+
+    assert({
+      given: 'empty perAssertionResults array',
+      should: 'return passed: true (all zero assertions meet threshold)',
+      actual: result.passed,
+      expected: true
+    });
+
+    assert({
+      given: 'empty perAssertionResults array',
+      should: 'return empty assertions array',
+      actual: result.assertions,
+      expected: []
+    });
+  });
+
+  test.each([
+    ['4 runs, 75% threshold', 4, 75, 3, 3, true],
+    ['4 runs, 75% threshold', 4, 75, 2, 2, false],
+    ['5 runs, 75% threshold', 5, 75, 4, 4, true],
+    ['5 runs, 75% threshold', 5, 75, 3, 3, false],
+    ['10 runs, 80% threshold', 10, 80, 8, 8, true],
+    ['4 runs, 80% threshold', 4, 80, 4, 4, true],
+    ['4 runs, 80% threshold', 4, 80, 3, 3, false],
+  ])('applies threshold correctly: %s with %i passes', (_, runs, threshold, passCount, totalPasses, expectedPass) => {
+    const runResults = [
+      ...Array(passCount).fill({ passed: true, score: 100 }),
+      ...Array(runs - passCount).fill({ passed: false, score: 0 })
+    ];
+
+    const result = aggregatePerAssertionResults({
+      perAssertionResults: [{ requirement: 'test assertion', runResults }],
+      threshold,
+      runs
+    });
+
+    assert({
+      given: `${_} with ${passCount} of ${runs} passes`,
+      should: expectedPass ? 'pass the assertion' : 'fail the assertion',
+      actual: result.assertions[0].passed,
+      expected: expectedPass
+    });
+
+    assert({
+      given: `${_} with ${passCount} passes`,
+      should: `report passCount of ${totalPasses}`,
+      actual: result.assertions[0].passCount,
+      expected: totalPasses
+    });
+  });
+
+  test.each([
+    ['runs above maximum', { runs: 1001, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'],
+    ['zero runs', { runs: 0, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'],
+    ['negative runs', { runs: -1, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'],
+    ['non-integer runs', { runs: 1.5, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'],
+    ['NaN runs', { runs: NaN, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'],
+    ['threshold above maximum', { runs: 4, threshold: 150 }, 'INVALID_AGGREGATION_PARAMS'],
+    ['negative threshold', { runs: 4, threshold: -10 }, 'INVALID_AGGREGATION_PARAMS'],
+    ['NaN threshold', { runs: 4, threshold: NaN }, 'INVALID_AGGREGATION_PARAMS'],
+  ])('throws ValidationError for %s', (_, { runs, threshold }, expectedCode) => {
+    const perAssertionResults = [
+      { requirement: 'test', runResults: [{ passed: true }] }
+    ];
+
+    const error = Try(aggregatePerAssertionResults, { perAssertionResults, threshold, runs });
+
+    assert({
+      given: _,
+      should: 'have ValidationError name in cause',
+      actual: error?.cause?.name,
+      expected: 'ValidationError'
+    });
+
+    assert({
+      given: _,
+      should: 'have correct error code in cause',
+      actual: error?.cause?.code,
+      expected: expectedCode
+    });
+  });
+});
+
+describe('normalizeJudgment()', () => {
+  const createMockLogger = () => ({ log: vi.fn() });
+
+  test('passes through complete valid input unchanged', () => {
+    const logger = createMockLogger();
+    const judgeResponse = { passed: true, actual: 'Result from agent', expected: 'Expected output', score: 85 };
+
+    const result = normalizeJudgment({ judgeResponse, requirement: 'test assertion', runIndex: 0, logger });
+
+    assert({
+      given: 'complete valid judgment with passed: true',
+      should: 'preserve passed as true',
+      actual: result.passed,
+      expected: true
+    });
+
+    assert({
+      given: 'complete valid judgment',
+      should: 'preserve actual value',
+      actual: result.actual,
+      expected: 'Result from agent'
+    });
+
+    assert({
+      given: 'complete valid judgment',
+      should: 'preserve expected value',
+      actual: result.expected,
+      expected: 'Expected output'
+    });
+
+    assert({
+      given: 'complete valid judgment with score 85',
+      should: 'preserve score value',
+      actual: result.score,
+      expected: 85
+    });
+  });
+
+  test('defaults passed to false when missing', () => {
+    const logger = createMockLogger();
+    const result = normalizeJudgment({
+      judgeResponse: { actual: 'Result', expected: 'Expected', score: 50 },
+      requirement: 'test',
+      runIndex: 0,
+      logger
+    });
+
+    assert({
+      given: 'judgment missing passed field',
+      should: 'default passed to false',
+      actual: result.passed,
+      expected: false
+    });
+  });
+
+  test('defaults missing actual and expected with warning log', () => {
+    const logger = createMockLogger();
+    const result = normalizeJudgment({
+      judgeResponse: { passed: true, score: 100 },
+      requirement: 'test assertion',
+      runIndex: 2,
+      logger
+    });
+
+    assert({
+      given: 'judgment missing actual',
+      should: 'default actual to "No actual provided"',
+      actual: result.actual,
+      expected: 'No actual provided'
+    });
+
+    assert({
+      given: 'judgment missing expected',
+      should: 'default expected to "No expected provided"',
+      actual: result.expected,
+      expected: 'No expected provided'
+    });
+
+    assert({
+      given: 'judgment missing actual and expected',
+      should: 'log warning with requirement and run number',
+      actual: logger.log.mock.calls[0][0],
+      expected: 'Warning: Judge response missing fields for "test assertion" run 3'
+    });
+  });
+
+  test.each([
+    ['score 150', 150, 100],
+    ['score -50', -50, 0],
+    ['NaN score', NaN, 0],
+  ])('normalizes %s correctly', (_, score, expected) => {
+    const logger = createMockLogger();
+    const result = normalizeJudgment({
+      judgeResponse: { passed: true, actual: 'Result', expected: 'Expected', score },
+      requirement: 'test',
+      runIndex: 0,
+      logger
+    });
+
+    assert({
+      given: `judgment with ${_}`,
+      should: `normalize score to ${expected}`,
+      actual: result.score,
+      expected
+    });
+  });
+
+  test('defaults missing score to 0', () => {
+    const logger = createMockLogger();
+    const result = normalizeJudgment({
+      judgeResponse: { passed: true, actual: 'Result', expected: 'Expected' },
+      requirement: 'test',
+      runIndex: 0,
+      logger
+    });
+
+    assert({
+      given: 'judgment missing score',
+      should: 'default to 0',
+      actual: result.score,
+      expected: 0
+    });
+  });
+
+  test.each([
+    ['null input', null],
+    ['string input', 'not an object'],
+    ['undefined input', undefined],
+  ])('throws ParseError for %s', (_, input) => {
+    const logger = createMockLogger();
+    const error = Try(normalizeJudgment, { judgeResponse: input, requirement: 'test assertion', runIndex: 1, logger });
+
+    assert({
+      given: _,
+      should: 'have ParseError name in cause',
+      actual: error?.cause?.name,
+      expected: 'ParseError'
+    });
+
+    assert({
+      given: _,
+      should: 'have JUDGE_INVALID_RESPONSE code in cause',
+      actual: error?.cause?.code,
+      expected: 'JUDGE_INVALID_RESPONSE'
+    });
+  });
+
+  test('includes requirement and runIndex in ParseError cause', () => {
+    const logger = createMockLogger();
+    const error = Try(normalizeJudgment, { judgeResponse: null, requirement: 'test assertion', runIndex: 1, logger });
+
+    assert({
+      given: 'null input',
+      should: 'include requirement in cause',
+      actual: error?.cause?.requirement,
+      expected: 'test assertion'
+    });
+
+    assert({
+      given: 'null input',
+      should: 'include runIndex in cause',
+      actual: error?.cause?.runIndex,
+      expected: 1
+    });
+
+    assert({
+      given: 'null input',
+      should: 'include rawResponse in cause',
+      actual: error?.cause?.rawResponse,
+      expected: null
+    });
+  });
+});
diff --git a/source/constants.js b/source/constants.js
index 71c17b6d..2624ee0c 100644
--- a/source/constants.js
+++ b/source/constants.js
@@ -47,7 +47,7 @@ export const agentSchema = z.enum(constraints.supportedAgents, {
   message: `agent must be one of: ${constraints.supportedAgents.join(', ')}`
 });
 
-export const calculateRequiredPassesSchema = z.object({
+export const aggregationParamsSchema = z.object({
   runs: runsSchema,
   threshold: thresholdSchema
 });
diff --git a/source/constants.test.js b/source/constants.test.js
index b68a15a9..ce80e926 100644
--- a/source/constants.test.js
+++ b/source/constants.test.js
@@ -8,7 +8,7 @@ import {
   concurrencySchema,
   timeoutSchema,
   agentSchema,
-  calculateRequiredPassesSchema,
+  aggregationParamsSchema,
   aiTestOptionsSchema
 } from './constants.js';
 
@@ -346,9 +346,9 @@ describe('constants module', () => {
     });
   });
 
-  describe('calculateRequiredPassesSchema', () => {
+  describe('aggregationParamsSchema', () => {
     test('validates complete object with defaults', () => {
-      const result = calculateRequiredPassesSchema.parse({
+      const result = aggregationParamsSchema.parse({
         runs: 5,
         threshold: 80
       });
@@ -362,7 +362,7 @@ describe('constants module', () => {
     });
 
     test('reports multiple validation errors', () => {
-      const result = calculateRequiredPassesSchema.safeParse({
+      const result = aggregationParamsSchema.safeParse({
         runs: -1,
         threshold: 150
       });
diff --git a/source/execute-agent.js b/source/execute-agent.js
new file mode 100644
index 00000000..a22d9c11
--- /dev/null
+++ b/source/execute-agent.js
@@ -0,0 +1,184 @@
+import { spawn } from 'child_process';
+import { createError } from 'error-causes';
+import { ParseError, TimeoutError, AgentProcessError } from './ai-errors.js';
+import { createDebugLogger } from './debug-logger.js';
+import { unwrapEnvelope, unwrapAgentResult } from './agent-parser.js';
+
+const maxOutputPreviewLength = 500;
+
+const withTimeout = (promise, ms, errorFactory) =>
+  Promise.race([
+    promise,
+    new Promise((_, reject) =>
+      setTimeout(() => reject(createError(errorFactory())), ms)
+    )
+  ]);
+
+const collectProcessOutput = (proc) =>
+  new Promise((resolve, reject) => {
+    let stdout = '';
+    let stderr = '';
+
+    proc.stdout.on('data', (data) => { stdout += data.toString(); });
+    proc.stderr.on('data', (data) => { stderr += data.toString(); });
+    proc.on('close', (code) => { resolve({ stdout, stderr, code }); });
+    proc.on('error', reject);
+  });
+
+/**
+ * Spawn an agent CLI subprocess and collect output.
+ * Logger is injected to avoid coupling: it is created once at executeAgent level.
+ */
+const spawnProcess = async ({ agentConfig, prompt, logger }) => {
+  const { command, args = [] } = agentConfig;
+  const allArgs = [...args, prompt];
+
+  logger.log('\nExecuting agent command:');
+  logger.command(command, args);
+  logger.log(`Prompt length: ${prompt.length} characters`);
+
+  try {
+    const proc = spawn(command, allArgs);
+    proc.stdin.end();
+    return await collectProcessOutput(proc);
+  } catch (err) {
+    throw createError({
+      ...AgentProcessError,
+      message: `Failed to spawn agent process: ${err.message}`,
+      command,
+      args: args.join(' '),
+      cause: err
+    });
+  }
+};
+
+/**
+ * Try to unwrap a JSON envelope { result: ... } from a raw string, returning the
+ * inner value as a string. Falls back to the original string if not JSON or no envelope.
+ */
+const unwrapRawOutput = (output) => {
+  if (!output.trim().startsWith('{')) return output;
+  try {
+    return unwrapEnvelope(JSON.parse(output));
+  } catch {
+    return output;
+  }
+};
+
+/**
+ * Process agent stdout: apply optional parseOutput preprocessing, then either
+ * return raw unwrapped string (rawOutput=true) or parse full JSON result.
+ */
+const processAgentOutput = ({ agentConfig, rawOutput, logger }) => ({ stdout }) => {
+  const { command, args = [], parseOutput } = agentConfig;
+
+  try {
+    const processedOutput = parseOutput ? parseOutput(stdout, logger) : stdout;
+
+    if (rawOutput) {
+      logger.log('Raw output requested - unwrapping JSON envelope');
+      const result = unwrapRawOutput(processedOutput);
+
+      if (typeof result !== 'string') {
+        throw createError({
+          ...ParseError,
+          message: `Raw output requested but result is not a string: ${typeof result}`,
+          resultType: typeof result
+        });
+      }
+
+      logger.log(`Returning raw output (${result.length} characters)`);
+      logger.flush();
+      return result;
+    }
+
+    const result = unwrapAgentResult(processedOutput, logger);
+    logger.result(result);
+    logger.flush();
+    return result;
+  } catch (err) {
+    const truncatedStdout = stdout.length > maxOutputPreviewLength ? `${stdout.slice(0, maxOutputPreviewLength)}...` : stdout;
+    logger.log('JSON parsing failed:', err.message);
+    logger.flush();
+
+    throw createError({
+      ...ParseError,
+      message: `Failed to parse agent output as JSON: ${err.message}`,
+      code: 'AGENT_OUTPUT_PARSE_ERROR',
+      command,
+      args: args.join(' '),
+      stdoutPreview: truncatedStdout,
+      cause: err
+    });
+  }
+};
+
+const runAgentProcess = async ({ agentConfig, prompt, timeout, logger }) => {
+  const { command, args = [] } = agentConfig;
+
+  const { stdout, stderr, code } = await withTimeout(
+    spawnProcess({ agentConfig, prompt, logger }),
+    timeout,
+    () => ({
+      ...TimeoutError,
+      message: `Agent process timed out after ${timeout}ms. Command: ${command} ${args.join(' ')}`,
+      command,
+      args: args.join(' '),
+      timeout
+    })
+  );
+
+  logger.log(`Process exited with code: ${code}`);
+  logger.log(`Stdout length: ${stdout.length} characters`);
+  logger.log(`Stderr length: ${stderr.length} characters`);
+
+  if (code !== 0) {
+    const truncatedStdout = stdout.length > maxOutputPreviewLength ? `${stdout.slice(0, maxOutputPreviewLength)}...` : stdout;
+    const truncatedStderr = stderr.length > maxOutputPreviewLength ? `${stderr.slice(0, maxOutputPreviewLength)}...` : stderr;
+
+    logger.log('Process failed with non-zero exit code');
+    logger.flush();
+
+    throw createError({
+      ...AgentProcessError,
+      message: `Agent process exited with code ${code}\n` +
+               `Command: ${command} ${args.join(' ')}\n` +
+               `Stderr: ${truncatedStderr}\n` +
+               `Stdout preview: ${truncatedStdout}`,
+      command,
+      args: args.join(' '),
+      exitCode: code,
+      stderr: truncatedStderr,
+      stdoutPreview: truncatedStdout
+    });
+  }
+
+  return { stdout };
+};
+
+/**
+ * Execute an agent CLI subprocess and return parsed JSON output or raw string.
+ * @param {Object} options
+ * @param {Object} options.agentConfig - Agent configuration
+ * @param {string} options.agentConfig.command - Command to execute
+ * @param {Array<string>} [options.agentConfig.args=[]] - Command arguments
+ * @param {Function} [options.agentConfig.parseOutput] - Optional stdout preprocessor
+ * @param {string} options.prompt - Prompt to send to the agent
+ * @param {number} [options.timeout=300000] - Timeout in ms (default: 5 minutes)
+ * @param {boolean} [options.debug=false] - Enable debug logging
+ * @param {string} [options.logFile] - Optional log file path for debug output
+ * @param {boolean} [options.rawOutput=false] - Return raw stdout string without JSON parsing
+ * @returns {Promise<Object|string>} Parsed JSON response or raw string if rawOutput=true
+ */
+export const executeAgent = async ({
+  agentConfig,
+  prompt,
+  timeout = 300000,
+  debug = false,
+  logFile,
+  rawOutput = false
+}) => {
+  const logger = createDebugLogger({ debug, logFile });
+  const processResult = await runAgentProcess({ agentConfig, prompt, timeout, logger });
+  return processAgentOutput({ agentConfig, rawOutput, logger })(processResult);
+};
diff --git a/source/execute-agent.test.js b/source/execute-agent.test.js
new file mode 100644
index 00000000..01b934a0
--- /dev/null
+++ b/source/execute-agent.test.js
@@ -0,0 +1,257 @@
+import { describe, test, vi, beforeEach } from 'vitest';
+import { assert } from './vitest.js';
+import { Try } from './riteway.js';
+
+vi.mock('child_process', () => ({
+  spawn: vi.fn()
+}));
+
+// Import after mock is registered
+const { spawn } = await import('child_process');
+const { executeAgent } = await import('./execute-agent.js');
+
+/**
+ * Build a mock child process that emits stdout/stderr data then closes.
+ * @param {Object} options
+ * @param {string} [options.stdout=''] - Data to emit on stdout
+ * @param {string} [options.stderr=''] - Data to emit on stderr
+ * @param {number} [options.exitCode=0] - Exit code for the close event
+ */
+const createMockProcess = ({ stdout = '', stderr = '', exitCode = 0 } = {}) => {
+  const listeners = { stdout: {}, stderr: {}, proc: {} };
+
+  const proc = {
+    stdout: {
+      on: (event, cb) => {
+        listeners.stdout[event] = cb;
+      }
+    },
+    stderr: {
+      on: (event, cb) => {
+        listeners.stderr[event] = cb;
+      }
+    },
+    stdin: { end: vi.fn() },
+    on: (event, cb) => {
+      listeners.proc[event] = cb;
+    }
+  };
+
+  // Emit events asynchronously after the next tick so all listeners are registered
+  setTimeout(() => {
+    if (stdout && listeners.stdout.data) listeners.stdout.data(stdout);
+    if (stderr && listeners.stderr.data) listeners.stderr.data(stderr);
+    if (listeners.proc.close) listeners.proc.close(exitCode);
+  }, 0);
+
+  return proc;
+};
+
+const agentConfig = {
+  command: 'claude',
+  args: ['-p', '--output-format', 'json', '--no-session-persistence']
+};
+
+describe('executeAgent()', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  test('returns parsed JSON from agent stdout', async () => {
+    const agentResponse = JSON.stringify({ passed: true, score: 90 });
+    spawn.mockReturnValue(createMockProcess({ stdout: agentResponse }));
+
+    const result = await executeAgent({
+      agentConfig,
+      prompt: 'test prompt'
+    });
+
+    assert({
+      given: 'agent stdout with valid JSON',
+      should: 'return parsed result object',
+      actual: result,
+      expected: { passed: true, score: 90 }
+    });
+  });
+
+  test('unwraps Claude CLI envelope and returns parsed inner result', async () => {
+    const innerResult = { passed: true, score: 85 };
+    const envelope = JSON.stringify({ result: JSON.stringify(innerResult) });
+    spawn.mockReturnValue(createMockProcess({ stdout: envelope }));
+
+    const result = await executeAgent({
+      agentConfig,
+      prompt: 'test prompt'
+    });
+
+    assert({
+      given: 'Claude CLI JSON envelope wrapping a stringified result',
+      should: 'unwrap and parse the inner result',
+      actual: result,
+      expected: innerResult
+    });
+  });
+
+  test('returns raw string when rawOutput is true', async () => {
+    const rawText = 'This is raw agent output';
+    spawn.mockReturnValue(createMockProcess({ stdout: rawText }));
+
+    const result = await executeAgent({
+      agentConfig,
+      prompt: 'test prompt',
+      rawOutput: true
+    });
+
+    assert({
+      given: 'rawOutput: true and plain text stdout',
+      should: 'return the raw string as-is',
+      actual: result,
+      expected: rawText
+    });
+  });
+
+  test('unwraps envelope when rawOutput is true', async () => {
+    const innerText = 'raw output from agent';
+    const envelope = JSON.stringify({ result: innerText });
+    spawn.mockReturnValue(createMockProcess({ stdout: envelope }));
+
+    const result = await executeAgent({
+      agentConfig,
+      prompt: 'test prompt',
+      rawOutput: true
+    });
+
+    assert({
+      given: 'rawOutput: true and JSON envelope wrapping a string',
+      should: 'unwrap and return the inner string',
+      actual: result,
+      expected: innerText
+    });
+  });
+
+  test('applies parseOutput preprocessor before parsing result', async () => {
+    const ndjsonOutput = '{"type":"text","part":{"text":"{\\"passed\\":true}"}}';
+    const parseOutput = vi.fn(() => '{"passed":true}');
+    spawn.mockReturnValue(createMockProcess({ stdout: ndjsonOutput }));
+
+    const result = await executeAgent({
+      agentConfig: { ...agentConfig, parseOutput },
+      prompt: 'test prompt'
+    });
+
+    assert({
+      given: 'agentConfig with parseOutput function',
+      should: 'call parseOutput exactly once',
+      actual: parseOutput.mock.calls.length,
+      expected: 1
+    });
+
+    assert({
+      given: 'agentConfig with parseOutput function',
+      should: 'pass the raw stdout as first argument',
+      actual: parseOutput.mock.calls[0][0],
+      expected: ndjsonOutput
+    });
+
+    assert({
+      given: 'parseOutput returns valid JSON',
+      should: 'return the parsed result',
+      actual: result,
+      expected: { passed: true }
+    });
+  });
+
+  test('throws AgentProcessError when exit code is non-zero', async () => {
+    spawn.mockReturnValue(createMockProcess({
+      stdout: '',
+      stderr: 'Permission denied',
+      exitCode: 1
+    }));
+
+    const err = await Try(executeAgent, { agentConfig, prompt: 'test prompt' });
+
+    assert({
+      given: 'non-zero exit code from agent process',
+      should: 'throw Error with AgentProcessError cause',
+      actual: err?.cause?.name,
+      expected: 'AgentProcessError'
+    });
+
+    assert({
+      given: 'non-zero exit code',
+      should: 'have AGENT_PROCESS_FAILURE code',
+      actual: err?.cause?.code,
+      expected: 'AGENT_PROCESS_FAILURE'
+    });
+
+    assert({
+      given: 'non-zero exit code',
+      should: 'include exit code in cause',
+      actual: err?.cause?.exitCode,
+      expected: 1
+    });
+  });
+
+  test('throws TimeoutError when timeout is exceeded', async () => {
+    // Process that never closes
+    const proc = {
+      stdout: { on: vi.fn() },
+      stderr: { on: vi.fn() },
+      stdin: { end: vi.fn() },
+      on: vi.fn()
+    };
+    spawn.mockReturnValue(proc);
+
+    const err = await Try(executeAgent, { agentConfig, prompt: 'test prompt', timeout: 1 });
+
+    assert({
+      given: 'agent process that exceeds timeout',
+      should: 'throw Error with TimeoutError cause',
+      actual: err?.cause?.name,
+      expected: 'TimeoutError'
+    });
+
+    assert({
+      given: 'timeout exceeded',
+      should: 'have AGENT_TIMEOUT code',
+      actual: err?.cause?.code,
+      expected: 'AGENT_TIMEOUT'
+    });
+  });
+
+  test('throws ParseError when stdout is not valid JSON (rawOutput: false)', async () => {
+    spawn.mockReturnValue(createMockProcess({ stdout: 'not valid json output' }));
+
+    const err = await Try(executeAgent, { agentConfig, prompt: 'test prompt' });
+
+    assert({
+      given: 'stdout that is not valid JSON',
+      should: 'throw Error with ParseError cause',
+      actual: err?.cause?.name,
+      expected: 'ParseError'
+    });
+  });
+
+  test('spawns the agent with command, args, and prompt appended', async () => {
+    spawn.mockReturnValue(createMockProcess({ stdout: '{"ok":true}' }));
+
+    await executeAgent({
+      agentConfig,
+      prompt: 'my prompt'
+    });
+
+    assert({
+      given: 'valid agentConfig with command and args',
+      should: 'spawn with the command',
+      actual: spawn.mock.calls[0][0],
+      expected: 'claude'
+    });
+
+    assert({
+      given: 'valid agentConfig with command and args',
+      should: 'spawn with args including the prompt at the end',
+      actual: spawn.mock.calls[0][1],
+      expected: ['-p', '--output-format', 'json', '--no-session-persistence', 'my prompt']
+    });
+  });
+});
diff --git a/source/extraction-parser.js b/source/extraction-parser.js
new file mode 100644
index 00000000..799f37e2
--- /dev/null
+++ b/source/extraction-parser.js
@@ -0,0 +1,127 @@
+import { createError } from 'error-causes';
+import { ExtractionParseError, ExtractionValidationError } from './ai-errors.js';
+import { readFile } from 'fs/promises';
+import { resolve } from 'path';
+
+const assertionRequiredFields = ['id', 'requirement'];
+
+/**
+ * Resolve and read import files, concatenating their contents.
+ *
+ * SECURITY NOTE: Import paths are NOT validated for path traversal.
+ * This allows legitimate cross-project imports (e.g., shared prompt libraries).
+ * Test authors are responsible for not importing sensitive files (.env, credentials).
+ * See PR #394 remediation epic (Wave 1, Task 2) for design rationale.
+ */
+export const resolveImportPaths = async (importPaths, projectRoot, debug) => {
+  if (debug) {
+    console.error(`[DEBUG] Found ${importPaths.length} imports to resolve`);
+  }
+  const importedContents = await Promise.all(
+    importPaths.map(async importPath => {
+      const resolvedPath = resolve(projectRoot, importPath);
+      if (debug) {
+        console.error(`[DEBUG] Reading import: ${importPath} -> ${resolvedPath}`);
+      }
+      try {
+        return await readFile(resolvedPath, 'utf-8');
+      } catch (originalError) {
+        throw createError({
+          name: 'ValidationError',
+          message: `Failed to read imported prompt file: ${importPath}`,
+          code: 'PROMPT_READ_FAILED',
+          path: importPath,
+          resolvedPath,
+          cause: originalError
+        });
+      }
+    })
+  );
+  const result = importedContents.join('\n\n');
+  if (debug) {
+    console.error(`[DEBUG] Imported content length: ${result.length} characters`);
+  }
+  return result;
+};
+
+/** Extract JSON from markdown code fences if present. */
+export const extractJSONFromMarkdown = (str) => {
+  const match = str.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
+  return match ? match[1] : str;
+};
+
+/**
+ * Try to parse a string as JSON, extracting from markdown code fences if needed.
+ * @throws {Error} If parsing fails
+ */
+export const tryParseJSON = (str) => {
+  try {
+    return JSON.parse(extractJSONFromMarkdown(str));
+  } catch (originalError) {
+    throw createError({
+      ...ExtractionParseError,
+      rawInput: str,
+      cause: originalError
+    });
+  }
+};
+
+/**
+ * Parse and validate extraction output from the agent.
+ * Accepts either a raw JSON string or an already-parsed object.
+ * Handles markdown code fences if present.
+ */
+export const parseExtractionResult = (rawOutput) => {
+  const parsed = typeof rawOutput === 'string'
+    ? tryParseJSON(rawOutput)
+    : rawOutput;
+
+  if (typeof parsed !== 'object' || parsed === null) {
+    throw createError({
+      ...ExtractionValidationError,
+      message: 'Extraction result must be a JSON object',
+      rawOutput
+    });
+  }
+
+  if (parsed.userPrompt === undefined || parsed.userPrompt === null) {
+    throw createError({
+      ...ExtractionValidationError,
+      message: 'Extraction result is missing required field: userPrompt',
+      rawOutput
+    });
+  }
+
+  if (!Array.isArray(parsed.importPaths)) {
+    throw createError({
+      ...ExtractionValidationError,
+      message: 'Extraction result is missing required field: importPaths (must be an array)',
+      rawOutput
+    });
+  }
+
+  if (!Array.isArray(parsed.assertions)) {
+    throw createError({
+      ...ExtractionValidationError,
+      message: 'Extraction result is missing required field: assertions (must be an array)',
+      rawOutput
+    });
+  }
+
+  // for loop: early throw on first invalid item; index needed for error context
+  for (let i = 0; i < parsed.assertions.length; i++) {
+    for (const field of assertionRequiredFields) {
+      if (parsed.assertions[i][field] === undefined || parsed.assertions[i][field] === null) {
+        throw createError({
+          ...ExtractionValidationError,
+          message: `Assertion at index ${i} is missing required field: ${field}`,
+          assertionIndex: i,
+          missingField: field,
+          rawOutput
+        });
+      }
+    }
+  }
+
+  return parsed;
+};
diff --git a/source/extraction-parser.test.js b/source/extraction-parser.test.js
new file mode 100644
index 00000000..ba2dfb99
--- /dev/null
+++ b/source/extraction-parser.test.js
@@ -0,0 +1,226 @@
+import { describe, test } from 'vitest';
+import { assert } from './vitest.js';
+import { Try } from './riteway.js';
+import { parseExtractionResult } from './extraction-parser.js';
+
+describe('parseExtractionResult()', () => {
+  test('parses valid extraction result with required fields', () => {
+    const validOutput = JSON.stringify({
+      userPrompt: 'What is 2 + 2?',
+      importPaths: ['test.mdc'],
+      assertions: [
+        { id: 1, requirement: 'Given simple addition, should add correctly' },
+        { id: 2, requirement: 'Given format, should output JSON' }
+      ]
+    });
+
+    const result = parseExtractionResult(validOutput);
+
+    assert({
+      given: 'valid extraction result',
+      should: 'preserve the userPrompt field',
+      actual: result.userPrompt,
+      expected: 'What is 2 + 2?'
+    });
+
+    assert({
+      given: 'valid extraction result',
+      should: 'preserve the importPaths array',
+      actual: Array.isArray(result.importPaths),
+      expected: true
+    });
+
+    assert({
+      given: 'valid extraction result',
+      should: 'preserve importPaths values',
+      actual: result.importPaths[0],
+      expected: 'test.mdc'
+    });
+
+    assert({
+      given: 'valid extraction result',
+      should: 'preserve assertions array length',
+      actual: result.assertions.length,
+      expected: 2
+    });
+
+    assert({
+      given: 'valid extraction result',
+      should: 'preserve assertion requirement field',
+      actual: result.assertions[0].requirement,
+      expected: 'Given simple addition, should add correctly'
+    });
+  });
+
+  test('parses JSON wrapped in markdown code fences', () => {
+    const markdownWrapped = '```json\n{\n  "userPrompt": "test prompt",\n  "importPaths": [],\n  "assertions": [\n    {\n      "id": 1,\n      "requirement": "Given test, should pass"\n    }\n  ]\n}\n```';
+
+    const result = parseExtractionResult(markdownWrapped);
+
+    assert({
+      given: 'JSON wrapped in markdown code fences',
+      should: 'parse importPaths as an array',
+      actual: Array.isArray(result.importPaths),
+      expected: true
+    });
+
+    assert({
+      given: 'JSON wrapped in markdown code fences',
+      should: 'preserve the userPrompt field',
+      actual: result.userPrompt,
+      expected: 'test prompt'
+    });
+
+    assert({
+      given: 'JSON wrapped in markdown code fences',
+      should: 'preserve assertions array',
+      actual: result.assertions[0].requirement,
+      expected: 'Given test, should pass'
+    });
+  });
+
+  test('parses JSON with surrounding explanation text and markdown fences', () => {
+    const withExplanation = 'Here is the extraction result you requested:\n\n```json\n{\n  "userPrompt": "test prompt",\n  "importPaths": [],\n  "assertions": [\n    {\n      "id": 1,\n      "requirement": "Given test, should pass"\n    }\n  ]\n}\n```\n\nLet me know if you need more help.';
+
+    const result = parseExtractionResult(withExplanation);
+
+    assert({
+      given: 'JSON with explanation text and markdown fences',
+      should: 'extract and parse the JSON object',
+      actual: result.userPrompt,
+      expected: 'test prompt'
+    });
+
+    assert({
+      given: 'JSON with explanation text and markdown fences',
+      should: 'return the parsed content',
+      actual: result.assertions[0].requirement,
+      expected: 'Given test, should pass'
+    });
+  });
+
+  test('accepts an already-parsed object', () => {
+    const parsed = {
+      userPrompt: 'test prompt',
+      importPaths: [],
+      assertions: [{ id: 1, requirement: 'Given a test, should pass' }]
+    };
+
+    const result = parseExtractionResult(parsed);
+
+    assert({
+      given: 'an already-parsed object instead of a JSON string',
+      should: 'validate and return the object directly',
+      actual: result.userPrompt,
+      expected: 'test prompt'
+    });
+
+    assert({
+      given: 'an already-parsed object',
+      should: 'preserve the assertions',
+      actual: result.assertions[0].requirement,
+      expected: 'Given a test, should pass'
+    });
+  });
+
+  test('throws ExtractionParseError on malformed non-JSON input', () => {
+    const error = Try(parseExtractionResult, 'This is not JSON at all');
+
+    assert({
+      given: 'non-JSON input',
+      should: 'throw ExtractionParseError cause',
+      actual: error?.cause?.name,
+      expected: 'ExtractionParseError'
+    });
+
+    assert({
+      given: 'non-JSON input',
+      should: 'have EXTRACTION_PARSE_FAILURE code',
+      actual: error?.cause?.code,
+      expected: 'EXTRACTION_PARSE_FAILURE'
+    });
+
+    assert({
+      given: 'non-JSON input',
+      should: 'preserve original JSON SyntaxError as cause',
+      actual: error?.cause?.cause?.name,
+      expected: 'SyntaxError'
+    });
+  });
+
+  test('throws ExtractionValidationError when result has wrong structure', () => {
+    const error = Try(parseExtractionResult, JSON.stringify({ id: 1, description: 'test', prompt: 'test' }));
+
+    assert({
+      given: 'extraction result with invalid structure',
+      should: 'throw ExtractionValidationError cause',
+      actual: error?.cause?.name,
+      expected: 'ExtractionValidationError'
+    });
+
+    assert({
+      given: 'extraction result with invalid structure',
+      should: 'have EXTRACTION_VALIDATION_FAILURE code',
+      actual: error?.cause?.code,
+      expected: 'EXTRACTION_VALIDATION_FAILURE'
+    });
+  });
+
+  test.each([
+    [
+      'missing importPaths',
+      { userPrompt: 'test', assertions: [] },
+      'importPaths'
+    ],
+    [
+      'missing userPrompt',
+      { importPaths: [], assertions: [] },
+      'userPrompt'
+    ],
+    [
+      'missing assertions',
+      { userPrompt: 'test', importPaths: [] },
+      'assertions'
+    ],
+  ])('throws when %s is missing', (_, input, missingField) => {
+    const error = Try(parseExtractionResult, JSON.stringify(input));
+
+    assert({
+      given: `extraction result missing ${missingField}`,
+      should: 'throw ExtractionValidationError',
+      actual: error?.cause?.name,
+      expected: 'ExtractionValidationError'
+    });
+
+    assert({
+      given: `extraction result missing ${missingField}`,
+      should: 'have descriptive error message',
+      actual: error?.message?.includes(missingField),
+      expected: true
+    });
+  });
+
+  test('throws when assertion is missing required field', () => {
+    const missingAssertionFields = JSON.stringify({
+      userPrompt: 'test',
+      importPaths: [],
+      assertions: [{ id: 1 }]
+    });
+
+    const error = Try(parseExtractionResult, missingAssertionFields);
+
+    assert({
+      given: 'assertion missing the requirement field',
+      should: 'throw ExtractionValidationError',
+      actual: error?.cause?.name,
+      expected: 'ExtractionValidationError'
+    });
+
+    assert({
+      given: 'assertion missing the requirement field',
+      should: 'have error message indicating missing field',
+      actual: error?.message?.includes('requirement'),
+      expected: true
+    });
+  });
+});