From 1760471974a802aaab2a5a74bc4b22ecaa8e8410 Mon Sep 17 00:00:00 2001 From: Ian White Date: Wed, 18 Feb 2026 08:50:10 -0600 Subject: [PATCH 1/6] =?UTF-8?q?feat(ai):=20PR=203=20=E2=80=94=20parsers=20?= =?UTF-8?q?+=20execute-agent=20module?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add agent-parser, extraction-parser, aggregation, and execute-agent modules with full unit test coverage. - agent-parser: parseStringResult, parseOpenCodeNDJSON, unwrapEnvelope (new shared export), unwrapAgentResult. Shared unwrapEnvelope breaks duplication between agent-parser and execute-agent (WIP fix #9). - extraction-parser: parseExtractionResult with multi-strategy JSON parsing (direct, markdown fence, pre-parsed object), and resolveImportPaths for prompt file resolution. - aggregation: normalizeJudgment, calculateRequiredPasses, aggregatePerAssertionResults with Zod validation. - execute-agent: extracted from ai-runner.js to break the circular dependency (ai-runner ↔ test-extractor). Logger injected at executeAgent call site rather than created inside spawnProcess (WIP fix #8). Uses shared unwrapEnvelope from agent-parser. - Test files use test.each for all table-driven cases per convention. 164 tests pass, 0 lint errors, TypeScript checks pass. Co-authored-by: Cursor --- source/agent-parser.js | 108 +++++++ source/agent-parser.test.js | 408 ++++++++++++++++++++++++++ source/aggregation.js | 110 +++++++ source/aggregation.test.js | 488 +++++++++++++++++++++++++++++++ source/execute-agent.js | 182 ++++++++++++ source/execute-agent.test.js | 277 ++++++++++++++++++ source/extraction-parser.js | 127 ++++++++ source/extraction-parser.test.js | 226 ++++++++++++++ 8 files changed, 1926 insertions(+) create mode 100644 source/agent-parser.js create mode 100644 source/agent-parser.test.js create mode 100644 source/aggregation.js create mode 100644 source/aggregation.test.js create mode 100644 source/execute-agent.js create mode 100644 source/execute-agent.test.js create mode 100644 source/extraction-parser.js create mode 100644 source/extraction-parser.test.js diff --git a/source/agent-parser.js b/source/agent-parser.js new file mode 100644 index 00000000..38f41aaa --- /dev/null +++ b/source/agent-parser.js @@ -0,0 +1,108 @@ +import { createError } from 'error-causes'; +import { ParseError } from './ai-errors.js'; + +/** + * Parse a string result from an agent, attempting multiple strategies: + * 1. Direct JSON parse if string starts with { or [ + * 2. Extract and parse markdown-wrapped JSON (```json\n...\n```) + * 3. Keep as plain text if neither works + */ +export const parseStringResult = (result, logger) => { + const trimmed = result.trim(); + + if (trimmed.startsWith('{') || trimmed.startsWith('[')) { + try { + const parsed = JSON.parse(trimmed); + logger.log('Successfully parsed string as JSON'); + return parsed; + } catch { + logger.log('Direct JSON parse failed, trying markdown extraction'); + } + } + + const markdownMatch = result.match(/```(?:json)?\s*\n([\s\S]*?)\n```/); + if (markdownMatch) { + logger.log('Found markdown-wrapped JSON, extracting...'); + try { + const parsed = JSON.parse(markdownMatch[1]); + logger.log('Successfully parsed markdown-wrapped JSON'); + return parsed; + } catch { + logger.log('Failed to parse markdown content, keeping original string'); + } + } + + logger.log('String is not valid JSON, keeping as plain text'); + return result; +}; + +/** + * Parse OpenCode's NDJSON output, extracting and concatenating all "text" events. + */ +export const parseOpenCodeNDJSON = (ndjson, logger) => { + logger.log('Parsing OpenCode NDJSON output...'); + + const lines = ndjson.trim().split('\n').filter(line => line.trim()); + + const textEvents = lines.reduce((acc, line) => { + try { + const event = JSON.parse(line); + if (event.type === 'text' && event.part?.text) { + logger.log(`Found text event with ${event.part.text.length} characters`); + acc.push(event.part.text); + } + } catch (err) { + logger.log(`Warning: Failed to parse NDJSON line: ${err.message}`); + } + return acc; + }, []); + + if (textEvents.length === 0) { + throw createError({ + ...ParseError, + message: 'No text events found in OpenCode output', + code: 'NO_TEXT_EVENTS', + ndjsonLength: ndjson.length, + linesProcessed: lines.length + }); + } + + const combinedText = textEvents.join(''); + logger.log(`Combined ${textEvents.length} text event(s) into ${combinedText.length} characters`); + return combinedText; +}; + +/** + * Unwrap a JSON envelope object { result: ... }, returning the inner value. + * If no envelope is present, returns the object as-is. + * Shared helper used by unwrapAgentResult and execute-agent's raw output handling. + */ +export const unwrapEnvelope = (parsed) => + parsed?.result !== undefined ? parsed.result : parsed; + +/** + * Unwrap agent result from potential JSON envelope and parse nested JSON. + * Handles Claude CLI's envelope format { result: "..." } and nested JSON strings. + * @throws {Error} If output is not valid JSON after all parsing attempts + */ +export const unwrapAgentResult = (processedOutput, logger) => { + const parsed = parseStringResult(processedOutput, logger); + + if (typeof parsed === 'string') { + throw createError({ + ...ParseError, + message: `Agent output is not valid JSON: ${parsed.slice(0, 100)}`, + outputPreview: parsed.slice(0, 100) + }); + } + + const unwrapped = unwrapEnvelope(parsed); + + logger.log(`Parsed result type: ${typeof unwrapped}`); + if (typeof unwrapped === 'string') { + logger.log('Result is string, attempting to parse as JSON'); + return parseStringResult(unwrapped, logger); + } + + return unwrapped; +}; diff --git a/source/agent-parser.test.js b/source/agent-parser.test.js new file mode 100644 index 00000000..1eee4b18 --- /dev/null +++ b/source/agent-parser.test.js @@ -0,0 +1,408 @@ +import { describe, test } from 'vitest'; +import { assert } from './vitest.js'; +import { Try } from './riteway.js'; +import { + parseStringResult, + parseOpenCodeNDJSON, + unwrapEnvelope, + unwrapAgentResult +} from './agent-parser.js'; + +const createMockLogger = () => { + const logs = []; + return { + log: (...args) => logs.push(args.join(' ')), + logs + }; +}; + +describe('parseStringResult()', () => { + test('parses direct JSON when string starts with {', () => { + const logger = createMockLogger(); + const input = '{"passed": true, "output": "test"}'; + + const result = parseStringResult(input, logger); + + assert({ + given: 'JSON string starting with {', + should: 'parse as JSON object', + actual: JSON.stringify(result), + expected: '{"passed":true,"output":"test"}' + }); + + assert({ + given: 'successful JSON parse', + should: 'log success message', + actual: logger.logs.some(log => log.includes('Successfully parsed string as JSON')), + expected: true + }); + }); + + test('parses direct JSON when string starts with [', () => { + const logger = createMockLogger(); + const input = '[{"id": 1}, {"id": 2}]'; + + const result = parseStringResult(input, logger); + + assert({ + given: 'JSON string starting with [', + should: 'parse as JSON array', + actual: result.length, + expected: 2 + }); + }); + + test('extracts markdown-wrapped JSON when direct parse fails', () => { + const logger = createMockLogger(); + const input = '```json\n{"passed": true, "output": "test"}\n```'; + + const result = parseStringResult(input, logger); + + assert({ + given: 'markdown-wrapped JSON', + should: 'extract and parse JSON', + actual: JSON.stringify(result), + expected: '{"passed":true,"output":"test"}' + }); + + assert({ + given: 'markdown extraction', + should: 'log markdown extraction', + actual: logger.logs.some(log => log.includes('markdown-wrapped JSON')), + expected: true + }); + }); + + test('extracts markdown-wrapped JSON without json language tag', () => { + const logger = createMockLogger(); + const input = '```\n{"passed": true}\n```'; + + const result = parseStringResult(input, logger); + + assert({ + given: 'markdown without json tag', + should: 'extract and parse JSON', + actual: result.passed, + expected: true + }); + }); + + test('tries markdown extraction even if string starts with {', () => { + const logger = createMockLogger(); + const input = '{ broken json ```json\n{"passed": true}\n```'; + + const result = parseStringResult(input, logger); + + assert({ + given: 'malformed JSON with markdown fallback', + should: 'extract from markdown block', + actual: result.passed, + expected: true + }); + + assert({ + given: 'fallback scenario', + should: 'log failed parse and markdown extraction', + actual: logger.logs.some(log => log.includes('trying markdown extraction')), + expected: true + }); + }); + + test('returns plain text when no parsing succeeds', () => { + const logger = createMockLogger(); + const input = 'This is just plain text with no JSON'; + + const result = parseStringResult(input, logger); + + assert({ + given: 'plain text string', + should: 'return original string', + actual: result, + expected: input + }); + + assert({ + given: 'no valid JSON', + should: 'log keeping as plain text', + actual: logger.logs.some(log => log.includes('keeping as plain text')), + expected: true + }); + }); + + test('handles malformed markdown gracefully', () => { + const logger = createMockLogger(); + const input = '```json\n{ broken: json }\n```'; + + const result = parseStringResult(input, logger); + + assert({ + given: 'markdown with invalid JSON', + should: 'return original string', + actual: result, + expected: input + }); + + assert({ + given: 'failed markdown parse', + should: 'log failure', + actual: logger.logs.some(log => log.includes('Failed to parse markdown content')), + expected: true + }); + }); + + test('trims whitespace before parsing', () => { + const logger = createMockLogger(); + const input = ' \n {"passed": true} \n '; + + const result = parseStringResult(input, logger); + + assert({ + given: 'JSON with surrounding whitespace', + should: 'parse successfully', + actual: result.passed, + expected: true + }); + + assert({ + given: 'JSON with surrounding whitespace', + should: 'return parsed object matching trimmed input', + actual: result, + expected: { passed: true } + }); + }); +}); + +describe('parseOpenCodeNDJSON()', () => { + test('extracts text from single text event', () => { + const logger = createMockLogger(); + const ndjson = '{"type":"step_start","timestamp":1770245956364}\n' + + '{"type":"text","part":{"text":"```json\\n{\\"status\\": \\"ok\\"}\\n```"}}\n' + + '{"type":"step_finish","timestamp":1770245956211}'; + + const result = parseOpenCodeNDJSON(ndjson, logger); + + assert({ + given: 'NDJSON with single text event', + should: 'extract text content', + actual: result, + expected: '```json\n{"status": "ok"}\n```' + }); + + assert({ + given: 'successful text extraction', + should: 'log found text event', + actual: logger.logs.some(log => log.includes('Found text event')), + expected: true + }); + }); + + test('concatenates multiple text events', () => { + const logger = createMockLogger(); + const ndjson = '{"type":"text","part":{"text":"Part 1"}}\n' + + '{"type":"text","part":{"text":" Part 2"}}\n' + + '{"type":"text","part":{"text":" Part 3"}}'; + + const result = parseOpenCodeNDJSON(ndjson, logger); + + assert({ + given: 'NDJSON with multiple text events', + should: 'concatenate all text content', + actual: result, + expected: 'Part 1 Part 2 Part 3' + }); + }); + + test('filters out non-text events', () => { + const logger = createMockLogger(); + const ndjson = '{"type":"step_start","data":"ignored"}\n' + + '{"type":"text","part":{"text":"Hello"}}\n' + + '{"type":"step_finish","data":"ignored"}\n' + + '{"type":"text","part":{"text":" World"}}'; + + const result = parseOpenCodeNDJSON(ndjson, logger); + + assert({ + given: 'NDJSON with mixed event types', + should: 'extract only text events', + actual: result, + expected: 'Hello World' + }); + }); + + test('skips malformed JSON lines', () => { + const logger = createMockLogger(); + const ndjson = '{invalid json}\n' + + '{"type":"text","part":{"text":"Valid text"}}\n' + + 'not json at all'; + + const result = parseOpenCodeNDJSON(ndjson, logger); + + assert({ + given: 'NDJSON with malformed lines', + should: 'skip invalid lines and process valid ones', + actual: result, + expected: 'Valid text' + }); + + assert({ + given: 'malformed JSON', + should: 'log warning for failed parse', + actual: logger.logs.some(log => log.includes('Failed to parse NDJSON line')), + expected: true + }); + }); + + test('throws error when no text events found', () => { + const logger = createMockLogger(); + const ndjson = '{"type":"step_start","data":"no text here"}\n' + + '{"type":"step_finish","data":"still no text"}'; + + const error = Try(parseOpenCodeNDJSON, ndjson, logger); + + assert({ + given: 'NDJSON with no text events', + should: 'throw Error with cause', + actual: error instanceof Error && error.cause !== undefined, + expected: true + }); + + assert({ + given: 'NDJSON with no text events', + should: 'have ParseError name in cause', + actual: error?.cause?.name, + expected: 'ParseError' + }); + + assert({ + given: 'NDJSON with no text events', + should: 'have NO_TEXT_EVENTS code in cause', + actual: error?.cause?.code, + expected: 'NO_TEXT_EVENTS' + }); + + assert({ + given: 'NDJSON with no text events', + should: 'include ndjsonLength in cause', + actual: typeof error?.cause?.ndjsonLength === 'number', + expected: true + }); + + assert({ + given: 'NDJSON with no text events', + should: 'include linesProcessed in cause', + actual: error?.cause?.linesProcessed, + expected: 2 + }); + }); + + test('handles empty lines in NDJSON', () => { + const logger = createMockLogger(); + const ndjson = '\n\n{"type":"text","part":{"text":"Hello"}}\n\n\n{"type":"text","part":{"text":" World"}}\n\n'; + + const result = parseOpenCodeNDJSON(ndjson, logger); + + assert({ + given: 'NDJSON with empty lines', + should: 'filter empty lines and process valid events', + actual: result, + expected: 'Hello World' + }); + }); + + test('preserves markdown-wrapped JSON in text', () => { + const logger = createMockLogger(); + const ndjson = '{"type":"text","part":{"text":"```json\\n{\\"passed\\":true}\\n```"}}'; + + const result = parseOpenCodeNDJSON(ndjson, logger); + + assert({ + given: 'text event with markdown-wrapped JSON', + should: 'preserve markdown formatting', + actual: result, + expected: '```json\n{"passed":true}\n```' + }); + }); +}); + +describe('unwrapEnvelope()', () => { + test.each([ + ['object with result field', { result: { passed: true } }, { passed: true }], + ['object with result as string', { result: 'raw string' }, 'raw string'], + ['object with result as null', { result: null }, null], + ['object without result field', { passed: true, score: 80 }, { passed: true, score: 80 }], + ])('%s', (_, input, expected) => { + assert({ + given: `parsed object: ${_}`, + should: 'return the unwrapped value', + actual: unwrapEnvelope(input), + expected + }); + }); + + test('returns object as-is when result key is absent', () => { + const input = { status: 'ok', data: [1, 2, 3] }; + assert({ + given: 'object with no result key', + should: 'return the original object', + actual: unwrapEnvelope(input), + expected: input + }); + }); +}); + +describe('unwrapAgentResult()', () => { + test('unwraps Claude envelope and returns parsed inner object', () => { + const logger = createMockLogger(); + const envelope = JSON.stringify({ result: JSON.stringify({ passed: true, score: 90 }) }); + + const result = unwrapAgentResult(envelope, logger); + + assert({ + given: 'Claude CLI envelope wrapping a JSON string', + should: 'return the fully parsed inner object', + actual: result, + expected: { passed: true, score: 90 } + }); + }); + + test('returns parsed object when no envelope present', () => { + const logger = createMockLogger(); + const direct = JSON.stringify({ passed: false, score: 40 }); + + const result = unwrapAgentResult(direct, logger); + + assert({ + given: 'direct JSON object (no envelope)', + should: 'return the parsed object', + actual: result, + expected: { passed: false, score: 40 } + }); + }); + + test('throws ParseError when output is not valid JSON', () => { + const logger = createMockLogger(); + + const error = Try(unwrapAgentResult, 'plain text response', logger); + + assert({ + given: 'plain text that is not valid JSON', + should: 'throw Error with ParseError cause', + actual: error?.cause?.name, + expected: 'ParseError' + }); + }); + + test('handles markdown-wrapped envelope', () => { + const logger = createMockLogger(); + const markdownEnvelope = '```json\n{"result": {"passed": true}}\n```'; + + const result = unwrapAgentResult(markdownEnvelope, logger); + + assert({ + given: 'markdown-wrapped JSON with result envelope', + should: 'extract, unwrap and return the inner object', + actual: result, + expected: { passed: true } + }); + }); +}); diff --git a/source/aggregation.js b/source/aggregation.js new file mode 100644 index 00000000..bb128d45 --- /dev/null +++ b/source/aggregation.js @@ -0,0 +1,110 @@ +import { createError } from 'error-causes'; +import { ValidationError, ParseError } from './ai-errors.js'; +import { + defaults, + calculateRequiredPassesSchema +} from './constants.js'; + +/** + * Normalize a judge response (already parsed from TAP YAML) to ensure consistent + * structure with safe defaults for missing fields. + * @throws {Error} If raw is not an object (null, string, undefined, etc.) + */ +export const normalizeJudgment = (raw, { requirement, runIndex, logger }) => { + if (typeof raw !== 'object' || raw === null) { + throw createError({ + ...ParseError, + message: 'Judge returned non-object response', + code: 'JUDGE_INVALID_RESPONSE', + requirement, + runIndex, + rawResponse: raw + }); + } + + if (raw?.actual === undefined || raw?.expected === undefined) { + logger.log(`Warning: Judge response missing fields for "${requirement}" run ${runIndex + 1}`); + } + + return { + passed: raw?.passed === true, + actual: raw?.actual ?? 'No actual provided', + expected: raw?.expected ?? 'No expected provided', + score: Number.isFinite(raw?.score) ? Math.max(0, Math.min(100, raw.score)) : 0 + }; +}; + +/** + * Calculate the number of passes required to meet the threshold. + * Uses ceiling to ensure threshold is met or exceeded. + * @throws {Error} If validation fails (non-integer runs, invalid threshold, etc.) + */ +export const calculateRequiredPasses = ({ runs = defaults.runs, threshold = defaults.threshold } = {}) => { + try { + const validated = calculateRequiredPassesSchema.parse({ runs, threshold }); + return Math.ceil((validated.runs * validated.threshold) / 100); + } catch (zodError) { + const issues = zodError.issues || []; + const messages = issues.map(issue => + `${issue.path.join('.')}: ${issue.message}` + ).join('; '); + + throw createError({ + ...ValidationError, + message: `Invalid parameters for calculateRequiredPasses: ${messages}`, + code: 'INVALID_CALCULATION_PARAMS', + runs, + threshold, + cause: zodError + }); + } +}; + +/** + * Aggregate results from per-assertion test runs. + * Each assertion is independently evaluated against the threshold. + * Overall pass requires all assertions to meet the threshold. + */ +export const aggregatePerAssertionResults = ({ perAssertionResults, threshold, runs }) => { + try { + calculateRequiredPassesSchema.parse({ runs, threshold }); + } catch (zodError) { + const issues = zodError.issues || []; + const messages = issues.map(issue => + `${issue.path.join('.')}: ${issue.message}` + ).join('; '); + + throw createError({ + ...ValidationError, + message: `Invalid parameters for aggregatePerAssertionResults: ${messages}`, + code: 'INVALID_AGGREGATION_PARAMS', + runs, + threshold, + cause: zodError + }); + } + + const requiredPasses = calculateRequiredPasses({ runs, threshold }); + + const assertions = perAssertionResults.map(({ requirement, runResults }) => { + const passCount = runResults.filter(r => r.passed).length; + const totalScore = runResults.reduce((sum, r) => sum + (r.score ?? 0), 0); + const averageScore = runResults.length > 0 + ? Math.round((totalScore / runResults.length) * 100) / 100 + : 0; + + return { + requirement, + passed: passCount >= requiredPasses, + passCount, + totalRuns: runs, + averageScore, + runResults + }; + }); + + return { + passed: assertions.every(a => a.passed), + assertions + }; +}; diff --git a/source/aggregation.test.js b/source/aggregation.test.js new file mode 100644 index 00000000..3e45f979 --- /dev/null +++ b/source/aggregation.test.js @@ -0,0 +1,488 @@ +import { describe, test, vi } from 'vitest'; +import { assert } from './vitest.js'; +import { Try } from './riteway.js'; +import { + normalizeJudgment, + calculateRequiredPasses, + aggregatePerAssertionResults +} from './aggregation.js'; + +describe('calculateRequiredPasses()', () => { + test.each([ + ['4 runs, 75% threshold', { runs: 4, threshold: 75 }, 3], + ['5 runs, 75% threshold', { runs: 5, threshold: 75 }, 4], + ['10 runs, 80% threshold', { runs: 10, threshold: 80 }, 8], + ['4 runs, 80% threshold', { runs: 4, threshold: 80 }, 4], + ])('%s requires correct pass count', (_, options, expected) => { + assert({ + given: _, + should: `require ${expected} passes`, + actual: calculateRequiredPasses(options), + expected + }); + }); + + test('uses default values when called with no arguments', () => { + assert({ + given: 'no arguments', + should: 'use defaults (4 runs, 75% threshold) requiring 3 passes', + actual: calculateRequiredPasses(), + expected: 3 + }); + }); + + test.each([ + ['zero runs', { runs: 0, threshold: 75 }, 'runs must be at least 1'], + ['negative runs', { runs: -1, threshold: 75 }, 'runs must be at least 1'], + ['non-integer runs', { runs: 1.5, threshold: 75 }, 'runs must be an integer'], + ['NaN runs', { runs: NaN, threshold: 75 }, 'expected number, received NaN'], + ])('throws ValidationError for %s', (_, options, expectedMessage) => { + const error = Try(calculateRequiredPasses, options); + + assert({ + given: _, + should: 'have ValidationError in cause', + actual: error?.cause?.name, + expected: 'ValidationError' + }); + + assert({ + given: _, + should: 'have INVALID_CALCULATION_PARAMS code', + actual: error?.cause?.code, + expected: 'INVALID_CALCULATION_PARAMS' + }); + + assert({ + given: _, + should: 'include descriptive message', + actual: error?.message?.includes(expectedMessage), + expected: true + }); + }); + + test.each([ + ['threshold above 100', { runs: 4, threshold: 150 }, 'threshold must be at most 100'], + ['negative threshold', { runs: 4, threshold: -10 }, 'threshold must be at least 0'], + ['NaN threshold', { runs: 4, threshold: NaN }, 'expected number, received NaN'], + ])('throws ValidationError for %s', (_, options, expectedMessage) => { + const error = Try(calculateRequiredPasses, options); + + assert({ + given: _, + should: 'have ValidationError in cause', + actual: error?.cause?.name, + expected: 'ValidationError' + }); + + assert({ + given: _, + should: 'have INVALID_CALCULATION_PARAMS code', + actual: error?.cause?.code, + expected: 'INVALID_CALCULATION_PARAMS' + }); + + assert({ + given: _, + should: 'include descriptive message', + actual: error?.message?.includes(expectedMessage), + expected: true + }); + }); +}); + +describe('aggregatePerAssertionResults()', () => { + test('aggregates per-assertion results when all assertions pass', () => { + const perAssertionResults = [ + { + requirement: 'Given simple addition, should add correctly', + runResults: [ + { passed: true, output: 'ok' }, + { passed: true, output: 'ok' } + ] + }, + { + requirement: 'Given format, should output JSON', + runResults: [ + { passed: true, output: 'ok' }, + { passed: true, output: 'ok' } + ] + } + ]; + + const result = aggregatePerAssertionResults({ + perAssertionResults, + threshold: 75, + runs: 2 + }); + + assert({ + given: 'all assertions meeting threshold', + should: 'return passed: true', + actual: result.passed, + expected: true + }); + + assert({ + given: 'two assertions', + should: 'return assertions array of length 2', + actual: result.assertions.length, + expected: 2 + }); + + assert({ + given: 'first assertion with all passes', + should: 'mark the assertion as passed', + actual: result.assertions[0].passed, + expected: true + }); + + assert({ + given: 'first assertion with 2 passes', + should: 'report passCount 2', + actual: result.assertions[0].passCount, + expected: 2 + }); + + assert({ + given: 'first assertion requirement', + should: 'preserve the requirement', + actual: result.assertions[0].requirement, + expected: 'Given simple addition, should add correctly' + }); + }); + + test('fails when any assertion does not meet threshold', () => { + const perAssertionResults = [ + { + requirement: 'Given addition, should add correctly', + runResults: [ + { passed: true, output: 'ok' }, + { passed: true, output: 'ok' } + ] + }, + { + requirement: 'Given format, should output JSON', + runResults: [ + { passed: false, output: 'fail' }, + { passed: false, output: 'fail' } + ] + } + ]; + + const result = aggregatePerAssertionResults({ + perAssertionResults, + threshold: 75, + runs: 2 + }); + + assert({ + given: 'one assertion failing threshold', + should: 'return passed: false', + actual: result.passed, + expected: false + }); + + assert({ + given: 'the passing assertion', + should: 'mark it as passed', + actual: result.assertions[0].passed, + expected: true + }); + + assert({ + given: 'the failing assertion', + should: 'mark it as failed', + actual: result.assertions[1].passed, + expected: false + }); + + assert({ + given: 'the failing assertion', + should: 'have passCount 0', + actual: result.assertions[1].passCount, + expected: 0 + }); + }); + + test('includes per-assertion run results and totalRuns', () => { + const runResults = [ + { passed: true, output: 'run 1' }, + { passed: false, output: 'run 2' } + ]; + const perAssertionResults = [{ requirement: 'test assertion', runResults }]; + + const result = aggregatePerAssertionResults({ + perAssertionResults, + threshold: 50, + runs: 2 + }); + + assert({ + given: 'per-assertion run results', + should: 'include run results in the assertion', + actual: result.assertions[0].runResults, + expected: runResults + }); + + assert({ + given: 'per-assertion run results', + should: 'include totalRuns per assertion', + actual: result.assertions[0].totalRuns, + expected: 2 + }); + }); + + test('calculates averageScore from run results', () => { + const perAssertionResults = [ + { + requirement: 'test with scores', + runResults: [ + { passed: true, score: 85 }, + { passed: true, score: 95 }, + { passed: true, score: 90 } + ] + } + ]; + + const result = aggregatePerAssertionResults({ + perAssertionResults, + threshold: 75, + runs: 3 + }); + + assert({ + given: 'three runs with scores 85, 95, 90', + should: 'calculate average score of 90', + actual: result.assertions[0].averageScore, + expected: 90 + }); + }); + + test('defaults missing scores to 0 when calculating average', () => { + const perAssertionResults = [ + { + requirement: 'test without scores', + runResults: [ + { passed: true }, + { passed: true } + ] + } + ]; + + const result = aggregatePerAssertionResults({ + perAssertionResults, + threshold: 75, + runs: 2 + }); + + assert({ + given: 'run results with no score fields', + should: 'report averageScore of 0', + actual: result.assertions[0].averageScore, + expected: 0 + }); + }); + + test.each([ + ['runs above maximum', { runs: 1001, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'], + ['threshold above maximum', { runs: 4, threshold: 150 }, 'INVALID_AGGREGATION_PARAMS'], + ])('throws ValidationError for %s', (_, { runs, threshold }, expectedCode) => { + const perAssertionResults = [ + { requirement: 'test', runResults: [{ passed: true }] } + ]; + + const error = Try(() => + aggregatePerAssertionResults({ perAssertionResults, threshold, runs }) + ); + + assert({ + given: _, + should: 'throw validation error', + actual: error instanceof Error, + expected: true + }); + + assert({ + given: _, + should: 'have correct error code', + actual: error?.cause?.code, + expected: expectedCode + }); + }); +}); + +describe('normalizeJudgment()', () => { + const createMockLogger = () => ({ log: vi.fn() }); + + test('is exported as a function', () => { + assert({ + given: 'aggregation module', + should: 'export normalizeJudgment', + actual: typeof normalizeJudgment, + expected: 'function' + }); + }); + + test('passes through complete valid input unchanged', () => { + const logger = createMockLogger(); + const raw = { passed: true, actual: 'Result from agent', expected: 'Expected output', score: 85 }; + + const result = normalizeJudgment(raw, { requirement: 'test assertion', runIndex: 0, logger }); + + assert({ + given: 'complete valid judgment with passed: true', + should: 'preserve passed as true', + actual: result.passed, + expected: true + }); + + assert({ + given: 'complete valid judgment', + should: 'preserve actual value', + actual: result.actual, + expected: 'Result from agent' + }); + + assert({ + given: 'complete valid judgment', + should: 'preserve expected value', + actual: result.expected, + expected: 'Expected output' + }); + + assert({ + given: 'complete valid judgment with score 85', + should: 'preserve score value', + actual: result.score, + expected: 85 + }); + }); + + test('defaults passed to false when missing', () => { + const logger = createMockLogger(); + const result = normalizeJudgment( + { actual: 'Result', expected: 'Expected', score: 50 }, + { requirement: 'test', runIndex: 0, logger } + ); + + assert({ + given: 'judgment missing passed field', + should: 'default passed to false', + actual: result.passed, + expected: false + }); + }); + + test('defaults missing actual and expected with warning log', () => { + const logger = createMockLogger(); + const result = normalizeJudgment( + { passed: true, score: 100 }, + { requirement: 'test assertion', runIndex: 2, logger } + ); + + assert({ + given: 'judgment missing actual', + should: 'default actual to "No actual provided"', + actual: result.actual, + expected: 'No actual provided' + }); + + assert({ + given: 'judgment missing expected', + should: 'default expected to "No expected provided"', + actual: result.expected, + expected: 'No expected provided' + }); + + assert({ + given: 'judgment missing actual and expected', + should: 'log warning with requirement and run number', + actual: logger.log.mock.calls[0][0], + expected: 'Warning: Judge response missing fields for "test assertion" run 3' + }); + }); + + test.each([ + ['score 150', 150, 100], + ['score -50', -50, 0], + ['NaN score', NaN, 0], + ])('normalizes %s correctly', (_, score, expected) => { + const logger = createMockLogger(); + const result = normalizeJudgment( + { passed: true, actual: 'Result', expected: 'Expected', score }, + { requirement: 'test', runIndex: 0, logger } + ); + + assert({ + given: `judgment with ${_}`, + should: `normalize score to ${expected}`, + actual: result.score, + expected + }); + }); + + test('defaults missing score to 0', () => { + const logger = createMockLogger(); + const result = normalizeJudgment( + { passed: true, actual: 'Result', expected: 'Expected' }, + { requirement: 'test', runIndex: 0, logger } + ); + + assert({ + given: 'judgment missing score', + should: 'default to 0', + actual: result.score, + expected: 0 + }); + }); + + test.each([ + ['null input', null], + ['string input', 'not an object'], + ['undefined input', undefined], + ])('throws ParseError for %s', (_, input) => { + const logger = createMockLogger(); + const error = Try(normalizeJudgment, input, { requirement: 'test assertion', runIndex: 1, logger }); + + assert({ + given: _, + should: 'have ParseError name in cause', + actual: error?.cause?.name, + expected: 'ParseError' + }); + + assert({ + given: _, + should: 'have JUDGE_INVALID_RESPONSE code in cause', + actual: error?.cause?.code, + expected: 'JUDGE_INVALID_RESPONSE' + }); + }); + + test('includes requirement and runIndex in ParseError cause', () => { + const logger = createMockLogger(); + const error = Try(normalizeJudgment, null, { requirement: 'test assertion', runIndex: 1, logger }); + + assert({ + given: 'null input', + should: 'include requirement in cause', + actual: error?.cause?.requirement, + expected: 'test assertion' + }); + + assert({ + given: 'null input', + should: 'include runIndex in cause', + actual: error?.cause?.runIndex, + expected: 1 + }); + + assert({ + given: 'null input', + should: 'include rawResponse in cause', + actual: error?.cause?.rawResponse, + expected: null + }); + }); +}); diff --git a/source/execute-agent.js b/source/execute-agent.js new file mode 100644 index 00000000..62ec2949 --- /dev/null +++ b/source/execute-agent.js @@ -0,0 +1,182 @@ +import { spawn } from 'child_process'; +import { createError } from 'error-causes'; +import { ParseError, TimeoutError, AgentProcessError } from './ai-errors.js'; +import { createDebugLogger } from './debug-logger.js'; +import { unwrapEnvelope, unwrapAgentResult } from './agent-parser.js'; + +const withTimeout = (promise, ms, errorFactory) => + Promise.race([ + promise, + new Promise((_, reject) => + setTimeout(() => reject(createError(errorFactory())), ms) + ) + ]); + +const collectProcessOutput = (proc) => + new Promise((resolve, reject) => { + let stdout = ''; + let stderr = ''; + + proc.stdout.on('data', (data) => { stdout += data.toString(); }); + proc.stderr.on('data', (data) => { stderr += data.toString(); }); + proc.on('close', (code) => { resolve({ stdout, stderr, code }); }); + proc.on('error', reject); + }); + +/** + * Spawn an agent CLI subprocess and collect output. + * Logger is injected to avoid coupling: it is created once at executeAgent level. + */ +const spawnProcess = async ({ agentConfig, prompt, logger }) => { + const { command, args = [] } = agentConfig; + const allArgs = [...args, prompt]; + + logger.log('\nExecuting agent command:'); + logger.command(command, args); + logger.log(`Prompt length: ${prompt.length} characters`); + + try { + const proc = spawn(command, allArgs); + proc.stdin.end(); + return await collectProcessOutput(proc); + } catch (err) { + throw createError({ + ...AgentProcessError, + message: `Failed to spawn agent process: ${err.message}`, + command, + args: args.join(' '), + cause: err + }); + } +}; + +/** + * Try to unwrap a JSON envelope { result: ... } from a raw string, returning the + * inner value as a string. Falls back to the original string if not JSON or no envelope. + */ +const unwrapRawOutput = (output) => { + if (!output.trim().startsWith('{')) return output; + try { + return unwrapEnvelope(JSON.parse(output)); + } catch { + return output; + } +}; + +/** + * Process agent stdout: apply optional parseOutput preprocessing, then either + * return raw unwrapped string (rawOutput=true) or parse full JSON result. + */ +const processAgentOutput = ({ agentConfig, rawOutput, logger }) => ({ stdout }) => { + const { command, args = [], parseOutput } = agentConfig; + + try { + const processedOutput = parseOutput ? parseOutput(stdout, logger) : stdout; + + if (rawOutput) { + logger.log('Raw output requested - unwrapping JSON envelope'); + const result = unwrapRawOutput(processedOutput); + + if (typeof result !== 'string') { + throw createError({ + ...ParseError, + message: `Raw output requested but result is not a string: ${typeof result}`, + resultType: typeof result + }); + } + + logger.log(`Returning raw output (${result.length} characters)`); + logger.flush(); + return result; + } + + const result = unwrapAgentResult(processedOutput, logger); + logger.result(result); + logger.flush(); + return result; + } catch (err) { + const truncatedStdout = stdout.length > 500 ? `${stdout.slice(0, 500)}...` : stdout; + logger.log('JSON parsing failed:', err.message); + logger.flush(); + + throw createError({ + ...ParseError, + message: `Failed to parse agent output as JSON: ${err.message}`, + code: 'AGENT_OUTPUT_PARSE_ERROR', + command, + args: args.join(' '), + stdoutPreview: truncatedStdout, + cause: err + }); + } +}; + +const runAgentProcess = async ({ agentConfig, prompt, timeout, logger }) => { + const { command, args = [] } = agentConfig; + + const { stdout, stderr, code } = await withTimeout( + spawnProcess({ agentConfig, prompt, logger }), + timeout, + () => ({ + ...TimeoutError, + message: `Agent process timed out after ${timeout}ms. Command: ${command} ${args.join(' ')}`, + command, + args: args.join(' '), + timeout + }) + ); + + logger.log(`Process exited with code: ${code}`); + logger.log(`Stdout length: ${stdout.length} characters`); + logger.log(`Stderr length: ${stderr.length} characters`); + + if (code !== 0) { + const truncatedStdout = stdout.length > 500 ? `${stdout.slice(0, 500)}...` : stdout; + const truncatedStderr = stderr.length > 500 ? `${stderr.slice(0, 500)}...` : stderr; + + logger.log('Process failed with non-zero exit code'); + logger.flush(); + + throw createError({ + ...AgentProcessError, + message: `Agent process exited with code ${code}\n` + + `Command: ${command} ${args.join(' ')}\n` + + `Stderr: ${truncatedStderr}\n` + + `Stdout preview: ${truncatedStdout}`, + command, + args: args.join(' '), + exitCode: code, + stderr: truncatedStderr, + stdoutPreview: truncatedStdout + }); + } + + return { stdout }; +}; + +/** + * Execute an agent CLI subprocess and return parsed JSON output or raw string. + * @param {Object} options + * @param {Object} options.agentConfig - Agent configuration + * @param {string} options.agentConfig.command - Command to execute + * @param {Array} [options.agentConfig.args=[]] - Command arguments + * @param {Function} [options.agentConfig.parseOutput] - Optional stdout preprocessor + * @param {string} options.prompt - Prompt to send to the agent + * @param {number} [options.timeout=300000] - Timeout in ms (default: 5 minutes) + * @param {boolean} [options.debug=false] - Enable debug logging + * @param {string} [options.logFile] - Optional log file path for debug output + * @param {boolean} [options.rawOutput=false] - Return raw stdout string without JSON parsing + * @returns {Promise} Parsed JSON response or raw string if rawOutput=true + */ +export const executeAgent = async ({ + agentConfig, + prompt, + timeout = 300000, + debug = false, + logFile, + rawOutput = false +}) => { + const logger = createDebugLogger({ debug, logFile }); + const processResult = await runAgentProcess({ agentConfig, prompt, timeout, logger }); + return processAgentOutput({ agentConfig, rawOutput, logger })(processResult); +}; diff --git a/source/execute-agent.test.js b/source/execute-agent.test.js new file mode 100644 index 00000000..90a5fb23 --- /dev/null +++ b/source/execute-agent.test.js @@ -0,0 +1,277 @@ +import { describe, test, vi, beforeEach } from 'vitest'; +import { assert } from './vitest.js'; + +vi.mock('child_process', () => ({ + spawn: vi.fn() +})); + +// Import after mock is registered +const { spawn } = await import('child_process'); +const { executeAgent } = await import('./execute-agent.js'); + +/** + * Build a mock child process that emits stdout/stderr data then closes. + * @param {Object} options + * @param {string} [options.stdout=''] - Data to emit on stdout + * @param {string} [options.stderr=''] - Data to emit on stderr + * @param {number} [options.exitCode=0] - Exit code for the close event + */ +const createMockProcess = ({ stdout = '', stderr = '', exitCode = 0 } = {}) => { + const listeners = { stdout: {}, stderr: {}, proc: {} }; + + const proc = { + stdout: { + on: (event, cb) => { + listeners.stdout[event] = cb; + } + }, + stderr: { + on: (event, cb) => { + listeners.stderr[event] = cb; + } + }, + stdin: { end: vi.fn() }, + on: (event, cb) => { + listeners.proc[event] = cb; + } + }; + + // Emit events asynchronously after the next tick so all listeners are registered + setTimeout(() => { + if (stdout && listeners.stdout.data) listeners.stdout.data(stdout); + if (stderr && listeners.stderr.data) listeners.stderr.data(stderr); + if (listeners.proc.close) listeners.proc.close(exitCode); + }, 0); + + return proc; +}; + +const agentConfig = { + command: 'claude', + args: ['-p', '--output-format', 'json', '--no-session-persistence'] +}; + +describe('executeAgent()', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + test('returns parsed JSON from agent stdout', async () => { + const agentResponse = JSON.stringify({ passed: true, score: 90 }); + spawn.mockReturnValue(createMockProcess({ stdout: agentResponse })); + + const result = await executeAgent({ + agentConfig, + prompt: 'test prompt' + }); + + assert({ + given: 'agent stdout with valid JSON', + should: 'return parsed result object', + actual: result, + expected: { passed: true, score: 90 } + }); + }); + + test('unwraps Claude CLI envelope and returns parsed inner result', async () => { + const innerResult = { passed: true, score: 85 }; + const envelope = JSON.stringify({ result: JSON.stringify(innerResult) }); + spawn.mockReturnValue(createMockProcess({ stdout: envelope })); + + const result = await executeAgent({ + agentConfig, + prompt: 'test prompt' + }); + + assert({ + given: 'Claude CLI JSON envelope wrapping a stringified result', + should: 'unwrap and parse the inner result', + actual: result, + expected: innerResult + }); + }); + + test('returns raw string when rawOutput is true', async () => { + const rawText = 'This is raw agent output'; + spawn.mockReturnValue(createMockProcess({ stdout: rawText })); + + const result = await executeAgent({ + agentConfig, + prompt: 'test prompt', + rawOutput: true + }); + + assert({ + given: 'rawOutput: true and plain text stdout', + should: 'return the raw string as-is', + actual: result, + expected: rawText + }); + }); + + test('unwraps envelope when rawOutput is true', async () => { + const innerText = 'raw output from agent'; + const envelope = JSON.stringify({ result: innerText }); + spawn.mockReturnValue(createMockProcess({ stdout: envelope })); + + const result = await executeAgent({ + agentConfig, + prompt: 'test prompt', + rawOutput: true + }); + + assert({ + given: 'rawOutput: true and JSON envelope wrapping a string', + should: 'unwrap and return the inner string', + actual: result, + expected: innerText + }); + }); + + test('applies parseOutput preprocessor before parsing result', async () => { + const ndjsonOutput = '{"type":"text","part":{"text":"{\\"passed\\":true}"}}'; + const parseOutput = vi.fn(() => '{"passed":true}'); + spawn.mockReturnValue(createMockProcess({ stdout: ndjsonOutput })); + + const result = await executeAgent({ + agentConfig: { ...agentConfig, parseOutput }, + prompt: 'test prompt' + }); + + assert({ + given: 'agentConfig with parseOutput function', + should: 'call parseOutput with stdout', + actual: parseOutput.mock.calls.length, + expected: 1 + }); + + assert({ + given: 'parseOutput returns valid JSON', + should: 'return the parsed result', + actual: result, + expected: { passed: true } + }); + }); + + test('throws AgentProcessError when exit code is non-zero', async () => { + spawn.mockReturnValue(createMockProcess({ + stdout: '', + stderr: 'Permission denied', + exitCode: 1 + })); + + try { + await executeAgent({ agentConfig, prompt: 'test prompt' }); + assert({ + given: 'non-zero exit code', + should: 'have thrown an error', + actual: false, + expected: true + }); + } catch (err) { + assert({ + given: 'non-zero exit code from agent process', + should: 'throw Error with AgentProcessError cause', + actual: err?.cause?.name, + expected: 'AgentProcessError' + }); + + assert({ + given: 'non-zero exit code', + should: 'have AGENT_PROCESS_FAILURE code', + actual: err?.cause?.code, + expected: 'AGENT_PROCESS_FAILURE' + }); + + assert({ + given: 'non-zero exit code', + should: 'include exit code in cause', + actual: err?.cause?.exitCode, + expected: 1 + }); + } + }); + + test('throws TimeoutError when timeout is exceeded', async () => { + // Process that never closes + const proc = { + stdout: { on: vi.fn() }, + stderr: { on: vi.fn() }, + stdin: { end: vi.fn() }, + on: vi.fn() + }; + spawn.mockReturnValue(proc); + + try { + await executeAgent({ + agentConfig, + prompt: 'test prompt', + timeout: 1 + }); + assert({ + given: 'timeout exceeded', + should: 'have thrown an error', + actual: false, + expected: true + }); + } catch (err) { + assert({ + given: 'agent process that exceeds timeout', + should: 'throw Error with TimeoutError cause', + actual: err?.cause?.name, + expected: 'TimeoutError' + }); + + assert({ + given: 'timeout exceeded', + should: 'have AGENT_TIMEOUT code', + actual: err?.cause?.code, + expected: 'AGENT_TIMEOUT' + }); + } + }); + + test('throws ParseError when stdout is not valid JSON (rawOutput: false)', async () => { + spawn.mockReturnValue(createMockProcess({ stdout: 'not valid json output' })); + + try { + await executeAgent({ agentConfig, prompt: 'test prompt' }); + assert({ + given: 'invalid JSON stdout', + should: 'have thrown an error', + actual: false, + expected: true + }); + } catch (err) { + assert({ + given: 'stdout that is not valid JSON', + should: 'throw Error with ParseError cause', + actual: err?.cause?.name, + expected: 'ParseError' + }); + } + }); + + test('spawns the agent with command, args, and prompt appended', async () => { + spawn.mockReturnValue(createMockProcess({ stdout: '{"ok":true}' })); + + await executeAgent({ + agentConfig, + prompt: 'my prompt' + }); + + assert({ + given: 'valid agentConfig with command and args', + should: 'spawn with the command', + actual: spawn.mock.calls[0][0], + expected: 'claude' + }); + + assert({ + given: 'valid agentConfig with command and args', + should: 'spawn with args including the prompt at the end', + actual: spawn.mock.calls[0][1], + expected: ['-p', '--output-format', 'json', '--no-session-persistence', 'my prompt'] + }); + }); +}); diff --git a/source/extraction-parser.js b/source/extraction-parser.js new file mode 100644 index 00000000..799f37e2 --- /dev/null +++ b/source/extraction-parser.js @@ -0,0 +1,127 @@ +import { createError } from 'error-causes'; +import { ExtractionParseError, ExtractionValidationError } from './ai-errors.js'; +import { readFile } from 'fs/promises'; +import { resolve } from 'path'; + +const assertionRequiredFields = ['id', 'requirement']; + +/** + * Resolve and read import files, concatenating their contents. + * + * SECURITY NOTE: Import paths are NOT validated for path traversal. + * This allows legitimate cross-project imports (e.g., shared prompt libraries). + * Test authors are responsible for not importing sensitive files (.env, credentials). + * See PR #394 remediation epic (Wave 1, Task 2) for design rationale. + */ +export const resolveImportPaths = async (importPaths, projectRoot, debug) => { + if (debug) { + console.error(`[DEBUG] Found ${importPaths.length} imports to resolve`); + } + const importedContents = await Promise.all( + importPaths.map(async importPath => { + const resolvedPath = resolve(projectRoot, importPath); + if (debug) { + console.error(`[DEBUG] Reading import: ${importPath} -> ${resolvedPath}`); + } + try { + return await readFile(resolvedPath, 'utf-8'); + } catch (originalError) { + throw createError({ + name: 'ValidationError', + message: `Failed to read imported prompt file: ${importPath}`, + code: 'PROMPT_READ_FAILED', + path: importPath, + resolvedPath, + cause: originalError + }); + } + }) + ); + const result = importedContents.join('\n\n'); + if (debug) { + console.error(`[DEBUG] Imported content length: ${result.length} characters`); + } + return result; +}; + +/** Extract JSON from markdown code fences if present. */ +export const extractJSONFromMarkdown = (str) => { + const match = str.match(/```(?:json)?\s*\n([\s\S]*?)\n```/); + return match ? match[1] : str; +}; + +/** + * Try to parse a string as JSON, extracting from markdown code fences if needed. + * @throws {Error} If parsing fails + */ +export const tryParseJSON = (str) => { + try { + return JSON.parse(extractJSONFromMarkdown(str)); + } catch (originalError) { + throw createError({ + ...ExtractionParseError, + rawInput: str, + cause: originalError + }); + } +}; + +/** + * Parse and validate extraction output from the agent. + * Accepts either a raw JSON string or an already-parsed object. + * Handles markdown code fences if present. + */ +export const parseExtractionResult = (rawOutput) => { + const parsed = typeof rawOutput === 'string' + ? tryParseJSON(rawOutput) + : rawOutput; + + if (typeof parsed !== 'object' || parsed === null) { + throw createError({ + ...ExtractionValidationError, + message: 'Extraction result must be a JSON object', + rawOutput + }); + } + + if (parsed.userPrompt === undefined || parsed.userPrompt === null) { + throw createError({ + ...ExtractionValidationError, + message: 'Extraction result is missing required field: userPrompt', + rawOutput + }); + } + + if (!Array.isArray(parsed.importPaths)) { + throw createError({ + ...ExtractionValidationError, + message: 'Extraction result is missing required field: importPaths (must be an array)', + rawOutput + }); + } + + if (!Array.isArray(parsed.assertions)) { + throw createError({ + ...ExtractionValidationError, + message: 'Extraction result is missing required field: assertions (must be an array)', + rawOutput + }); + } + + // for loop: early throw on first invalid item; index needed for error context + for (let i = 0; i < parsed.assertions.length; i++) { + for (const field of assertionRequiredFields) { + if (parsed.assertions[i][field] === undefined || parsed.assertions[i][field] === null) { + throw createError({ + ...ExtractionValidationError, + message: `Assertion at index ${i} is missing required field: ${field}`, + assertionIndex: i, + missingField: field, + rawOutput + }); + } + } + } + + return parsed; +}; diff --git a/source/extraction-parser.test.js b/source/extraction-parser.test.js new file mode 100644 index 00000000..1d2f69c2 --- /dev/null +++ b/source/extraction-parser.test.js @@ -0,0 +1,226 @@ +import { describe, test } from 'vitest'; +import { assert } from './vitest.js'; +import { Try } from './riteway.js'; +import { parseExtractionResult } from './extraction-parser.js'; + +describe('parseExtractionResult()', () => { + test('parses valid extraction result with required fields', () => { + const validOutput = JSON.stringify({ + userPrompt: 'What is 2 + 2?', + importPaths: ['test.mdc'], + assertions: [ + { id: 1, requirement: 'Given simple addition, should add correctly' }, + { id: 2, requirement: 'Given format, should output JSON' } + ] + }); + + const result = parseExtractionResult(validOutput); + + assert({ + given: 'valid extraction result', + should: 'preserve the userPrompt field', + actual: result.userPrompt, + expected: 'What is 2 + 2?' + }); + + assert({ + given: 'valid extraction result', + should: 'preserve the importPaths array', + actual: Array.isArray(result.importPaths), + expected: true + }); + + assert({ + given: 'valid extraction result', + should: 'preserve importPaths values', + actual: result.importPaths[0], + expected: 'test.mdc' + }); + + assert({ + given: 'valid extraction result', + should: 'preserve assertions array length', + actual: result.assertions.length, + expected: 2 + }); + + assert({ + given: 'valid extraction result', + should: 'preserve assertion requirement field', + actual: result.assertions[0].requirement, + expected: 'Given simple addition, should add correctly' + }); + }); + + test('parses JSON wrapped in markdown code fences', () => { + const markdownWrapped = '```json\n{\n "userPrompt": "test prompt",\n "importPaths": [],\n "assertions": [\n {\n "id": 1,\n "requirement": "Given test, should pass"\n }\n ]\n}\n```'; + + const result = parseExtractionResult(markdownWrapped); + + assert({ + given: 'JSON wrapped in markdown code fences', + should: 'extract and parse the JSON object', + actual: typeof result === 'object' && result !== null, + expected: true + }); + + assert({ + given: 'JSON wrapped in markdown code fences', + should: 'preserve the userPrompt field', + actual: result.userPrompt, + expected: 'test prompt' + }); + + assert({ + given: 'JSON wrapped in markdown code fences', + should: 'preserve assertions array', + actual: result.assertions[0].requirement, + expected: 'Given test, should pass' + }); + }); + + test('parses JSON with surrounding explanation text and markdown fences', () => { + const withExplanation = 'Here is the extraction result you requested:\n\n```json\n{\n "userPrompt": "test prompt",\n "importPaths": [],\n "assertions": [\n {\n "id": 1,\n "requirement": "Given test, should pass"\n }\n ]\n}\n```\n\nLet me know if you need more help.'; + + const result = parseExtractionResult(withExplanation); + + assert({ + given: 'JSON with explanation text and markdown fences', + should: 'extract and parse the JSON object', + actual: typeof result === 'object' && result !== null, + expected: true + }); + + assert({ + given: 'JSON with explanation text and markdown fences', + should: 'return the parsed content', + actual: result.assertions[0].requirement, + expected: 'Given test, should pass' + }); + }); + + test('accepts an already-parsed object', () => { + const parsed = { + userPrompt: 'test prompt', + importPaths: [], + assertions: [{ id: 1, requirement: 'Given a test, should pass' }] + }; + + const result = parseExtractionResult(parsed); + + assert({ + given: 'an already-parsed object instead of a JSON string', + should: 'validate and return the object directly', + actual: result.userPrompt, + expected: 'test prompt' + }); + + assert({ + given: 'an already-parsed object', + should: 'preserve the assertions', + actual: result.assertions[0].requirement, + expected: 'Given a test, should pass' + }); + }); + + test('throws ExtractionParseError on malformed non-JSON input', () => { + const error = Try(parseExtractionResult, 'This is not JSON at all'); + + assert({ + given: 'non-JSON input', + should: 'throw ExtractionParseError cause', + actual: error?.cause?.name, + expected: 'ExtractionParseError' + }); + + assert({ + given: 'non-JSON input', + should: 'have EXTRACTION_PARSE_FAILURE code', + actual: error?.cause?.code, + expected: 'EXTRACTION_PARSE_FAILURE' + }); + + assert({ + given: 'non-JSON input', + should: 'preserve original parse error as cause', + actual: error?.cause?.cause !== undefined, + expected: true + }); + }); + + test('throws ExtractionValidationError when result has wrong structure', () => { + const error = Try(parseExtractionResult, JSON.stringify({ id: 1, description: 'test', prompt: 'test' })); + + assert({ + given: 'extraction result with invalid structure', + should: 'throw ExtractionValidationError cause', + actual: error?.cause?.name, + expected: 'ExtractionValidationError' + }); + + assert({ + given: 'extraction result with invalid structure', + should: 'have EXTRACTION_VALIDATION_FAILURE code', + actual: error?.cause?.code, + expected: 'EXTRACTION_VALIDATION_FAILURE' + }); + }); + + test.each([ + [ + 'missing importPaths', + { userPrompt: 'test', assertions: [] }, + 'importPaths' + ], + [ + 'missing userPrompt', + { importPaths: [], assertions: [] }, + 'userPrompt' + ], + [ + 'missing assertions', + { userPrompt: 'test', importPaths: [] }, + 'assertions' + ], + ])('throws when %s is missing', (_, input, missingField) => { + const error = Try(parseExtractionResult, JSON.stringify(input)); + + assert({ + given: `extraction result missing ${missingField}`, + should: 'throw ExtractionValidationError', + actual: error?.cause?.name, + expected: 'ExtractionValidationError' + }); + + assert({ + given: `extraction result missing ${missingField}`, + should: 'have descriptive error message', + actual: error?.message?.includes(missingField), + expected: true + }); + }); + + test('throws when assertion is missing required field', () => { + const missingAssertionFields = JSON.stringify({ + userPrompt: 'test', + importPaths: [], + assertions: [{ id: 1 }] + }); + + const error = Try(parseExtractionResult, missingAssertionFields); + + assert({ + given: 'assertion missing the requirement field', + should: 'throw ExtractionValidationError', + actual: error?.cause?.name, + expected: 'ExtractionValidationError' + }); + + assert({ + given: 'assertion missing the requirement field', + should: 'have error message indicating missing field', + actual: error?.message?.includes('requirement'), + expected: true + }); + }); +}); From 41172a3abae4eb9412f7b0c78673fe45b816b6f1 Mon Sep 17 00:00:00 2001 From: Ian White Date: Wed, 18 Feb 2026 09:06:05 -0600 Subject: [PATCH 2/6] fix(ai): address PR 3 code review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - aggregation.js: validate once in aggregatePerAssertionResults — capture the Zod-validated result and compute Math.ceil inline, eliminating the redundant second schema parse inside calculateRequiredPasses - aggregation.js: remove misleading optional chaining (raw?.passed etc.) after the null-guard throw; use plain property access - agent-parser.js: replace acc.push() with [...acc, text] in reduce accumulator to prefer immutability per JS style guide - agent-parser.test.js: drop redundant "parsed object:" prefix from unwrapEnvelope test.each given fields; remove duplicate standalone "no result key" test that overlapped with test.each row - aggregation.test.js: remove redundant export-existence assertion for normalizeJudgment; add empty perAssertionResults edge case (vacuous truth — every() on [] returns true) - execute-agent.test.js: strengthen parseOutput test to verify stdout and logger are threaded through as expected (documents WIP fix #8) Co-authored-by: Cursor --- source/agent-parser.js | 2 +- source/agent-parser.test.js | 12 +----------- source/aggregation.js | 15 ++++++++------- source/aggregation.test.js | 31 ++++++++++++++++++++++--------- source/execute-agent.test.js | 16 +++++++++++++++- 5 files changed, 47 insertions(+), 29 deletions(-) diff --git a/source/agent-parser.js b/source/agent-parser.js index 38f41aaa..3960224d 100644 --- a/source/agent-parser.js +++ b/source/agent-parser.js @@ -49,7 +49,7 @@ export const parseOpenCodeNDJSON = (ndjson, logger) => { const event = JSON.parse(line); if (event.type === 'text' && event.part?.text) { logger.log(`Found text event with ${event.part.text.length} characters`); - acc.push(event.part.text); + return [...acc, event.part.text]; } } catch (err) { logger.log(`Warning: Failed to parse NDJSON line: ${err.message}`); diff --git a/source/agent-parser.test.js b/source/agent-parser.test.js index 1eee4b18..f222c4ed 100644 --- a/source/agent-parser.test.js +++ b/source/agent-parser.test.js @@ -332,22 +332,12 @@ describe('unwrapEnvelope()', () => { ['object without result field', { passed: true, score: 80 }, { passed: true, score: 80 }], ])('%s', (_, input, expected) => { assert({ - given: `parsed object: ${_}`, + given: _, should: 'return the unwrapped value', actual: unwrapEnvelope(input), expected }); }); - - test('returns object as-is when result key is absent', () => { - const input = { status: 'ok', data: [1, 2, 3] }; - assert({ - given: 'object with no result key', - should: 'return the original object', - actual: unwrapEnvelope(input), - expected: input - }); - }); }); describe('unwrapAgentResult()', () => { diff --git a/source/aggregation.js b/source/aggregation.js index bb128d45..d4928d8a 100644 --- a/source/aggregation.js +++ b/source/aggregation.js @@ -22,15 +22,15 @@ export const normalizeJudgment = (raw, { requirement, runIndex, logger }) => { }); } - if (raw?.actual === undefined || raw?.expected === undefined) { + if (raw.actual === undefined || raw.expected === undefined) { logger.log(`Warning: Judge response missing fields for "${requirement}" run ${runIndex + 1}`); } return { - passed: raw?.passed === true, - actual: raw?.actual ?? 'No actual provided', - expected: raw?.expected ?? 'No expected provided', - score: Number.isFinite(raw?.score) ? Math.max(0, Math.min(100, raw.score)) : 0 + passed: raw.passed === true, + actual: raw.actual ?? 'No actual provided', + expected: raw.expected ?? 'No expected provided', + score: Number.isFinite(raw.score) ? Math.max(0, Math.min(100, raw.score)) : 0 }; }; @@ -66,8 +66,9 @@ export const calculateRequiredPasses = ({ runs = defaults.runs, threshold = defa * Overall pass requires all assertions to meet the threshold. */ export const aggregatePerAssertionResults = ({ perAssertionResults, threshold, runs }) => { + let validated; try { - calculateRequiredPassesSchema.parse({ runs, threshold }); + validated = calculateRequiredPassesSchema.parse({ runs, threshold }); } catch (zodError) { const issues = zodError.issues || []; const messages = issues.map(issue => @@ -84,7 +85,7 @@ export const aggregatePerAssertionResults = ({ perAssertionResults, threshold, r }); } - const requiredPasses = calculateRequiredPasses({ runs, threshold }); + const requiredPasses = Math.ceil((validated.runs * validated.threshold) / 100); const assertions = perAssertionResults.map(({ requirement, runResults }) => { const passCount = runResults.filter(r => r.passed).length; diff --git a/source/aggregation.test.js b/source/aggregation.test.js index 3e45f979..961a6d87 100644 --- a/source/aggregation.test.js +++ b/source/aggregation.test.js @@ -284,6 +284,28 @@ describe('aggregatePerAssertionResults()', () => { }); }); + test('passes with empty assertions array (vacuous truth)', () => { + const result = aggregatePerAssertionResults({ + perAssertionResults: [], + threshold: 75, + runs: 4 + }); + + assert({ + given: 'empty perAssertionResults array', + should: 'return passed: true (all zero assertions meet threshold)', + actual: result.passed, + expected: true + }); + + assert({ + given: 'empty perAssertionResults array', + should: 'return empty assertions array', + actual: result.assertions, + expected: [] + }); + }); + test.each([ ['runs above maximum', { runs: 1001, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'], ['threshold above maximum', { runs: 4, threshold: 150 }, 'INVALID_AGGREGATION_PARAMS'], @@ -315,15 +337,6 @@ describe('aggregatePerAssertionResults()', () => { describe('normalizeJudgment()', () => { const createMockLogger = () => ({ log: vi.fn() }); - test('is exported as a function', () => { - assert({ - given: 'aggregation module', - should: 'export normalizeJudgment', - actual: typeof normalizeJudgment, - expected: 'function' - }); - }); - test('passes through complete valid input unchanged', () => { const logger = createMockLogger(); const raw = { passed: true, actual: 'Result from agent', expected: 'Expected output', score: 85 }; diff --git a/source/execute-agent.test.js b/source/execute-agent.test.js index 90a5fb23..92e72e00 100644 --- a/source/execute-agent.test.js +++ b/source/execute-agent.test.js @@ -140,11 +140,25 @@ describe('executeAgent()', () => { assert({ given: 'agentConfig with parseOutput function', - should: 'call parseOutput with stdout', + should: 'call parseOutput exactly once', actual: parseOutput.mock.calls.length, expected: 1 }); + assert({ + given: 'agentConfig with parseOutput function', + should: 'pass the raw stdout as first argument', + actual: parseOutput.mock.calls[0][0], + expected: ndjsonOutput + }); + + assert({ + given: 'agentConfig with parseOutput function', + should: 'pass the logger as second argument (WIP fix #8 — logger threading)', + actual: typeof parseOutput.mock.calls[0][1]?.log, + expected: 'function' + }); + assert({ given: 'parseOutput returns valid JSON', should: 'return the parsed result', From 401aa921caec7485258442e6559a6b2ebba78424 Mon Sep 17 00:00:00 2001 From: Ian White Date: Wed, 18 Feb 2026 11:33:47 -0600 Subject: [PATCH 3/6] fix(ai): address PR 3 author review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - aggregation.js: rename `raw` param to `judgeResponse` and fold into single options object for normalizeJudgment; removes the two-argument signature (breaking change, callers updated) - aggregation.js: remove calculateRequiredPasses — math is inlined in aggregatePerAssertionResults, eliminating double schema parse - aggregation.test.js: remove calculateRequiredPasses describe block; fix Try() usage (direct fn ref, not arrow wrapper); update all normalizeJudgment call sites to new single-options signature - execute-agent.js: extract magic number 500 to maxOutputPreviewLength constant (camelCase per javascript.mdc); applied to all 3 truncation sites - execute-agent.test.js: replace try/catch antipatterns with await Try(); add Try import from riteway.js - extraction-parser.test.js: strengthen weak typeof assertions to check specific fields; strengthen cause !== undefined to cause.name === SyntaxError 151 tests pass, 0 lint errors, TypeScript clean. Co-authored-by: Cursor --- source/aggregation.js | 49 +++-------- source/aggregation.test.js | 137 +++++++------------------------ source/execute-agent.js | 8 +- source/execute-agent.test.js | 119 +++++++++++---------------- source/extraction-parser.test.js | 14 ++-- 5 files changed, 97 insertions(+), 230 deletions(-) diff --git a/source/aggregation.js b/source/aggregation.js index d4928d8a..a3b5bd33 100644 --- a/source/aggregation.js +++ b/source/aggregation.js @@ -1,65 +1,36 @@ import { createError } from 'error-causes'; import { ValidationError, ParseError } from './ai-errors.js'; -import { - defaults, - calculateRequiredPassesSchema -} from './constants.js'; +import { calculateRequiredPassesSchema } from './constants.js'; /** * Normalize a judge response (already parsed from TAP YAML) to ensure consistent * structure with safe defaults for missing fields. - * @throws {Error} If raw is not an object (null, string, undefined, etc.) + * @throws {Error} If judgeResponse is not an object (null, string, undefined, etc.) */ -export const normalizeJudgment = (raw, { requirement, runIndex, logger }) => { - if (typeof raw !== 'object' || raw === null) { +export const normalizeJudgment = ({ judgeResponse, requirement, runIndex, logger }) => { + if (typeof judgeResponse !== 'object' || judgeResponse === null) { throw createError({ ...ParseError, message: 'Judge returned non-object response', code: 'JUDGE_INVALID_RESPONSE', requirement, runIndex, - rawResponse: raw + rawResponse: judgeResponse }); } - if (raw.actual === undefined || raw.expected === undefined) { + if (judgeResponse.actual === undefined || judgeResponse.expected === undefined) { logger.log(`Warning: Judge response missing fields for "${requirement}" run ${runIndex + 1}`); } return { - passed: raw.passed === true, - actual: raw.actual ?? 'No actual provided', - expected: raw.expected ?? 'No expected provided', - score: Number.isFinite(raw.score) ? Math.max(0, Math.min(100, raw.score)) : 0 + passed: judgeResponse.passed === true, + actual: judgeResponse.actual ?? 'No actual provided', + expected: judgeResponse.expected ?? 'No expected provided', + score: Number.isFinite(judgeResponse.score) ? Math.max(0, Math.min(100, judgeResponse.score)) : 0 }; }; -/** - * Calculate the number of passes required to meet the threshold. - * Uses ceiling to ensure threshold is met or exceeded. - * @throws {Error} If validation fails (non-integer runs, invalid threshold, etc.) - */ -export const calculateRequiredPasses = ({ runs = defaults.runs, threshold = defaults.threshold } = {}) => { - try { - const validated = calculateRequiredPassesSchema.parse({ runs, threshold }); - return Math.ceil((validated.runs * validated.threshold) / 100); - } catch (zodError) { - const issues = zodError.issues || []; - const messages = issues.map(issue => - `${issue.path.join('.')}: ${issue.message}` - ).join('; '); - - throw createError({ - ...ValidationError, - message: `Invalid parameters for calculateRequiredPasses: ${messages}`, - code: 'INVALID_CALCULATION_PARAMS', - runs, - threshold, - cause: zodError - }); - } -}; - /** * Aggregate results from per-assertion test runs. * Each assertion is independently evaluated against the threshold. diff --git a/source/aggregation.test.js b/source/aggregation.test.js index 961a6d87..2e7a3afc 100644 --- a/source/aggregation.test.js +++ b/source/aggregation.test.js @@ -3,94 +3,9 @@ import { assert } from './vitest.js'; import { Try } from './riteway.js'; import { normalizeJudgment, - calculateRequiredPasses, aggregatePerAssertionResults } from './aggregation.js'; -describe('calculateRequiredPasses()', () => { - test.each([ - ['4 runs, 75% threshold', { runs: 4, threshold: 75 }, 3], - ['5 runs, 75% threshold', { runs: 5, threshold: 75 }, 4], - ['10 runs, 80% threshold', { runs: 10, threshold: 80 }, 8], - ['4 runs, 80% threshold', { runs: 4, threshold: 80 }, 4], - ])('%s requires correct pass count', (_, options, expected) => { - assert({ - given: _, - should: `require ${expected} passes`, - actual: calculateRequiredPasses(options), - expected - }); - }); - - test('uses default values when called with no arguments', () => { - assert({ - given: 'no arguments', - should: 'use defaults (4 runs, 75% threshold) requiring 3 passes', - actual: calculateRequiredPasses(), - expected: 3 - }); - }); - - test.each([ - ['zero runs', { runs: 0, threshold: 75 }, 'runs must be at least 1'], - ['negative runs', { runs: -1, threshold: 75 }, 'runs must be at least 1'], - ['non-integer runs', { runs: 1.5, threshold: 75 }, 'runs must be an integer'], - ['NaN runs', { runs: NaN, threshold: 75 }, 'expected number, received NaN'], - ])('throws ValidationError for %s', (_, options, expectedMessage) => { - const error = Try(calculateRequiredPasses, options); - - assert({ - given: _, - should: 'have ValidationError in cause', - actual: error?.cause?.name, - expected: 'ValidationError' - }); - - assert({ - given: _, - should: 'have INVALID_CALCULATION_PARAMS code', - actual: error?.cause?.code, - expected: 'INVALID_CALCULATION_PARAMS' - }); - - assert({ - given: _, - should: 'include descriptive message', - actual: error?.message?.includes(expectedMessage), - expected: true - }); - }); - - test.each([ - ['threshold above 100', { runs: 4, threshold: 150 }, 'threshold must be at most 100'], - ['negative threshold', { runs: 4, threshold: -10 }, 'threshold must be at least 0'], - ['NaN threshold', { runs: 4, threshold: NaN }, 'expected number, received NaN'], - ])('throws ValidationError for %s', (_, options, expectedMessage) => { - const error = Try(calculateRequiredPasses, options); - - assert({ - given: _, - should: 'have ValidationError in cause', - actual: error?.cause?.name, - expected: 'ValidationError' - }); - - assert({ - given: _, - should: 'have INVALID_CALCULATION_PARAMS code', - actual: error?.cause?.code, - expected: 'INVALID_CALCULATION_PARAMS' - }); - - assert({ - given: _, - should: 'include descriptive message', - actual: error?.message?.includes(expectedMessage), - expected: true - }); - }); -}); - describe('aggregatePerAssertionResults()', () => { test('aggregates per-assertion results when all assertions pass', () => { const perAssertionResults = [ @@ -314,9 +229,7 @@ describe('aggregatePerAssertionResults()', () => { { requirement: 'test', runResults: [{ passed: true }] } ]; - const error = Try(() => - aggregatePerAssertionResults({ perAssertionResults, threshold, runs }) - ); + const error = Try(aggregatePerAssertionResults, { perAssertionResults, threshold, runs }); assert({ given: _, @@ -339,9 +252,9 @@ describe('normalizeJudgment()', () => { test('passes through complete valid input unchanged', () => { const logger = createMockLogger(); - const raw = { passed: true, actual: 'Result from agent', expected: 'Expected output', score: 85 }; + const judgeResponse = { passed: true, actual: 'Result from agent', expected: 'Expected output', score: 85 }; - const result = normalizeJudgment(raw, { requirement: 'test assertion', runIndex: 0, logger }); + const result = normalizeJudgment({ judgeResponse, requirement: 'test assertion', runIndex: 0, logger }); assert({ given: 'complete valid judgment with passed: true', @@ -374,10 +287,12 @@ describe('normalizeJudgment()', () => { test('defaults passed to false when missing', () => { const logger = createMockLogger(); - const result = normalizeJudgment( - { actual: 'Result', expected: 'Expected', score: 50 }, - { requirement: 'test', runIndex: 0, logger } - ); + const result = normalizeJudgment({ + judgeResponse: { actual: 'Result', expected: 'Expected', score: 50 }, + requirement: 'test', + runIndex: 0, + logger + }); assert({ given: 'judgment missing passed field', @@ -389,10 +304,12 @@ describe('normalizeJudgment()', () => { test('defaults missing actual and expected with warning log', () => { const logger = createMockLogger(); - const result = normalizeJudgment( - { passed: true, score: 100 }, - { requirement: 'test assertion', runIndex: 2, logger } - ); + const result = normalizeJudgment({ + judgeResponse: { passed: true, score: 100 }, + requirement: 'test assertion', + runIndex: 2, + logger + }); assert({ given: 'judgment missing actual', @@ -422,10 +339,12 @@ describe('normalizeJudgment()', () => { ['NaN score', NaN, 0], ])('normalizes %s correctly', (_, score, expected) => { const logger = createMockLogger(); - const result = normalizeJudgment( - { passed: true, actual: 'Result', expected: 'Expected', score }, - { requirement: 'test', runIndex: 0, logger } - ); + const result = normalizeJudgment({ + judgeResponse: { passed: true, actual: 'Result', expected: 'Expected', score }, + requirement: 'test', + runIndex: 0, + logger + }); assert({ given: `judgment with ${_}`, @@ -437,10 +356,12 @@ describe('normalizeJudgment()', () => { test('defaults missing score to 0', () => { const logger = createMockLogger(); - const result = normalizeJudgment( - { passed: true, actual: 'Result', expected: 'Expected' }, - { requirement: 'test', runIndex: 0, logger } - ); + const result = normalizeJudgment({ + judgeResponse: { passed: true, actual: 'Result', expected: 'Expected' }, + requirement: 'test', + runIndex: 0, + logger + }); assert({ given: 'judgment missing score', @@ -456,7 +377,7 @@ describe('normalizeJudgment()', () => { ['undefined input', undefined], ])('throws ParseError for %s', (_, input) => { const logger = createMockLogger(); - const error = Try(normalizeJudgment, input, { requirement: 'test assertion', runIndex: 1, logger }); + const error = Try(normalizeJudgment, { judgeResponse: input, requirement: 'test assertion', runIndex: 1, logger }); assert({ given: _, @@ -475,7 +396,7 @@ describe('normalizeJudgment()', () => { test('includes requirement and runIndex in ParseError cause', () => { const logger = createMockLogger(); - const error = Try(normalizeJudgment, null, { requirement: 'test assertion', runIndex: 1, logger }); + const error = Try(normalizeJudgment, { judgeResponse: null, requirement: 'test assertion', runIndex: 1, logger }); assert({ given: 'null input', diff --git a/source/execute-agent.js b/source/execute-agent.js index 62ec2949..a22d9c11 100644 --- a/source/execute-agent.js +++ b/source/execute-agent.js @@ -4,6 +4,8 @@ import { ParseError, TimeoutError, AgentProcessError } from './ai-errors.js'; import { createDebugLogger } from './debug-logger.js'; import { unwrapEnvelope, unwrapAgentResult } from './agent-parser.js'; +const maxOutputPreviewLength = 500; + const withTimeout = (promise, ms, errorFactory) => Promise.race([ promise, @@ -95,7 +97,7 @@ const processAgentOutput = ({ agentConfig, rawOutput, logger }) => ({ stdout }) logger.flush(); return result; } catch (err) { - const truncatedStdout = stdout.length > 500 ? `${stdout.slice(0, 500)}...` : stdout; + const truncatedStdout = stdout.length > maxOutputPreviewLength ? `${stdout.slice(0, maxOutputPreviewLength)}...` : stdout; logger.log('JSON parsing failed:', err.message); logger.flush(); @@ -131,8 +133,8 @@ const runAgentProcess = async ({ agentConfig, prompt, timeout, logger }) => { logger.log(`Stderr length: ${stderr.length} characters`); if (code !== 0) { - const truncatedStdout = stdout.length > 500 ? `${stdout.slice(0, 500)}...` : stdout; - const truncatedStderr = stderr.length > 500 ? `${stderr.slice(0, 500)}...` : stderr; + const truncatedStdout = stdout.length > maxOutputPreviewLength ? `${stdout.slice(0, maxOutputPreviewLength)}...` : stdout; + const truncatedStderr = stderr.length > maxOutputPreviewLength ? `${stderr.slice(0, maxOutputPreviewLength)}...` : stderr; logger.log('Process failed with non-zero exit code'); logger.flush(); diff --git a/source/execute-agent.test.js b/source/execute-agent.test.js index 92e72e00..d42e21bf 100644 --- a/source/execute-agent.test.js +++ b/source/execute-agent.test.js @@ -1,5 +1,6 @@ import { describe, test, vi, beforeEach } from 'vitest'; import { assert } from './vitest.js'; +import { Try } from './riteway.js'; vi.mock('child_process', () => ({ spawn: vi.fn() @@ -174,36 +175,28 @@ describe('executeAgent()', () => { exitCode: 1 })); - try { - await executeAgent({ agentConfig, prompt: 'test prompt' }); - assert({ - given: 'non-zero exit code', - should: 'have thrown an error', - actual: false, - expected: true - }); - } catch (err) { - assert({ - given: 'non-zero exit code from agent process', - should: 'throw Error with AgentProcessError cause', - actual: err?.cause?.name, - expected: 'AgentProcessError' - }); - - assert({ - given: 'non-zero exit code', - should: 'have AGENT_PROCESS_FAILURE code', - actual: err?.cause?.code, - expected: 'AGENT_PROCESS_FAILURE' - }); - - assert({ - given: 'non-zero exit code', - should: 'include exit code in cause', - actual: err?.cause?.exitCode, - expected: 1 - }); - } + const err = await Try(executeAgent, { agentConfig, prompt: 'test prompt' }); + + assert({ + given: 'non-zero exit code from agent process', + should: 'throw Error with AgentProcessError cause', + actual: err?.cause?.name, + expected: 'AgentProcessError' + }); + + assert({ + given: 'non-zero exit code', + should: 'have AGENT_PROCESS_FAILURE code', + actual: err?.cause?.code, + expected: 'AGENT_PROCESS_FAILURE' + }); + + assert({ + given: 'non-zero exit code', + should: 'include exit code in cause', + actual: err?.cause?.exitCode, + expected: 1 + }); }); test('throws TimeoutError when timeout is exceeded', async () => { @@ -216,54 +209,34 @@ describe('executeAgent()', () => { }; spawn.mockReturnValue(proc); - try { - await executeAgent({ - agentConfig, - prompt: 'test prompt', - timeout: 1 - }); - assert({ - given: 'timeout exceeded', - should: 'have thrown an error', - actual: false, - expected: true - }); - } catch (err) { - assert({ - given: 'agent process that exceeds timeout', - should: 'throw Error with TimeoutError cause', - actual: err?.cause?.name, - expected: 'TimeoutError' - }); - - assert({ - given: 'timeout exceeded', - should: 'have AGENT_TIMEOUT code', - actual: err?.cause?.code, - expected: 'AGENT_TIMEOUT' - }); - } + const err = await Try(executeAgent, { agentConfig, prompt: 'test prompt', timeout: 1 }); + + assert({ + given: 'agent process that exceeds timeout', + should: 'throw Error with TimeoutError cause', + actual: err?.cause?.name, + expected: 'TimeoutError' + }); + + assert({ + given: 'timeout exceeded', + should: 'have AGENT_TIMEOUT code', + actual: err?.cause?.code, + expected: 'AGENT_TIMEOUT' + }); }); test('throws ParseError when stdout is not valid JSON (rawOutput: false)', async () => { spawn.mockReturnValue(createMockProcess({ stdout: 'not valid json output' })); - try { - await executeAgent({ agentConfig, prompt: 'test prompt' }); - assert({ - given: 'invalid JSON stdout', - should: 'have thrown an error', - actual: false, - expected: true - }); - } catch (err) { - assert({ - given: 'stdout that is not valid JSON', - should: 'throw Error with ParseError cause', - actual: err?.cause?.name, - expected: 'ParseError' - }); - } + const err = await Try(executeAgent, { agentConfig, prompt: 'test prompt' }); + + assert({ + given: 'stdout that is not valid JSON', + should: 'throw Error with ParseError cause', + actual: err?.cause?.name, + expected: 'ParseError' + }); }); test('spawns the agent with command, args, and prompt appended', async () => { diff --git a/source/extraction-parser.test.js b/source/extraction-parser.test.js index 1d2f69c2..ba2dfb99 100644 --- a/source/extraction-parser.test.js +++ b/source/extraction-parser.test.js @@ -59,8 +59,8 @@ describe('parseExtractionResult()', () => { assert({ given: 'JSON wrapped in markdown code fences', - should: 'extract and parse the JSON object', - actual: typeof result === 'object' && result !== null, + should: 'parse importPaths as an array', + actual: Array.isArray(result.importPaths), expected: true }); @@ -87,8 +87,8 @@ describe('parseExtractionResult()', () => { assert({ given: 'JSON with explanation text and markdown fences', should: 'extract and parse the JSON object', - actual: typeof result === 'object' && result !== null, - expected: true + actual: result.userPrompt, + expected: 'test prompt' }); assert({ @@ -142,9 +142,9 @@ describe('parseExtractionResult()', () => { assert({ given: 'non-JSON input', - should: 'preserve original parse error as cause', - actual: error?.cause?.cause !== undefined, - expected: true + should: 'preserve original JSON SyntaxError as cause', + actual: error?.cause?.cause?.name, + expected: 'SyntaxError' }); }); From 6c5406c05fbc1f4b984b99d42fd5893e06be7b66 Mon Sep 17 00:00:00 2001 From: Ian White Date: Wed, 18 Feb 2026 11:47:26 -0600 Subject: [PATCH 4/6] fix(ai): address PR 3 follow-up review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - constants.js: rename calculateRequiredPassesSchema to aggregationParamsSchema — name now reflects what the schema validates (aggregation input params) rather than the deleted calculateRequiredPasses function; update all import sites - aggregation.test.js: add 6 missing Zod validation edge cases for aggregatePerAssertionResults (zero runs, negative runs, non-integer runs, NaN runs, negative threshold, NaN threshold) — coverage gap introduced when calculateRequiredPasses and its tests were removed; all cases now exercised via aggregatePerAssertionResults test.each 157 tests pass, 0 lint errors, TypeScript clean. Co-authored-by: Cursor --- source/aggregation.js | 4 ++-- source/aggregation.test.js | 6 ++++++ source/constants.js | 2 +- source/constants.test.js | 8 ++++---- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/source/aggregation.js b/source/aggregation.js index a3b5bd33..6f8c0cb9 100644 --- a/source/aggregation.js +++ b/source/aggregation.js @@ -1,6 +1,6 @@ import { createError } from 'error-causes'; import { ValidationError, ParseError } from './ai-errors.js'; -import { calculateRequiredPassesSchema } from './constants.js'; +import { aggregationParamsSchema } from './constants.js'; /** * Normalize a judge response (already parsed from TAP YAML) to ensure consistent @@ -39,7 +39,7 @@ export const normalizeJudgment = ({ judgeResponse, requirement, runIndex, logger export const aggregatePerAssertionResults = ({ perAssertionResults, threshold, runs }) => { let validated; try { - validated = calculateRequiredPassesSchema.parse({ runs, threshold }); + validated = aggregationParamsSchema.parse({ runs, threshold }); } catch (zodError) { const issues = zodError.issues || []; const messages = issues.map(issue => diff --git a/source/aggregation.test.js b/source/aggregation.test.js index 2e7a3afc..a3074cde 100644 --- a/source/aggregation.test.js +++ b/source/aggregation.test.js @@ -223,7 +223,13 @@ describe('aggregatePerAssertionResults()', () => { test.each([ ['runs above maximum', { runs: 1001, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'], + ['zero runs', { runs: 0, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'], + ['negative runs', { runs: -1, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'], + ['non-integer runs', { runs: 1.5, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'], + ['NaN runs', { runs: NaN, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'], ['threshold above maximum', { runs: 4, threshold: 150 }, 'INVALID_AGGREGATION_PARAMS'], + ['negative threshold', { runs: 4, threshold: -10 }, 'INVALID_AGGREGATION_PARAMS'], + ['NaN threshold', { runs: 4, threshold: NaN }, 'INVALID_AGGREGATION_PARAMS'], ])('throws ValidationError for %s', (_, { runs, threshold }, expectedCode) => { const perAssertionResults = [ { requirement: 'test', runResults: [{ passed: true }] } diff --git a/source/constants.js b/source/constants.js index 71c17b6d..2624ee0c 100644 --- a/source/constants.js +++ b/source/constants.js @@ -47,7 +47,7 @@ export const agentSchema = z.enum(constraints.supportedAgents, { message: `agent must be one of: ${constraints.supportedAgents.join(', ')}` }); -export const calculateRequiredPassesSchema = z.object({ +export const aggregationParamsSchema = z.object({ runs: runsSchema, threshold: thresholdSchema }); diff --git a/source/constants.test.js b/source/constants.test.js index b68a15a9..ce80e926 100644 --- a/source/constants.test.js +++ b/source/constants.test.js @@ -8,7 +8,7 @@ import { concurrencySchema, timeoutSchema, agentSchema, - calculateRequiredPassesSchema, + aggregationParamsSchema, aiTestOptionsSchema } from './constants.js'; @@ -346,9 +346,9 @@ describe('constants module', () => { }); }); - describe('calculateRequiredPassesSchema', () => { + describe('aggregationParamsSchema', () => { test('validates complete object with defaults', () => { - const result = calculateRequiredPassesSchema.parse({ + const result = aggregationParamsSchema.parse({ runs: 5, threshold: 80 }); @@ -362,7 +362,7 @@ describe('constants module', () => { }); test('reports multiple validation errors', () => { - const result = calculateRequiredPassesSchema.safeParse({ + const result = aggregationParamsSchema.safeParse({ runs: -1, threshold: 150 }); From a80bb8ed50fbd3760b8a891253e1cd579485fcc5 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 18 Feb 2026 20:59:55 +0000 Subject: [PATCH 5/6] refactor(test): complete PR review remediation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🐛 - Remove weak instanceof Error assertions 🔄 - Add threshold calculation verification tests Tests now verify threshold-based pass/fail logic directly 164 tests passing, 0 lint errors, TypeScript clean Co-authored-by: Ian White --- source/agent-parser.test.js | 7 ------ source/aggregation.test.js | 43 +++++++++++++++++++++++++++++++++---- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/source/agent-parser.test.js b/source/agent-parser.test.js index f222c4ed..2aa41da6 100644 --- a/source/agent-parser.test.js +++ b/source/agent-parser.test.js @@ -259,13 +259,6 @@ describe('parseOpenCodeNDJSON()', () => { const error = Try(parseOpenCodeNDJSON, ndjson, logger); - assert({ - given: 'NDJSON with no text events', - should: 'throw Error with cause', - actual: error instanceof Error && error.cause !== undefined, - expected: true - }); - assert({ given: 'NDJSON with no text events', should: 'have ParseError name in cause', diff --git a/source/aggregation.test.js b/source/aggregation.test.js index a3074cde..616eb033 100644 --- a/source/aggregation.test.js +++ b/source/aggregation.test.js @@ -221,6 +221,41 @@ describe('aggregatePerAssertionResults()', () => { }); }); + test.each([ + ['4 runs, 75% threshold', 4, 75, 3, 3, true], + ['4 runs, 75% threshold', 4, 75, 2, 2, false], + ['5 runs, 75% threshold', 5, 75, 4, 4, true], + ['5 runs, 75% threshold', 5, 75, 3, 3, false], + ['10 runs, 80% threshold', 10, 80, 8, 8, true], + ['4 runs, 80% threshold', 4, 80, 4, 4, true], + ['4 runs, 80% threshold', 4, 80, 3, 3, false], + ])('applies threshold correctly: %s with %i passes', (_, runs, threshold, passCount, totalPasses, expectedPass) => { + const runResults = [ + ...Array(passCount).fill({ passed: true, score: 100 }), + ...Array(runs - passCount).fill({ passed: false, score: 0 }) + ]; + + const result = aggregatePerAssertionResults({ + perAssertionResults: [{ requirement: 'test assertion', runResults }], + threshold, + runs + }); + + assert({ + given: `${_} with ${passCount} of ${runs} passes`, + should: expectedPass ? 'pass the assertion' : 'fail the assertion', + actual: result.assertions[0].passed, + expected: expectedPass + }); + + assert({ + given: `${_} with ${passCount} passes`, + should: `report passCount of ${totalPasses}`, + actual: result.assertions[0].passCount, + expected: totalPasses + }); + }); + test.each([ ['runs above maximum', { runs: 1001, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'], ['zero runs', { runs: 0, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'], @@ -239,14 +274,14 @@ describe('aggregatePerAssertionResults()', () => { assert({ given: _, - should: 'throw validation error', - actual: error instanceof Error, - expected: true + should: 'have ValidationError name in cause', + actual: error?.cause?.name, + expected: 'ValidationError' }); assert({ given: _, - should: 'have correct error code', + should: 'have correct error code in cause', actual: error?.cause?.code, expected: expectedCode }); From 5d1cc7fbac2bb856df4100db9197db508f7ea4f5 Mon Sep 17 00:00:00 2001 From: Ian White Date: Wed, 18 Feb 2026 15:16:28 -0600 Subject: [PATCH 6/6] fix(ai): remove implementation detail from test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - execute-agent.test.js: remove logger type assertion from parseOutput test — typeof checks violate tdd.mdc:64 and logger threading is an implementation detail; the three remaining assertions (call count, stdout arg, parsed result) collectively verify correct integration 164 tests pass, 0 lint errors, TypeScript clean. Co-authored-by: Cursor --- source/execute-agent.test.js | 7 ------- 1 file changed, 7 deletions(-) diff --git a/source/execute-agent.test.js b/source/execute-agent.test.js index d42e21bf..01b934a0 100644 --- a/source/execute-agent.test.js +++ b/source/execute-agent.test.js @@ -153,13 +153,6 @@ describe('executeAgent()', () => { expected: ndjsonOutput }); - assert({ - given: 'agentConfig with parseOutput function', - should: 'pass the logger as second argument (WIP fix #8 — logger threading)', - actual: typeof parseOutput.mock.calls[0][1]?.log, - expected: 'function' - }); - assert({ given: 'parseOutput returns valid JSON', should: 'return the parsed result',