diff --git a/source/agent-parser.js b/source/agent-parser.js new file mode 100644 index 00000000..3960224d --- /dev/null +++ b/source/agent-parser.js @@ -0,0 +1,108 @@ +import { createError } from 'error-causes'; +import { ParseError } from './ai-errors.js'; + +/** + * Parse a string result from an agent, attempting multiple strategies: + * 1. Direct JSON parse if string starts with { or [ + * 2. Extract and parse markdown-wrapped JSON (```json\n...\n```) + * 3. Keep as plain text if neither works + */ +export const parseStringResult = (result, logger) => { + const trimmed = result.trim(); + + if (trimmed.startsWith('{') || trimmed.startsWith('[')) { + try { + const parsed = JSON.parse(trimmed); + logger.log('Successfully parsed string as JSON'); + return parsed; + } catch { + logger.log('Direct JSON parse failed, trying markdown extraction'); + } + } + + const markdownMatch = result.match(/```(?:json)?\s*\n([\s\S]*?)\n```/); + if (markdownMatch) { + logger.log('Found markdown-wrapped JSON, extracting...'); + try { + const parsed = JSON.parse(markdownMatch[1]); + logger.log('Successfully parsed markdown-wrapped JSON'); + return parsed; + } catch { + logger.log('Failed to parse markdown content, keeping original string'); + } + } + + logger.log('String is not valid JSON, keeping as plain text'); + return result; +}; + +/** + * Parse OpenCode's NDJSON output, extracting and concatenating all "text" events. + */ +export const parseOpenCodeNDJSON = (ndjson, logger) => { + logger.log('Parsing OpenCode NDJSON output...'); + + const lines = ndjson.trim().split('\n').filter(line => line.trim()); + + const textEvents = lines.reduce((acc, line) => { + try { + const event = JSON.parse(line); + if (event.type === 'text' && event.part?.text) { + logger.log(`Found text event with ${event.part.text.length} characters`); + return [...acc, event.part.text]; + } + } catch (err) { + logger.log(`Warning: Failed to parse NDJSON line: ${err.message}`); + } + return acc; + }, []); + + if (textEvents.length === 0) { + throw createError({ + ...ParseError, + message: 'No text events found in OpenCode output', + code: 'NO_TEXT_EVENTS', + ndjsonLength: ndjson.length, + linesProcessed: lines.length + }); + } + + const combinedText = textEvents.join(''); + logger.log(`Combined ${textEvents.length} text event(s) into ${combinedText.length} characters`); + return combinedText; +}; + +/** + * Unwrap a JSON envelope object { result: ... }, returning the inner value. + * If no envelope is present, returns the object as-is. + * Shared helper used by unwrapAgentResult and execute-agent's raw output handling. + */ +export const unwrapEnvelope = (parsed) => + parsed?.result !== undefined ? parsed.result : parsed; + +/** + * Unwrap agent result from potential JSON envelope and parse nested JSON. + * Handles Claude CLI's envelope format { result: "..." } and nested JSON strings. + * @throws {Error} If output is not valid JSON after all parsing attempts + */ +export const unwrapAgentResult = (processedOutput, logger) => { + const parsed = parseStringResult(processedOutput, logger); + + if (typeof parsed === 'string') { + throw createError({ + ...ParseError, + message: `Agent output is not valid JSON: ${parsed.slice(0, 100)}`, + outputPreview: parsed.slice(0, 100) + }); + } + + const unwrapped = unwrapEnvelope(parsed); + + logger.log(`Parsed result type: ${typeof unwrapped}`); + if (typeof unwrapped === 'string') { + logger.log('Result is string, attempting to parse as JSON'); + return parseStringResult(unwrapped, logger); + } + + return unwrapped; +}; diff --git a/source/agent-parser.test.js b/source/agent-parser.test.js new file mode 100644 index 00000000..2aa41da6 --- /dev/null +++ b/source/agent-parser.test.js @@ -0,0 +1,391 @@ +import { describe, test } from 'vitest'; +import { assert } from './vitest.js'; +import { Try } from './riteway.js'; +import { + parseStringResult, + parseOpenCodeNDJSON, + unwrapEnvelope, + unwrapAgentResult +} from './agent-parser.js'; + +const createMockLogger = () => { + const logs = []; + return { + log: (...args) => logs.push(args.join(' ')), + logs + }; +}; + +describe('parseStringResult()', () => { + test('parses direct JSON when string starts with {', () => { + const logger = createMockLogger(); + const input = '{"passed": true, "output": "test"}'; + + const result = parseStringResult(input, logger); + + assert({ + given: 'JSON string starting with {', + should: 'parse as JSON object', + actual: JSON.stringify(result), + expected: '{"passed":true,"output":"test"}' + }); + + assert({ + given: 'successful JSON parse', + should: 'log success message', + actual: logger.logs.some(log => log.includes('Successfully parsed string as JSON')), + expected: true + }); + }); + + test('parses direct JSON when string starts with [', () => { + const logger = createMockLogger(); + const input = '[{"id": 1}, {"id": 2}]'; + + const result = parseStringResult(input, logger); + + assert({ + given: 'JSON string starting with [', + should: 'parse as JSON array', + actual: result.length, + expected: 2 + }); + }); + + test('extracts markdown-wrapped JSON when direct parse fails', () => { + const logger = createMockLogger(); + const input = '```json\n{"passed": true, "output": "test"}\n```'; + + const result = parseStringResult(input, logger); + + assert({ + given: 'markdown-wrapped JSON', + should: 'extract and parse JSON', + actual: JSON.stringify(result), + expected: '{"passed":true,"output":"test"}' + }); + + assert({ + given: 'markdown extraction', + should: 'log markdown extraction', + actual: logger.logs.some(log => log.includes('markdown-wrapped JSON')), + expected: true + }); + }); + + test('extracts markdown-wrapped JSON without json language tag', () => { + const logger = createMockLogger(); + const input = '```\n{"passed": true}\n```'; + + const result = parseStringResult(input, logger); + + assert({ + given: 'markdown without json tag', + should: 'extract and parse JSON', + actual: result.passed, + expected: true + }); + }); + + test('tries markdown extraction even if string starts with {', () => { + const logger = createMockLogger(); + const input = '{ broken json ```json\n{"passed": true}\n```'; + + const result = parseStringResult(input, logger); + + assert({ + given: 'malformed JSON with markdown fallback', + should: 'extract from markdown block', + actual: result.passed, + expected: true + }); + + assert({ + given: 'fallback scenario', + should: 'log failed parse and markdown extraction', + actual: logger.logs.some(log => log.includes('trying markdown extraction')), + expected: true + }); + }); + + test('returns plain text when no parsing succeeds', () => { + const logger = createMockLogger(); + const input = 'This is just plain text with no JSON'; + + const result = parseStringResult(input, logger); + + assert({ + given: 'plain text string', + should: 'return original string', + actual: result, + expected: input + }); + + assert({ + given: 'no valid JSON', + should: 'log keeping as plain text', + actual: logger.logs.some(log => log.includes('keeping as plain text')), + expected: true + }); + }); + + test('handles malformed markdown gracefully', () => { + const logger = createMockLogger(); + const input = '```json\n{ broken: json }\n```'; + + const result = parseStringResult(input, logger); + + assert({ + given: 'markdown with invalid JSON', + should: 'return original string', + actual: result, + expected: input + }); + + assert({ + given: 'failed markdown parse', + should: 'log failure', + actual: logger.logs.some(log => log.includes('Failed to parse markdown content')), + expected: true + }); + }); + + test('trims whitespace before parsing', () => { + const logger = createMockLogger(); + const input = ' \n {"passed": true} \n '; + + const result = parseStringResult(input, logger); + + assert({ + given: 'JSON with surrounding whitespace', + should: 'parse successfully', + actual: result.passed, + expected: true + }); + + assert({ + given: 'JSON with surrounding whitespace', + should: 'return parsed object matching trimmed input', + actual: result, + expected: { passed: true } + }); + }); +}); + +describe('parseOpenCodeNDJSON()', () => { + test('extracts text from single text event', () => { + const logger = createMockLogger(); + const ndjson = '{"type":"step_start","timestamp":1770245956364}\n' + + '{"type":"text","part":{"text":"```json\\n{\\"status\\": \\"ok\\"}\\n```"}}\n' + + '{"type":"step_finish","timestamp":1770245956211}'; + + const result = parseOpenCodeNDJSON(ndjson, logger); + + assert({ + given: 'NDJSON with single text event', + should: 'extract text content', + actual: result, + expected: '```json\n{"status": "ok"}\n```' + }); + + assert({ + given: 'successful text extraction', + should: 'log found text event', + actual: logger.logs.some(log => log.includes('Found text event')), + expected: true + }); + }); + + test('concatenates multiple text events', () => { + const logger = createMockLogger(); + const ndjson = '{"type":"text","part":{"text":"Part 1"}}\n' + + '{"type":"text","part":{"text":" Part 2"}}\n' + + '{"type":"text","part":{"text":" Part 3"}}'; + + const result = parseOpenCodeNDJSON(ndjson, logger); + + assert({ + given: 'NDJSON with multiple text events', + should: 'concatenate all text content', + actual: result, + expected: 'Part 1 Part 2 Part 3' + }); + }); + + test('filters out non-text events', () => { + const logger = createMockLogger(); + const ndjson = '{"type":"step_start","data":"ignored"}\n' + + '{"type":"text","part":{"text":"Hello"}}\n' + + '{"type":"step_finish","data":"ignored"}\n' + + '{"type":"text","part":{"text":" World"}}'; + + const result = parseOpenCodeNDJSON(ndjson, logger); + + assert({ + given: 'NDJSON with mixed event types', + should: 'extract only text events', + actual: result, + expected: 'Hello World' + }); + }); + + test('skips malformed JSON lines', () => { + const logger = createMockLogger(); + const ndjson = '{invalid json}\n' + + '{"type":"text","part":{"text":"Valid text"}}\n' + + 'not json at all'; + + const result = parseOpenCodeNDJSON(ndjson, logger); + + assert({ + given: 'NDJSON with malformed lines', + should: 'skip invalid lines and process valid ones', + actual: result, + expected: 'Valid text' + }); + + assert({ + given: 'malformed JSON', + should: 'log warning for failed parse', + actual: logger.logs.some(log => log.includes('Failed to parse NDJSON line')), + expected: true + }); + }); + + test('throws error when no text events found', () => { + const logger = createMockLogger(); + const ndjson = '{"type":"step_start","data":"no text here"}\n' + + '{"type":"step_finish","data":"still no text"}'; + + const error = Try(parseOpenCodeNDJSON, ndjson, logger); + + assert({ + given: 'NDJSON with no text events', + should: 'have ParseError name in cause', + actual: error?.cause?.name, + expected: 'ParseError' + }); + + assert({ + given: 'NDJSON with no text events', + should: 'have NO_TEXT_EVENTS code in cause', + actual: error?.cause?.code, + expected: 'NO_TEXT_EVENTS' + }); + + assert({ + given: 'NDJSON with no text events', + should: 'include ndjsonLength in cause', + actual: typeof error?.cause?.ndjsonLength === 'number', + expected: true + }); + + assert({ + given: 'NDJSON with no text events', + should: 'include linesProcessed in cause', + actual: error?.cause?.linesProcessed, + expected: 2 + }); + }); + + test('handles empty lines in NDJSON', () => { + const logger = createMockLogger(); + const ndjson = '\n\n{"type":"text","part":{"text":"Hello"}}\n\n\n{"type":"text","part":{"text":" World"}}\n\n'; + + const result = parseOpenCodeNDJSON(ndjson, logger); + + assert({ + given: 'NDJSON with empty lines', + should: 'filter empty lines and process valid events', + actual: result, + expected: 'Hello World' + }); + }); + + test('preserves markdown-wrapped JSON in text', () => { + const logger = createMockLogger(); + const ndjson = '{"type":"text","part":{"text":"```json\\n{\\"passed\\":true}\\n```"}}'; + + const result = parseOpenCodeNDJSON(ndjson, logger); + + assert({ + given: 'text event with markdown-wrapped JSON', + should: 'preserve markdown formatting', + actual: result, + expected: '```json\n{"passed":true}\n```' + }); + }); +}); + +describe('unwrapEnvelope()', () => { + test.each([ + ['object with result field', { result: { passed: true } }, { passed: true }], + ['object with result as string', { result: 'raw string' }, 'raw string'], + ['object with result as null', { result: null }, null], + ['object without result field', { passed: true, score: 80 }, { passed: true, score: 80 }], + ])('%s', (_, input, expected) => { + assert({ + given: _, + should: 'return the unwrapped value', + actual: unwrapEnvelope(input), + expected + }); + }); +}); + +describe('unwrapAgentResult()', () => { + test('unwraps Claude envelope and returns parsed inner object', () => { + const logger = createMockLogger(); + const envelope = JSON.stringify({ result: JSON.stringify({ passed: true, score: 90 }) }); + + const result = unwrapAgentResult(envelope, logger); + + assert({ + given: 'Claude CLI envelope wrapping a JSON string', + should: 'return the fully parsed inner object', + actual: result, + expected: { passed: true, score: 90 } + }); + }); + + test('returns parsed object when no envelope present', () => { + const logger = createMockLogger(); + const direct = JSON.stringify({ passed: false, score: 40 }); + + const result = unwrapAgentResult(direct, logger); + + assert({ + given: 'direct JSON object (no envelope)', + should: 'return the parsed object', + actual: result, + expected: { passed: false, score: 40 } + }); + }); + + test('throws ParseError when output is not valid JSON', () => { + const logger = createMockLogger(); + + const error = Try(unwrapAgentResult, 'plain text response', logger); + + assert({ + given: 'plain text that is not valid JSON', + should: 'throw Error with ParseError cause', + actual: error?.cause?.name, + expected: 'ParseError' + }); + }); + + test('handles markdown-wrapped envelope', () => { + const logger = createMockLogger(); + const markdownEnvelope = '```json\n{"result": {"passed": true}}\n```'; + + const result = unwrapAgentResult(markdownEnvelope, logger); + + assert({ + given: 'markdown-wrapped JSON with result envelope', + should: 'extract, unwrap and return the inner object', + actual: result, + expected: { passed: true } + }); + }); +}); diff --git a/source/aggregation.js b/source/aggregation.js new file mode 100644 index 00000000..6f8c0cb9 --- /dev/null +++ b/source/aggregation.js @@ -0,0 +1,82 @@ +import { createError } from 'error-causes'; +import { ValidationError, ParseError } from './ai-errors.js'; +import { aggregationParamsSchema } from './constants.js'; + +/** + * Normalize a judge response (already parsed from TAP YAML) to ensure consistent + * structure with safe defaults for missing fields. + * @throws {Error} If judgeResponse is not an object (null, string, undefined, etc.) + */ +export const normalizeJudgment = ({ judgeResponse, requirement, runIndex, logger }) => { + if (typeof judgeResponse !== 'object' || judgeResponse === null) { + throw createError({ + ...ParseError, + message: 'Judge returned non-object response', + code: 'JUDGE_INVALID_RESPONSE', + requirement, + runIndex, + rawResponse: judgeResponse + }); + } + + if (judgeResponse.actual === undefined || judgeResponse.expected === undefined) { + logger.log(`Warning: Judge response missing fields for "${requirement}" run ${runIndex + 1}`); + } + + return { + passed: judgeResponse.passed === true, + actual: judgeResponse.actual ?? 'No actual provided', + expected: judgeResponse.expected ?? 'No expected provided', + score: Number.isFinite(judgeResponse.score) ? Math.max(0, Math.min(100, judgeResponse.score)) : 0 + }; +}; + +/** + * Aggregate results from per-assertion test runs. + * Each assertion is independently evaluated against the threshold. + * Overall pass requires all assertions to meet the threshold. + */ +export const aggregatePerAssertionResults = ({ perAssertionResults, threshold, runs }) => { + let validated; + try { + validated = aggregationParamsSchema.parse({ runs, threshold }); + } catch (zodError) { + const issues = zodError.issues || []; + const messages = issues.map(issue => + `${issue.path.join('.')}: ${issue.message}` + ).join('; '); + + throw createError({ + ...ValidationError, + message: `Invalid parameters for aggregatePerAssertionResults: ${messages}`, + code: 'INVALID_AGGREGATION_PARAMS', + runs, + threshold, + cause: zodError + }); + } + + const requiredPasses = Math.ceil((validated.runs * validated.threshold) / 100); + + const assertions = perAssertionResults.map(({ requirement, runResults }) => { + const passCount = runResults.filter(r => r.passed).length; + const totalScore = runResults.reduce((sum, r) => sum + (r.score ?? 0), 0); + const averageScore = runResults.length > 0 + ? Math.round((totalScore / runResults.length) * 100) / 100 + : 0; + + return { + requirement, + passed: passCount >= requiredPasses, + passCount, + totalRuns: runs, + averageScore, + runResults + }; + }); + + return { + passed: assertions.every(a => a.passed), + assertions + }; +}; diff --git a/source/aggregation.test.js b/source/aggregation.test.js new file mode 100644 index 00000000..616eb033 --- /dev/null +++ b/source/aggregation.test.js @@ -0,0 +1,463 @@ +import { describe, test, vi } from 'vitest'; +import { assert } from './vitest.js'; +import { Try } from './riteway.js'; +import { + normalizeJudgment, + aggregatePerAssertionResults +} from './aggregation.js'; + +describe('aggregatePerAssertionResults()', () => { + test('aggregates per-assertion results when all assertions pass', () => { + const perAssertionResults = [ + { + requirement: 'Given simple addition, should add correctly', + runResults: [ + { passed: true, output: 'ok' }, + { passed: true, output: 'ok' } + ] + }, + { + requirement: 'Given format, should output JSON', + runResults: [ + { passed: true, output: 'ok' }, + { passed: true, output: 'ok' } + ] + } + ]; + + const result = aggregatePerAssertionResults({ + perAssertionResults, + threshold: 75, + runs: 2 + }); + + assert({ + given: 'all assertions meeting threshold', + should: 'return passed: true', + actual: result.passed, + expected: true + }); + + assert({ + given: 'two assertions', + should: 'return assertions array of length 2', + actual: result.assertions.length, + expected: 2 + }); + + assert({ + given: 'first assertion with all passes', + should: 'mark the assertion as passed', + actual: result.assertions[0].passed, + expected: true + }); + + assert({ + given: 'first assertion with 2 passes', + should: 'report passCount 2', + actual: result.assertions[0].passCount, + expected: 2 + }); + + assert({ + given: 'first assertion requirement', + should: 'preserve the requirement', + actual: result.assertions[0].requirement, + expected: 'Given simple addition, should add correctly' + }); + }); + + test('fails when any assertion does not meet threshold', () => { + const perAssertionResults = [ + { + requirement: 'Given addition, should add correctly', + runResults: [ + { passed: true, output: 'ok' }, + { passed: true, output: 'ok' } + ] + }, + { + requirement: 'Given format, should output JSON', + runResults: [ + { passed: false, output: 'fail' }, + { passed: false, output: 'fail' } + ] + } + ]; + + const result = aggregatePerAssertionResults({ + perAssertionResults, + threshold: 75, + runs: 2 + }); + + assert({ + given: 'one assertion failing threshold', + should: 'return passed: false', + actual: result.passed, + expected: false + }); + + assert({ + given: 'the passing assertion', + should: 'mark it as passed', + actual: result.assertions[0].passed, + expected: true + }); + + assert({ + given: 'the failing assertion', + should: 'mark it as failed', + actual: result.assertions[1].passed, + expected: false + }); + + assert({ + given: 'the failing assertion', + should: 'have passCount 0', + actual: result.assertions[1].passCount, + expected: 0 + }); + }); + + test('includes per-assertion run results and totalRuns', () => { + const runResults = [ + { passed: true, output: 'run 1' }, + { passed: false, output: 'run 2' } + ]; + const perAssertionResults = [{ requirement: 'test assertion', runResults }]; + + const result = aggregatePerAssertionResults({ + perAssertionResults, + threshold: 50, + runs: 2 + }); + + assert({ + given: 'per-assertion run results', + should: 'include run results in the assertion', + actual: result.assertions[0].runResults, + expected: runResults + }); + + assert({ + given: 'per-assertion run results', + should: 'include totalRuns per assertion', + actual: result.assertions[0].totalRuns, + expected: 2 + }); + }); + + test('calculates averageScore from run results', () => { + const perAssertionResults = [ + { + requirement: 'test with scores', + runResults: [ + { passed: true, score: 85 }, + { passed: true, score: 95 }, + { passed: true, score: 90 } + ] + } + ]; + + const result = aggregatePerAssertionResults({ + perAssertionResults, + threshold: 75, + runs: 3 + }); + + assert({ + given: 'three runs with scores 85, 95, 90', + should: 'calculate average score of 90', + actual: result.assertions[0].averageScore, + expected: 90 + }); + }); + + test('defaults missing scores to 0 when calculating average', () => { + const perAssertionResults = [ + { + requirement: 'test without scores', + runResults: [ + { passed: true }, + { passed: true } + ] + } + ]; + + const result = aggregatePerAssertionResults({ + perAssertionResults, + threshold: 75, + runs: 2 + }); + + assert({ + given: 'run results with no score fields', + should: 'report averageScore of 0', + actual: result.assertions[0].averageScore, + expected: 0 + }); + }); + + test('passes with empty assertions array (vacuous truth)', () => { + const result = aggregatePerAssertionResults({ + perAssertionResults: [], + threshold: 75, + runs: 4 + }); + + assert({ + given: 'empty perAssertionResults array', + should: 'return passed: true (all zero assertions meet threshold)', + actual: result.passed, + expected: true + }); + + assert({ + given: 'empty perAssertionResults array', + should: 'return empty assertions array', + actual: result.assertions, + expected: [] + }); + }); + + test.each([ + ['4 runs, 75% threshold', 4, 75, 3, 3, true], + ['4 runs, 75% threshold', 4, 75, 2, 2, false], + ['5 runs, 75% threshold', 5, 75, 4, 4, true], + ['5 runs, 75% threshold', 5, 75, 3, 3, false], + ['10 runs, 80% threshold', 10, 80, 8, 8, true], + ['4 runs, 80% threshold', 4, 80, 4, 4, true], + ['4 runs, 80% threshold', 4, 80, 3, 3, false], + ])('applies threshold correctly: %s with %i passes', (_, runs, threshold, passCount, totalPasses, expectedPass) => { + const runResults = [ + ...Array(passCount).fill({ passed: true, score: 100 }), + ...Array(runs - passCount).fill({ passed: false, score: 0 }) + ]; + + const result = aggregatePerAssertionResults({ + perAssertionResults: [{ requirement: 'test assertion', runResults }], + threshold, + runs + }); + + assert({ + given: `${_} with ${passCount} of ${runs} passes`, + should: expectedPass ? 'pass the assertion' : 'fail the assertion', + actual: result.assertions[0].passed, + expected: expectedPass + }); + + assert({ + given: `${_} with ${passCount} passes`, + should: `report passCount of ${totalPasses}`, + actual: result.assertions[0].passCount, + expected: totalPasses + }); + }); + + test.each([ + ['runs above maximum', { runs: 1001, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'], + ['zero runs', { runs: 0, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'], + ['negative runs', { runs: -1, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'], + ['non-integer runs', { runs: 1.5, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'], + ['NaN runs', { runs: NaN, threshold: 75 }, 'INVALID_AGGREGATION_PARAMS'], + ['threshold above maximum', { runs: 4, threshold: 150 }, 'INVALID_AGGREGATION_PARAMS'], + ['negative threshold', { runs: 4, threshold: -10 }, 'INVALID_AGGREGATION_PARAMS'], + ['NaN threshold', { runs: 4, threshold: NaN }, 'INVALID_AGGREGATION_PARAMS'], + ])('throws ValidationError for %s', (_, { runs, threshold }, expectedCode) => { + const perAssertionResults = [ + { requirement: 'test', runResults: [{ passed: true }] } + ]; + + const error = Try(aggregatePerAssertionResults, { perAssertionResults, threshold, runs }); + + assert({ + given: _, + should: 'have ValidationError name in cause', + actual: error?.cause?.name, + expected: 'ValidationError' + }); + + assert({ + given: _, + should: 'have correct error code in cause', + actual: error?.cause?.code, + expected: expectedCode + }); + }); +}); + +describe('normalizeJudgment()', () => { + const createMockLogger = () => ({ log: vi.fn() }); + + test('passes through complete valid input unchanged', () => { + const logger = createMockLogger(); + const judgeResponse = { passed: true, actual: 'Result from agent', expected: 'Expected output', score: 85 }; + + const result = normalizeJudgment({ judgeResponse, requirement: 'test assertion', runIndex: 0, logger }); + + assert({ + given: 'complete valid judgment with passed: true', + should: 'preserve passed as true', + actual: result.passed, + expected: true + }); + + assert({ + given: 'complete valid judgment', + should: 'preserve actual value', + actual: result.actual, + expected: 'Result from agent' + }); + + assert({ + given: 'complete valid judgment', + should: 'preserve expected value', + actual: result.expected, + expected: 'Expected output' + }); + + assert({ + given: 'complete valid judgment with score 85', + should: 'preserve score value', + actual: result.score, + expected: 85 + }); + }); + + test('defaults passed to false when missing', () => { + const logger = createMockLogger(); + const result = normalizeJudgment({ + judgeResponse: { actual: 'Result', expected: 'Expected', score: 50 }, + requirement: 'test', + runIndex: 0, + logger + }); + + assert({ + given: 'judgment missing passed field', + should: 'default passed to false', + actual: result.passed, + expected: false + }); + }); + + test('defaults missing actual and expected with warning log', () => { + const logger = createMockLogger(); + const result = normalizeJudgment({ + judgeResponse: { passed: true, score: 100 }, + requirement: 'test assertion', + runIndex: 2, + logger + }); + + assert({ + given: 'judgment missing actual', + should: 'default actual to "No actual provided"', + actual: result.actual, + expected: 'No actual provided' + }); + + assert({ + given: 'judgment missing expected', + should: 'default expected to "No expected provided"', + actual: result.expected, + expected: 'No expected provided' + }); + + assert({ + given: 'judgment missing actual and expected', + should: 'log warning with requirement and run number', + actual: logger.log.mock.calls[0][0], + expected: 'Warning: Judge response missing fields for "test assertion" run 3' + }); + }); + + test.each([ + ['score 150', 150, 100], + ['score -50', -50, 0], + ['NaN score', NaN, 0], + ])('normalizes %s correctly', (_, score, expected) => { + const logger = createMockLogger(); + const result = normalizeJudgment({ + judgeResponse: { passed: true, actual: 'Result', expected: 'Expected', score }, + requirement: 'test', + runIndex: 0, + logger + }); + + assert({ + given: `judgment with ${_}`, + should: `normalize score to ${expected}`, + actual: result.score, + expected + }); + }); + + test('defaults missing score to 0', () => { + const logger = createMockLogger(); + const result = normalizeJudgment({ + judgeResponse: { passed: true, actual: 'Result', expected: 'Expected' }, + requirement: 'test', + runIndex: 0, + logger + }); + + assert({ + given: 'judgment missing score', + should: 'default to 0', + actual: result.score, + expected: 0 + }); + }); + + test.each([ + ['null input', null], + ['string input', 'not an object'], + ['undefined input', undefined], + ])('throws ParseError for %s', (_, input) => { + const logger = createMockLogger(); + const error = Try(normalizeJudgment, { judgeResponse: input, requirement: 'test assertion', runIndex: 1, logger }); + + assert({ + given: _, + should: 'have ParseError name in cause', + actual: error?.cause?.name, + expected: 'ParseError' + }); + + assert({ + given: _, + should: 'have JUDGE_INVALID_RESPONSE code in cause', + actual: error?.cause?.code, + expected: 'JUDGE_INVALID_RESPONSE' + }); + }); + + test('includes requirement and runIndex in ParseError cause', () => { + const logger = createMockLogger(); + const error = Try(normalizeJudgment, { judgeResponse: null, requirement: 'test assertion', runIndex: 1, logger }); + + assert({ + given: 'null input', + should: 'include requirement in cause', + actual: error?.cause?.requirement, + expected: 'test assertion' + }); + + assert({ + given: 'null input', + should: 'include runIndex in cause', + actual: error?.cause?.runIndex, + expected: 1 + }); + + assert({ + given: 'null input', + should: 'include rawResponse in cause', + actual: error?.cause?.rawResponse, + expected: null + }); + }); +}); diff --git a/source/constants.js b/source/constants.js index 71c17b6d..2624ee0c 100644 --- a/source/constants.js +++ b/source/constants.js @@ -47,7 +47,7 @@ export const agentSchema = z.enum(constraints.supportedAgents, { message: `agent must be one of: ${constraints.supportedAgents.join(', ')}` }); -export const calculateRequiredPassesSchema = z.object({ +export const aggregationParamsSchema = z.object({ runs: runsSchema, threshold: thresholdSchema }); diff --git a/source/constants.test.js b/source/constants.test.js index b68a15a9..ce80e926 100644 --- a/source/constants.test.js +++ b/source/constants.test.js @@ -8,7 +8,7 @@ import { concurrencySchema, timeoutSchema, agentSchema, - calculateRequiredPassesSchema, + aggregationParamsSchema, aiTestOptionsSchema } from './constants.js'; @@ -346,9 +346,9 @@ describe('constants module', () => { }); }); - describe('calculateRequiredPassesSchema', () => { + describe('aggregationParamsSchema', () => { test('validates complete object with defaults', () => { - const result = calculateRequiredPassesSchema.parse({ + const result = aggregationParamsSchema.parse({ runs: 5, threshold: 80 }); @@ -362,7 +362,7 @@ describe('constants module', () => { }); test('reports multiple validation errors', () => { - const result = calculateRequiredPassesSchema.safeParse({ + const result = aggregationParamsSchema.safeParse({ runs: -1, threshold: 150 }); diff --git a/source/execute-agent.js b/source/execute-agent.js new file mode 100644 index 00000000..a22d9c11 --- /dev/null +++ b/source/execute-agent.js @@ -0,0 +1,184 @@ +import { spawn } from 'child_process'; +import { createError } from 'error-causes'; +import { ParseError, TimeoutError, AgentProcessError } from './ai-errors.js'; +import { createDebugLogger } from './debug-logger.js'; +import { unwrapEnvelope, unwrapAgentResult } from './agent-parser.js'; + +const maxOutputPreviewLength = 500; + +const withTimeout = (promise, ms, errorFactory) => + Promise.race([ + promise, + new Promise((_, reject) => + setTimeout(() => reject(createError(errorFactory())), ms) + ) + ]); + +const collectProcessOutput = (proc) => + new Promise((resolve, reject) => { + let stdout = ''; + let stderr = ''; + + proc.stdout.on('data', (data) => { stdout += data.toString(); }); + proc.stderr.on('data', (data) => { stderr += data.toString(); }); + proc.on('close', (code) => { resolve({ stdout, stderr, code }); }); + proc.on('error', reject); + }); + +/** + * Spawn an agent CLI subprocess and collect output. + * Logger is injected to avoid coupling: it is created once at executeAgent level. + */ +const spawnProcess = async ({ agentConfig, prompt, logger }) => { + const { command, args = [] } = agentConfig; + const allArgs = [...args, prompt]; + + logger.log('\nExecuting agent command:'); + logger.command(command, args); + logger.log(`Prompt length: ${prompt.length} characters`); + + try { + const proc = spawn(command, allArgs); + proc.stdin.end(); + return await collectProcessOutput(proc); + } catch (err) { + throw createError({ + ...AgentProcessError, + message: `Failed to spawn agent process: ${err.message}`, + command, + args: args.join(' '), + cause: err + }); + } +}; + +/** + * Try to unwrap a JSON envelope { result: ... } from a raw string, returning the + * inner value as a string. Falls back to the original string if not JSON or no envelope. + */ +const unwrapRawOutput = (output) => { + if (!output.trim().startsWith('{')) return output; + try { + return unwrapEnvelope(JSON.parse(output)); + } catch { + return output; + } +}; + +/** + * Process agent stdout: apply optional parseOutput preprocessing, then either + * return raw unwrapped string (rawOutput=true) or parse full JSON result. + */ +const processAgentOutput = ({ agentConfig, rawOutput, logger }) => ({ stdout }) => { + const { command, args = [], parseOutput } = agentConfig; + + try { + const processedOutput = parseOutput ? parseOutput(stdout, logger) : stdout; + + if (rawOutput) { + logger.log('Raw output requested - unwrapping JSON envelope'); + const result = unwrapRawOutput(processedOutput); + + if (typeof result !== 'string') { + throw createError({ + ...ParseError, + message: `Raw output requested but result is not a string: ${typeof result}`, + resultType: typeof result + }); + } + + logger.log(`Returning raw output (${result.length} characters)`); + logger.flush(); + return result; + } + + const result = unwrapAgentResult(processedOutput, logger); + logger.result(result); + logger.flush(); + return result; + } catch (err) { + const truncatedStdout = stdout.length > maxOutputPreviewLength ? `${stdout.slice(0, maxOutputPreviewLength)}...` : stdout; + logger.log('JSON parsing failed:', err.message); + logger.flush(); + + throw createError({ + ...ParseError, + message: `Failed to parse agent output as JSON: ${err.message}`, + code: 'AGENT_OUTPUT_PARSE_ERROR', + command, + args: args.join(' '), + stdoutPreview: truncatedStdout, + cause: err + }); + } +}; + +const runAgentProcess = async ({ agentConfig, prompt, timeout, logger }) => { + const { command, args = [] } = agentConfig; + + const { stdout, stderr, code } = await withTimeout( + spawnProcess({ agentConfig, prompt, logger }), + timeout, + () => ({ + ...TimeoutError, + message: `Agent process timed out after ${timeout}ms. Command: ${command} ${args.join(' ')}`, + command, + args: args.join(' '), + timeout + }) + ); + + logger.log(`Process exited with code: ${code}`); + logger.log(`Stdout length: ${stdout.length} characters`); + logger.log(`Stderr length: ${stderr.length} characters`); + + if (code !== 0) { + const truncatedStdout = stdout.length > maxOutputPreviewLength ? `${stdout.slice(0, maxOutputPreviewLength)}...` : stdout; + const truncatedStderr = stderr.length > maxOutputPreviewLength ? `${stderr.slice(0, maxOutputPreviewLength)}...` : stderr; + + logger.log('Process failed with non-zero exit code'); + logger.flush(); + + throw createError({ + ...AgentProcessError, + message: `Agent process exited with code ${code}\n` + + `Command: ${command} ${args.join(' ')}\n` + + `Stderr: ${truncatedStderr}\n` + + `Stdout preview: ${truncatedStdout}`, + command, + args: args.join(' '), + exitCode: code, + stderr: truncatedStderr, + stdoutPreview: truncatedStdout + }); + } + + return { stdout }; +}; + +/** + * Execute an agent CLI subprocess and return parsed JSON output or raw string. + * @param {Object} options + * @param {Object} options.agentConfig - Agent configuration + * @param {string} options.agentConfig.command - Command to execute + * @param {Array} [options.agentConfig.args=[]] - Command arguments + * @param {Function} [options.agentConfig.parseOutput] - Optional stdout preprocessor + * @param {string} options.prompt - Prompt to send to the agent + * @param {number} [options.timeout=300000] - Timeout in ms (default: 5 minutes) + * @param {boolean} [options.debug=false] - Enable debug logging + * @param {string} [options.logFile] - Optional log file path for debug output + * @param {boolean} [options.rawOutput=false] - Return raw stdout string without JSON parsing + * @returns {Promise} Parsed JSON response or raw string if rawOutput=true + */ +export const executeAgent = async ({ + agentConfig, + prompt, + timeout = 300000, + debug = false, + logFile, + rawOutput = false +}) => { + const logger = createDebugLogger({ debug, logFile }); + const processResult = await runAgentProcess({ agentConfig, prompt, timeout, logger }); + return processAgentOutput({ agentConfig, rawOutput, logger })(processResult); +}; diff --git a/source/execute-agent.test.js b/source/execute-agent.test.js new file mode 100644 index 00000000..01b934a0 --- /dev/null +++ b/source/execute-agent.test.js @@ -0,0 +1,257 @@ +import { describe, test, vi, beforeEach } from 'vitest'; +import { assert } from './vitest.js'; +import { Try } from './riteway.js'; + +vi.mock('child_process', () => ({ + spawn: vi.fn() +})); + +// Import after mock is registered +const { spawn } = await import('child_process'); +const { executeAgent } = await import('./execute-agent.js'); + +/** + * Build a mock child process that emits stdout/stderr data then closes. + * @param {Object} options + * @param {string} [options.stdout=''] - Data to emit on stdout + * @param {string} [options.stderr=''] - Data to emit on stderr + * @param {number} [options.exitCode=0] - Exit code for the close event + */ +const createMockProcess = ({ stdout = '', stderr = '', exitCode = 0 } = {}) => { + const listeners = { stdout: {}, stderr: {}, proc: {} }; + + const proc = { + stdout: { + on: (event, cb) => { + listeners.stdout[event] = cb; + } + }, + stderr: { + on: (event, cb) => { + listeners.stderr[event] = cb; + } + }, + stdin: { end: vi.fn() }, + on: (event, cb) => { + listeners.proc[event] = cb; + } + }; + + // Emit events asynchronously after the next tick so all listeners are registered + setTimeout(() => { + if (stdout && listeners.stdout.data) listeners.stdout.data(stdout); + if (stderr && listeners.stderr.data) listeners.stderr.data(stderr); + if (listeners.proc.close) listeners.proc.close(exitCode); + }, 0); + + return proc; +}; + +const agentConfig = { + command: 'claude', + args: ['-p', '--output-format', 'json', '--no-session-persistence'] +}; + +describe('executeAgent()', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + test('returns parsed JSON from agent stdout', async () => { + const agentResponse = JSON.stringify({ passed: true, score: 90 }); + spawn.mockReturnValue(createMockProcess({ stdout: agentResponse })); + + const result = await executeAgent({ + agentConfig, + prompt: 'test prompt' + }); + + assert({ + given: 'agent stdout with valid JSON', + should: 'return parsed result object', + actual: result, + expected: { passed: true, score: 90 } + }); + }); + + test('unwraps Claude CLI envelope and returns parsed inner result', async () => { + const innerResult = { passed: true, score: 85 }; + const envelope = JSON.stringify({ result: JSON.stringify(innerResult) }); + spawn.mockReturnValue(createMockProcess({ stdout: envelope })); + + const result = await executeAgent({ + agentConfig, + prompt: 'test prompt' + }); + + assert({ + given: 'Claude CLI JSON envelope wrapping a stringified result', + should: 'unwrap and parse the inner result', + actual: result, + expected: innerResult + }); + }); + + test('returns raw string when rawOutput is true', async () => { + const rawText = 'This is raw agent output'; + spawn.mockReturnValue(createMockProcess({ stdout: rawText })); + + const result = await executeAgent({ + agentConfig, + prompt: 'test prompt', + rawOutput: true + }); + + assert({ + given: 'rawOutput: true and plain text stdout', + should: 'return the raw string as-is', + actual: result, + expected: rawText + }); + }); + + test('unwraps envelope when rawOutput is true', async () => { + const innerText = 'raw output from agent'; + const envelope = JSON.stringify({ result: innerText }); + spawn.mockReturnValue(createMockProcess({ stdout: envelope })); + + const result = await executeAgent({ + agentConfig, + prompt: 'test prompt', + rawOutput: true + }); + + assert({ + given: 'rawOutput: true and JSON envelope wrapping a string', + should: 'unwrap and return the inner string', + actual: result, + expected: innerText + }); + }); + + test('applies parseOutput preprocessor before parsing result', async () => { + const ndjsonOutput = '{"type":"text","part":{"text":"{\\"passed\\":true}"}}'; + const parseOutput = vi.fn(() => '{"passed":true}'); + spawn.mockReturnValue(createMockProcess({ stdout: ndjsonOutput })); + + const result = await executeAgent({ + agentConfig: { ...agentConfig, parseOutput }, + prompt: 'test prompt' + }); + + assert({ + given: 'agentConfig with parseOutput function', + should: 'call parseOutput exactly once', + actual: parseOutput.mock.calls.length, + expected: 1 + }); + + assert({ + given: 'agentConfig with parseOutput function', + should: 'pass the raw stdout as first argument', + actual: parseOutput.mock.calls[0][0], + expected: ndjsonOutput + }); + + assert({ + given: 'parseOutput returns valid JSON', + should: 'return the parsed result', + actual: result, + expected: { passed: true } + }); + }); + + test('throws AgentProcessError when exit code is non-zero', async () => { + spawn.mockReturnValue(createMockProcess({ + stdout: '', + stderr: 'Permission denied', + exitCode: 1 + })); + + const err = await Try(executeAgent, { agentConfig, prompt: 'test prompt' }); + + assert({ + given: 'non-zero exit code from agent process', + should: 'throw Error with AgentProcessError cause', + actual: err?.cause?.name, + expected: 'AgentProcessError' + }); + + assert({ + given: 'non-zero exit code', + should: 'have AGENT_PROCESS_FAILURE code', + actual: err?.cause?.code, + expected: 'AGENT_PROCESS_FAILURE' + }); + + assert({ + given: 'non-zero exit code', + should: 'include exit code in cause', + actual: err?.cause?.exitCode, + expected: 1 + }); + }); + + test('throws TimeoutError when timeout is exceeded', async () => { + // Process that never closes + const proc = { + stdout: { on: vi.fn() }, + stderr: { on: vi.fn() }, + stdin: { end: vi.fn() }, + on: vi.fn() + }; + spawn.mockReturnValue(proc); + + const err = await Try(executeAgent, { agentConfig, prompt: 'test prompt', timeout: 1 }); + + assert({ + given: 'agent process that exceeds timeout', + should: 'throw Error with TimeoutError cause', + actual: err?.cause?.name, + expected: 'TimeoutError' + }); + + assert({ + given: 'timeout exceeded', + should: 'have AGENT_TIMEOUT code', + actual: err?.cause?.code, + expected: 'AGENT_TIMEOUT' + }); + }); + + test('throws ParseError when stdout is not valid JSON (rawOutput: false)', async () => { + spawn.mockReturnValue(createMockProcess({ stdout: 'not valid json output' })); + + const err = await Try(executeAgent, { agentConfig, prompt: 'test prompt' }); + + assert({ + given: 'stdout that is not valid JSON', + should: 'throw Error with ParseError cause', + actual: err?.cause?.name, + expected: 'ParseError' + }); + }); + + test('spawns the agent with command, args, and prompt appended', async () => { + spawn.mockReturnValue(createMockProcess({ stdout: '{"ok":true}' })); + + await executeAgent({ + agentConfig, + prompt: 'my prompt' + }); + + assert({ + given: 'valid agentConfig with command and args', + should: 'spawn with the command', + actual: spawn.mock.calls[0][0], + expected: 'claude' + }); + + assert({ + given: 'valid agentConfig with command and args', + should: 'spawn with args including the prompt at the end', + actual: spawn.mock.calls[0][1], + expected: ['-p', '--output-format', 'json', '--no-session-persistence', 'my prompt'] + }); + }); +}); diff --git a/source/extraction-parser.js b/source/extraction-parser.js new file mode 100644 index 00000000..799f37e2 --- /dev/null +++ b/source/extraction-parser.js @@ -0,0 +1,127 @@ +import { createError } from 'error-causes'; +import { ExtractionParseError, ExtractionValidationError } from './ai-errors.js'; +import { readFile } from 'fs/promises'; +import { resolve } from 'path'; + +const assertionRequiredFields = ['id', 'requirement']; + +/** + * Resolve and read import files, concatenating their contents. + * + * SECURITY NOTE: Import paths are NOT validated for path traversal. + * This allows legitimate cross-project imports (e.g., shared prompt libraries). + * Test authors are responsible for not importing sensitive files (.env, credentials). + * See PR #394 remediation epic (Wave 1, Task 2) for design rationale. + */ +export const resolveImportPaths = async (importPaths, projectRoot, debug) => { + if (debug) { + console.error(`[DEBUG] Found ${importPaths.length} imports to resolve`); + } + const importedContents = await Promise.all( + importPaths.map(async importPath => { + const resolvedPath = resolve(projectRoot, importPath); + if (debug) { + console.error(`[DEBUG] Reading import: ${importPath} -> ${resolvedPath}`); + } + try { + return await readFile(resolvedPath, 'utf-8'); + } catch (originalError) { + throw createError({ + name: 'ValidationError', + message: `Failed to read imported prompt file: ${importPath}`, + code: 'PROMPT_READ_FAILED', + path: importPath, + resolvedPath, + cause: originalError + }); + } + }) + ); + const result = importedContents.join('\n\n'); + if (debug) { + console.error(`[DEBUG] Imported content length: ${result.length} characters`); + } + return result; +}; + +/** Extract JSON from markdown code fences if present. */ +export const extractJSONFromMarkdown = (str) => { + const match = str.match(/```(?:json)?\s*\n([\s\S]*?)\n```/); + return match ? match[1] : str; +}; + +/** + * Try to parse a string as JSON, extracting from markdown code fences if needed. + * @throws {Error} If parsing fails + */ +export const tryParseJSON = (str) => { + try { + return JSON.parse(extractJSONFromMarkdown(str)); + } catch (originalError) { + throw createError({ + ...ExtractionParseError, + rawInput: str, + cause: originalError + }); + } +}; + +/** + * Parse and validate extraction output from the agent. + * Accepts either a raw JSON string or an already-parsed object. + * Handles markdown code fences if present. + */ +export const parseExtractionResult = (rawOutput) => { + const parsed = typeof rawOutput === 'string' + ? tryParseJSON(rawOutput) + : rawOutput; + + if (typeof parsed !== 'object' || parsed === null) { + throw createError({ + ...ExtractionValidationError, + message: 'Extraction result must be a JSON object', + rawOutput + }); + } + + if (parsed.userPrompt === undefined || parsed.userPrompt === null) { + throw createError({ + ...ExtractionValidationError, + message: 'Extraction result is missing required field: userPrompt', + rawOutput + }); + } + + if (!Array.isArray(parsed.importPaths)) { + throw createError({ + ...ExtractionValidationError, + message: 'Extraction result is missing required field: importPaths (must be an array)', + rawOutput + }); + } + + if (!Array.isArray(parsed.assertions)) { + throw createError({ + ...ExtractionValidationError, + message: 'Extraction result is missing required field: assertions (must be an array)', + rawOutput + }); + } + + // for loop: early throw on first invalid item; index needed for error context + for (let i = 0; i < parsed.assertions.length; i++) { + for (const field of assertionRequiredFields) { + if (parsed.assertions[i][field] === undefined || parsed.assertions[i][field] === null) { + throw createError({ + ...ExtractionValidationError, + message: `Assertion at index ${i} is missing required field: ${field}`, + assertionIndex: i, + missingField: field, + rawOutput + }); + } + } + } + + return parsed; +}; diff --git a/source/extraction-parser.test.js b/source/extraction-parser.test.js new file mode 100644 index 00000000..ba2dfb99 --- /dev/null +++ b/source/extraction-parser.test.js @@ -0,0 +1,226 @@ +import { describe, test } from 'vitest'; +import { assert } from './vitest.js'; +import { Try } from './riteway.js'; +import { parseExtractionResult } from './extraction-parser.js'; + +describe('parseExtractionResult()', () => { + test('parses valid extraction result with required fields', () => { + const validOutput = JSON.stringify({ + userPrompt: 'What is 2 + 2?', + importPaths: ['test.mdc'], + assertions: [ + { id: 1, requirement: 'Given simple addition, should add correctly' }, + { id: 2, requirement: 'Given format, should output JSON' } + ] + }); + + const result = parseExtractionResult(validOutput); + + assert({ + given: 'valid extraction result', + should: 'preserve the userPrompt field', + actual: result.userPrompt, + expected: 'What is 2 + 2?' + }); + + assert({ + given: 'valid extraction result', + should: 'preserve the importPaths array', + actual: Array.isArray(result.importPaths), + expected: true + }); + + assert({ + given: 'valid extraction result', + should: 'preserve importPaths values', + actual: result.importPaths[0], + expected: 'test.mdc' + }); + + assert({ + given: 'valid extraction result', + should: 'preserve assertions array length', + actual: result.assertions.length, + expected: 2 + }); + + assert({ + given: 'valid extraction result', + should: 'preserve assertion requirement field', + actual: result.assertions[0].requirement, + expected: 'Given simple addition, should add correctly' + }); + }); + + test('parses JSON wrapped in markdown code fences', () => { + const markdownWrapped = '```json\n{\n "userPrompt": "test prompt",\n "importPaths": [],\n "assertions": [\n {\n "id": 1,\n "requirement": "Given test, should pass"\n }\n ]\n}\n```'; + + const result = parseExtractionResult(markdownWrapped); + + assert({ + given: 'JSON wrapped in markdown code fences', + should: 'parse importPaths as an array', + actual: Array.isArray(result.importPaths), + expected: true + }); + + assert({ + given: 'JSON wrapped in markdown code fences', + should: 'preserve the userPrompt field', + actual: result.userPrompt, + expected: 'test prompt' + }); + + assert({ + given: 'JSON wrapped in markdown code fences', + should: 'preserve assertions array', + actual: result.assertions[0].requirement, + expected: 'Given test, should pass' + }); + }); + + test('parses JSON with surrounding explanation text and markdown fences', () => { + const withExplanation = 'Here is the extraction result you requested:\n\n```json\n{\n "userPrompt": "test prompt",\n "importPaths": [],\n "assertions": [\n {\n "id": 1,\n "requirement": "Given test, should pass"\n }\n ]\n}\n```\n\nLet me know if you need more help.'; + + const result = parseExtractionResult(withExplanation); + + assert({ + given: 'JSON with explanation text and markdown fences', + should: 'extract and parse the JSON object', + actual: result.userPrompt, + expected: 'test prompt' + }); + + assert({ + given: 'JSON with explanation text and markdown fences', + should: 'return the parsed content', + actual: result.assertions[0].requirement, + expected: 'Given test, should pass' + }); + }); + + test('accepts an already-parsed object', () => { + const parsed = { + userPrompt: 'test prompt', + importPaths: [], + assertions: [{ id: 1, requirement: 'Given a test, should pass' }] + }; + + const result = parseExtractionResult(parsed); + + assert({ + given: 'an already-parsed object instead of a JSON string', + should: 'validate and return the object directly', + actual: result.userPrompt, + expected: 'test prompt' + }); + + assert({ + given: 'an already-parsed object', + should: 'preserve the assertions', + actual: result.assertions[0].requirement, + expected: 'Given a test, should pass' + }); + }); + + test('throws ExtractionParseError on malformed non-JSON input', () => { + const error = Try(parseExtractionResult, 'This is not JSON at all'); + + assert({ + given: 'non-JSON input', + should: 'throw ExtractionParseError cause', + actual: error?.cause?.name, + expected: 'ExtractionParseError' + }); + + assert({ + given: 'non-JSON input', + should: 'have EXTRACTION_PARSE_FAILURE code', + actual: error?.cause?.code, + expected: 'EXTRACTION_PARSE_FAILURE' + }); + + assert({ + given: 'non-JSON input', + should: 'preserve original JSON SyntaxError as cause', + actual: error?.cause?.cause?.name, + expected: 'SyntaxError' + }); + }); + + test('throws ExtractionValidationError when result has wrong structure', () => { + const error = Try(parseExtractionResult, JSON.stringify({ id: 1, description: 'test', prompt: 'test' })); + + assert({ + given: 'extraction result with invalid structure', + should: 'throw ExtractionValidationError cause', + actual: error?.cause?.name, + expected: 'ExtractionValidationError' + }); + + assert({ + given: 'extraction result with invalid structure', + should: 'have EXTRACTION_VALIDATION_FAILURE code', + actual: error?.cause?.code, + expected: 'EXTRACTION_VALIDATION_FAILURE' + }); + }); + + test.each([ + [ + 'missing importPaths', + { userPrompt: 'test', assertions: [] }, + 'importPaths' + ], + [ + 'missing userPrompt', + { importPaths: [], assertions: [] }, + 'userPrompt' + ], + [ + 'missing assertions', + { userPrompt: 'test', importPaths: [] }, + 'assertions' + ], + ])('throws when %s is missing', (_, input, missingField) => { + const error = Try(parseExtractionResult, JSON.stringify(input)); + + assert({ + given: `extraction result missing ${missingField}`, + should: 'throw ExtractionValidationError', + actual: error?.cause?.name, + expected: 'ExtractionValidationError' + }); + + assert({ + given: `extraction result missing ${missingField}`, + should: 'have descriptive error message', + actual: error?.message?.includes(missingField), + expected: true + }); + }); + + test('throws when assertion is missing required field', () => { + const missingAssertionFields = JSON.stringify({ + userPrompt: 'test', + importPaths: [], + assertions: [{ id: 1 }] + }); + + const error = Try(parseExtractionResult, missingAssertionFields); + + assert({ + given: 'assertion missing the requirement field', + should: 'throw ExtractionValidationError', + actual: error?.cause?.name, + expected: 'ExtractionValidationError' + }); + + assert({ + given: 'assertion missing the requirement field', + should: 'have error message indicating missing field', + actual: error?.message?.includes('requirement'), + expected: true + }); + }); +});