Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion apps/web/src/app/api/ai/chat/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,27 @@ export async function POST(request: Request) {
});
}

// Guard: if truncation left zero messages, the system prompt + tools alone exceed the budget.
// Sending an empty conversation to the model would produce a meaningless response or error.
if (includedMessages.length === 0) {
loggers.ai.error('AI Chat API: Context budget exhausted by system prompt and tools alone', {
model: currentModel,
provider: currentProvider,
contextWindow,
inputBudget,
systemPromptTokens,
toolTokens,
});
return NextResponse.json(
{
error: 'context_length_exceeded',
message: 'The system configuration (prompts and tools) exceeds this model\'s context window. Please switch to a model with a larger context window.',
details: 'context_length_exceeded',
},
{ status: 413 }
);
}

const modelMessages = convertToModelMessages(includedMessages as UIMessage[], {
tools: filteredTools // Use original tools - no wrapping needed
});
Expand Down Expand Up @@ -1248,7 +1269,7 @@ export async function POST(request: Request) {
error: 'context_length_exceeded',
message: wasTruncated
? 'The conversation still exceeds this model\'s context window even after trimming. Please start a new conversation.'
: 'The conversation is too long for this model\'s context window. Older messages have been trimmed — try sending your message again.',
: 'The conversation is too long for this model\'s context window. Please start a new conversation or switch to a model with a larger context window.',
details: 'context_length_exceeded',
},
{ status: 413 }
Expand Down
117 changes: 117 additions & 0 deletions apps/web/src/lib/ai/shared/__tests__/error-messages.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import { describe, it, expect } from 'vitest';
import {
getAIErrorMessage,
isContextLengthError,
isRateLimitError,
isAuthenticationError,
} from '../error-messages';

describe('isContextLengthError', () => {
it('returns false for undefined/empty input', () => {
expect(isContextLengthError(undefined)).toBe(false);
expect(isContextLengthError('')).toBe(false);
});

it('detects context_length_exceeded API key', () => {
expect(isContextLengthError('context_length_exceeded')).toBe(true);
expect(isContextLengthError('Error: context_length_exceeded for model gpt-4o')).toBe(true);
});

it('detects human-readable context length variants', () => {
expect(isContextLengthError('context length exceeded')).toBe(true);
expect(isContextLengthError('exceeds the context window')).toBe(true);
expect(isContextLengthError('maximum context length is 128000 tokens')).toBe(true);
});

it('detects token limit errors', () => {
expect(isContextLengthError('token limit exceeded')).toBe(true);
expect(isContextLengthError('number of tokens exceeds the maximum')).toBe(true);
expect(isContextLengthError('too many tokens in the request')).toBe(true);
});

it('detects provider-specific "maximum tokens" phrasing', () => {
expect(isContextLengthError('maximum number of tokens allowed is 200000')).toBe(true);
});

it('detects HTTP 413 in status-code patterns only', () => {
expect(isContextLengthError('HTTP 413')).toBe(true);
expect(isContextLengthError('status 413')).toBe(true);
expect(isContextLengthError('error 413: payload too large')).toBe(true);
expect(isContextLengthError('code 413')).toBe(true);
});

it('does NOT false-positive on bare "413" in other contexts', () => {
expect(isContextLengthError('processed 413 items successfully')).toBe(false);
expect(isContextLengthError('user ID 4130 not found')).toBe(false);
expect(isContextLengthError('port 4135 is in use')).toBe(false);
});

it('does NOT match unrelated error messages', () => {
expect(isContextLengthError('rate limit exceeded')).toBe(false);
expect(isContextLengthError('Unauthorized')).toBe(false);
expect(isContextLengthError('Internal server error')).toBe(false);
expect(isContextLengthError('Provider returned error')).toBe(false);
});
});

describe('isRateLimitError', () => {
it('returns false for undefined/empty input', () => {
expect(isRateLimitError(undefined)).toBe(false);
expect(isRateLimitError('')).toBe(false);
});

it('detects rate limit errors', () => {
expect(isRateLimitError('rate limit exceeded')).toBe(true);
expect(isRateLimitError('429 Too Many Requests')).toBe(true);
expect(isRateLimitError('402 Payment Required')).toBe(true);
expect(isRateLimitError('Failed after 3 retries')).toBe(true);
expect(isRateLimitError('Provider returned error')).toBe(true);
});

it('excludes context-length errors that contain "limit"', () => {
expect(isRateLimitError('token limit exceeded')).toBe(false);
expect(isRateLimitError('context_length_exceeded')).toBe(false);
expect(isRateLimitError('maximum context length limit')).toBe(false);
});
});

describe('isAuthenticationError', () => {
it('detects auth errors', () => {
expect(isAuthenticationError('Unauthorized')).toBe(true);
expect(isAuthenticationError('401 Unauthorized')).toBe(true);
});

it('returns false for non-auth errors', () => {
expect(isAuthenticationError(undefined)).toBe(false);
expect(isAuthenticationError('rate limit exceeded')).toBe(false);
});
});

describe('getAIErrorMessage', () => {
it('returns generic message for undefined input', () => {
expect(getAIErrorMessage(undefined)).toBe('Something went wrong. Please try again.');
});

it('returns auth message for Unauthorized errors', () => {
expect(getAIErrorMessage('Unauthorized')).toBe(
'Authentication failed. Please refresh the page and try again.'
);
});

it('returns context-length message for context errors', () => {
const msg = getAIErrorMessage('context_length_exceeded');
expect(msg).toContain('context window');
expect(msg).not.toContain('trimmed');
});

it('returns rate-limit message for rate errors', () => {
const msg = getAIErrorMessage('429 Too Many Requests');
expect(msg).toContain('rate limit');
});

it('returns generic message for unknown errors', () => {
expect(getAIErrorMessage('some random error')).toBe(
'Something went wrong. Please try again.'
);
});
});
5 changes: 3 additions & 2 deletions apps/web/src/lib/ai/shared/error-messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ export function getAIErrorMessage(errorMessage: string | undefined): string {

// Context length errors
if (isContextLengthError(errorMessage)) {
return 'The conversation is too long for this model\'s context window. Older messages have been trimmed to fit — try sending your message again.';
return 'The conversation is too long for this model\'s context window. Please start a new conversation or switch to a model with a larger context window.';
}

// Rate limit errors
Expand Down Expand Up @@ -55,7 +55,8 @@ export function isContextLengthError(errorMessage: string | undefined): boolean
msg.includes('token limit') ||
msg.includes('tokens exceeds') ||
msg.includes('too many tokens') ||
errorMessage.includes('413') ||
// Match HTTP 413 only in status-code patterns (e.g. "status 413", "HTTP 413", "code 413")
/\b(?:status|http|code|error)\s*413\b/i.test(errorMessage) ||
// OpenRouter / provider-specific phrasing
(msg.includes('maximum') && msg.includes('tokens'))
);
Expand Down
208 changes: 208 additions & 0 deletions packages/lib/src/monitoring/__tests__/ai-context-calculator.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
import { describe, it, expect } from 'vitest';
import {
estimateTokens,
estimateMessageTokens,
estimateSystemPromptTokens,
estimateToolDefinitionTokens,
getContextWindowSize,
determineMessagesToInclude,
type UIMessage,
} from '../ai-context-calculator';

describe('estimateTokens', () => {
it('returns 0 for empty/falsy input', () => {
expect(estimateTokens('')).toBe(0);
expect(estimateTokens(null as unknown as string)).toBe(0);
expect(estimateTokens(undefined as unknown as string)).toBe(0);
});

it('estimates ~4 chars/token for ASCII text', () => {
const text = 'Hello, world! This is a test.';
const tokens = estimateTokens(text);
// 28 chars / 4 = 7
expect(tokens).toBe(Math.ceil(text.length / 4));
});

it('uses ~2 chars/token for CJK-heavy text (>20% non-ASCII)', () => {
const text = '这是一个测试消息'; // 8 CJK characters, 100% non-ASCII
const tokens = estimateTokens(text);
expect(tokens).toBe(Math.ceil(text.length / 2));
});

it('uses 4 chars/token for mostly-ASCII text with minor non-ASCII', () => {
// 80 ASCII chars + 5 non-ASCII = 5/85 ≈ 5.9% non-ASCII → use 4 chars/token
const text = 'a'.repeat(80) + '你好世界呢';
const tokens = estimateTokens(text);
expect(tokens).toBe(Math.ceil(text.length / 4));
});

it('switches to 2 chars/token at >20% non-ASCII threshold', () => {
// 3 ASCII + 1 CJK = 25% non-ASCII (above 20% threshold)
const text = 'abc你';
const tokens = estimateTokens(text);
expect(tokens).toBe(Math.ceil(text.length / 2));
});

it('handles emoji as non-ASCII', () => {
// Emoji are multi-byte, charCodeAt > 127 for surrogate pairs
const text = 'Hello 🌍🌍🌍🌍🌍'; // mixed with emoji
const tokens = estimateTokens(text);
expect(tokens).toBeGreaterThan(0);
});
});

describe('estimateMessageTokens', () => {
it('returns overhead tokens for empty message', () => {
const msg: UIMessage = { role: 'user', parts: [] };
// 5 (role) + 10 (overhead) = 15
expect(estimateMessageTokens(msg)).toBe(15);
});

it('counts text parts', () => {
const msg: UIMessage = {
role: 'user',
parts: [{ type: 'text', text: 'Hello, world!' }],
};
const tokens = estimateMessageTokens(msg);
// 5 (role) + estimateTokens('Hello, world!') + 10 (overhead)
expect(tokens).toBe(5 + estimateTokens('Hello, world!') + 10);
});

it('counts tool invocation parts (tool-{name} format)', () => {
const msg: UIMessage = {
role: 'assistant',
parts: [
{
type: 'tool-search',
toolCallId: 'call_123',
toolName: 'search',
input: { query: 'test' },
output: { results: ['a', 'b'] },
state: 'output-available',
},
],
};
const tokens = estimateMessageTokens(msg);
// Should include: 5 (role) + 10 (tool overhead) + toolName + input + output + 10 (msg overhead)
expect(tokens).toBeGreaterThan(25);
});

it('handles legacy tool-call/tool-result format via startsWith("tool-")', () => {
const msg: UIMessage = {
role: 'assistant',
parts: [
{
type: 'tool-myTool',
toolCallId: 'call_456',
toolName: 'myTool',
args: { param: 'value' },
result: 'done',
},
],
};
const tokens = estimateMessageTokens(msg);
// args falls through to input ?? args path
expect(tokens).toBeGreaterThan(15);
});

it('handles message with no parts', () => {
const msg: UIMessage = { role: 'user' };
expect(estimateMessageTokens(msg)).toBe(15);
});
});

describe('estimateSystemPromptTokens', () => {
it('returns 0 for empty prompt', () => {
expect(estimateSystemPromptTokens(undefined)).toBe(0);
expect(estimateSystemPromptTokens('')).toBe(0);
});

it('estimates tokens for a prompt', () => {
const prompt = 'You are a helpful assistant.';
expect(estimateSystemPromptTokens(prompt)).toBe(estimateTokens(prompt));
});
});

describe('estimateToolDefinitionTokens', () => {
it('returns 0 for empty tools', () => {
expect(estimateToolDefinitionTokens(undefined)).toBe(0);
expect(estimateToolDefinitionTokens({})).toBe(0);
});

it('estimates tokens from JSON serialization', () => {
const tools = { search: { description: 'Search the web', parameters: { query: 'string' } } };
const tokens = estimateToolDefinitionTokens(tools);
expect(tokens).toBeGreaterThan(0);
expect(tokens).toBe(estimateTokens(JSON.stringify(tools)));
});
});

describe('getContextWindowSize', () => {
it('returns correct size for known direct models', () => {
expect(getContextWindowSize('gpt-4o', 'openai')).toBe(128_000);
expect(getContextWindowSize('gpt-4', 'openai')).toBe(8_192);
});

it('returns correct size for Anthropic models', () => {
const size = getContextWindowSize('claude-3-5-sonnet-20241022', 'anthropic');
expect(size).toBe(200_000);
});

it('returns correct size for Google models', () => {
expect(getContextWindowSize('gemini-2.5-pro', 'google')).toBe(2_000_000);
expect(getContextWindowSize('gemini-2.5-flash', 'google')).toBe(1_000_000);
});

it('returns conservative default for unknown models', () => {
expect(getContextWindowSize('unknown-model', 'unknown-provider')).toBe(200_000);
});

it('handles OpenRouter models', () => {
const size = getContextWindowSize('anthropic/claude-3.5-sonnet', 'openrouter');
expect(size).toBe(200_000);
});
});

describe('determineMessagesToInclude', () => {
const makeMsg = (text: string, role: 'user' | 'assistant' = 'user'): UIMessage => ({
id: text,
role,
parts: [{ type: 'text', text }],
});

it('includes all messages when they fit in budget', () => {
const messages = [makeMsg('Hello'), makeMsg('World')];
const result = determineMessagesToInclude(messages, 10_000, 100, 100);
expect(result.includedMessages).toHaveLength(2);
expect(result.wasTruncated).toBe(false);
});

it('truncates oldest messages first when budget is tight', () => {
const messages = [
makeMsg('a'.repeat(1000)), // ~250 tokens + overhead
makeMsg('b'.repeat(1000)), // ~250 tokens + overhead
makeMsg('c'.repeat(100)), // ~25 tokens + overhead
];
// Budget of 100 tokens for messages (after system/tool subtracted)
const result = determineMessagesToInclude(messages, 200, 50, 50);
expect(result.wasTruncated).toBe(true);
expect(result.includedMessages.length).toBeLessThan(3);
// Most recent message should be included
if (result.includedMessages.length > 0) {
expect(result.includedMessages[result.includedMessages.length - 1].id).toBe('c'.repeat(100));
}
});

it('returns empty array when budget is zero or negative', () => {
const messages = [makeMsg('test')];
const result = determineMessagesToInclude(messages, 100, 60, 60);
expect(result.includedMessages).toHaveLength(0);
expect(result.wasTruncated).toBe(true);
});

it('preserves message order', () => {
const messages = [makeMsg('first'), makeMsg('second'), makeMsg('third')];
const result = determineMessagesToInclude(messages, 50_000, 0, 0);
expect(result.includedMessages.map(m => m.id)).toEqual(['first', 'second', 'third']);
});
});
Loading