2witstudios · 2witstudios · Feb 26, 2026
diff --git a/apps/web/src/app/api/ai/chat/route.ts b/apps/web/src/app/api/ai/chat/route.ts
@@ -856,6 +856,27 @@ export async function POST(request: Request) {
       });
     }
 
+    // Guard: if truncation left zero messages, the system prompt + tools alone exceed the budget.
+    // Sending an empty conversation to the model would produce a meaningless response or error.
+    if (includedMessages.length === 0) {
+      loggers.ai.error('AI Chat API: Context budget exhausted by system prompt and tools alone', {
+        model: currentModel,
+        provider: currentProvider,
+        contextWindow,
+        inputBudget,
+        systemPromptTokens,
+        toolTokens,
+      });
+      return NextResponse.json(
+        {
+          error: 'context_length_exceeded',
+          message: 'The system configuration (prompts and tools) exceeds this model\'s context window. Please switch to a model with a larger context window.',
+          details: 'context_length_exceeded',
+        },
+        { status: 413 }
+      );
+    }
+
     const modelMessages = convertToModelMessages(includedMessages as UIMessage[], {
       tools: filteredTools  // Use original tools - no wrapping needed
     });
@@ -1248,7 +1269,7 @@ export async function POST(request: Request) {
           error: 'context_length_exceeded',
           message: wasTruncated
             ? 'The conversation still exceeds this model\'s context window even after trimming. Please start a new conversation.'
-            : 'The conversation is too long for this model\'s context window. Older messages have been trimmed — try sending your message again.',
+            : 'The conversation is too long for this model\'s context window. Please start a new conversation or switch to a model with a larger context window.',
           details: 'context_length_exceeded',
         },
         { status: 413 }

diff --git a/apps/web/src/lib/ai/shared/__tests__/error-messages.test.ts b/apps/web/src/lib/ai/shared/__tests__/error-messages.test.ts
@@ -0,0 +1,117 @@
+import { describe, it, expect } from 'vitest';
+import {
+  getAIErrorMessage,
+  isContextLengthError,
+  isRateLimitError,
+  isAuthenticationError,
+} from '../error-messages';
+
+describe('isContextLengthError', () => {
+  it('returns false for undefined/empty input', () => {
+    expect(isContextLengthError(undefined)).toBe(false);
+    expect(isContextLengthError('')).toBe(false);
+  });
+
+  it('detects context_length_exceeded API key', () => {
+    expect(isContextLengthError('context_length_exceeded')).toBe(true);
+    expect(isContextLengthError('Error: context_length_exceeded for model gpt-4o')).toBe(true);
+  });
+
+  it('detects human-readable context length variants', () => {
+    expect(isContextLengthError('context length exceeded')).toBe(true);
+    expect(isContextLengthError('exceeds the context window')).toBe(true);
+    expect(isContextLengthError('maximum context length is 128000 tokens')).toBe(true);
+  });
+
+  it('detects token limit errors', () => {
+    expect(isContextLengthError('token limit exceeded')).toBe(true);
+    expect(isContextLengthError('number of tokens exceeds the maximum')).toBe(true);
+    expect(isContextLengthError('too many tokens in the request')).toBe(true);
+  });
+
+  it('detects provider-specific "maximum tokens" phrasing', () => {
+    expect(isContextLengthError('maximum number of tokens allowed is 200000')).toBe(true);
+  });
+
+  it('detects HTTP 413 in status-code patterns only', () => {
+    expect(isContextLengthError('HTTP 413')).toBe(true);
+    expect(isContextLengthError('status 413')).toBe(true);
+    expect(isContextLengthError('error 413: payload too large')).toBe(true);
+    expect(isContextLengthError('code 413')).toBe(true);
+  });
+
+  it('does NOT false-positive on bare "413" in other contexts', () => {
+    expect(isContextLengthError('processed 413 items successfully')).toBe(false);
+    expect(isContextLengthError('user ID 4130 not found')).toBe(false);
+    expect(isContextLengthError('port 4135 is in use')).toBe(false);
+  });
+
+  it('does NOT match unrelated error messages', () => {
+    expect(isContextLengthError('rate limit exceeded')).toBe(false);
+    expect(isContextLengthError('Unauthorized')).toBe(false);
+    expect(isContextLengthError('Internal server error')).toBe(false);
+    expect(isContextLengthError('Provider returned error')).toBe(false);
+  });
+});
+
+describe('isRateLimitError', () => {
+  it('returns false for undefined/empty input', () => {
+    expect(isRateLimitError(undefined)).toBe(false);
+    expect(isRateLimitError('')).toBe(false);
+  });
+
+  it('detects rate limit errors', () => {
+    expect(isRateLimitError('rate limit exceeded')).toBe(true);
+    expect(isRateLimitError('429 Too Many Requests')).toBe(true);
+    expect(isRateLimitError('402 Payment Required')).toBe(true);
+    expect(isRateLimitError('Failed after 3 retries')).toBe(true);
+    expect(isRateLimitError('Provider returned error')).toBe(true);
+  });
+
+  it('excludes context-length errors that contain "limit"', () => {
+    expect(isRateLimitError('token limit exceeded')).toBe(false);
+    expect(isRateLimitError('context_length_exceeded')).toBe(false);
+    expect(isRateLimitError('maximum context length limit')).toBe(false);
+  });
+});
+
+describe('isAuthenticationError', () => {
+  it('detects auth errors', () => {
+    expect(isAuthenticationError('Unauthorized')).toBe(true);
+    expect(isAuthenticationError('401 Unauthorized')).toBe(true);
+  });
+
+  it('returns false for non-auth errors', () => {
+    expect(isAuthenticationError(undefined)).toBe(false);
+    expect(isAuthenticationError('rate limit exceeded')).toBe(false);
+  });
+});
+
+describe('getAIErrorMessage', () => {
+  it('returns generic message for undefined input', () => {
+    expect(getAIErrorMessage(undefined)).toBe('Something went wrong. Please try again.');
+  });
+
+  it('returns auth message for Unauthorized errors', () => {
+    expect(getAIErrorMessage('Unauthorized')).toBe(
+      'Authentication failed. Please refresh the page and try again.'
+    );
+  });
+
+  it('returns context-length message for context errors', () => {
+    const msg = getAIErrorMessage('context_length_exceeded');
+    expect(msg).toContain('context window');
+    expect(msg).not.toContain('trimmed');
+  });
+
+  it('returns rate-limit message for rate errors', () => {
+    const msg = getAIErrorMessage('429 Too Many Requests');
+    expect(msg).toContain('rate limit');
+  });
+
+  it('returns generic message for unknown errors', () => {
+    expect(getAIErrorMessage('some random error')).toBe(
+      'Something went wrong. Please try again.'
+    );
+  });
+});
diff --git a/apps/web/src/lib/ai/shared/error-messages.ts b/apps/web/src/lib/ai/shared/error-messages.ts
@@ -15,7 +15,7 @@ export function getAIErrorMessage(errorMessage: string | undefined): string {
 
   // Context length errors
   if (isContextLengthError(errorMessage)) {
-    return 'The conversation is too long for this model\'s context window. Older messages have been trimmed to fit — try sending your message again.';
+    return 'The conversation is too long for this model\'s context window. Please start a new conversation or switch to a model with a larger context window.';
   }
 
   // Rate limit errors
@@ -55,7 +55,8 @@ export function isContextLengthError(errorMessage: string | undefined): boolean
     msg.includes('token limit') ||
     msg.includes('tokens exceeds') ||
     msg.includes('too many tokens') ||
-    errorMessage.includes('413') ||
+    // Match HTTP 413 only in status-code patterns (e.g. "status 413", "HTTP 413", "code 413")
+    /\b(?:status|http|code|error)\s*413\b/i.test(errorMessage) ||
     // OpenRouter / provider-specific phrasing
     (msg.includes('maximum') && msg.includes('tokens'))
   );

diff --git a/packages/lib/src/monitoring/__tests__/ai-context-calculator.test.ts b/packages/lib/src/monitoring/__tests__/ai-context-calculator.test.ts
@@ -0,0 +1,208 @@
+import { describe, it, expect } from 'vitest';
+import {
+  estimateTokens,
+  estimateMessageTokens,
+  estimateSystemPromptTokens,
+  estimateToolDefinitionTokens,
+  getContextWindowSize,
+  determineMessagesToInclude,
+  type UIMessage,
+} from '../ai-context-calculator';
+
+describe('estimateTokens', () => {
+  it('returns 0 for empty/falsy input', () => {
+    expect(estimateTokens('')).toBe(0);
+    expect(estimateTokens(null as unknown as string)).toBe(0);
+    expect(estimateTokens(undefined as unknown as string)).toBe(0);
+  });
+
+  it('estimates ~4 chars/token for ASCII text', () => {
+    const text = 'Hello, world! This is a test.';
+    const tokens = estimateTokens(text);
+    // 28 chars / 4 = 7
+    expect(tokens).toBe(Math.ceil(text.length / 4));
+  });
+
+  it('uses ~2 chars/token for CJK-heavy text (>20% non-ASCII)', () => {
+    const text = '这是一个测试消息'; // 8 CJK characters, 100% non-ASCII
+    const tokens = estimateTokens(text);
+    expect(tokens).toBe(Math.ceil(text.length / 2));
+  });
+
+  it('uses 4 chars/token for mostly-ASCII text with minor non-ASCII', () => {
+    // 80 ASCII chars + 5 non-ASCII = 5/85 ≈ 5.9% non-ASCII → use 4 chars/token
+    const text = 'a'.repeat(80) + '你好世界呢';
+    const tokens = estimateTokens(text);
+    expect(tokens).toBe(Math.ceil(text.length / 4));
+  });
+
+  it('switches to 2 chars/token at >20% non-ASCII threshold', () => {
+    // 3 ASCII + 1 CJK = 25% non-ASCII (above 20% threshold)
+    const text = 'abc你';
+    const tokens = estimateTokens(text);
+    expect(tokens).toBe(Math.ceil(text.length / 2));
+  });
+
+  it('handles emoji as non-ASCII', () => {
+    // Emoji are multi-byte, charCodeAt > 127 for surrogate pairs
+    const text = 'Hello 🌍🌍🌍🌍🌍'; // mixed with emoji
+    const tokens = estimateTokens(text);
+    expect(tokens).toBeGreaterThan(0);
+  });
+});
+
+describe('estimateMessageTokens', () => {
+  it('returns overhead tokens for empty message', () => {
+    const msg: UIMessage = { role: 'user', parts: [] };
+    // 5 (role) + 10 (overhead) = 15
+    expect(estimateMessageTokens(msg)).toBe(15);
+  });
+
+  it('counts text parts', () => {
+    const msg: UIMessage = {
+      role: 'user',
+      parts: [{ type: 'text', text: 'Hello, world!' }],
+    };
+    const tokens = estimateMessageTokens(msg);
+    // 5 (role) + estimateTokens('Hello, world!') + 10 (overhead)
+    expect(tokens).toBe(5 + estimateTokens('Hello, world!') + 10);
+  });
+
+  it('counts tool invocation parts (tool-{name} format)', () => {
+    const msg: UIMessage = {
+      role: 'assistant',
+      parts: [
+        {
+          type: 'tool-search',
+          toolCallId: 'call_123',
+          toolName: 'search',
+          input: { query: 'test' },
+          output: { results: ['a', 'b'] },
+          state: 'output-available',
+        },
+      ],
+    };
+    const tokens = estimateMessageTokens(msg);
+    // Should include: 5 (role) + 10 (tool overhead) + toolName + input + output + 10 (msg overhead)
+    expect(tokens).toBeGreaterThan(25);
+  });
+
+  it('handles legacy tool-call/tool-result format via startsWith("tool-")', () => {
+    const msg: UIMessage = {
+      role: 'assistant',
+      parts: [
+        {
+          type: 'tool-myTool',
+          toolCallId: 'call_456',
+          toolName: 'myTool',
+          args: { param: 'value' },
+          result: 'done',
+        },
+      ],
+    };
+    const tokens = estimateMessageTokens(msg);
+    // args falls through to input ?? args path
+    expect(tokens).toBeGreaterThan(15);
+  });
+
+  it('handles message with no parts', () => {
+    const msg: UIMessage = { role: 'user' };
+    expect(estimateMessageTokens(msg)).toBe(15);
+  });
+});
+
+describe('estimateSystemPromptTokens', () => {
+  it('returns 0 for empty prompt', () => {
+    expect(estimateSystemPromptTokens(undefined)).toBe(0);
+    expect(estimateSystemPromptTokens('')).toBe(0);
+  });
+
+  it('estimates tokens for a prompt', () => {
+    const prompt = 'You are a helpful assistant.';
+    expect(estimateSystemPromptTokens(prompt)).toBe(estimateTokens(prompt));
+  });
+});
+
+describe('estimateToolDefinitionTokens', () => {
+  it('returns 0 for empty tools', () => {
+    expect(estimateToolDefinitionTokens(undefined)).toBe(0);
+    expect(estimateToolDefinitionTokens({})).toBe(0);
+  });
+
+  it('estimates tokens from JSON serialization', () => {
+    const tools = { search: { description: 'Search the web', parameters: { query: 'string' } } };
+    const tokens = estimateToolDefinitionTokens(tools);
+    expect(tokens).toBeGreaterThan(0);
+    expect(tokens).toBe(estimateTokens(JSON.stringify(tools)));
+  });
+});
+
+describe('getContextWindowSize', () => {
+  it('returns correct size for known direct models', () => {
+    expect(getContextWindowSize('gpt-4o', 'openai')).toBe(128_000);
+    expect(getContextWindowSize('gpt-4', 'openai')).toBe(8_192);
+  });
+
+  it('returns correct size for Anthropic models', () => {
+    const size = getContextWindowSize('claude-3-5-sonnet-20241022', 'anthropic');
+    expect(size).toBe(200_000);
+  });
+
+  it('returns correct size for Google models', () => {
+    expect(getContextWindowSize('gemini-2.5-pro', 'google')).toBe(2_000_000);
+    expect(getContextWindowSize('gemini-2.5-flash', 'google')).toBe(1_000_000);
+  });
+
+  it('returns conservative default for unknown models', () => {
+    expect(getContextWindowSize('unknown-model', 'unknown-provider')).toBe(200_000);
+  });
+
+  it('handles OpenRouter models', () => {
+    const size = getContextWindowSize('anthropic/claude-3.5-sonnet', 'openrouter');
+    expect(size).toBe(200_000);
+  });
+});
+
+describe('determineMessagesToInclude', () => {
+  const makeMsg = (text: string, role: 'user' | 'assistant' = 'user'): UIMessage => ({
+    id: text,
+    role,
+    parts: [{ type: 'text', text }],
+  });
+
+  it('includes all messages when they fit in budget', () => {
+    const messages = [makeMsg('Hello'), makeMsg('World')];
+    const result = determineMessagesToInclude(messages, 10_000, 100, 100);
+    expect(result.includedMessages).toHaveLength(2);
+    expect(result.wasTruncated).toBe(false);
+  });
+
+  it('truncates oldest messages first when budget is tight', () => {
+    const messages = [
+      makeMsg('a'.repeat(1000)), // ~250 tokens + overhead
+      makeMsg('b'.repeat(1000)), // ~250 tokens + overhead
+      makeMsg('c'.repeat(100)),  // ~25 tokens + overhead
+    ];
+    // Budget of 100 tokens for messages (after system/tool subtracted)
+    const result = determineMessagesToInclude(messages, 200, 50, 50);
+    expect(result.wasTruncated).toBe(true);
+    expect(result.includedMessages.length).toBeLessThan(3);
+    // Most recent message should be included
+    if (result.includedMessages.length > 0) {
+      expect(result.includedMessages[result.includedMessages.length - 1].id).toBe('c'.repeat(100));
+    }
+  });
+
+  it('returns empty array when budget is zero or negative', () => {
+    const messages = [makeMsg('test')];
+    const result = determineMessagesToInclude(messages, 100, 60, 60);
+    expect(result.includedMessages).toHaveLength(0);
+    expect(result.wasTruncated).toBe(true);
+  });
+
+  it('preserves message order', () => {
+    const messages = [makeMsg('first'), makeMsg('second'), makeMsg('third')];
+    const result = determineMessagesToInclude(messages, 50_000, 0, 0);
+    expect(result.includedMessages.map(m => m.id)).toEqual(['first', 'second', 'third']);
+  });
+});