diff --git a/apps/web/src/app/api/ai/chat/route.ts b/apps/web/src/app/api/ai/chat/route.ts
index c6becc4fd..fe5506de2 100644
--- a/apps/web/src/app/api/ai/chat/route.ts
+++ b/apps/web/src/app/api/ai/chat/route.ts
@@ -856,6 +856,27 @@ export async function POST(request: Request) {
       });
     }
 
+    // Guard: if truncation left zero messages, the system prompt + tools alone exceed the budget.
+    // Sending an empty conversation to the model would produce a meaningless response or error.
+    if (includedMessages.length === 0) {
+      loggers.ai.error('AI Chat API: Context budget exhausted by system prompt and tools alone', {
+        model: currentModel,
+        provider: currentProvider,
+        contextWindow,
+        inputBudget,
+        systemPromptTokens,
+        toolTokens,
+      });
+      return NextResponse.json(
+        {
+          error: 'context_length_exceeded',
+          message: 'The system configuration (prompts and tools) exceeds this model\'s context window. Please switch to a model with a larger context window.',
+          details: 'context_length_exceeded',
+        },
+        { status: 413 }
+      );
+    }
+
     const modelMessages = convertToModelMessages(includedMessages as UIMessage[], {
       tools: filteredTools  // Use original tools - no wrapping needed
     });
@@ -1248,7 +1269,7 @@ export async function POST(request: Request) {
           error: 'context_length_exceeded',
           message: wasTruncated
             ? 'The conversation still exceeds this model\'s context window even after trimming. Please start a new conversation.'
-            : 'The conversation is too long for this model\'s context window. Older messages have been trimmed — try sending your message again.',
+            : 'The conversation is too long for this model\'s context window. Please start a new conversation or switch to a model with a larger context window.',
           details: 'context_length_exceeded',
         },
         { status: 413 }
diff --git a/apps/web/src/lib/ai/shared/__tests__/error-messages.test.ts b/apps/web/src/lib/ai/shared/__tests__/error-messages.test.ts
new file mode 100644
index 000000000..fd00dff04
--- /dev/null
+++ b/apps/web/src/lib/ai/shared/__tests__/error-messages.test.ts
@@ -0,0 +1,117 @@
+import { describe, it, expect } from 'vitest';
+import {
+  getAIErrorMessage,
+  isContextLengthError,
+  isRateLimitError,
+  isAuthenticationError,
+} from '../error-messages';
+
+describe('isContextLengthError', () => {
+  it('returns false for undefined/empty input', () => {
+    expect(isContextLengthError(undefined)).toBe(false);
+    expect(isContextLengthError('')).toBe(false);
+  });
+
+  it('detects context_length_exceeded API key', () => {
+    expect(isContextLengthError('context_length_exceeded')).toBe(true);
+    expect(isContextLengthError('Error: context_length_exceeded for model gpt-4o')).toBe(true);
+  });
+
+  it('detects human-readable context length variants', () => {
+    expect(isContextLengthError('context length exceeded')).toBe(true);
+    expect(isContextLengthError('exceeds the context window')).toBe(true);
+    expect(isContextLengthError('maximum context length is 128000 tokens')).toBe(true);
+  });
+
+  it('detects token limit errors', () => {
+    expect(isContextLengthError('token limit exceeded')).toBe(true);
+    expect(isContextLengthError('number of tokens exceeds the maximum')).toBe(true);
+    expect(isContextLengthError('too many tokens in the request')).toBe(true);
+  });
+
+  it('detects provider-specific "maximum tokens" phrasing', () => {
+    expect(isContextLengthError('maximum number of tokens allowed is 200000')).toBe(true);
+  });
+
+  it('detects HTTP 413 in status-code patterns only', () => {
+    expect(isContextLengthError('HTTP 413')).toBe(true);
+    expect(isContextLengthError('status 413')).toBe(true);
+    expect(isContextLengthError('error 413: payload too large')).toBe(true);
+    expect(isContextLengthError('code 413')).toBe(true);
+  });
+
+  it('does NOT false-positive on bare "413" in other contexts', () => {
+    expect(isContextLengthError('processed 413 items successfully')).toBe(false);
+    expect(isContextLengthError('user ID 4130 not found')).toBe(false);
+    expect(isContextLengthError('port 4135 is in use')).toBe(false);
+  });
+
+  it('does NOT match unrelated error messages', () => {
+    expect(isContextLengthError('rate limit exceeded')).toBe(false);
+    expect(isContextLengthError('Unauthorized')).toBe(false);
+    expect(isContextLengthError('Internal server error')).toBe(false);
+    expect(isContextLengthError('Provider returned error')).toBe(false);
+  });
+});
+
+describe('isRateLimitError', () => {
+  it('returns false for undefined/empty input', () => {
+    expect(isRateLimitError(undefined)).toBe(false);
+    expect(isRateLimitError('')).toBe(false);
+  });
+
+  it('detects rate limit errors', () => {
+    expect(isRateLimitError('rate limit exceeded')).toBe(true);
+    expect(isRateLimitError('429 Too Many Requests')).toBe(true);
+    expect(isRateLimitError('402 Payment Required')).toBe(true);
+    expect(isRateLimitError('Failed after 3 retries')).toBe(true);
+    expect(isRateLimitError('Provider returned error')).toBe(true);
+  });
+
+  it('excludes context-length errors that contain "limit"', () => {
+    expect(isRateLimitError('token limit exceeded')).toBe(false);
+    expect(isRateLimitError('context_length_exceeded')).toBe(false);
+    expect(isRateLimitError('maximum context length limit')).toBe(false);
+  });
+});
+
+describe('isAuthenticationError', () => {
+  it('detects auth errors', () => {
+    expect(isAuthenticationError('Unauthorized')).toBe(true);
+    expect(isAuthenticationError('401 Unauthorized')).toBe(true);
+  });
+
+  it('returns false for non-auth errors', () => {
+    expect(isAuthenticationError(undefined)).toBe(false);
+    expect(isAuthenticationError('rate limit exceeded')).toBe(false);
+  });
+});
+
+describe('getAIErrorMessage', () => {
+  it('returns generic message for undefined input', () => {
+    expect(getAIErrorMessage(undefined)).toBe('Something went wrong. Please try again.');
+  });
+
+  it('returns auth message for Unauthorized errors', () => {
+    expect(getAIErrorMessage('Unauthorized')).toBe(
+      'Authentication failed. Please refresh the page and try again.'
+    );
+  });
+
+  it('returns context-length message for context errors', () => {
+    const msg = getAIErrorMessage('context_length_exceeded');
+    expect(msg).toContain('context window');
+    expect(msg).not.toContain('trimmed');
+  });
+
+  it('returns rate-limit message for rate errors', () => {
+    const msg = getAIErrorMessage('429 Too Many Requests');
+    expect(msg).toContain('rate limit');
+  });
+
+  it('returns generic message for unknown errors', () => {
+    expect(getAIErrorMessage('some random error')).toBe(
+      'Something went wrong. Please try again.'
+    );
+  });
+});
diff --git a/apps/web/src/lib/ai/shared/error-messages.ts b/apps/web/src/lib/ai/shared/error-messages.ts
index 4d78199a7..c375e578e 100644
--- a/apps/web/src/lib/ai/shared/error-messages.ts
+++ b/apps/web/src/lib/ai/shared/error-messages.ts
@@ -15,7 +15,7 @@ export function getAIErrorMessage(errorMessage: string | undefined): string {
 
   // Context length errors
   if (isContextLengthError(errorMessage)) {
-    return 'The conversation is too long for this model\'s context window. Older messages have been trimmed to fit — try sending your message again.';
+    return 'The conversation is too long for this model\'s context window. Please start a new conversation or switch to a model with a larger context window.';
   }
 
   // Rate limit errors
@@ -55,7 +55,8 @@ export function isContextLengthError(errorMessage: string | undefined): boolean
     msg.includes('token limit') ||
     msg.includes('tokens exceeds') ||
     msg.includes('too many tokens') ||
-    errorMessage.includes('413') ||
+    // Match HTTP 413 only in status-code patterns (e.g. "status 413", "HTTP 413", "code 413")
+    /\b(?:status|http|code|error)\s*413\b/i.test(errorMessage) ||
     // OpenRouter / provider-specific phrasing
     (msg.includes('maximum') && msg.includes('tokens'))
   );
diff --git a/packages/lib/src/monitoring/__tests__/ai-context-calculator.test.ts b/packages/lib/src/monitoring/__tests__/ai-context-calculator.test.ts
new file mode 100644
index 000000000..fadf6423c
--- /dev/null
+++ b/packages/lib/src/monitoring/__tests__/ai-context-calculator.test.ts
@@ -0,0 +1,208 @@
+import { describe, it, expect } from 'vitest';
+import {
+  estimateTokens,
+  estimateMessageTokens,
+  estimateSystemPromptTokens,
+  estimateToolDefinitionTokens,
+  getContextWindowSize,
+  determineMessagesToInclude,
+  type UIMessage,
+} from '../ai-context-calculator';
+
+describe('estimateTokens', () => {
+  it('returns 0 for empty/falsy input', () => {
+    expect(estimateTokens('')).toBe(0);
+    expect(estimateTokens(null as unknown as string)).toBe(0);
+    expect(estimateTokens(undefined as unknown as string)).toBe(0);
+  });
+
+  it('estimates ~4 chars/token for ASCII text', () => {
+    const text = 'Hello, world! This is a test.';
+    const tokens = estimateTokens(text);
+    // 28 chars / 4 = 7
+    expect(tokens).toBe(Math.ceil(text.length / 4));
+  });
+
+  it('uses ~2 chars/token for CJK-heavy text (>20% non-ASCII)', () => {
+    const text = '这是一个测试消息'; // 8 CJK characters, 100% non-ASCII
+    const tokens = estimateTokens(text);
+    expect(tokens).toBe(Math.ceil(text.length / 2));
+  });
+
+  it('uses 4 chars/token for mostly-ASCII text with minor non-ASCII', () => {
+    // 80 ASCII chars + 5 non-ASCII = 5/85 ≈ 5.9% non-ASCII → use 4 chars/token
+    const text = 'a'.repeat(80) + '你好世界呢';
+    const tokens = estimateTokens(text);
+    expect(tokens).toBe(Math.ceil(text.length / 4));
+  });
+
+  it('switches to 2 chars/token at >20% non-ASCII threshold', () => {
+    // 3 ASCII + 1 CJK = 25% non-ASCII (above 20% threshold)
+    const text = 'abc你';
+    const tokens = estimateTokens(text);
+    expect(tokens).toBe(Math.ceil(text.length / 2));
+  });
+
+  it('handles emoji as non-ASCII', () => {
+    // Emoji are multi-byte, charCodeAt > 127 for surrogate pairs
+    const text = 'Hello 🌍🌍🌍🌍🌍'; // mixed with emoji
+    const tokens = estimateTokens(text);
+    expect(tokens).toBeGreaterThan(0);
+  });
+});
+
+describe('estimateMessageTokens', () => {
+  it('returns overhead tokens for empty message', () => {
+    const msg: UIMessage = { role: 'user', parts: [] };
+    // 5 (role) + 10 (overhead) = 15
+    expect(estimateMessageTokens(msg)).toBe(15);
+  });
+
+  it('counts text parts', () => {
+    const msg: UIMessage = {
+      role: 'user',
+      parts: [{ type: 'text', text: 'Hello, world!' }],
+    };
+    const tokens = estimateMessageTokens(msg);
+    // 5 (role) + estimateTokens('Hello, world!') + 10 (overhead)
+    expect(tokens).toBe(5 + estimateTokens('Hello, world!') + 10);
+  });
+
+  it('counts tool invocation parts (tool-{name} format)', () => {
+    const msg: UIMessage = {
+      role: 'assistant',
+      parts: [
+        {
+          type: 'tool-search',
+          toolCallId: 'call_123',
+          toolName: 'search',
+          input: { query: 'test' },
+          output: { results: ['a', 'b'] },
+          state: 'output-available',
+        },
+      ],
+    };
+    const tokens = estimateMessageTokens(msg);
+    // Should include: 5 (role) + 10 (tool overhead) + toolName + input + output + 10 (msg overhead)
+    expect(tokens).toBeGreaterThan(25);
+  });
+
+  it('handles legacy tool-call/tool-result format via startsWith("tool-")', () => {
+    const msg: UIMessage = {
+      role: 'assistant',
+      parts: [
+        {
+          type: 'tool-myTool',
+          toolCallId: 'call_456',
+          toolName: 'myTool',
+          args: { param: 'value' },
+          result: 'done',
+        },
+      ],
+    };
+    const tokens = estimateMessageTokens(msg);
+    // args falls through to input ?? args path
+    expect(tokens).toBeGreaterThan(15);
+  });
+
+  it('handles message with no parts', () => {
+    const msg: UIMessage = { role: 'user' };
+    expect(estimateMessageTokens(msg)).toBe(15);
+  });
+});
+
+describe('estimateSystemPromptTokens', () => {
+  it('returns 0 for empty prompt', () => {
+    expect(estimateSystemPromptTokens(undefined)).toBe(0);
+    expect(estimateSystemPromptTokens('')).toBe(0);
+  });
+
+  it('estimates tokens for a prompt', () => {
+    const prompt = 'You are a helpful assistant.';
+    expect(estimateSystemPromptTokens(prompt)).toBe(estimateTokens(prompt));
+  });
+});
+
+describe('estimateToolDefinitionTokens', () => {
+  it('returns 0 for empty tools', () => {
+    expect(estimateToolDefinitionTokens(undefined)).toBe(0);
+    expect(estimateToolDefinitionTokens({})).toBe(0);
+  });
+
+  it('estimates tokens from JSON serialization', () => {
+    const tools = { search: { description: 'Search the web', parameters: { query: 'string' } } };
+    const tokens = estimateToolDefinitionTokens(tools);
+    expect(tokens).toBeGreaterThan(0);
+    expect(tokens).toBe(estimateTokens(JSON.stringify(tools)));
+  });
+});
+
+describe('getContextWindowSize', () => {
+  it('returns correct size for known direct models', () => {
+    expect(getContextWindowSize('gpt-4o', 'openai')).toBe(128_000);
+    expect(getContextWindowSize('gpt-4', 'openai')).toBe(8_192);
+  });
+
+  it('returns correct size for Anthropic models', () => {
+    const size = getContextWindowSize('claude-3-5-sonnet-20241022', 'anthropic');
+    expect(size).toBe(200_000);
+  });
+
+  it('returns correct size for Google models', () => {
+    expect(getContextWindowSize('gemini-2.5-pro', 'google')).toBe(2_000_000);
+    expect(getContextWindowSize('gemini-2.5-flash', 'google')).toBe(1_000_000);
+  });
+
+  it('returns conservative default for unknown models', () => {
+    expect(getContextWindowSize('unknown-model', 'unknown-provider')).toBe(200_000);
+  });
+
+  it('handles OpenRouter models', () => {
+    const size = getContextWindowSize('anthropic/claude-3.5-sonnet', 'openrouter');
+    expect(size).toBe(200_000);
+  });
+});
+
+describe('determineMessagesToInclude', () => {
+  const makeMsg = (text: string, role: 'user' | 'assistant' = 'user'): UIMessage => ({
+    id: text,
+    role,
+    parts: [{ type: 'text', text }],
+  });
+
+  it('includes all messages when they fit in budget', () => {
+    const messages = [makeMsg('Hello'), makeMsg('World')];
+    const result = determineMessagesToInclude(messages, 10_000, 100, 100);
+    expect(result.includedMessages).toHaveLength(2);
+    expect(result.wasTruncated).toBe(false);
+  });
+
+  it('truncates oldest messages first when budget is tight', () => {
+    const messages = [
+      makeMsg('a'.repeat(1000)), // ~250 tokens + overhead
+      makeMsg('b'.repeat(1000)), // ~250 tokens + overhead
+      makeMsg('c'.repeat(100)),  // ~25 tokens + overhead
+    ];
+    // Budget of 100 tokens for messages (after system/tool subtracted)
+    const result = determineMessagesToInclude(messages, 200, 50, 50);
+    expect(result.wasTruncated).toBe(true);
+    expect(result.includedMessages.length).toBeLessThan(3);
+    // Most recent message should be included
+    if (result.includedMessages.length > 0) {
+      expect(result.includedMessages[result.includedMessages.length - 1].id).toBe('c'.repeat(100));
+    }
+  });
+
+  it('returns empty array when budget is zero or negative', () => {
+    const messages = [makeMsg('test')];
+    const result = determineMessagesToInclude(messages, 100, 60, 60);
+    expect(result.includedMessages).toHaveLength(0);
+    expect(result.wasTruncated).toBe(true);
+  });
+
+  it('preserves message order', () => {
+    const messages = [makeMsg('first'), makeMsg('second'), makeMsg('third')];
+    const result = determineMessagesToInclude(messages, 50_000, 0, 0);
+    expect(result.includedMessages.map(m => m.id)).toEqual(['first', 'second', 'third']);
+  });
+});
diff --git a/packages/lib/src/monitoring/ai-context-calculator.ts b/packages/lib/src/monitoring/ai-context-calculator.ts
index 940c16737..11729ed8f 100644
--- a/packages/lib/src/monitoring/ai-context-calculator.ts
+++ b/packages/lib/src/monitoring/ai-context-calculator.ts
@@ -5,9 +5,14 @@
  * Tracks which messages are included in each API call to determine real context window usage.
  */
 
+import { MODEL_CONTEXT_WINDOWS } from './model-context-windows';
+
 /**
  * Minimal UIMessage type for token estimation
  * (Compatible with Vercel AI SDK UIMessage)
+ *
+ * Tool parts in the Vercel AI SDK use type `tool-{toolName}` (e.g. "tool-search")
+ * with fields: toolCallId, toolName, input, output, state.
  */
 export interface UIMessage {
   id?: string;
@@ -15,8 +20,13 @@ export interface UIMessage {
   parts?: Array<{
     type: string;
     text?: string;
+    // Tool invocation fields (Vercel AI SDK DynamicToolUIPart)
     toolCallId?: string;
     toolName?: string;
+    input?: unknown;
+    output?: unknown;
+    state?: string;
+    // Legacy fields for backwards compatibility
     args?: unknown;
     result?: unknown;
   }>;
@@ -55,9 +65,11 @@ export interface ContextCalculation {
 export function estimateTokens(text: string): number {
   if (!text) return 0;
 
-  // Detect non-ASCII heavy content (CJK, emoji, etc.)
-  // CJK Unified Ideographs, Hiragana, Katakana, Hangul, etc.
-  const nonAsciiCount = (text.match(/[^\x00-\x7F]/g) || []).length;
+  // Count non-ASCII characters efficiently via charCode loop (avoids regex array allocation)
+  let nonAsciiCount = 0;
+  for (let i = 0; i < text.length; i++) {
+    if (text.charCodeAt(i) > 127) nonAsciiCount++;
+  }
   const nonAsciiRatio = nonAsciiCount / text.length;
 
   // Use 2 chars/token when >20% non-ASCII (CJK-heavy), else 4 chars/token
@@ -106,22 +118,24 @@ export function estimateMessageTokens(message: UIMessage): number {
     for (const part of message.parts) {
       if (part.type === 'text' && part.text) {
         tokens += estimateTokens(part.text);
-      } else if (part.type === 'tool-call' && part.toolCallId) {
-        // Tool call: function name + args
-        tokens += 10; // Tool call ID
+      } else if (part.type.startsWith('tool-')) {
+        // Vercel AI SDK tool invocation parts: type is "tool-{toolName}"
+        // Fields: toolCallId, toolName, input, output, state
+        tokens += 10; // Tool call ID overhead
         if (part.toolName) {
           tokens += estimateTokens(part.toolName);
         }
-        if (part.args) {
-          tokens += estimateTokens(JSON.stringify(part.args));
+        // Input (tool arguments)
+        const inputData = part.input ?? part.args;
+        if (inputData) {
+          tokens += estimateTokens(JSON.stringify(inputData));
         }
-      } else if (part.type === 'tool-result' && part.toolCallId) {
-        // Tool result: result data
-        tokens += 10; // Tool call ID
-        if (part.result) {
-          const resultStr = typeof part.result === 'string'
-            ? part.result
-            : JSON.stringify(part.result);
+        // Output (tool result) — only present when state is 'output-available'
+        const outputData = part.output ?? part.result;
+        if (outputData) {
+          const resultStr = typeof outputData === 'string'
+            ? outputData
+            : JSON.stringify(outputData);
           tokens += estimateTokens(resultStr);
         }
       }
@@ -135,127 +149,129 @@ export function estimateMessageTokens(message: UIMessage): number {
 }
 
 /**
- * Get context window size for a model
- * Returns the maximum number of tokens the model can handle
+ * Get context window size for a model.
+ *
+ * Uses MODEL_CONTEXT_WINDOWS from ai-monitoring.ts as the canonical source of truth.
+ * Falls back to heuristic matching for models not in the map.
  */
 export function getContextWindowSize(model: string, provider?: string): number {
-  const providerLower = provider?.toLowerCase() || '';
+  // 1. Try exact match against the canonical map (imported at bottom of file to avoid circular deps)
+  const canonical = getCanonicalContextWindow(model, provider);
+  if (canonical !== undefined) return canonical;
+
+  // 2. Heuristic fallbacks for models not in the canonical map
   const modelLower = model.toLowerCase();
+  const providerLower = provider?.toLowerCase() || '';
 
   // OpenAI models
   if (providerLower === 'openai' || modelLower.includes('gpt')) {
-    // GPT-5.2 models (400k/256k context)
     if (modelLower.includes('gpt-5.2')) {
-      if (modelLower.includes('mini') || modelLower.includes('nano')) {
-        return 256_000;
-      }
-      return 400_000;
-    }
-    // GPT-5.1 models (400k context)
-    if (modelLower.includes('gpt-5.1')) {
-      return 400_000;
+      return (modelLower.includes('mini') || modelLower.includes('nano')) ? 256_000 : 400_000;
     }
-    // GPT-5.0 models (272k/128k context)
+    if (modelLower.includes('gpt-5.1')) return 400_000;
     if (modelLower.includes('gpt-5')) {
-      if (modelLower.includes('mini') || modelLower.includes('nano')) {
-        return 128_000;
-      }
-      return 272_000;
+      return (modelLower.includes('mini') || modelLower.includes('nano')) ? 128_000 : 272_000;
     }
-    if (modelLower.includes('gpt-4o')) return 128_000;
-    if (modelLower.includes('gpt-4-turbo')) return 128_000;
+    if (modelLower.includes('gpt-4o') || modelLower.includes('gpt-4-turbo')) return 128_000;
     if (modelLower.includes('gpt-4')) return 8_192;
     if (modelLower.includes('gpt-3.5')) return 16_385;
-    return 200_000; // Default for newer OpenAI models
+    return 200_000;
   }
 
   // Anthropic models
   if (providerLower === 'anthropic' || modelLower.includes('claude')) {
-    if (modelLower.includes('claude-sonnet-4') || modelLower.includes('claude-4')) {
-      return 200_000;
-    }
-    if (modelLower.includes('claude-3-5') || modelLower.includes('claude-3')) {
-      return 200_000;
-    }
-    return 200_000; // Default for Anthropic
+    return 200_000;
   }
 
   // Google models
   if (providerLower === 'google' || modelLower.includes('gemini')) {
-    if (modelLower.includes('gemini-2.5-pro') || modelLower.includes('gemini-2-5-pro')) {
-      return 2_000_000;
-    }
-    if (modelLower.includes('gemini-2.5-flash') || modelLower.includes('gemini-2-5-flash')) {
-      return 1_000_000;
-    }
-    if (modelLower.includes('gemini-2.0-pro') || modelLower.includes('gemini-2-pro')) {
-      return 2_000_000;
-    }
-    if (modelLower.includes('gemini-2.0-flash') || modelLower.includes('gemini-2-flash')) {
-      return 1_000_000;
-    }
+    if (modelLower.includes('gemini-2.5-pro') || modelLower.includes('gemini-2-5-pro')) return 2_000_000;
+    if (modelLower.includes('gemini-2.5-flash') || modelLower.includes('gemini-2-5-flash')) return 1_000_000;
+    if (modelLower.includes('gemini-2.0-pro') || modelLower.includes('gemini-2-pro')) return 2_000_000;
+    if (modelLower.includes('gemini-2.0-flash') || modelLower.includes('gemini-2-flash')) return 1_000_000;
     if (modelLower.includes('gemini-1.5-pro')) return 2_000_000;
     if (modelLower.includes('gemini-1.5-flash')) return 1_000_000;
     if (modelLower.includes('gemini-pro')) return 32_000;
-    return 1_000_000; // Default for Google
+    return 1_000_000;
   }
 
   // xAI models
   if (providerLower === 'xai' || modelLower.includes('grok')) {
     if (modelLower.includes('grok-4-fast')) return 2_000_000;
-    if (modelLower.includes('grok')) return 128_000;
     return 128_000;
   }
 
   // PageSpace (GLM models)
   if (providerLower === 'pagespace' || modelLower.includes('glm')) {
-    if (modelLower.includes('glm-5')) return 200_000;
-    if (modelLower.includes('glm-4.7')) return 200_000;
-    if (modelLower.includes('glm-4.6')) return 200_000;
     if (modelLower.includes('glm-4.5')) return 128_000;
-    return 200_000; // Updated default for GLM
+    return 200_000;
   }
 
   // MiniMax models
   if (providerLower === 'minimax' || modelLower.includes('minimax')) {
-    if (modelLower.includes('m2.5')) return 1_000_000;
-    return 128_000; // Default for older MiniMax models
+    return 128_000;
   }
 
-  // OpenRouter - use model-specific limits where known, else 200k conservative default
-  if (providerLower === 'openrouter') {
-    // Claude models via OpenRouter
-    if (modelLower.includes('claude')) return 200_000;
-    // Gemini models via OpenRouter
-    if (modelLower.includes('gemini-2.5')) return 1_000_000;
-    if (modelLower.includes('gemini-2.0') || modelLower.includes('gemini-1.5')) return 1_000_000;
-    // GPT models via OpenRouter
-    if (modelLower.includes('gpt-5.2')) {
-      return modelLower.includes('mini') || modelLower.includes('nano') ? 256_000 : 400_000;
-    }
-    if (modelLower.includes('gpt-5.1')) return 400_000;
-    if (modelLower.includes('gpt-5')) {
-      return modelLower.includes('mini') || modelLower.includes('nano') ? 128_000 : 272_000;
+  // Unknown provider/model - conservative default
+  return 200_000;
+}
+
+/**
+ * Attempt exact lookup in MODEL_CONTEXT_WINDOWS from ai-monitoring.
+ * Tries the model directly, then with provider prefix (e.g. "openai/gpt-5").
+ */
+function getCanonicalContextWindow(model: string, provider?: string): number | undefined {
+  const windows = MODEL_CONTEXT_WINDOWS as Record<string, number>;
+
+  // Direct match (e.g. "gpt-5.2" or "anthropic/claude-opus-4.5")
+  if (windows[model] !== undefined) {
+    return windows[model];
+  }
+
+  // Try with provider prefix (e.g. provider="openrouter", model="gpt-5.2" → "openai/gpt-5.2")
+  if (provider) {
+    const providerPrefixes = getProviderPrefixes(provider, model);
+    for (const prefix of providerPrefixes) {
+      const key = `${prefix}/${model}`;
+      if (windows[key] !== undefined) {
+        return windows[key];
+      }
     }
-    if (modelLower.includes('gpt-4o') || modelLower.includes('gpt-4-turbo')) return 128_000;
-    // DeepSeek models - commonly 64k or 128k
-    if (modelLower.includes('deepseek-r1') || modelLower.includes('deepseek-v3')) return 128_000;
-    if (modelLower.includes('deepseek')) return 64_000;
-    // Qwen models
-    if (modelLower.includes('qwen-2.5') || modelLower.includes('qwq')) return 128_000;
-    if (modelLower.includes('qwen')) return 32_000;
-    // Llama models
-    if (modelLower.includes('llama-3') || modelLower.includes('llama3')) return 128_000;
-    if (modelLower.includes('llama')) return 32_000;
-    // Mistral models
-    if (modelLower.includes('mistral-large') || modelLower.includes('mistral-nemo')) return 128_000;
-    if (modelLower.includes('mistral')) return 32_000;
-    // OpenRouter platform hard cap is 400k for many endpoints - use 200k as safe default
-    return 200_000;
   }
 
-  // Unknown provider/model - conservative default
-  return 200_000;
+  return undefined;
+}
+
+/**
+ * Map provider/model to possible MODEL_CONTEXT_WINDOWS key prefixes
+ */
+function getProviderPrefixes(provider: string, model: string): string[] {
+  const p = provider.toLowerCase();
+  const m = model.toLowerCase();
+
+  if (p === 'openai') return ['openai'];
+  if (p === 'anthropic') return ['anthropic'];
+  if (p === 'google') return ['google'];
+  if (p === 'xai') return ['x-ai'];
+  if (p === 'minimax') return ['minimax'];
+  if (p === 'pagespace') return ['z-ai'];
+
+  // OpenRouter: model strings already include the provider prefix (e.g. "anthropic/claude-3.5-sonnet")
+  // but some may be bare model names — try common prefixes based on model name
+  if (p === 'openrouter') {
+    if (m.includes('claude')) return ['anthropic'];
+    if (m.includes('gpt') || m.includes('o3') || m.includes('o4') || m.includes('o1')) return ['openai'];
+    if (m.includes('gemini')) return ['google'];
+    if (m.includes('grok')) return ['x-ai'];
+    if (m.includes('llama')) return ['meta-llama'];
+    if (m.includes('mistral') || m.includes('codestral') || m.includes('devstral')) return ['mistralai'];
+    if (m.includes('deepseek')) return ['deepseek'];
+    if (m.includes('qwen') || m.includes('qwq')) return ['qwen'];
+    if (m.includes('minimax')) return ['minimax'];
+    if (m.includes('glm')) return ['z-ai'];
+  }
+
+  return [];
 }
 
 /**
diff --git a/packages/lib/src/monitoring/ai-monitoring.ts b/packages/lib/src/monitoring/ai-monitoring.ts
index ea7fa40de..57be6421e 100644
--- a/packages/lib/src/monitoring/ai-monitoring.ts
+++ b/packages/lib/src/monitoring/ai-monitoring.ts
@@ -6,6 +6,8 @@
 import { db, aiUsageLogs, sql, and, eq, gte, lte } from '@pagespace/db';
 import { writeAiUsage } from '../logging/logger-database';
 import { loggers } from '../logging/logger-config';
+import { MODEL_CONTEXT_WINDOWS, getContextWindow } from './model-context-windows';
+export { MODEL_CONTEXT_WINDOWS, getContextWindow };
 
 /**
  * AI Provider Pricing (per 1M tokens)
@@ -188,198 +190,9 @@ export const AI_PRICING = {
   'default': { input: 0, output: 0 }
 } as const;
 
-/**
- * Model Context Window Sizes (in tokens)
- * Maximum context length for each model
- * Updated November 2025
- */
-export const MODEL_CONTEXT_WINDOWS = {
-  // OpenRouter Models - Anthropic
-  'anthropic/claude-opus-4.5': 200000,
-  'anthropic/claude-sonnet-4.5': 200000,
-  'anthropic/claude-haiku-4.5': 200000,
-  'anthropic/claude-3.5-sonnet': 200000,
-  'anthropic/claude-3-haiku': 200000,
-  'anthropic/claude-opus-4.1': 200000,
-
-  // OpenRouter Models - OpenAI
-  'openai/gpt-5.2': 400000,
-  'openai/gpt-5.2-codex': 400000,
-  'openai/gpt-5.2-mini': 256000,
-  'openai/gpt-5.2-nano': 256000,
-  'openai/gpt-5.1': 400000,
-  'openai/gpt-5.1-codex': 400000,
-  'openai/gpt-5.1-codex-mini': 400000,
-  'openai/gpt-4o': 128000,
-  'openai/gpt-4o-mini': 128000,
-  'openai/o3-deep-research': 200000,
-  'openai/o4-mini-deep-research': 200000,
-  'openai/gpt-5': 272000,
-  'openai/gpt-5-mini': 128000,
-  'openai/gpt-5-nano': 128000,
-  'openai/gpt-oss-120b': 128000,
-  'openai/gpt-oss-20b': 128000,
-
-  // OpenRouter Models - Other
-  'meta-llama/llama-3.1-405b-instruct': 128000,
-  'mistralai/mistral-medium-3.1': 128000,
-  'mistralai/mistral-small-3.2-24b-instruct': 32000,
-  'mistralai/codestral-2508': 32000,
-  'mistralai/devstral-medium': 128000,
-  'mistralai/devstral-small': 128000,
-
-  // OpenRouter Models - Google
-  'google/gemini-3-pro-preview': 1048576,
-  'google/gemini-3-flash-preview': 1048576,
-  'google/gemini-2.5-pro': 2000000,
-  'google/gemini-2.5-flash': 1000000,
-  'google/gemini-2.5-flash-lite': 1000000,
-  'google/gemini-2.5-flash-lite-preview-06-17': 1000000,
-  'google/gemini-2.0-pro': 2000000,
-  'google/gemini-2.0-flash': 1000000,
-
-  // OpenRouter Models - Chinese/Asian
-  'z-ai/glm-4.7': 200000,
-  'z-ai/glm-4.5v': 128000,
-  'z-ai/glm-4.5': 128000,
-  'z-ai/glm-4.5-air': 128000,
-  'z-ai/glm-4-32b': 128000,
-  'qwen/qwen3-max': 128000,
-  'qwen/qwen3-235b-a22b-thinking-2507': 128000,
-  'qwen/qwen3-235b-a22b-2507': 128000,
-  'qwen/qwen3-coder': 128000,
-  'moonshotai/kimi-k2': 128000,
-  'minimax/minimax-m1': 128000,
-  'z-ai/glm-5': 202752,
-  'minimax/minimax-m2.5': 204800,
-
-  // OpenRouter Models - DeepSeek
-  'deepseek/deepseek-v3.1-terminus': 128000,
-
-  // OpenRouter Models - AI21
-  'ai21/jamba-mini-1.7': 256000,
-  'ai21/jamba-large-1.7': 256000,
-
-  // OpenRouter Models - xAI
-  'x-ai/grok-4-fast': 2000000,
-  'x-ai/grok-4': 128000,
-
-  // OpenRouter Models - Other
-  'inception/mercury': 128000,
-
-  // Google AI Direct Models
-  'gemini-3-pro': 1048576,
-  'gemini-3-flash-preview': 1048576,
-  'gemini-2.5-pro': 2000000,
-  'gemini-2.5-flash': 1000000,
-  'gemini-2.5-flash-lite': 1000000,
-  'gemini-2.0-pro-exp': 2000000,
-  'gemini-2.0-flash': 1000000,
-  'gemini-2.0-flash-exp': 1000000,
-  'gemini-2.0-flash-lite': 1000000,
-  'gemini-1.5-flash': 1000000,
-  'gemini-1.5-flash-8b': 1000000,
-  'gemini-1.5-pro': 2000000,
-
-  // OpenAI Direct Models
-  'gpt-5.2': 400000,
-  'gpt-5.2-codex': 400000,
-  'gpt-5.2-mini': 256000,
-  'gpt-5.2-nano': 256000,
-  'gpt-5.1': 400000,
-  'gpt-5.1-codex': 400000,
-  'gpt-5': 272000,
-  'gpt-5-mini': 128000,
-  'gpt-5-nano': 128000,
-  'gpt-4.1-2025-04-14': 400000,
-  'gpt-4.1-mini-2025-04-14': 400000,
-  'gpt-4.1-nano-2025-04-14': 400000,
-  'gpt-4o': 128000,
-  'gpt-4o-mini': 128000,
-  'gpt-4o-audio-preview': 128000,
-  'gpt-4-turbo': 128000,
-  'gpt-4': 8192,
-  'gpt-3.5-turbo': 16385,
-  'o4-mini-2025-04-16': 200000,
-  'o3': 200000,
-  'o3-mini': 200000,
-  'o1': 200000,
-  'o1-mini': 200000,
-  'o1-preview': 200000,
-
-  // Anthropic Direct Models
-  'claude-opus-4-5-20251124': 200000,
-  'claude-sonnet-4-5': 200000,
-  'claude-sonnet-4-5-20250929': 200000,
-  'claude-haiku-4-5-20251001': 200000,
-  'claude-opus-4-1-20250805': 200000,
-  'claude-sonnet-4-1-20250805': 200000,
-  'claude-3-7-sonnet-20250219': 200000,
-  'claude-3-5-sonnet-20241022': 200000,
-  'claude-3-5-sonnet-20240620': 200000,
-  'claude-3-5-sonnet-latest': 200000,
-  'claude-3-5-haiku-20241022': 200000,
-  'claude-3-5-haiku-latest': 200000,
-  'claude-3-opus-20240229': 200000,
-  'claude-3-opus-latest': 200000,
-  'claude-3-sonnet-20240229': 200000,
-  'claude-3-haiku-20240307': 200000,
-
-  // xAI Models
-  'grok-4': 128000,
-  'grok-4-fast-reasoning': 2000000,
-  'grok-4-fast-non-reasoning': 2000000,
-  'grok-code-fast-1': 128000,
-  'grok-3': 128000,
-  'grok-3-latest': 128000,
-  'grok-3-fast': 128000,
-  'grok-3-fast-latest': 128000,
-  'grok-3-mini': 128000,
-  'grok-3-mini-latest': 128000,
-  'grok-3-mini-fast': 128000,
-  'grok-3-mini-fast-latest': 128000,
-  'grok-2': 128000,
-  'grok-2-latest': 128000,
-  'grok-2-1212': 128000,
-  'grok-2-vision': 128000,
-  'grok-2-vision-latest': 128000,
-  'grok-2-vision-1212': 128000,
-  'grok-beta': 128000,
-  'grok-vision-beta': 128000,
-
-  // MiniMax Direct Models
-  'MiniMax-M2.5': 1000000,
-  'MiniMax-M2.1': 128000,
-  'MiniMax-M2': 128000,
-  'MiniMax-M2-Stable': 128000,
-
-  // PageSpace/GLM Models
-  'glm-5': 200000,
-  'glm-4.7': 200000,
-  'glm-4.6': 200000,
-  'glm-4.5': 128000,
-  'glm-4.5-air': 128000,
-
-  // Ollama (local) - context varies by model and configuration
-  'llama3.2': 128000,
-  'llama3.2-vision': 128000,
-  'llama3.1': 128000,
-  'qwen2.5-coder': 32000,
-  'deepseek-r1': 64000,
-  'gemma2': 8192,
-  'mistral': 32000,
-  'phi3': 128000,
-
-  // Default
-  'default': 200000 // Updated default for newer models
-} as const;
-
-/**
- * Get context window size for a model
- */
-export function getContextWindow(model: string): number {
-  return MODEL_CONTEXT_WINDOWS[model as keyof typeof MODEL_CONTEXT_WINDOWS] || MODEL_CONTEXT_WINDOWS.default;
-}
+// MODEL_CONTEXT_WINDOWS and getContextWindow are re-exported from model-context-windows.ts
+// (imported at top of file) to maintain backwards compatibility.
+// The canonical source of truth is packages/lib/src/monitoring/model-context-windows.ts.
 
 /**
  * Calculate cost based on tokens and model
diff --git a/packages/lib/src/monitoring/model-context-windows.ts b/packages/lib/src/monitoring/model-context-windows.ts
new file mode 100644
index 000000000..48cab95b4
--- /dev/null
+++ b/packages/lib/src/monitoring/model-context-windows.ts
@@ -0,0 +1,198 @@
+/**
+ * Model Context Window Sizes (in tokens)
+ *
+ * Canonical source of truth for all model context window limits.
+ * Shared between ai-monitoring.ts and ai-context-calculator.ts.
+ *
+ * This file is deliberately dependency-free so it can be imported
+ * from lightweight modules without pulling in database or logging deps.
+ *
+ * Updated November 2025
+ */
+export const MODEL_CONTEXT_WINDOWS = {
+  // OpenRouter Models - Anthropic
+  'anthropic/claude-opus-4.5': 200000,
+  'anthropic/claude-sonnet-4.5': 200000,
+  'anthropic/claude-haiku-4.5': 200000,
+  'anthropic/claude-3.5-sonnet': 200000,
+  'anthropic/claude-3-haiku': 200000,
+  'anthropic/claude-opus-4.1': 200000,
+
+  // OpenRouter Models - OpenAI
+  'openai/gpt-5.2': 400000,
+  'openai/gpt-5.2-codex': 400000,
+  'openai/gpt-5.2-mini': 256000,
+  'openai/gpt-5.2-nano': 256000,
+  'openai/gpt-5.1': 400000,
+  'openai/gpt-5.1-codex': 400000,
+  'openai/gpt-5.1-codex-mini': 400000,
+  'openai/gpt-4o': 128000,
+  'openai/gpt-4o-mini': 128000,
+  'openai/o3-deep-research': 200000,
+  'openai/o4-mini-deep-research': 200000,
+  'openai/gpt-5': 272000,
+  'openai/gpt-5-mini': 128000,
+  'openai/gpt-5-nano': 128000,
+  'openai/gpt-oss-120b': 128000,
+  'openai/gpt-oss-20b': 128000,
+
+  // OpenRouter Models - Other
+  'meta-llama/llama-3.1-405b-instruct': 128000,
+  'mistralai/mistral-medium-3.1': 128000,
+  'mistralai/mistral-small-3.2-24b-instruct': 32000,
+  'mistralai/codestral-2508': 32000,
+  'mistralai/devstral-medium': 128000,
+  'mistralai/devstral-small': 128000,
+
+  // OpenRouter Models - Google
+  'google/gemini-3-pro-preview': 1048576,
+  'google/gemini-3-flash-preview': 1048576,
+  'google/gemini-2.5-pro': 2000000,
+  'google/gemini-2.5-flash': 1000000,
+  'google/gemini-2.5-flash-lite': 1000000,
+  'google/gemini-2.5-flash-lite-preview-06-17': 1000000,
+  'google/gemini-2.0-pro': 2000000,
+  'google/gemini-2.0-flash': 1000000,
+
+  // OpenRouter Models - Chinese/Asian
+  'z-ai/glm-4.7': 200000,
+  'z-ai/glm-4.5v': 128000,
+  'z-ai/glm-4.5': 128000,
+  'z-ai/glm-4.5-air': 128000,
+  'z-ai/glm-4-32b': 128000,
+  'qwen/qwen3-max': 128000,
+  'qwen/qwen3-235b-a22b-thinking-2507': 128000,
+  'qwen/qwen3-235b-a22b-2507': 128000,
+  'qwen/qwen3-coder': 128000,
+  'moonshotai/kimi-k2': 128000,
+  'minimax/minimax-m1': 128000,
+  'z-ai/glm-5': 202752,
+  'minimax/minimax-m2.5': 204800,
+
+  // OpenRouter Models - DeepSeek
+  'deepseek/deepseek-v3.1-terminus': 128000,
+
+  // OpenRouter Models - AI21
+  'ai21/jamba-mini-1.7': 256000,
+  'ai21/jamba-large-1.7': 256000,
+
+  // OpenRouter Models - xAI
+  'x-ai/grok-4-fast': 2000000,
+  'x-ai/grok-4': 128000,
+
+  // OpenRouter Models - Other
+  'inception/mercury': 128000,
+
+  // Google AI Direct Models
+  'gemini-3-pro': 1048576,
+  'gemini-3-flash-preview': 1048576,
+  'gemini-2.5-pro': 2000000,
+  'gemini-2.5-flash': 1000000,
+  'gemini-2.5-flash-lite': 1000000,
+  'gemini-2.0-pro-exp': 2000000,
+  'gemini-2.0-flash': 1000000,
+  'gemini-2.0-flash-exp': 1000000,
+  'gemini-2.0-flash-lite': 1000000,
+  'gemini-1.5-flash': 1000000,
+  'gemini-1.5-flash-8b': 1000000,
+  'gemini-1.5-pro': 2000000,
+
+  // OpenAI Direct Models
+  'gpt-5.2': 400000,
+  'gpt-5.2-codex': 400000,
+  'gpt-5.2-mini': 256000,
+  'gpt-5.2-nano': 256000,
+  'gpt-5.1': 400000,
+  'gpt-5.1-codex': 400000,
+  'gpt-5': 272000,
+  'gpt-5-mini': 128000,
+  'gpt-5-nano': 128000,
+  'gpt-4.1-2025-04-14': 400000,
+  'gpt-4.1-mini-2025-04-14': 400000,
+  'gpt-4.1-nano-2025-04-14': 400000,
+  'gpt-4o': 128000,
+  'gpt-4o-mini': 128000,
+  'gpt-4o-audio-preview': 128000,
+  'gpt-4-turbo': 128000,
+  'gpt-4': 8192,
+  'gpt-3.5-turbo': 16385,
+  'o4-mini-2025-04-16': 200000,
+  'o3': 200000,
+  'o3-mini': 200000,
+  'o1': 200000,
+  'o1-mini': 200000,
+  'o1-preview': 200000,
+
+  // Anthropic Direct Models
+  'claude-opus-4-5-20251124': 200000,
+  'claude-sonnet-4-5': 200000,
+  'claude-sonnet-4-5-20250929': 200000,
+  'claude-haiku-4-5-20251001': 200000,
+  'claude-opus-4-1-20250805': 200000,
+  'claude-sonnet-4-1-20250805': 200000,
+  'claude-3-7-sonnet-20250219': 200000,
+  'claude-3-5-sonnet-20241022': 200000,
+  'claude-3-5-sonnet-20240620': 200000,
+  'claude-3-5-sonnet-latest': 200000,
+  'claude-3-5-haiku-20241022': 200000,
+  'claude-3-5-haiku-latest': 200000,
+  'claude-3-opus-20240229': 200000,
+  'claude-3-opus-latest': 200000,
+  'claude-3-sonnet-20240229': 200000,
+  'claude-3-haiku-20240307': 200000,
+
+  // xAI Models
+  'grok-4': 128000,
+  'grok-4-fast-reasoning': 2000000,
+  'grok-4-fast-non-reasoning': 2000000,
+  'grok-code-fast-1': 128000,
+  'grok-3': 128000,
+  'grok-3-latest': 128000,
+  'grok-3-fast': 128000,
+  'grok-3-fast-latest': 128000,
+  'grok-3-mini': 128000,
+  'grok-3-mini-latest': 128000,
+  'grok-3-mini-fast': 128000,
+  'grok-3-mini-fast-latest': 128000,
+  'grok-2': 128000,
+  'grok-2-latest': 128000,
+  'grok-2-1212': 128000,
+  'grok-2-vision': 128000,
+  'grok-2-vision-latest': 128000,
+  'grok-2-vision-1212': 128000,
+  'grok-beta': 128000,
+  'grok-vision-beta': 128000,
+
+  // MiniMax Direct Models
+  'MiniMax-M2.5': 1000000,
+  'MiniMax-M2.1': 128000,
+  'MiniMax-M2': 128000,
+  'MiniMax-M2-Stable': 128000,
+
+  // PageSpace/GLM Models
+  'glm-5': 202752,
+  'glm-4.7': 200000,
+  'glm-4.6': 200000,
+  'glm-4.5': 128000,
+  'glm-4.5-air': 128000,
+
+  // Ollama (local) - context varies by model and configuration
+  'llama3.2': 128000,
+  'llama3.2-vision': 128000,
+  'llama3.1': 128000,
+  'qwen2.5-coder': 32000,
+  'deepseek-r1': 64000,
+  'gemma2': 8192,
+  'mistral': 32000,
+  'phi3': 128000,
+
+  // Default
+  'default': 200000,
+} as const;
+
+/**
+ * Get context window size for a model (simple key lookup)
+ */
+export function getContextWindow(model: string): number {
+  return MODEL_CONTEXT_WINDOWS[model as keyof typeof MODEL_CONTEXT_WINDOWS] || MODEL_CONTEXT_WINDOWS.default;
+}