Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 54 additions & 8 deletions apps/web/src/app/api/ai/chat/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ import {
} from '@/lib/ai/core/stream-abort-registry';
import { validateUserMessageFileParts, hasFileParts } from '@/lib/ai/core/validate-image-parts';
import { hasVisionCapability } from '@/lib/ai/core/model-capabilities';
import {
determineMessagesToInclude,
getContextWindowSize,
estimateSystemPromptTokens,
estimateToolDefinitionTokens,
} from '@pagespace/lib/ai-context-calculator';
import { isContextLengthError } from '@/lib/ai/shared/error-messages';


// Allow streaming responses up to 5 minutes for complex AI agent interactions
Expand Down Expand Up @@ -741,13 +748,10 @@ export async function POST(request: Request) {
});
}

// Convert UIMessages to ModelMessages for the AI model
// First sanitize messages to remove tool parts without results (prevents "input-available" state errors)
// Sanitize messages to remove tool parts without results (prevents "input-available" state errors)
// NOTE: We use database-loaded messages, NOT messages from client
// modelMessages is computed after system prompt is built so we can apply context truncation
const sanitizedMessages = sanitizeMessagesForModel(conversationHistory);
const modelMessages = convertToModelMessages(sanitizedMessages, {
tools: filteredTools // Use original tools - no wrapping needed
});

// Fetch user personalization for AI system prompt injection
const personalization = await getUserPersonalization(userId);
Expand Down Expand Up @@ -818,8 +822,43 @@ export async function POST(request: Request) {
}

loggers.ai.debug('AI Chat API: Tools configured for Page AI', { toolCount: Object.keys(filteredTools).length });

// Context-length guard: proactively truncate oldest messages to fit within the model's context window.
// This prevents AI_APICallError from providers when a conversation grows too long.
// We build modelMessages here (after system prompt) so we have accurate token budgeting.
const fullSystemPrompt = systemPrompt + timestampSystemPrompt + pageTreePrompt;
const contextWindow = getContextWindowSize(currentModel, currentProvider);
const systemPromptTokens = estimateSystemPromptTokens(fullSystemPrompt);
// Cast needed because filteredTools is a ToolSet (Vercel AI SDK type) but calculator expects plain object
const toolTokens = estimateToolDefinitionTokens(filteredTools as Record<string, unknown>);
// Reserve 25% headroom for output tokens and tokenizer inaccuracies
const inputBudget = Math.floor(contextWindow * 0.75);
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
const { includedMessages, wasTruncated } = determineMessagesToInclude(
sanitizedMessages,
inputBudget,
systemPromptTokens,
toolTokens
);

if (wasTruncated) {
loggers.ai.warn('AI Chat API: Conversation truncated to fit context window', {
originalMessageCount: sanitizedMessages.length,
includedMessageCount: includedMessages.length,
model: currentModel,
provider: currentProvider,
contextWindow,
inputBudget,
systemPromptTokens,
toolTokens,
});
}

const modelMessages = convertToModelMessages(includedMessages, {
tools: filteredTools // Use original tools - no wrapping needed
});

loggers.ai.info('AI Chat API: Starting streamText for Page AI', { model: currentModel, pageName: page.title });

// Create UI message stream with visual content injection support
// This handles the case where tools return visual content that needs to be injected into the stream
let result;
Expand Down Expand Up @@ -1199,8 +1238,15 @@ export async function POST(request: Request) {
});

// Return a proper error response
return NextResponse.json({
error: 'Failed to process chat request. Please try again.'
const errorMsg = error instanceof Error ? error.message : '';
if (isContextLengthError(errorMsg)) {
return NextResponse.json(
{ error: 'context_length_exceeded', details: errorMsg },
{ status: 413 }
);
}
return NextResponse.json({
error: 'Failed to process chat request. Please try again.'
}, { status: 500 });
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import React, { useEffect, useState, useRef, useMemo, useCallback } from 'react';
import { UIMessage } from 'ai';
import { getAIErrorMessage } from '@/lib/ai/shared/error-messages';
import { usePathname } from 'next/navigation';
import { Button } from '@/components/ui/button';
import { ChatInput, type ChatInputRef } from '@/components/ai/chat/input';
Expand Down Expand Up @@ -787,16 +788,7 @@ const SidebarChatTab: React.FC = () => {
{error && showError && (
<div className="p-2 bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded text-xs flex items-center justify-between">
<p className="text-red-700 dark:text-red-300">
{error.message?.includes('Unauthorized') || error.message?.includes('401')
? 'Authentication failed. Please refresh the page and try again.'
: (error.message?.toLowerCase().includes('rate') ||
error.message?.toLowerCase().includes('limit') ||
error.message?.includes('429') ||
error.message?.includes('402') ||
error.message?.includes('Failed after') ||
error.message?.includes('Provider returned error'))
? 'Free tier rate limit hit. Please try again in a few seconds or subscribe for premium models and access.'
: 'Something went wrong. Please try again.'}
{getAIErrorMessage(error.message)}
</p>
<button
onClick={() => setShowError(false)}
Expand Down
25 changes: 25 additions & 0 deletions apps/web/src/lib/ai/shared/error-messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ export function getAIErrorMessage(errorMessage: string | undefined): string {
return 'Authentication failed. Please refresh the page and try again.';
}

// Context length errors
if (isContextLengthError(errorMessage)) {
return 'The conversation is too long for this model\'s context window. Older messages have been trimmed to fit — try sending your message again.';
}

// Rate limit errors
if (
errorMessage.toLowerCase().includes('rate') ||
Expand All @@ -36,6 +41,26 @@ export function isAuthenticationError(errorMessage: string | undefined): boolean
return errorMessage.includes('Unauthorized') || errorMessage.includes('401');
}

/**
* Check if error is a context length / token limit error
*/
export function isContextLengthError(errorMessage: string | undefined): boolean {
if (!errorMessage) return false;
const msg = errorMessage.toLowerCase();
return (
msg.includes('context_length') || // API error key: context_length_exceeded
msg.includes('context length') || // Human-readable variant
msg.includes('context window') ||
msg.includes('maximum context') ||
msg.includes('token limit') ||
msg.includes('tokens exceeds') ||
msg.includes('too many tokens') ||
errorMessage.includes('413') ||
// OpenRouter / provider-specific phrasing
(msg.includes('maximum') && msg.includes('tokens'))
);
}

/**
* Check if error is a rate limit error
*/
Expand Down
29 changes: 27 additions & 2 deletions packages/lib/src/monitoring/ai-context-calculator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,33 @@ export function getContextWindowSize(model: string, provider?: string): number {
return 128_000; // Default for older MiniMax models
}

// OpenRouter or unknown
return 200_000; // Conservative default
// OpenRouter - use model-specific limits where known, else 200k conservative default
if (providerLower === 'openrouter') {
// Claude models via OpenRouter
if (modelLower.includes('claude')) return 200_000;
// Gemini models via OpenRouter
if (modelLower.includes('gemini-2.5')) return 1_000_000;
if (modelLower.includes('gemini-2.0') || modelLower.includes('gemini-1.5')) return 1_000_000;
// GPT models via OpenRouter
if (modelLower.includes('gpt-4o') || modelLower.includes('gpt-4-turbo')) return 128_000;
// DeepSeek models - commonly 64k or 128k
if (modelLower.includes('deepseek-r1') || modelLower.includes('deepseek-v3')) return 128_000;
if (modelLower.includes('deepseek')) return 64_000;
// Qwen models
if (modelLower.includes('qwen-2.5') || modelLower.includes('qwq')) return 128_000;
if (modelLower.includes('qwen')) return 32_000;
// Llama models
if (modelLower.includes('llama-3') || modelLower.includes('llama3')) return 128_000;
if (modelLower.includes('llama')) return 32_000;
// Mistral models
if (modelLower.includes('mistral-large') || modelLower.includes('mistral-nemo')) return 128_000;
if (modelLower.includes('mistral')) return 32_000;
// OpenRouter platform hard cap is 400k for many endpoints - use 200k as safe default
return 200_000;
}

// Unknown provider/model - conservative default
return 200_000;
}

/**
Expand Down
Loading