Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 84 additions & 8 deletions apps/web/src/app/api/ai/chat/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ import {
} from '@/lib/ai/core/stream-abort-registry';
import { validateUserMessageFileParts, hasFileParts } from '@/lib/ai/core/validate-image-parts';
import { hasVisionCapability } from '@/lib/ai/core/model-capabilities';
import {
determineMessagesToInclude,
getContextWindowSize,
estimateSystemPromptTokens,
estimateToolDefinitionTokens,
} from '@pagespace/lib/ai-context-calculator';
import { isContextLengthError } from '@/lib/ai/shared/error-messages';


// Allow streaming responses up to 5 minutes for complex AI agent interactions
Expand All @@ -90,6 +97,7 @@ export async function POST(request: Request) {
let selectedProvider: string | undefined;
let selectedModel: string | undefined;
let usagePromise: Promise<LanguageModelUsage | undefined> | undefined;
let wasTruncated = false;
const usageLogger = loggers.ai.child({ module: 'page-ai-usage' });
const permissionLogger = loggers.ai.child({ module: 'page-ai-permissions' });

Expand Down Expand Up @@ -741,13 +749,10 @@ export async function POST(request: Request) {
});
}

// Convert UIMessages to ModelMessages for the AI model
// First sanitize messages to remove tool parts without results (prevents "input-available" state errors)
// Sanitize messages to remove tool parts without results (prevents "input-available" state errors)
// NOTE: We use database-loaded messages, NOT messages from client
// modelMessages is computed after system prompt is built so we can apply context truncation
const sanitizedMessages = sanitizeMessagesForModel(conversationHistory);
const modelMessages = convertToModelMessages(sanitizedMessages, {
tools: filteredTools // Use original tools - no wrapping needed
});

// Fetch user personalization for AI system prompt injection
const personalization = await getUserPersonalization(userId);
Expand Down Expand Up @@ -818,8 +823,66 @@ export async function POST(request: Request) {
}

loggers.ai.debug('AI Chat API: Tools configured for Page AI', { toolCount: Object.keys(filteredTools).length });

// Context-length guard: proactively truncate oldest messages to fit within the model's context window.
// This prevents AI_APICallError from providers when a conversation grows too long.
// We build modelMessages here (after system prompt) so we have accurate token budgeting.
const fullSystemPrompt = systemPrompt + timestampSystemPrompt + pageTreePrompt;
const contextWindow = getContextWindowSize(currentModel, currentProvider);
const systemPromptTokens = estimateSystemPromptTokens(fullSystemPrompt);
// Cast needed because filteredTools is a ToolSet (Vercel AI SDK type) but calculator expects plain object
const toolTokens = estimateToolDefinitionTokens(filteredTools as Record<string, unknown>);
// Reserve 25% headroom for output tokens and tokenizer inaccuracies
const inputBudget = Math.floor(contextWindow * 0.75);
const truncationResult = determineMessagesToInclude(
sanitizedMessages,
inputBudget,
systemPromptTokens,
toolTokens
);
const { includedMessages } = truncationResult;
wasTruncated = truncationResult.wasTruncated;

if (wasTruncated) {
loggers.ai.warn('AI Chat API: Conversation truncated to fit context window', {
originalMessageCount: sanitizedMessages.length,
includedMessageCount: includedMessages.length,
model: currentModel,
provider: currentProvider,
contextWindow,
inputBudget,
systemPromptTokens,
toolTokens,
});
}

// Guard: if truncation left zero messages, the latest message alone exceeds the budget
if (includedMessages.length === 0) {
loggers.ai.error('AI Chat API: No messages fit within context budget', {
model: currentModel,
provider: currentProvider,
contextWindow,
inputBudget,
systemPromptTokens,
toolTokens,
originalMessageCount: sanitizedMessages.length,
});
return NextResponse.json(
{
error: 'context_length_exceeded',
message: 'Your latest message is too large to fit within this model\'s context window. Try shortening your message or starting a new conversation.',
details: 'context_length_exceeded',
},
{ status: 413 }
);
}

const modelMessages = convertToModelMessages(includedMessages as UIMessage[], {
tools: filteredTools // Use original tools - no wrapping needed
});

loggers.ai.info('AI Chat API: Starting streamText for Page AI', { model: currentModel, pageName: page.title });

// Create UI message stream with visual content injection support
// This handles the case where tools return visual content that needs to be injected into the stream
let result;
Expand Down Expand Up @@ -1199,8 +1262,21 @@ export async function POST(request: Request) {
});

// Return a proper error response
return NextResponse.json({
error: 'Failed to process chat request. Please try again.'
const errorMsg = error instanceof Error ? error.message : '';
if (isContextLengthError(errorMsg)) {
return NextResponse.json(
{
error: 'context_length_exceeded',
message: wasTruncated
? 'The conversation still exceeds this model\'s context window even after trimming. Please start a new conversation.'
: 'The conversation is too long for this model\'s context window. Please start a new conversation or try a model with a larger context window.',
details: 'context_length_exceeded',
},
{ status: 413 }
);
}
return NextResponse.json({
error: 'Failed to process chat request. Please try again.'
}, { status: 500 });
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import React, { useEffect, useState, useRef, useMemo, useCallback } from 'react';
import { UIMessage } from 'ai';
import { getAIErrorMessage } from '@/lib/ai/shared/error-messages';
import { usePathname } from 'next/navigation';
import { Button } from '@/components/ui/button';
import { ChatInput, type ChatInputRef } from '@/components/ai/chat/input';
Expand Down Expand Up @@ -787,16 +788,7 @@ const SidebarChatTab: React.FC = () => {
{error && showError && (
<div className="p-2 bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded text-xs flex items-center justify-between">
<p className="text-red-700 dark:text-red-300">
{error.message?.includes('Unauthorized') || error.message?.includes('401')
? 'Authentication failed. Please refresh the page and try again.'
: (error.message?.toLowerCase().includes('rate') ||
error.message?.toLowerCase().includes('limit') ||
error.message?.includes('429') ||
error.message?.includes('402') ||
error.message?.includes('Failed after') ||
error.message?.includes('Provider returned error'))
? 'Free tier rate limit hit. Please try again in a few seconds or subscribe for premium models and access.'
: 'Something went wrong. Please try again.'}
{getAIErrorMessage(error.message)}
</p>
<button
onClick={() => setShowError(false)}
Expand Down
56 changes: 56 additions & 0 deletions apps/web/src/lib/ai/shared/error-messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,35 @@
export function getAIErrorMessage(errorMessage: string | undefined): string {
if (!errorMessage) return 'Something went wrong. Please try again.';

const msg = errorMessage.toLowerCase();

// Authentication errors
if (errorMessage.includes('Unauthorized') || errorMessage.includes('401')) {
return 'Authentication failed. Please refresh the page and try again.';
}

// Request size errors (distinct from context-window limits)
if (
msg.includes('request body too large') ||
msg.includes('payload too large') ||
msg.includes('entity too large')
) {
return 'Your request is too large. Try sending a shorter message or fewer/lower-size attachments.';
}

// Context length errors
if (isContextLengthError(errorMessage)) {
// Preserve server-provided guidance when present (e.g. "even after trimming", "latest message too large")
if (
msg.includes('latest message is too large') ||
msg.includes('even after trimming') ||
msg.includes('too long for this model')
) {
return errorMessage;
}
return 'The conversation is too long for this model\'s context window. Please start a new conversation or use a model with a larger context window.';
}

// Rate limit errors
if (
errorMessage.toLowerCase().includes('rate') ||
Expand All @@ -36,11 +60,43 @@ export function isAuthenticationError(errorMessage: string | undefined): boolean
return errorMessage.includes('Unauthorized') || errorMessage.includes('401');
}

/**
* Check if error is a context length / token limit error
*/
export function isContextLengthError(errorMessage: string | undefined): boolean {
if (!errorMessage) return false;
const msg = errorMessage.toLowerCase();

// Explicitly exclude non-context 413 payload errors
if (
msg.includes('request body too large') ||
msg.includes('payload too large') ||
msg.includes('entity too large')
) {
return false;
}

return (
msg.includes('context_length') || // API error key: context_length_exceeded
msg.includes('context length') || // Human-readable variant
msg.includes('context window') ||
msg.includes('maximum context') ||
msg.includes('token limit') ||
msg.includes('tokens exceeds') ||
msg.includes('too many tokens') ||
errorMessage.includes('413') ||
// OpenRouter / provider-specific phrasing
(msg.includes('maximum') && msg.includes('tokens'))
);
}

/**
* Check if error is a rate limit error
*/
export function isRateLimitError(errorMessage: string | undefined): boolean {
if (!errorMessage) return false;
// Exclude context-length errors that also contain "limit"
if (isContextLengthError(errorMessage)) return false;
return (
errorMessage.toLowerCase().includes('rate') ||
errorMessage.toLowerCase().includes('limit') ||
Expand Down
57 changes: 49 additions & 8 deletions packages/lib/src/monitoring/ai-context-calculator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,21 @@ export interface ContextCalculation {
}

/**
* Estimate tokens in a text string
* Uses 4 characters per token as a rough estimate
* This is conservative - actual token count may be slightly lower
* Estimate tokens in a text string.
* Uses ~4 chars/token for Latin text, ~2 chars/token when significant
* non-ASCII / CJK content is detected (CJK characters often tokenize to 1-2 tokens each).
*/
export function estimateTokens(text: string): number {
if (!text) return 0;

// GPT-style tokenization: ~4 characters per token
// This is conservative to avoid underestimating
return Math.ceil(text.length / 4);
// Detect non-ASCII heavy content (CJK, emoji, etc.)
// CJK Unified Ideographs, Hiragana, Katakana, Hangul, etc.
const nonAsciiCount = (text.match(/[^\x00-\x7F]/g) || []).length;
Copy link
Contributor

@coderabbitai coderabbitai bot Feb 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# Check if the file exists and read the content around line 60
fd "ai-context-calculator.ts" --type f

Repository: 2witstudios/PageSpace

Length of output: 117


🏁 Script executed:

# Once we confirm the file exists, read it to verify the exact content at line 60
cat -n packages/lib/src/monitoring/ai-context-calculator.ts | sed -n '55,65p'

Repository: 2witstudios/PageSpace

Length of output: 636


🏁 Script executed:

# Check for linter configuration in the packages/lib directory
find packages/lib -maxdepth 3 -name "*.json" -o -name "*.mjs" -o -name "biome.json" -o -name ".eslintrc*" | head -20

Repository: 2witstudios/PageSpace

Length of output: 150


🏁 Script executed:

# Check if Biome is configured at the root or in packages
find . -maxdepth 2 -name "biome.json" -o -name "biome.jsonc"

Repository: 2witstudios/PageSpace

Length of output: 77


🏁 Script executed:

# Read the biome.json configuration
cat biome.json

Repository: 2witstudios/PageSpace

Length of output: 1839


🏁 Script executed:

# Check the suggested fix in the review comment - verify if the logic is equivalent
python3 << 'EOF'
# Test if the suggested fix produces the same results as the original

# Original: text.match(/[^\x00-\x7F]/g) matches all non-ASCII characters
# Suggested: reduce with codePointAt check for > 0x7f

test_strings = [
    "hello",  # All ASCII
    "hello🌍",  # ASCII + emoji
    "你好",  # Chinese characters
    "café",  # ASCII + accented char
    "مرحبا",  # Arabic
]

for test_str in test_strings:
    # Simulate original regex behavior
    original_count = len([c for c in test_str if ord(c) > 0x7f])
    
    # Simulate suggested fix behavior
    suggested_count = 0
    for char in test_str:
        code_point = ord(char)
        if code_point > 0x7f:
            suggested_count += 1
    
    print(f"String: '{test_str}' | Original: {original_count} | Suggested: {suggested_count} | Match: {original_count == suggested_count}")
EOF

Repository: 2witstudios/PageSpace

Length of output: 357


🏁 Script executed:

# Check if noControlCharactersInRegex is part of Biome's recommended rules
# and verify if it would be active with the current config
web_search "Biome noControlCharactersInRegex rule"

Repository: 2witstudios/PageSpace

Length of output: 115


🏁 Script executed:

# Search for other similar patterns that might have the same issue
rg -n '\\x00|\\x[0-9A-Fa-f]{2}' --type=ts --type=tsx packages/

Repository: 2witstudios/PageSpace

Length of output: 92


🏁 Script executed:

# Verify the suggested fix doesn't have any edge cases with surrogate pairs or multi-byte chars
python3 << 'EOF'
# Test edge cases with the suggested fix

test_cases = [
    ("hello", 0),  # All ASCII
    ("café", 1),  # Single accented character
    ("🌍", 1),  # Single emoji (surrogate pair in JS)
    ("你好世界", 4),  # Chinese characters
    ("hello🌍world", 1),  # Mixed ASCII and emoji
    ("\x00\x01\x7f", 0),  # Control characters at edge of range
    ("", 0),  # Empty string
]

for test_str, expected in test_cases:
    # Suggested fix logic
    count = 0
    for char in test_str:
        code_point = ord(char)
        if code_point > 0x7f:
            count += 1
    
    status = "✓" if count == expected else "✗"
    print(f"{status} String: {repr(test_str)} | Expected: {expected} | Got: {count}")
EOF

Repository: 2witstudios/PageSpace

Length of output: 355


🏁 Script executed:

# Search for control character patterns in the codebase using ts file type only
rg -n '\\x00|\\x[0-9A-Fa-f]{2}' --type=ts packages/

Repository: 2witstudios/PageSpace

Length of output: 2413


🏁 Script executed:

# Verify Biome rule documentation is available and check if recommended includes the suspicious rule
python3 << 'EOF'
# Check Biome's default rule settings for noControlCharactersInRegex
# Based on the biome.json, the "suspicious" category doesn't explicitly disable the rule,
# which means it uses the recommended default

biome_config = {
    "suspicious": {
        "noArrayIndexKey": "off",
        "noExplicitAny": "off",
        "noImplicitAnyLet": "off",
        "noDoubleEquals": "off",
        "noGlobalIsNan": "off",
        "noAssignInExpressions": "off"
        # noControlCharactersInRegex is NOT listed, so it uses the default (recommended = true for suspicious)
    }
}

print("Analysis of biome.json configuration:")
print("- Linter is enabled: true")
print("- 'recommended' is enabled globally for linter rules")
print("- 'suspicious' category does NOT explicitly disable 'noControlCharactersInRegex'")
print("- Therefore, the rule IS active by default")
print("\nThe regex pattern /[^\\x00-\\x7F]/ contains control character escapes,")
print("which would trigger the 'noControlCharactersInRegex' rule in Biome.")
EOF

Repository: 2witstudios/PageSpace

Length of output: 443


Replace the regex pattern to resolve the noControlCharactersInRegex Biome linter violation.

The pattern /[^\x00-\x7F]/g at line 60 triggers Biome's suspicious/noControlCharactersInRegex rule, which is enabled by default. Use explicit code point checking instead:

Suggested fix
-  const nonAsciiCount = (text.match(/[^\x00-\x7F]/g) || []).length;
+  const nonAsciiCount = [...text].reduce((count, char) => {
+    const codePoint = char.codePointAt(0) ?? 0;
+    return count + (codePoint > 0x7f ? 1 : 0);
+  }, 0);
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
const nonAsciiCount = (text.match(/[^\x00-\x7F]/g) || []).length;
const nonAsciiCount = [...text].reduce((count, char) => {
const codePoint = char.codePointAt(0) ?? 0;
return count + (codePoint > 0x7f ? 1 : 0);
}, 0);
🧰 Tools
🪛 Biome (2.4.4)

[error] 60-60: Unexpected control character in a regular expression.

(lint/suspicious/noControlCharactersInRegex)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@packages/lib/src/monitoring/ai-context-calculator.ts` at line 60, The regex
/[^\x00-\x7F]/g used to compute nonAsciiCount violates the linter; replace that
regex-based approach with explicit code-point checking: iterate over the string
(e.g., using for...of or Array.from) and count characters whose codePointAt(0) >
0x7F, then store that count in nonAsciiCount. Update the expression that
computes nonAsciiCount accordingly so it handles full Unicode code points
(surrogate pairs) and removes the problematic regex.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Investigated — the regex /[^\x00-\x7F]/g does not violate any configured lint rule. CI ESLint passed green on this code. The no-control-regex rule (which is likely what was flagged) targets control characters in \x00-\x1F, not the full ASCII range check we're using here.

The regex approach is also slightly more conservative for surrogate pairs (emoji), since each surrogate half counts separately, inflating nonAsciiCount — which makes the token estimate more conservative (safer for truncation). No change needed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@2witstudios Thank you for investigating this thoroughly! You're absolutely right—I apologize for the false positive. The regex /[^\x00-\x7F]/g is valid and doesn't violate your configured lint rules. The no-control-regex rule (ESLint) indeed targets actual control characters in the \x00-\x1F range, not ASCII boundary checks like this one.

Your point about the conservative behavior with surrogate pairs is excellent—counting each surrogate half separately does inflate nonAsciiCount, which makes the token estimate err on the side of caution for truncation purposes. That's exactly the right approach here.

I've noted this for future reference. Thanks for the clarification!


✏️ Learnings added
Learnt from: 2witstudios
Repo: 2witstudios/PageSpace PR: 708
File: packages/lib/src/monitoring/ai-context-calculator.ts:60-60
Timestamp: 2026-02-27T15:18:17.845Z
Learning: In packages/lib/src/monitoring/ai-context-calculator.ts, the regex `/[^\x00-\x7F]/g` used in estimateTokens() to count non-ASCII characters is valid and does not violate lint rules (CI ESLint passes). The `no-control-regex` rule targets control characters in `\x00-\x1F`, not full ASCII range checks. The regex approach is intentionally conservative for surrogate pairs (emoji), counting each surrogate half separately to inflate nonAsciiCount, making token estimates safer for context truncation.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.


🧠 Learnings used
Learnt from: 2witstudios
Repo: 2witstudios/PageSpace PR: 258
File: apps/realtime/src/validation.ts:0-0
Timestamp: 2026-01-27T03:45:52.322Z
Learning: Enforce using paralleldrive/cuid2 for ID generation across the TypeScript codebase (not UUIDs). IDs should follow the CUID2 format: lowercase alphanumeric starting with a letter, matching ^[a-z][a-z0-9]{1,31}$ with a maximum length of 32 characters. Audit code paths that generate IDs (e.g., new UUID usages) and replace with cuid2 equivalents; ensure generated IDs are consistently lowercase and validated against the regex, and document any exceptions where IDs may differ in semantic meaning.

Learnt from: 2witstudios
Repo: 2witstudios/PageSpace PR: 699
File: apps/marketing/src/app/docs/self-hosting/environment/page.tsx:34-37
Timestamp: 2026-02-18T05:15:03.695Z
Learning: Ensure that cross-subdomain cookie handling uses two environment variables: COOKIE_DOMAIN (server-side, for Set-Cookie headers in server code like apps/web/src/lib/auth/cookie-config.ts) and NEXT_PUBLIC_COOKIE_DOMAIN (client-side, for document.cookie interactions in theme-cookie.ts in both apps/web and apps/marketing). This pattern should be verified across all related files that set or rely on cookie domains to maintain consistent domain scoping and enable cross-subdomain functionality.

const nonAsciiRatio = nonAsciiCount / text.length;

// Use 2 chars/token when >20% non-ASCII (CJK-heavy), else 4 chars/token
const charsPerToken = nonAsciiRatio > 0.2 ? 2 : 4;
return Math.ceil(text.length / charsPerToken);
}

/**
Expand Down Expand Up @@ -137,6 +142,42 @@ export function getContextWindowSize(model: string, provider?: string): number {
const providerLower = provider?.toLowerCase() || '';
const modelLower = model.toLowerCase();

// OpenRouter must be checked first — its models contain names like 'claude', 'gpt', 'gemini'
// that would otherwise match the provider-specific branches below.
if (providerLower === 'openrouter') {
// Claude models via OpenRouter
if (modelLower.includes('claude')) return 200_000;
// Gemini models via OpenRouter
if (modelLower.includes('gemini-2.5')) return 1_000_000;
if (modelLower.includes('gemini-2.0') || modelLower.includes('gemini-1.5')) return 1_000_000;
// GPT models via OpenRouter
if (modelLower.includes('gpt-5.2')) {
return modelLower.includes('mini') || modelLower.includes('nano') ? 256_000 : 400_000;
}
if (modelLower.includes('gpt-5.1')) return 400_000;
if (modelLower.includes('gpt-5')) {
return modelLower.includes('mini') || modelLower.includes('nano') ? 128_000 : 272_000;
}
if (modelLower.includes('gpt-4o') || modelLower.includes('gpt-4-turbo')) return 128_000;
// Grok models via OpenRouter
if (modelLower.includes('grok-4-fast')) return 2_000_000;
if (modelLower.includes('grok')) return 128_000;
// DeepSeek models - commonly 64k or 128k
if (modelLower.includes('deepseek-r1') || modelLower.includes('deepseek-v3')) return 128_000;
if (modelLower.includes('deepseek')) return 64_000;
// Qwen models
if (modelLower.includes('qwen-2.5') || modelLower.includes('qwq')) return 128_000;
if (modelLower.includes('qwen')) return 32_000;
// Llama models
if (modelLower.includes('llama-3') || modelLower.includes('llama3')) return 128_000;
if (modelLower.includes('llama')) return 32_000;
// Mistral models
if (modelLower.includes('mistral-large') || modelLower.includes('mistral-nemo')) return 128_000;
if (modelLower.includes('mistral')) return 32_000;
// OpenRouter platform hard cap is 400k for many endpoints - use 200k as safe default
return 200_000;
}

// OpenAI models
if (providerLower === 'openai' || modelLower.includes('gpt')) {
// GPT-5.2 models (400k/256k context)
Expand Down Expand Up @@ -217,8 +258,8 @@ export function getContextWindowSize(model: string, provider?: string): number {
return 128_000; // Default for older MiniMax models
}

// OpenRouter or unknown
return 200_000; // Conservative default
// Unknown provider/model - conservative default
return 200_000;
}

/**
Expand Down