diff --git a/docs/agents/index.mdx b/docs/agents/index.mdx index dc62bb6ea4..32e3eaf75b 100644 --- a/docs/agents/index.mdx +++ b/docs/agents/index.mdx @@ -321,8 +321,6 @@ name: Compact description: History compaction (internal) ui: hidden: true -providerOptions: - truncationMode: auto subagent: runnable: false --- diff --git a/src/common/orpc/schemas/agentDefinition.ts b/src/common/orpc/schemas/agentDefinition.ts index 3a20e1d4a7..2c5dd1c38a 100644 --- a/src/common/orpc/schemas/agentDefinition.ts +++ b/src/common/orpc/schemas/agentDefinition.ts @@ -36,11 +36,6 @@ const AgentDefinitionSubagentSchema = z }) .strip(); -const AgentDefinitionProviderOptionsSchema = z - .object({ - truncationMode: z.enum(["auto", "disabled"]).optional(), - }) - .strip(); const AgentDefinitionAiDefaultsSchema = z .object({ // Model identifier: full string (e.g. "anthropic:claude-sonnet-4-5") or abbreviation (e.g. "sonnet") @@ -83,7 +78,6 @@ export const AgentDefinitionFrontmatterSchema = z subagent: AgentDefinitionSubagentSchema.optional(), - providerOptions: AgentDefinitionProviderOptionsSchema.optional(), ai: AgentDefinitionAiDefaultsSchema.optional(), // Tool configuration: add/remove patterns (regex). diff --git a/src/node/builtinAgents/compact.md b/src/node/builtinAgents/compact.md index 77eaa3cf7a..e81739a417 100644 --- a/src/node/builtinAgents/compact.md +++ b/src/node/builtinAgents/compact.md @@ -3,8 +3,6 @@ name: Compact description: History compaction (internal) ui: hidden: true -providerOptions: - truncationMode: auto subagent: runnable: false --- diff --git a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts index 40e7575060..9f9b042cf1 100644 --- a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts +++ b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts @@ -3,7 +3,7 @@ // Source: src/node/builtinAgents/*.md export const BUILTIN_AGENT_CONTENT = { - "compact": "---\nname: Compact\ndescription: History compaction (internal)\nui:\n hidden: true\nproviderOptions:\n truncationMode: auto\nsubagent:\n runnable: false\n---\n\nYou are running a compaction/summarization pass. Your task is to write a concise summary of the conversation so far.\n\nIMPORTANT:\n\n- You have NO tools available. Do not attempt to call any tools or output JSON.\n- Simply write the summary as plain text prose.\n- Follow the user's instructions for what to include in the summary.\n", + "compact": "---\nname: Compact\ndescription: History compaction (internal)\nui:\n hidden: true\nsubagent:\n runnable: false\n---\n\nYou are running a compaction/summarization pass. Your task is to write a concise summary of the conversation so far.\n\nIMPORTANT:\n\n- You have NO tools available. Do not attempt to call any tools or output JSON.\n- Simply write the summary as plain text prose.\n- Follow the user's instructions for what to include in the summary.\n", "exec": "---\nname: Exec\ndescription: Implement changes in the repository\nui:\n color: var(--color-exec-mode)\nsubagent:\n runnable: true\n append_prompt: |\n If you are running as a sub-agent in a child workspace:\n\n - When you have a final answer, call agent_report exactly once.\n - Do not call task/task_await/task_list/task_terminate (subagent recursion is disabled).\n - Do not call propose_plan.\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n remove:\n # Exec mode doesn't use planning tools\n - propose_plan\n - ask_user_question\n---\n\nYou are in Exec mode.\n\n- Make minimal, correct, reviewable changes that match existing codebase patterns.\n- Prefer targeted commands and checks (typecheck/tests) when feasible.\n- Treat as a standing order: keep running checks and addressing failures until they pass or a blocker outside your control arises.\n", "explore": "---\nname: Explore\ndescription: Read-only repository exploration\nbase: exec\nui:\n hidden: true\nsubagent:\n runnable: true\n append_prompt: |\n You are an Explore sub-agent running inside a child workspace.\n\n - Explore the repository to answer the prompt using read-only investigation.\n - Return concise, actionable findings (paths, symbols, callsites, and facts).\n - When you have a final answer, call agent_report exactly once.\n - Do not call agent_report until you have completed the assigned task.\ntools:\n # Remove editing and task tools from exec base (read-only agent)\n remove:\n - file_edit_.*\n - task\n - task_.*\n - agent_skill_read\n - agent_skill_read_file\n---\n\nYou are in Explore mode (read-only).\n\n=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS ===\n\n- You MUST NOT create, edit, delete, move, or copy files.\n- You MUST NOT create temporary files anywhere (including /tmp).\n- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files.\n- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).\n- Use bash only for read-only operations (rg, ls, cat, git diff/show/log, etc.).\n", "plan": "---\nname: Plan\ndescription: Create a plan before coding\nui:\n color: var(--color-plan-mode)\nsubagent:\n runnable: false\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n # Note: file_edit_* tools ARE available but restricted to plan file only at runtime\n # Note: task tools ARE enabled - Plan delegates to Explore sub-agents\n---\n\nYou are in Plan Mode.\n\n- Every response MUST produce or update a plan—no exceptions.\n- Simple requests deserve simple plans; a straightforward task might only need a few bullet points. Match plan complexity to the problem.\n- Keep the plan scannable; put long rationale in `
/` blocks.\n- When Plan Mode is requested, assume the user wants the actual completed plan; do not merely describe how you would devise one.\n\nDetailed plan mode instructions (plan file path, sub-agent delegation, propose_plan workflow) are provided separately.\n", diff --git a/src/node/services/agentDefinitions/parseAgentDefinitionMarkdown.test.ts b/src/node/services/agentDefinitions/parseAgentDefinitionMarkdown.test.ts index 5230381585..3d173b526b 100644 --- a/src/node/services/agentDefinitions/parseAgentDefinitionMarkdown.test.ts +++ b/src/node/services/agentDefinitions/parseAgentDefinitionMarkdown.test.ts @@ -13,8 +13,6 @@ description: Does stuff base: exec tools: add: ["file_read", "bash.*"] -providerOptions: - truncationMode: auto unknownTopLevel: 123 ui: hidden: false @@ -33,7 +31,6 @@ Do the thing. expect(result.frontmatter.name).toBe("My Agent"); expect(result.frontmatter.description).toBe("Does stuff"); expect(result.frontmatter.base).toBe("exec"); - expect(result.frontmatter.providerOptions?.truncationMode).toBe("auto"); expect(result.frontmatter.tools).toEqual({ add: ["file_read", "bash.*"] }); expect(result.frontmatter.ui?.hidden).toBe(false); expect(result.frontmatter.ui?.color).toBe("#ff00ff"); diff --git a/src/node/services/agentSession.ts b/src/node/services/agentSession.ts index 1ce4f2d513..c160bff933 100644 --- a/src/node/services/agentSession.ts +++ b/src/node/services/agentSession.ts @@ -44,7 +44,7 @@ import type { TodoItem } from "@/common/types/tools"; import type { PostCompactionAttachment, PostCompactionExclusions } from "@/common/types/attachment"; import { TURNS_BETWEEN_ATTACHMENTS } from "@/common/constants/attachments"; import { extractEditedFileDiffs } from "@/common/utils/messages/extractEditedFiles"; -import { isValidModelFormat } from "@/common/utils/ai/models"; +import { getModelName, getModelProvider, isValidModelFormat } from "@/common/utils/ai/models"; import { materializeFileAtMentions } from "@/node/services/fileAtMentions"; /** @@ -146,6 +146,16 @@ export class AgentSession { * Cache the last-known experiment state so we don't spam metadata refresh * when post-compaction context is disabled. */ + /** Track compaction requests that already retried with truncation. */ + private readonly compactionRetryAttempts = new Set(); + /** + * Active compaction request metadata for retry decisions (cleared on stream end/abort). + */ + private activeCompactionRequest?: { + id: string; + modelString: string; + options?: SendMessageOptions; + }; private postCompactionContextEnabled = false; constructor(options: AgentSessionOptions) { @@ -684,7 +694,8 @@ export class AgentSession { private async streamWithHistory( modelString: string, - options?: SendMessageOptions + options?: SendMessageOptions, + openaiTruncationModeOverride?: "auto" | "disabled" ): Promise> { if (this.disposed) { return Ok(undefined); @@ -711,6 +722,12 @@ export class AgentSession { ); } + this.activeCompactionRequest = this.resolveCompactionRequest( + historyResult.data, + modelString, + options + ); + // Check for external file edits (timestamp-based polling) const changedFileAttachments = await this.getChangedFileAttachments(); @@ -728,7 +745,7 @@ export class AgentSession { // Bind recordFileState to this session for the propose_plan tool const recordFileState = this.recordFileState.bind(this); - return this.aiService.streamMessage( + const streamResult = await this.aiService.streamMessage( historyResult.data, this.workspaceId, modelString, @@ -745,10 +762,147 @@ export class AgentSession { postCompactionAttachments, options?.experiments, options?.disableWorkspaceAgents, - () => !this.messageQueue.isEmpty() + () => !this.messageQueue.isEmpty(), + openaiTruncationModeOverride + ); + + if (!streamResult.success) { + this.activeCompactionRequest = undefined; + } + + return streamResult; + } + + private resolveCompactionRequest( + history: MuxMessage[], + modelString: string, + options?: SendMessageOptions + ): { id: string; modelString: string; options?: SendMessageOptions } | undefined { + for (let index = history.length - 1; index >= 0; index -= 1) { + const message = history[index]; + if (message.role !== "user") { + continue; + } + if (!isCompactionRequestMetadata(message.metadata?.muxMetadata)) { + return undefined; + } + return { + id: message.id, + modelString, + options, + }; + } + return undefined; + } + + private async finalizeCompactionRetry(messageId: string): Promise { + this.activeCompactionRequest = undefined; + this.emitChatEvent({ + type: "stream-abort", + workspaceId: this.workspaceId, + messageId, + }); + await this.clearFailedCompaction(messageId); + } + + private async clearFailedCompaction(messageId: string): Promise { + const [partialResult, deleteMessageResult] = await Promise.all([ + this.partialService.deletePartial(this.workspaceId), + this.historyService.deleteMessage(this.workspaceId, messageId), + ]); + + if (!partialResult.success) { + log.warn("Failed to clear partial before compaction retry", { + workspaceId: this.workspaceId, + error: partialResult.error, + }); + } + + if ( + !deleteMessageResult.success && + !( + typeof deleteMessageResult.error === "string" && + deleteMessageResult.error.includes("not found in history") + ) + ) { + log.warn("Failed to delete failed compaction placeholder", { + workspaceId: this.workspaceId, + error: deleteMessageResult.error, + }); + } + } + + private isGptClassModel(modelString: string): boolean { + return ( + getModelProvider(modelString) === "openai" && + getModelName(modelString).toLowerCase().startsWith("gpt-") ); } + private async maybeRetryCompactionOnContextExceeded(data: { + messageId: string; + errorType?: string; + }): Promise { + if (data.errorType !== "context_exceeded") { + return false; + } + + const context = this.activeCompactionRequest; + if (!context) { + return false; + } + + if (!this.isGptClassModel(context.modelString)) { + return false; + } + + if (this.compactionRetryAttempts.has(context.id)) { + return false; + } + + this.compactionRetryAttempts.add(context.id); + + log.info("Compaction hit context limit; retrying once with OpenAI truncation", { + workspaceId: this.workspaceId, + model: context.modelString, + compactionRequestId: context.id, + }); + + await this.finalizeCompactionRetry(data.messageId); + + const retryResult = await this.streamWithHistory(context.modelString, context.options, "auto"); + if (!retryResult.success) { + log.error("Compaction retry failed to start", { + workspaceId: this.workspaceId, + error: retryResult.error, + }); + return false; + } + + return true; + } + + private async handleStreamError(data: { + workspaceId: string; + messageId: string; + error: string; + errorType?: string; + }): Promise { + if (await this.maybeRetryCompactionOnContextExceeded(data)) { + return; + } + + this.activeCompactionRequest = undefined; + + const streamError: StreamErrorMessage = { + type: "stream-error", + messageId: data.messageId, + error: data.error, + errorType: (data.errorType ?? "unknown") as StreamErrorMessage["errorType"], + }; + this.emitChatEvent(streamError); + } + private attachAiListeners(): void { const forward = ( event: string, @@ -792,10 +946,14 @@ export class AgentSession { forward("reasoning-delta", (payload) => this.emitChatEvent(payload)); forward("reasoning-end", (payload) => this.emitChatEvent(payload)); forward("usage-delta", (payload) => this.emitChatEvent(payload)); - forward("stream-abort", (payload) => this.emitChatEvent(payload)); + forward("stream-abort", (payload) => { + this.activeCompactionRequest = undefined; + this.emitChatEvent(payload); + }); forward("runtime-status", (payload) => this.emitChatEvent(payload)); forward("stream-end", async (payload) => { + this.activeCompactionRequest = undefined; const handled = await this.compactionHandler.handleCompletion(payload as StreamEndEvent); if (!handled) { this.emitChatEvent(payload); @@ -824,13 +982,7 @@ export class AgentSession { error: string; errorType?: string; }; - const streamError: StreamErrorMessage = { - type: "stream-error", - messageId: data.messageId, - error: data.error, - errorType: (data.errorType ?? "unknown") as StreamErrorMessage["errorType"], - }; - this.emitChatEvent(streamError); + void this.handleStreamError(data); }; this.aiListeners.push({ event: "error", handler: errorHandler }); diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 91ac2f6211..13a239bbd1 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -1023,6 +1023,7 @@ export class AIService extends EventEmitter { * @param changedFileAttachments Optional attachments for files that were edited externally * @param postCompactionAttachments Optional attachments to inject after compaction * @param disableWorkspaceAgents When true, read agent definitions from project path instead of workspace worktree + * @param openaiTruncationModeOverride Optional OpenAI truncation override (e.g., compaction retry) * @returns Promise that resolves when streaming completes or fails */ async streamMessage( @@ -1042,7 +1043,8 @@ export class AIService extends EventEmitter { postCompactionAttachments?: PostCompactionAttachment[] | null, experiments?: { programmaticToolCalling?: boolean; programmaticToolCallingExclusive?: boolean }, disableWorkspaceAgents?: boolean, - hasQueuedMessage?: () => boolean + hasQueuedMessage?: () => boolean, + openaiTruncationModeOverride?: "auto" | "disabled" ): Promise> { // Support interrupts during startup (before StreamManager emits stream-start). // We register an AbortController up-front and let stopStream() abort it. @@ -1852,7 +1854,7 @@ export class AIService extends EventEmitter { } // Build provider options based on thinking level and message history - const truncationMode = agentDefinition.frontmatter.providerOptions?.truncationMode; + const truncationMode = openaiTruncationModeOverride; // Pass filtered messages so OpenAI can extract previousResponseId for persistence // Also pass callback to filter out lost responseIds (OpenAI invalidated them) // Pass workspaceId to derive stable promptCacheKey for OpenAI caching