awslabs · guoyangzhen · Mar 14, 2026 · Mar 14, 2026 · Mar 14, 2026 · Mar 14, 2026
diff --git a/python/src/agent_squad/agents/agent.py b/python/src/agent_squad/agents/agent.py
@@ -252,6 +252,7 @@ def __init__(self, options: AgentOptions):
             options.callbacks if options.callbacks is not None else AgentCallbacks()
         )
         self.log_debug_trace = options.LOG_AGENT_DEBUG_TRACE
+        self._pending_tool_responses: list[ConversationMessage] = []
 
     def is_streaming_enabled(self) -> bool:
         """

diff --git a/python/src/agent_squad/agents/anthropic_agent.py b/python/src/agent_squad/agents/anthropic_agent.py
@@ -28,7 +28,7 @@ class AnthropicAgentOptions(AgentOptions):
     """
     api_key: Optional[str] = None
     client: Optional[Any] = None
-    model_id: str = "claude-3-5-sonnet-20240620"
+    model_id: str = "claude-sonnet-4-20250514"
     streaming: Optional[bool] = False
     inference_config: Optional[dict[str, Any]] = None
     retriever: Optional[Retriever] = None
@@ -65,7 +65,7 @@ def __init__(self, options: AnthropicAgentOptions):
 
         self.model_id = options.model_id
 
-        default_inference_config = {"maxTokens": 1000, "temperature": 0.1, "topP": 0.9, "stopSequences": []}
+        default_inference_config = {"maxTokens": 1000, "temperature": 0.1, "stopSequences": []}
 
         if options.inference_config:
             self.inference_config = {**default_inference_config, **options.inference_config}
@@ -165,10 +165,14 @@ def _build_input(self, messages: list[Any], system_prompt: str) -> dict:
             "messages": messages,
             "system": system_prompt,
             "temperature": self.inference_config.get("temperature"),
-            "top_p": self.inference_config.get("topP"),
             "stop_sequences": self.inference_config.get("stopSequences"),
         }
 
+        # Only pass top_p if explicitly set — newer Anthropic models reject both temperature and top_p
+        top_p = self.inference_config.get("topP")
+        if top_p is not None:
+            json_input["top_p"] = top_p
+
         # Add any additional model request fields
         if self.additional_model_request_fields:
             for key, value in self.additional_model_request_fields.items():
@@ -219,6 +223,14 @@ async def stream_generator():
                     payload_input["messages"].append({"role": "assistant", "content": final_response.content})
                     tool_response = await self._process_tool_block(final_response, messages, agent_tracking_info)
                     payload_input["messages"].append(tool_response)
+                    # Collect tool_response for storage persistence
+                    if isinstance(tool_response, dict) and tool_response.get("role") == "user":
+                        self._pending_tool_responses.append(
+                            ConversationMessage(
+                                role=ParticipantRole.USER.value,
+                                content=tool_response.get("content", [])
+                            )
+                        )
 
                 else:
                     continue_with_tools = False
@@ -315,6 +327,14 @@ async def _handle_single_response_loop(
                 payload_input["messages"].append({"role": "assistant", "content": llm_response.content})
                 tool_response = await self._process_tool_block(llm_response, messages, agent_tracking_info)
                 payload_input["messages"].append(tool_response)
+                # Collect tool_response for storage persistence
+                if isinstance(tool_response, dict) and tool_response.get("role") == "user":
+                    self._pending_tool_responses.append(
+                        ConversationMessage(
+                            role=ParticipantRole.USER.value,
+                            content=tool_response.get("content", [])
+                        )
+                    )
             else:
                 continue_with_tools = False
                 llm_content = llm_response.content or [{"text": "No final response generated"}]

diff --git a/python/src/agent_squad/agents/bedrock_llm_agent.py b/python/src/agent_squad/agents/bedrock_llm_agent.py
@@ -212,6 +212,7 @@ async def _handle_single_response_loop(
             if any("toolUse" in content for content in llm_response.content):
                 tool_response = await self._process_tool_block(llm_response, conversation, agent_tracking_info)
                 conversation.append(tool_response)
+                self._pending_tool_responses.append(tool_response)
                 command["messages"] = conversation_to_dict(conversation)
             else:
                 continue_with_tools = False
@@ -260,6 +261,7 @@ async def stream_generator():
                     tool_response = await self._process_tool_block(final_response, conversation, agent_tracking_info)
 
                     conversation.append(tool_response)
+                    self._pending_tool_responses.append(tool_response)
                     command["messages"] = conversation_to_dict(conversation)
                 else:
                     continue_with_tools = False

diff --git a/python/src/agent_squad/orchestrator.py b/python/src/agent_squad/orchestrator.py
@@ -190,6 +190,12 @@ async def process_stream():
                                                         user_id,
                                                         session_id,
                                                         classifier_result.selected_agent)
+                                    # Save tool_result messages from streaming tool processing
+                                    agent = classifier_result.selected_agent
+                                    if hasattr(agent, '_pending_tool_responses') and agent._pending_tool_responses:
+                                        for tool_msg in agent._pending_tool_responses:
+                                            await self.save_message(tool_msg, user_id, session_id, agent)
+                                        agent._pending_tool_responses = []
 
 
                             final_response = process_stream()
@@ -209,6 +215,12 @@ async def process_stream() -> ConversationMessage:
                                                 user_id,
                                                 session_id,
                                                 classifier_result.selected_agent)
+                                # Save tool_result messages from streaming tool processing
+                                agent = classifier_result.selected_agent
+                                if hasattr(agent, '_pending_tool_responses') and agent._pending_tool_responses:
+                                    for tool_msg in agent._pending_tool_responses:
+                                        await self.save_message(tool_msg, user_id, session_id, agent)
+                                    agent._pending_tool_responses = []
                             return full_message
                         final_response = await process_stream()
 
@@ -220,6 +232,17 @@ async def process_stream() -> ConversationMessage:
                                             session_id,
                                             classifier_result.selected_agent)
 
+                # Save tool_result messages that were created during tool processing.
+                # These are USER-role messages containing tool results that must be
+                # persisted so that subsequent requests include them in conversation history.
+                # Without this, the LLM sees tool_use blocks without corresponding
+                # tool_result blocks, causing API validation errors.
+                agent = classifier_result.selected_agent
+                if hasattr(agent, '_pending_tool_responses') and agent._pending_tool_responses:
+                    for tool_msg in agent._pending_tool_responses:
+                        await self.save_message(tool_msg, user_id, session_id, agent)
+                    agent._pending_tool_responses = []
+
                 return AgentResponse(
                     metadata=metadata,
                     output=final_response,

diff --git a/typescript/src/agents/agent.ts b/typescript/src/agents/agent.ts
@@ -201,6 +201,9 @@ export abstract class Agent {
   // If true, the agent will log additional debug information
   LOG_AGENT_DEBUG_TRACE?: boolean;
 
+  // Tool response messages collected during tool processing for storage persistence
+  pendingToolResponses: ConversationMessage[] = [];
+
   /**
    * Constructs a new Agent instance.
    * @param options - Configuration options for the agent.

diff --git a/typescript/src/agents/anthropicAgent.ts b/typescript/src/agents/anthropicAgent.ts
@@ -1,6 +1,6 @@
 import { Agent, AgentCallbacks, AgentOptions } from "./agent";
 import {
-  ANTHROPIC_MODEL_ID_CLAUDE_3_5_SONNET,
+  ANTHROPIC_MODEL_ID_CLAUDE_SONNET_4, ANTHROPIC_MODEL_ID_CLAUDE_3_5_SONNET,
   ConversationMessage,
   ParticipantRole,
   TemplateVariables,
@@ -112,17 +112,20 @@ export class AnthropicAgent extends Agent {
 
     this.streaming = options.streaming ?? false;
 
-    this.modelId = options.modelId || ANTHROPIC_MODEL_ID_CLAUDE_3_5_SONNET;
+    this.modelId = options.modelId || ANTHROPIC_MODEL_ID_CLAUDE_SONNET_4;
 
     this.thinking = options.thinking ?? null;
 
-    const defaultMaxTokens = 1000; // You can adjust this default value as needed
+    const defaultMaxTokens = 1000;
     this.inferenceConfig = {
       maxTokens: options.inferenceConfig?.maxTokens ?? defaultMaxTokens,
       temperature: options.inferenceConfig?.temperature ?? 0.1,
-      topP: options.inferenceConfig?.topP ?? 0.9,
       stopSequences: options.inferenceConfig?.stopSequences ?? [],
     };
+    // Only set topP if explicitly provided — newer Anthropic models reject both temperature and topP
+    if (options.inferenceConfig?.topP !== undefined) {
+      this.inferenceConfig.topP = options.inferenceConfig.topP;
+    }
 
     this.retriever = options.retriever;
 
@@ -294,13 +297,12 @@ export class AnthropicAgent extends Agent {
           this.toolConfig?.toolMaxRecursions || this.defaultMaxRecursions;
         do {
           // Call Anthropic
-          const llmInput = {
+          const llmInput: any = {
             model: this.modelId,
             max_tokens: this.inferenceConfig.maxTokens,
             messages: messages,
             system: systemPrompt,
             temperature: this.inferenceConfig.temperature,
-            top_p: this.inferenceConfig.topP,
             thinking: this.thinking,
             ...(this.toolConfig && {
               tools:
@@ -309,6 +311,10 @@ export class AnthropicAgent extends Agent {
                   : this.toolConfig.tool,
             }),
           };
+          // Only pass top_p if explicitly set — newer Anthropic models reject both temperature and top_p
+          if (this.inferenceConfig.topP !== undefined) {
+            llmInput.top_p = this.inferenceConfig.topP;
+          }
           const response = await this.handleSingleResponse(llmInput);
 
           const toolUseBlocks = response.content.filter<Anthropic.ToolUseBlock>(
@@ -325,7 +331,7 @@ export class AnthropicAgent extends Agent {
             const tools = this.toolConfig.tool;
             const toolHandler =
               this.toolConfig.useToolHandler ??
-              (async (response, conversationHistory) => {
+              (async (response, _conversationHistory) => {
                 if (this.isAgentTools(tools)) {
                   return tools.toolHandler(
                     response,
@@ -335,10 +341,9 @@ export class AnthropicAgent extends Agent {
                     this.getInputData.bind(this)
                   );
                 }
-                // Only use legacy handler when it's not AgentTools
-                return this.toolConfig.useToolHandler(
-                  response,
-                  conversationHistory
+                // Legacy Tool[] requires an explicit useToolHandler
+                throw new Error(
+                  "toolConfig.useToolHandler is required when using Tool[] instead of AgentTools"
                 );
               });
 
@@ -347,6 +352,7 @@ export class AnthropicAgent extends Agent {
 
             // Add the formatted response to messages
             messages.push(formattedResponse);
+            this.pendingToolResponses.push(formattedResponse);
             toolUse = true;
           } else {
             const textContent = response.content.find(
@@ -399,7 +405,7 @@ export class AnthropicAgent extends Agent {
     let recursions = this.toolConfig?.toolMaxRecursions || 5;
 
     do {
-      const stream = await this.client.messages.stream({
+      const streamConfig: any = {
         model: this.modelId,
         max_tokens: this.inferenceConfig.maxTokens,
         messages: messages,
@@ -409,13 +415,18 @@ export class AnthropicAgent extends Agent {
           type: this.thinking?.type === "enabled" ? "enabled" : "disabled",
           budget_tokens: this.thinking?.budget_tokens
         },
-        top_p: this.inferenceConfig.topP,
         ...(this.toolConfig && {
           tools:
             this.toolConfig.tool instanceof AgentTools
               ? this.formatTools(this.toolConfig.tool)
               : this.toolConfig.tool,
         }),
+      };
+      // Only pass top_p if explicitly set — newer Anthropic models reject both temperature and top_p
+      if (this.inferenceConfig.topP !== undefined) {
+        streamConfig.top_p = this.inferenceConfig.topP;
+      }
+      const stream = await this.client.messages.stream(streamConfig);
       });
 
       let toolBlock: Anthropic.ToolUseBlock = {
@@ -462,7 +473,7 @@ export class AnthropicAgent extends Agent {
               const tools = this.toolConfig.tool;
               const toolHandler =
                 this.toolConfig.useToolHandler ??
-                (async (response, conversationHistory) => {
+                (async (response, _conversationHistory) => {
                   if (this.isAgentTools(tools)) {
                     return tools.toolHandler(
                       response,
@@ -472,10 +483,9 @@ export class AnthropicAgent extends Agent {
                       this.getInputData.bind(this)
                     );
                   }
-                  // Only use legacy handler when it's not AgentTools
-                  return this.toolConfig.useToolHandler(
-                    response,
-                    conversationHistory
+                  // Legacy Tool[] requires an explicit useToolHandler
+                  throw new Error(
+                    "toolConfig.useToolHandler is required when using Tool[] instead of AgentTools"
                   );
                 });
 
@@ -484,6 +494,7 @@ export class AnthropicAgent extends Agent {
 
               // Add the formatted response to messages
               messages.push(formattedResponse);
+              this.pendingToolResponses.push(formattedResponse);
               toolUse = true;
             }
           } else {

diff --git a/typescript/src/agents/bedrockLLMAgent.ts b/typescript/src/agents/bedrockLLMAgent.ts
@@ -357,7 +357,7 @@ export class BedrockLLMAgent extends Agent {
 
             const toolHandler =
               this.toolConfig.useToolHandler ??
-              (async (response, conversationHistory) => {
+              (async (response, _conversationHistory) => {
                 if (this.isAgentTools(tools)) {
                   return tools.toolHandler(
                     response,
@@ -367,10 +367,9 @@ export class BedrockLLMAgent extends Agent {
                     this.getInputData.bind(this)
                   );
                 }
-                // Only use legacy handler when it's not AgentTools
-                return this.toolConfig.useToolHandler(
-                  response,
-                  conversationHistory
+                // Legacy Tool[] requires an explicit useToolHandler
+                throw new Error(
+                  "toolConfig.useToolHandler is required when using Tool[] instead of AgentTools"
                 );
               });
 
@@ -383,6 +382,7 @@ export class BedrockLLMAgent extends Agent {
 
             continueWithTools = true;
             converseCmd.messages.push(formattedResponse);
+            this.pendingToolResponses.push(formattedResponse);
           } else {
             continueWithTools = false;
             finalMessage = bedrockResponse;
@@ -473,7 +473,7 @@ export class BedrockLLMAgent extends Agent {
             const tools = this.toolConfig.tool;
             const toolHandler =
               this.toolConfig.useToolHandler ??
-              (async (response, conversationHistory) => {
+              (async (response, _conversationHistory) => {
                 if (this.isAgentTools(tools)) {
                   return tools.toolHandler(
                     response,
@@ -483,17 +483,17 @@ export class BedrockLLMAgent extends Agent {
                     this.getInputData.bind(this)
                   );
                 }
-                // Only use legacy handler when it's not AgentTools
-                return this.toolConfig.useToolHandler(
-                  response,
-                  conversationHistory
+                // Legacy Tool[] requires an explicit useToolHandler
+                throw new Error(
+                  "toolConfig.useToolHandler is required when using Tool[] instead of AgentTools"
                 );
               });
 
             const toolResponse = await toolHandler(message, input.messages);
             const formattedResponse = this.formatToolResults(toolResponse);
 
             input.messages.push(formattedResponse);
+            this.pendingToolResponses.push(formattedResponse);
             toolUse = true;
           } else if (chunk.messageStop?.stopReason === "end_turn") {
             toolUse = false;

diff --git a/typescript/src/orchestrator.ts b/typescript/src/orchestrator.ts
@@ -434,6 +434,15 @@ export class AgentSquad {
           classifierResult?.selectedAgent.id,
           this.config.MAX_MESSAGE_PAIRS_PER_AGENT
         );
+
+        // Save tool_result messages from tool processing
+        const agent = classifierResult?.selectedAgent;
+        if (agent?.pendingToolResponses?.length) {
+          for (const toolMsg of agent.pendingToolResponses) {
+            await this.storage.saveChatMessage(userId, sessionId, agent.id, toolMsg, this.config.MAX_MESSAGE_PAIRS_PER_AGENT);
+          }
+          agent.pendingToolResponses = [];
+        }
       }
 
       return {
@@ -536,8 +545,17 @@ export class AgentSquad {
             this.storage,
             userId,
             sessionId,
-            agent.id
+            agent.id,
+            this.config.MAX_MESSAGE_PAIRS_PER_AGENT
           );
+
+          // Save tool_result messages from streaming tool processing
+          if (agent.pendingToolResponses?.length) {
+            for (const toolMsg of agent.pendingToolResponses) {
+              await this.storage.saveChatMessage(userId, sessionId, agent.id, toolMsg, this.config.MAX_MESSAGE_PAIRS_PER_AGENT);
+            }
+            agent.pendingToolResponses = [];
+          }
         }
       } else {
         this.logger.warn("No data accumulated, messages not saved");