Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/agents/extensions/models/litellm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
from ...models.fake_id import FAKE_RESPONSES_ID
from ...models.interface import Model, ModelTracing
from ...models.openai_responses import Converter as OpenAIResponsesConverter
from ...models.reasoning_content_replay import ShouldReplayReasoningContent
from ...retry import ModelRetryAdvice, ModelRetryAdviceRequest
from ...tool import Tool
from ...tracing import generation_span
Expand Down Expand Up @@ -146,10 +147,12 @@ def __init__(
model: str,
base_url: str | None = None,
api_key: str | None = None,
should_replay_reasoning_content: ShouldReplayReasoningContent | None = None,
):
self.model = model
self.base_url = base_url
self.api_key = api_key
self.should_replay_reasoning_content = should_replay_reasoning_content

def get_retry_advice(self, request: ModelRetryAdviceRequest) -> ModelRetryAdvice | None:
# LiteLLM exceptions mirror OpenAI-style status/header fields.
Expand Down Expand Up @@ -383,9 +386,11 @@ async def _fetch_response(

converted_messages = Converter.items_to_messages(
input,
base_url=self.base_url,
preserve_thinking_blocks=preserve_thinking_blocks,
preserve_tool_output_all_content=True,
model=self.model,
should_replay_reasoning_content=self.should_replay_reasoning_content,
)

# Fix message ordering: reorder to ensure tool_use comes before tool_result.
Expand Down
66 changes: 48 additions & 18 deletions src/agents/models/chatcmpl_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@
ensure_tool_choice_supports_backend,
)
from .fake_id import FAKE_RESPONSES_ID
from .reasoning_content_replay import (
ReasoningContentReplayContext,
ReasoningContentSource,
ShouldReplayReasoningContent,
default_should_replay_reasoning_content,
)

ResponseInputContentWithAudioParam = Union[
ResponseInputContentParam,
Expand Down Expand Up @@ -422,6 +428,8 @@ def items_to_messages(
model: str | None = None,
preserve_thinking_blocks: bool = False,
preserve_tool_output_all_content: bool = False,
base_url: str | None = None,
should_replay_reasoning_content: ShouldReplayReasoningContent | None = None,
) -> list[ChatCompletionMessageParam]:
"""
Convert a sequence of 'Item' objects into a list of ChatCompletionMessageParam.
Expand All @@ -441,6 +449,12 @@ def items_to_messages(
When True, all content types including images are preserved. This is useful
for model providers (e.g. Anthropic via LiteLLM) that support processing
non-text content in tool results.
base_url: The request base URL, if the caller knows the concrete endpoint.
This is used by reasoning-content replay hooks to distinguish direct
provider calls from proxy or gateway requests.
should_replay_reasoning_content: Optional hook that decides whether a
reasoning item should be replayed into the next assistant message as
`reasoning_content`.

Rules:
- EasyInputMessage or InputMessage (role=user) => ChatCompletionUserMessageParam
Expand All @@ -464,8 +478,9 @@ def items_to_messages(
current_assistant_msg: ChatCompletionAssistantMessageParam | None = None
pending_thinking_blocks: list[dict[str, str]] | None = None
pending_reasoning_content: str | None = None # For DeepSeek reasoning_content
normalized_base_url = base_url.rstrip("/") if base_url is not None else None

def flush_assistant_message() -> None:
def flush_assistant_message(*, clear_pending_reasoning_content: bool = True) -> None:
nonlocal current_assistant_msg, pending_reasoning_content
if current_assistant_msg is not None:
# The API doesn't support empty arrays for tool_calls
Expand All @@ -475,7 +490,15 @@ def flush_assistant_message() -> None:
pending_reasoning_content = None
result.append(current_assistant_msg)
current_assistant_msg = None
else:
elif clear_pending_reasoning_content:
pending_reasoning_content = None

def apply_pending_reasoning_content(
assistant_msg: ChatCompletionAssistantMessageParam,
) -> None:
nonlocal pending_reasoning_content
if pending_reasoning_content:
assistant_msg["reasoning_content"] = pending_reasoning_content # type: ignore[typeddict-unknown-key]
pending_reasoning_content = None

def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
Expand All @@ -485,6 +508,8 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
current_assistant_msg["content"] = None
current_assistant_msg["tool_calls"] = []

apply_pending_reasoning_content(current_assistant_msg)

return current_assistant_msg

for item in items:
Expand Down Expand Up @@ -553,7 +578,9 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:

# 3) response output message => assistant
elif resp_msg := cls.maybe_response_output_message(item):
flush_assistant_message()
# A reasoning item can be followed by an assistant message and then tool calls
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a different bug I found during the feature addition this time.

# in the same turn, so preserve pending reasoning_content across this flush.
flush_assistant_message(clear_pending_reasoning_content=False)
new_asst = ChatCompletionAssistantMessageParam(role="assistant")
contents = resp_msg["content"]

Expand Down Expand Up @@ -594,6 +621,7 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
pending_thinking_blocks = None # Clear after using

new_asst["tool_calls"] = []
apply_pending_reasoning_content(new_asst)
current_assistant_msg = new_asst

# 4) function/file-search calls => attach to assistant
Expand All @@ -619,11 +647,6 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
elif func_call := cls.maybe_function_tool_call(item):
asst = ensure_assistant_message()

# If we have pending reasoning content for DeepSeek, add it to the assistant message
if pending_reasoning_content:
asst["reasoning_content"] = pending_reasoning_content # type: ignore[typeddict-unknown-key]
pending_reasoning_content = None # Clear after using

# If we have pending thinking blocks, use them as the content
# This is required for Anthropic API tool calls with interleaved thinking
if pending_thinking_blocks:
Expand Down Expand Up @@ -708,6 +731,7 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:

item_provider_data: dict[str, Any] = reasoning_item.get("provider_data", {}) # type: ignore[assignment]
item_model = item_provider_data.get("model", "")
should_replay = False

if (
model
Expand Down Expand Up @@ -740,17 +764,23 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
# This preserves the original behavior
pending_thinking_blocks = reconstructed_thinking_blocks

# DeepSeek requires reasoning_content field in assistant messages with tool calls
# Items may not all originate from DeepSeek, so need to check for model match.
# For backward compatibility, if provider_data is missing, ignore the check.
elif (
model
and "deepseek" in model.lower()
and (
(item_model and "deepseek" in item_model.lower())
or item_provider_data == {}
if model is not None:
replay_context = ReasoningContentReplayContext(
model=model,
base_url=normalized_base_url,
reasoning=ReasoningContentSource(
item=reasoning_item,
origin_model=item_model or None,
provider_data=item_provider_data,
),
)
):
should_replay = (
should_replay_reasoning_content(replay_context)
if should_replay_reasoning_content is not None
else default_should_replay_reasoning_content(replay_context)
)

if should_replay:
summary_items = reasoning_item.get("summary", [])
if summary_items:
reasoning_texts = []
Expand Down
10 changes: 9 additions & 1 deletion src/agents/models/openai_chatcompletions.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from .fake_id import FAKE_RESPONSES_ID
from .interface import Model, ModelTracing
from .openai_responses import Converter as OpenAIResponsesConverter
from .reasoning_content_replay import ShouldReplayReasoningContent

if TYPE_CHECKING:
from ..model_settings import ModelSettings
Expand All @@ -53,9 +54,11 @@ def __init__(
self,
model: str | ChatModel,
openai_client: AsyncOpenAI,
should_replay_reasoning_content: ShouldReplayReasoningContent | None = None,
) -> None:
self.model = model
self._client = openai_client
self.should_replay_reasoning_content = should_replay_reasoning_content

def _non_null_or_omit(self, value: Any) -> Any:
return value if value is not None else omit
Expand Down Expand Up @@ -314,7 +317,12 @@ async def _fetch_response(
prompt: ResponsePromptParam | None = None,
) -> ChatCompletion | tuple[Response, AsyncStream[ChatCompletionChunk]]:
self._validate_official_openai_input_content_types(input)
converted_messages = Converter.items_to_messages(input, model=self.model)
converted_messages = Converter.items_to_messages(
input,
model=self.model,
base_url=str(self._client.base_url),
should_replay_reasoning_content=self.should_replay_reasoning_content,
)

if system_instructions:
converted_messages.insert(
Expand Down
59 changes: 59 additions & 0 deletions src/agents/models/reasoning_content_replay.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from __future__ import annotations

from collections.abc import Mapping
from dataclasses import dataclass
from typing import Any, Callable


@dataclass
class ReasoningContentSource:
"""The reasoning item being considered for replay into the next request."""

item: Any
"""The raw reasoning item."""

origin_model: str | None
"""The model that originally produced the reasoning item, if known."""

provider_data: Mapping[str, Any]
"""Provider-specific metadata captured on the reasoning item."""


@dataclass
class ReasoningContentReplayContext:
"""Context passed to reasoning-content replay hooks."""

model: str
"""The model that will receive the next Chat Completions request."""

base_url: str | None
"""The request base URL, if the SDK knows the concrete endpoint."""

reasoning: ReasoningContentSource
"""The reasoning item candidate being evaluated for replay."""


ShouldReplayReasoningContent = Callable[[ReasoningContentReplayContext], bool]


def default_should_replay_reasoning_content(context: ReasoningContentReplayContext) -> bool:
"""Return whether the SDK should replay reasoning content by default."""

if "deepseek" not in context.model.lower():
return False

origin_model = context.reasoning.origin_model
# Replay only when the current request targets DeepSeek and the reasoning item either
# came from a DeepSeek model or predates provider tracking. This avoids mixing reasoning
# content from a different model family into the DeepSeek assistant message.
return (
origin_model is not None and "deepseek" in origin_model.lower()
) or context.reasoning.provider_data == {}


__all__ = [
"ReasoningContentReplayContext",
"ReasoningContentSource",
"ShouldReplayReasoningContent",
"default_should_replay_reasoning_content",
]
Loading
Loading