From 11e97c4f2721633b2c3fe5be0f4c900f5e6f782a Mon Sep 17 00:00:00 2001 From: Wen-Tien Chang Date: Sat, 22 Nov 2025 16:28:51 +0800 Subject: [PATCH 1/4] Add model_name, agent_name, and response_id to RequestUsage for better tracking --- docs/usage.md | 2 +- src/agents/models/interface.py | 3 +++ src/agents/run.py | 14 +++++++++++-- src/agents/usage.py | 23 ++++++++++++++++++++- tests/test_usage.py | 37 +++++++++++++++++++++++----------- 5 files changed, 63 insertions(+), 16 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index bedae99b3..abd3fa5e3 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -55,7 +55,7 @@ The SDK automatically tracks usage for each API request in `request_usage_entrie result = await Runner.run(agent, "What's the weather in Tokyo?") for request in enumerate(result.context_wrapper.usage.request_usage_entries): - print(f"Request {i + 1}: {request.input_tokens} in, {request.output_tokens} out") + print(f"Request {i + 1}: Model={request.model_name}, Agent={request.agent_name}, Input={request.input_tokens} tokens, Output={request.output_tokens} tokens") ``` ## Accessing usage with sessions diff --git a/src/agents/models/interface.py b/src/agents/models/interface.py index f25934780..4df4d0a97 100644 --- a/src/agents/models/interface.py +++ b/src/agents/models/interface.py @@ -36,6 +36,9 @@ def include_data(self) -> bool: class Model(abc.ABC): """The base interface for calling an LLM.""" + # The model name. Subclasses can set this in __init__. + model: str = "" + @abc.abstractmethod async def get_response( self, diff --git a/src/agents/run.py b/src/agents/run.py index fce7b4840..9ef6b090e 100644 --- a/src/agents/run.py +++ b/src/agents/run.py @@ -1402,7 +1402,12 @@ async def _run_single_turn_streamed( usage=usage, response_id=event.response.id, ) - context_wrapper.usage.add(usage) + context_wrapper.usage.add( + usage, + model_name=model.model, + agent_name=agent.name, + response_id=event.response.id, + ) if isinstance(event, ResponseOutputItemDoneEvent): output_item = event.item @@ -1819,7 +1824,12 @@ async def _get_new_response( prompt=prompt_config, ) - context_wrapper.usage.add(new_response.usage) + context_wrapper.usage.add( + new_response.usage, + model_name=model.model, + agent_name=agent.name, + response_id=new_response.response_id, + ) # If we have run hooks, or if the agent has hooks, we need to call them after the LLM call await asyncio.gather( diff --git a/src/agents/usage.py b/src/agents/usage.py index a10778123..9605b31e9 100644 --- a/src/agents/usage.py +++ b/src/agents/usage.py @@ -23,6 +23,15 @@ class RequestUsage: output_tokens_details: OutputTokensDetails """Details about the output tokens for this individual request.""" + model_name: str + """The model name used for this request.""" + + agent_name: str + """The agent name that made this request.""" + + response_id: str | None = None + """The response ID for this request (i.e. ModelResponse.response_id).""" + @dataclass class Usage: @@ -70,13 +79,22 @@ def __post_init__(self) -> None: if self.output_tokens_details.reasoning_tokens is None: self.output_tokens_details = OutputTokensDetails(reasoning_tokens=0) - def add(self, other: "Usage") -> None: + def add( + self, + other: "Usage", + model_name: str, + agent_name: str, + response_id: str | None = None, + ) -> None: """Add another Usage object to this one, aggregating all fields. This method automatically preserves request_usage_entries. Args: other: The Usage object to add to this one. + model_name: The model name used for this request. + agent_name: The agent name that made this request. + response_id: The response ID for this request. """ self.requests += other.requests if other.requests else 0 self.input_tokens += other.input_tokens if other.input_tokens else 0 @@ -101,6 +119,9 @@ def add(self, other: "Usage") -> None: total_tokens=other.total_tokens, input_tokens_details=other.input_tokens_details, output_tokens_details=other.output_tokens_details, + model_name=model_name, + agent_name=agent_name, + response_id=response_id, ) self.request_usage_entries.append(request_usage) elif other.request_usage_entries: diff --git a/tests/test_usage.py b/tests/test_usage.py index 9d89cc750..1c0157937 100644 --- a/tests/test_usage.py +++ b/tests/test_usage.py @@ -21,7 +21,7 @@ def test_usage_add_aggregates_all_fields(): total_tokens=15, ) - u1.add(u2) + u1.add(u2, model_name="gpt-5", agent_name="test-agent", response_id="resp-1") assert u1.requests == 3 assert u1.input_tokens == 17 @@ -42,7 +42,7 @@ def test_usage_add_aggregates_with_none_values(): total_tokens=15, ) - u1.add(u2) + u1.add(u2, model_name="gpt-5", agent_name="test-agent", response_id="resp-1") assert u1.requests == 2 assert u1.input_tokens == 7 @@ -60,6 +60,9 @@ def test_request_usage_creation(): total_tokens=300, input_tokens_details=InputTokensDetails(cached_tokens=10), output_tokens_details=OutputTokensDetails(reasoning_tokens=20), + model_name="gpt-5", + agent_name="test-agent", + response_id="resp-123", ) assert request_usage.input_tokens == 100 @@ -67,6 +70,9 @@ def test_request_usage_creation(): assert request_usage.total_tokens == 300 assert request_usage.input_tokens_details.cached_tokens == 10 assert request_usage.output_tokens_details.reasoning_tokens == 20 + assert request_usage.model_name == "gpt-5" + assert request_usage.agent_name == "test-agent" + assert request_usage.response_id == "resp-123" def test_usage_add_preserves_single_request(): @@ -81,7 +87,7 @@ def test_usage_add_preserves_single_request(): total_tokens=300, ) - u1.add(u2) + u1.add(u2, model_name="gpt-5", agent_name="test-agent", response_id="resp-1") # Should preserve the request usage details assert len(u1.request_usage_entries) == 1 @@ -91,6 +97,9 @@ def test_usage_add_preserves_single_request(): assert request_usage.total_tokens == 300 assert request_usage.input_tokens_details.cached_tokens == 10 assert request_usage.output_tokens_details.reasoning_tokens == 20 + assert request_usage.model_name == "gpt-5" + assert request_usage.agent_name == "test-agent" + assert request_usage.response_id == "resp-1" def test_usage_add_ignores_zero_token_requests(): @@ -105,7 +114,7 @@ def test_usage_add_ignores_zero_token_requests(): total_tokens=0, ) - u1.add(u2) + u1.add(u2, model_name="gpt-5", agent_name="test-agent", response_id="resp-1") # Should not create a request_usage_entry for zero tokens assert len(u1.request_usage_entries) == 0 @@ -123,7 +132,7 @@ def test_usage_add_ignores_multi_request_usage(): total_tokens=300, ) - u1.add(u2) + u1.add(u2, model_name="gpt-5", agent_name="test-agent", response_id="resp-1") # Should not create a request usage entry for multi-request usage assert len(u1.request_usage_entries) == 0 @@ -141,7 +150,7 @@ def test_usage_add_merges_existing_request_usage_entries(): output_tokens_details=OutputTokensDetails(reasoning_tokens=20), total_tokens=300, ) - u1.add(u2) + u1.add(u2, model_name="gpt-5", agent_name="agent-1", response_id="resp-1") # Create second usage with request_usage_entries u3 = Usage( @@ -153,7 +162,7 @@ def test_usage_add_merges_existing_request_usage_entries(): total_tokens=125, ) - u1.add(u3) + u1.add(u3, model_name="gpt-5", agent_name="agent-2", response_id="resp-2") # Should have both request_usage_entries assert len(u1.request_usage_entries) == 2 @@ -163,12 +172,16 @@ def test_usage_add_merges_existing_request_usage_entries(): assert first.input_tokens == 100 assert first.output_tokens == 200 assert first.total_tokens == 300 + assert first.agent_name == "agent-1" + assert first.response_id == "resp-1" # Second request second = u1.request_usage_entries[1] assert second.input_tokens == 50 assert second.output_tokens == 75 assert second.total_tokens == 125 + assert second.agent_name == "agent-2" + assert second.response_id == "resp-2" def test_usage_add_with_pre_existing_request_usage_entries(): @@ -184,7 +197,7 @@ def test_usage_add_with_pre_existing_request_usage_entries(): output_tokens_details=OutputTokensDetails(reasoning_tokens=20), total_tokens=300, ) - u1.add(u2) + u1.add(u2, model_name="gpt-5", agent_name="agent-1", response_id="resp-1") # Create another usage with request_usage_entries u3 = Usage( @@ -197,7 +210,7 @@ def test_usage_add_with_pre_existing_request_usage_entries(): ) # Add u3 to u1 - u1.add(u3) + u1.add(u3, model_name="gpt-5", agent_name="agent-2", response_id="resp-2") # Should have both request_usage_entries assert len(u1.request_usage_entries) == 2 @@ -227,7 +240,7 @@ def test_anthropic_cost_calculation_scenario(): output_tokens_details=OutputTokensDetails(reasoning_tokens=0), total_tokens=150_000, ) - usage.add(req1) + usage.add(req1, model_name="gpt-5", agent_name="test-agent", response_id="resp-1") # Second request: 150K input tokens req2 = Usage( @@ -238,7 +251,7 @@ def test_anthropic_cost_calculation_scenario(): output_tokens_details=OutputTokensDetails(reasoning_tokens=0), total_tokens=225_000, ) - usage.add(req2) + usage.add(req2, model_name="gpt-5", agent_name="test-agent", response_id="resp-2") # Third request: 80K input tokens req3 = Usage( @@ -249,7 +262,7 @@ def test_anthropic_cost_calculation_scenario(): output_tokens_details=OutputTokensDetails(reasoning_tokens=0), total_tokens=120_000, ) - usage.add(req3) + usage.add(req3, model_name="gpt-5", agent_name="test-agent", response_id="resp-3") # Verify aggregated totals assert usage.requests == 3 From 6e6ca81fc59159c5598180049ba447e8b680b4d6 Mon Sep 17 00:00:00 2001 From: Wen-Tien Chang Date: Sat, 22 Nov 2025 16:38:03 +0800 Subject: [PATCH 2/4] Add future annotations import to usage.py --- src/agents/usage.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/agents/usage.py b/src/agents/usage.py index 9605b31e9..5670e672f 100644 --- a/src/agents/usage.py +++ b/src/agents/usage.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from dataclasses import field from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails From cfa7c1e3999539cf1b98e4591db1a73b7305c7de Mon Sep 17 00:00:00 2001 From: Wen-Tien Chang Date: Sat, 22 Nov 2025 16:43:51 +0800 Subject: [PATCH 3/4] Remove unnecessary string quote from Usage type annotation --- src/agents/usage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agents/usage.py b/src/agents/usage.py index 5670e672f..87656a7a1 100644 --- a/src/agents/usage.py +++ b/src/agents/usage.py @@ -83,7 +83,7 @@ def __post_init__(self) -> None: def add( self, - other: "Usage", + other: Usage, model_name: str, agent_name: str, response_id: str | None = None, From 8ecd03b1e1c2aa10c7f5a1ca69fd50b72dcf78ea Mon Sep 17 00:00:00 2001 From: Wen-Tien Chang Date: Sat, 22 Nov 2025 22:55:04 +0800 Subject: [PATCH 4/4] Refactor RequestUsage to use flexible metadata dict instead of specific fields --- docs/usage.md | 4 +- src/agents/run.py | 16 ++++-- src/agents/usage.py | 23 ++------ tests/test_usage.py | 136 ++++++++++++++++++++++++++++++++++++-------- 4 files changed, 129 insertions(+), 50 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index abd3fa5e3..84008832f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -54,8 +54,8 @@ The SDK automatically tracks usage for each API request in `request_usage_entrie ```python result = await Runner.run(agent, "What's the weather in Tokyo?") -for request in enumerate(result.context_wrapper.usage.request_usage_entries): - print(f"Request {i + 1}: Model={request.model_name}, Agent={request.agent_name}, Input={request.input_tokens} tokens, Output={request.output_tokens} tokens") +for i, request in enumerate(result.context_wrapper.usage.request_usage_entries): + print(f"Request {i + 1}: Input={request.input_tokens} tokens, Output={request.output_tokens} tokens, metadata={request.metadata}") ``` ## Accessing usage with sessions diff --git a/src/agents/run.py b/src/agents/run.py index 9ef6b090e..1f64becb6 100644 --- a/src/agents/run.py +++ b/src/agents/run.py @@ -1404,9 +1404,11 @@ async def _run_single_turn_streamed( ) context_wrapper.usage.add( usage, - model_name=model.model, - agent_name=agent.name, - response_id=event.response.id, + metadata={ + "model_name": model.model, + "agent_name": agent.name, + "response_id": event.response.id, + }, ) if isinstance(event, ResponseOutputItemDoneEvent): @@ -1826,9 +1828,11 @@ async def _get_new_response( context_wrapper.usage.add( new_response.usage, - model_name=model.model, - agent_name=agent.name, - response_id=new_response.response_id, + metadata={ + "model_name": model.model, + "agent_name": agent.name, + "response_id": new_response.response_id, + }, ) # If we have run hooks, or if the agent has hooks, we need to call them after the LLM call diff --git a/src/agents/usage.py b/src/agents/usage.py index 87656a7a1..dbc0076e9 100644 --- a/src/agents/usage.py +++ b/src/agents/usage.py @@ -1,6 +1,7 @@ from __future__ import annotations from dataclasses import field +from typing import Any from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails from pydantic.dataclasses import dataclass @@ -25,14 +26,8 @@ class RequestUsage: output_tokens_details: OutputTokensDetails """Details about the output tokens for this individual request.""" - model_name: str - """The model name used for this request.""" - - agent_name: str - """The agent name that made this request.""" - - response_id: str | None = None - """The response ID for this request (i.e. ModelResponse.response_id).""" + metadata: dict[str, Any] = field(default_factory=dict) + """Additional metadata for this request (e.g., model_name, agent_name, response_id).""" @dataclass @@ -84,9 +79,7 @@ def __post_init__(self) -> None: def add( self, other: Usage, - model_name: str, - agent_name: str, - response_id: str | None = None, + metadata: dict[str, Any] | None = None, ) -> None: """Add another Usage object to this one, aggregating all fields. @@ -94,9 +87,7 @@ def add( Args: other: The Usage object to add to this one. - model_name: The model name used for this request. - agent_name: The agent name that made this request. - response_id: The response ID for this request. + metadata: Additional metadata for this request """ self.requests += other.requests if other.requests else 0 self.input_tokens += other.input_tokens if other.input_tokens else 0 @@ -121,9 +112,7 @@ def add( total_tokens=other.total_tokens, input_tokens_details=other.input_tokens_details, output_tokens_details=other.output_tokens_details, - model_name=model_name, - agent_name=agent_name, - response_id=response_id, + metadata=metadata or {}, ) self.request_usage_entries.append(request_usage) elif other.request_usage_entries: diff --git a/tests/test_usage.py b/tests/test_usage.py index 1c0157937..2d0dd086f 100644 --- a/tests/test_usage.py +++ b/tests/test_usage.py @@ -21,7 +21,14 @@ def test_usage_add_aggregates_all_fields(): total_tokens=15, ) - u1.add(u2, model_name="gpt-5", agent_name="test-agent", response_id="resp-1") + u1.add( + u2, + metadata={ + "model_name": "gpt-5", + "agent_name": "test-agent", + "response_id": "resp-1", + }, + ) assert u1.requests == 3 assert u1.input_tokens == 17 @@ -42,7 +49,14 @@ def test_usage_add_aggregates_with_none_values(): total_tokens=15, ) - u1.add(u2, model_name="gpt-5", agent_name="test-agent", response_id="resp-1") + u1.add( + u2, + metadata={ + "model_name": "gpt-5", + "agent_name": "test-agent", + "response_id": "resp-1", + }, + ) assert u1.requests == 2 assert u1.input_tokens == 7 @@ -60,9 +74,11 @@ def test_request_usage_creation(): total_tokens=300, input_tokens_details=InputTokensDetails(cached_tokens=10), output_tokens_details=OutputTokensDetails(reasoning_tokens=20), - model_name="gpt-5", - agent_name="test-agent", - response_id="resp-123", + metadata={ + "model_name": "gpt-5", + "agent_name": "test-agent", + "response_id": "resp-123", + }, ) assert request_usage.input_tokens == 100 @@ -70,9 +86,9 @@ def test_request_usage_creation(): assert request_usage.total_tokens == 300 assert request_usage.input_tokens_details.cached_tokens == 10 assert request_usage.output_tokens_details.reasoning_tokens == 20 - assert request_usage.model_name == "gpt-5" - assert request_usage.agent_name == "test-agent" - assert request_usage.response_id == "resp-123" + assert request_usage.metadata["model_name"] == "gpt-5" + assert request_usage.metadata["agent_name"] == "test-agent" + assert request_usage.metadata["response_id"] == "resp-123" def test_usage_add_preserves_single_request(): @@ -87,7 +103,14 @@ def test_usage_add_preserves_single_request(): total_tokens=300, ) - u1.add(u2, model_name="gpt-5", agent_name="test-agent", response_id="resp-1") + u1.add( + u2, + metadata={ + "model_name": "gpt-5", + "agent_name": "test-agent", + "response_id": "resp-1", + }, + ) # Should preserve the request usage details assert len(u1.request_usage_entries) == 1 @@ -97,9 +120,9 @@ def test_usage_add_preserves_single_request(): assert request_usage.total_tokens == 300 assert request_usage.input_tokens_details.cached_tokens == 10 assert request_usage.output_tokens_details.reasoning_tokens == 20 - assert request_usage.model_name == "gpt-5" - assert request_usage.agent_name == "test-agent" - assert request_usage.response_id == "resp-1" + assert request_usage.metadata["model_name"] == "gpt-5" + assert request_usage.metadata["agent_name"] == "test-agent" + assert request_usage.metadata["response_id"] == "resp-1" def test_usage_add_ignores_zero_token_requests(): @@ -114,7 +137,14 @@ def test_usage_add_ignores_zero_token_requests(): total_tokens=0, ) - u1.add(u2, model_name="gpt-5", agent_name="test-agent", response_id="resp-1") + u1.add( + u2, + metadata={ + "model_name": "gpt-5", + "agent_name": "test-agent", + "response_id": "resp-1", + }, + ) # Should not create a request_usage_entry for zero tokens assert len(u1.request_usage_entries) == 0 @@ -132,7 +162,14 @@ def test_usage_add_ignores_multi_request_usage(): total_tokens=300, ) - u1.add(u2, model_name="gpt-5", agent_name="test-agent", response_id="resp-1") + u1.add( + u2, + metadata={ + "model_name": "gpt-5", + "agent_name": "test-agent", + "response_id": "resp-1", + }, + ) # Should not create a request usage entry for multi-request usage assert len(u1.request_usage_entries) == 0 @@ -150,7 +187,14 @@ def test_usage_add_merges_existing_request_usage_entries(): output_tokens_details=OutputTokensDetails(reasoning_tokens=20), total_tokens=300, ) - u1.add(u2, model_name="gpt-5", agent_name="agent-1", response_id="resp-1") + u1.add( + u2, + metadata={ + "model_name": "gpt-5", + "agent_name": "agent-1", + "response_id": "resp-1", + }, + ) # Create second usage with request_usage_entries u3 = Usage( @@ -162,7 +206,14 @@ def test_usage_add_merges_existing_request_usage_entries(): total_tokens=125, ) - u1.add(u3, model_name="gpt-5", agent_name="agent-2", response_id="resp-2") + u1.add( + u3, + metadata={ + "model_name": "gpt-5", + "agent_name": "agent-2", + "response_id": "resp-2", + }, + ) # Should have both request_usage_entries assert len(u1.request_usage_entries) == 2 @@ -172,16 +223,16 @@ def test_usage_add_merges_existing_request_usage_entries(): assert first.input_tokens == 100 assert first.output_tokens == 200 assert first.total_tokens == 300 - assert first.agent_name == "agent-1" - assert first.response_id == "resp-1" + assert first.metadata["agent_name"] == "agent-1" + assert first.metadata["response_id"] == "resp-1" # Second request second = u1.request_usage_entries[1] assert second.input_tokens == 50 assert second.output_tokens == 75 assert second.total_tokens == 125 - assert second.agent_name == "agent-2" - assert second.response_id == "resp-2" + assert second.metadata["agent_name"] == "agent-2" + assert second.metadata["response_id"] == "resp-2" def test_usage_add_with_pre_existing_request_usage_entries(): @@ -197,7 +248,14 @@ def test_usage_add_with_pre_existing_request_usage_entries(): output_tokens_details=OutputTokensDetails(reasoning_tokens=20), total_tokens=300, ) - u1.add(u2, model_name="gpt-5", agent_name="agent-1", response_id="resp-1") + u1.add( + u2, + metadata={ + "model_name": "gpt-5", + "agent_name": "agent-1", + "response_id": "resp-1", + }, + ) # Create another usage with request_usage_entries u3 = Usage( @@ -210,7 +268,14 @@ def test_usage_add_with_pre_existing_request_usage_entries(): ) # Add u3 to u1 - u1.add(u3, model_name="gpt-5", agent_name="agent-2", response_id="resp-2") + u1.add( + u3, + metadata={ + "model_name": "gpt-5", + "agent_name": "agent-2", + "response_id": "resp-2", + }, + ) # Should have both request_usage_entries assert len(u1.request_usage_entries) == 2 @@ -240,7 +305,14 @@ def test_anthropic_cost_calculation_scenario(): output_tokens_details=OutputTokensDetails(reasoning_tokens=0), total_tokens=150_000, ) - usage.add(req1, model_name="gpt-5", agent_name="test-agent", response_id="resp-1") + usage.add( + req1, + metadata={ + "model_name": "gpt-5", + "agent_name": "test-agent", + "response_id": "resp-1", + }, + ) # Second request: 150K input tokens req2 = Usage( @@ -251,7 +323,14 @@ def test_anthropic_cost_calculation_scenario(): output_tokens_details=OutputTokensDetails(reasoning_tokens=0), total_tokens=225_000, ) - usage.add(req2, model_name="gpt-5", agent_name="test-agent", response_id="resp-2") + usage.add( + req2, + metadata={ + "model_name": "gpt-5", + "agent_name": "test-agent", + "response_id": "resp-2", + }, + ) # Third request: 80K input tokens req3 = Usage( @@ -262,7 +341,14 @@ def test_anthropic_cost_calculation_scenario(): output_tokens_details=OutputTokensDetails(reasoning_tokens=0), total_tokens=120_000, ) - usage.add(req3, model_name="gpt-5", agent_name="test-agent", response_id="resp-3") + usage.add( + req3, + metadata={ + "model_name": "gpt-5", + "agent_name": "test-agent", + "response_id": "resp-3", + }, + ) # Verify aggregated totals assert usage.requests == 3