Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions src/agents/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,30 @@
from pydantic.dataclasses import dataclass


@dataclass
class IndividualRequestUsage:
"""Usage details for a single API request.

This is useful for cost calculation when different pricing rates apply based on
per-request token counts (e.g., Anthropic's 200K token threshold pricing).
"""

input_tokens: int
"""Input tokens for this individual request."""

output_tokens: int
"""Output tokens for this individual request."""

total_tokens: int
"""Total tokens (input + output) for this individual request."""

input_tokens_details: InputTokensDetails
"""Details about the input tokens for this individual request."""

output_tokens_details: OutputTokensDetails
"""Details about the output tokens for this individual request."""


@dataclass
class Usage:
requests: int = 0
Expand All @@ -27,7 +51,34 @@ class Usage:
total_tokens: int = 0
"""Total tokens sent and received, across all requests."""

individual_requests: list[IndividualRequestUsage] = field(default_factory=list)
"""List of individual request usage details for accurate per-request cost calculation.

This field preserves the token counts for each individual API request made during a run.
This is particularly useful for providers like Anthropic that have different pricing
tiers based on per-request token counts (e.g., different rates for requests with more
or fewer than 200K tokens).

Each call to `add()` automatically creates an entry in this list if the added usage
represents a new request (i.e., has non-zero tokens).

Example:
For a run that makes 3 API calls with 100K, 150K, and 80K input tokens each,
the aggregated `input_tokens` would be 330K, but `individual_requests` would
preserve the [100K, 150K, 80K] breakdown needed for accurate cost calculation.
"""

def add(self, other: "Usage") -> None:
"""Add another Usage object to this one, aggregating all fields.

This method automatically preserves individual request details for accurate
cost calculation. When adding a Usage object that represents a single request
(requests=1), it creates an IndividualRequestUsage entry to preserve the
per-request token breakdown.

Args:
other: The Usage object to add to this one.
"""
self.requests += other.requests if other.requests else 0
self.input_tokens += other.input_tokens if other.input_tokens else 0
self.output_tokens += other.output_tokens if other.output_tokens else 0
Expand All @@ -41,3 +92,18 @@ def add(self, other: "Usage") -> None:
reasoning_tokens=self.output_tokens_details.reasoning_tokens
+ other.output_tokens_details.reasoning_tokens
)

# Automatically preserve individual request details for accurate cost calculation.
# If the other Usage represents a single request with tokens, record it.
if other.requests == 1 and other.total_tokens > 0:
individual_usage = IndividualRequestUsage(
input_tokens=other.input_tokens,
output_tokens=other.output_tokens,
total_tokens=other.total_tokens,
input_tokens_details=other.input_tokens_details,
output_tokens_details=other.output_tokens_details,
)
self.individual_requests.append(individual_usage)
elif other.individual_requests:
# If the other Usage already has individual request breakdowns, merge them.
self.individual_requests.extend(other.individual_requests)
219 changes: 218 additions & 1 deletion tests/test_usage.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails

from agents.usage import Usage
from agents.usage import IndividualRequestUsage, Usage


def test_usage_add_aggregates_all_fields():
Expand Down Expand Up @@ -50,3 +50,220 @@ def test_usage_add_aggregates_with_none_values():
assert u1.total_tokens == 15
assert u1.input_tokens_details.cached_tokens == 4
assert u1.output_tokens_details.reasoning_tokens == 6


def test_individual_request_usage_creation():
"""Test that IndividualRequestUsage is created correctly."""
individual = IndividualRequestUsage(
input_tokens=100,
output_tokens=200,
total_tokens=300,
input_tokens_details=InputTokensDetails(cached_tokens=10),
output_tokens_details=OutputTokensDetails(reasoning_tokens=20),
)

assert individual.input_tokens == 100
assert individual.output_tokens == 200
assert individual.total_tokens == 300
assert individual.input_tokens_details.cached_tokens == 10
assert individual.output_tokens_details.reasoning_tokens == 20


def test_usage_add_preserves_single_request():
"""Test that adding a single request Usage creates an IndividualRequestUsage entry."""
u1 = Usage()
u2 = Usage(
requests=1,
input_tokens=100,
input_tokens_details=InputTokensDetails(cached_tokens=10),
output_tokens=200,
output_tokens_details=OutputTokensDetails(reasoning_tokens=20),
total_tokens=300,
)

u1.add(u2)

# Should preserve the individual request details
assert len(u1.individual_requests) == 1
individual = u1.individual_requests[0]
assert individual.input_tokens == 100
assert individual.output_tokens == 200
assert individual.total_tokens == 300
assert individual.input_tokens_details.cached_tokens == 10
assert individual.output_tokens_details.reasoning_tokens == 20


def test_usage_add_ignores_zero_token_requests():
"""Test that zero-token requests don't create IndividualRequestUsage entries."""
u1 = Usage()
u2 = Usage(
requests=1,
input_tokens=0,
input_tokens_details=InputTokensDetails(cached_tokens=0),
output_tokens=0,
output_tokens_details=OutputTokensDetails(reasoning_tokens=0),
total_tokens=0,
)

u1.add(u2)

# Should not create an individual request entry for zero tokens
assert len(u1.individual_requests) == 0


def test_usage_add_ignores_multi_request_usage():
"""Test that multi-request Usage objects don't create IndividualRequestUsage entries."""
u1 = Usage()
u2 = Usage(
requests=3, # Multiple requests
input_tokens=100,
input_tokens_details=InputTokensDetails(cached_tokens=10),
output_tokens=200,
output_tokens_details=OutputTokensDetails(reasoning_tokens=20),
total_tokens=300,
)

u1.add(u2)

# Should not create an individual request entry for multi-request usage
assert len(u1.individual_requests) == 0


def test_usage_add_merges_existing_individual_requests():
"""Test that existing individual_requests are merged when adding Usage objects."""
# Create first usage with individual requests
u1 = Usage()
u2 = Usage(
requests=1,
input_tokens=100,
input_tokens_details=InputTokensDetails(cached_tokens=10),
output_tokens=200,
output_tokens_details=OutputTokensDetails(reasoning_tokens=20),
total_tokens=300,
)
u1.add(u2)

# Create second usage with individual requests
u3 = Usage(
requests=1,
input_tokens=50,
input_tokens_details=InputTokensDetails(cached_tokens=5),
output_tokens=75,
output_tokens_details=OutputTokensDetails(reasoning_tokens=10),
total_tokens=125,
)

u1.add(u3)

# Should have both individual requests
assert len(u1.individual_requests) == 2

# First request
first = u1.individual_requests[0]
assert first.input_tokens == 100
assert first.output_tokens == 200
assert first.total_tokens == 300

# Second request
second = u1.individual_requests[1]
assert second.input_tokens == 50
assert second.output_tokens == 75
assert second.total_tokens == 125


def test_usage_add_with_pre_existing_individual_requests():
"""Test adding Usage objects that already have individual_requests."""
u1 = Usage()

# Create a usage with individual requests
u2 = Usage(
requests=1,
input_tokens=100,
input_tokens_details=InputTokensDetails(cached_tokens=10),
output_tokens=200,
output_tokens_details=OutputTokensDetails(reasoning_tokens=20),
total_tokens=300,
)
u1.add(u2)

# Create another usage with individual requests
u3 = Usage(
requests=1,
input_tokens=50,
input_tokens_details=InputTokensDetails(cached_tokens=5),
output_tokens=75,
output_tokens_details=OutputTokensDetails(reasoning_tokens=10),
total_tokens=125,
)

# Add u3 to u1
u1.add(u3)

# Should have both individual requests
assert len(u1.individual_requests) == 2
assert u1.individual_requests[0].input_tokens == 100
assert u1.individual_requests[1].input_tokens == 50


def test_usage_individual_requests_default_empty():
"""Test that individual_requests defaults to an empty list."""
u = Usage()
assert u.individual_requests == []


def test_anthropic_cost_calculation_scenario():
"""Test a realistic scenario for Anthropic cost calculation with 200K token thresholds."""
# Simulate 3 API calls: 100K, 150K, and 80K input tokens each
# None exceed 200K, so they should all use the lower pricing tier

usage = Usage()

# First request: 100K input tokens
req1 = Usage(
requests=1,
input_tokens=100_000,
input_tokens_details=InputTokensDetails(cached_tokens=0),
output_tokens=50_000,
output_tokens_details=OutputTokensDetails(reasoning_tokens=0),
total_tokens=150_000,
)
usage.add(req1)

# Second request: 150K input tokens
req2 = Usage(
requests=1,
input_tokens=150_000,
input_tokens_details=InputTokensDetails(cached_tokens=0),
output_tokens=75_000,
output_tokens_details=OutputTokensDetails(reasoning_tokens=0),
total_tokens=225_000,
)
usage.add(req2)

# Third request: 80K input tokens
req3 = Usage(
requests=1,
input_tokens=80_000,
input_tokens_details=InputTokensDetails(cached_tokens=0),
output_tokens=40_000,
output_tokens_details=OutputTokensDetails(reasoning_tokens=0),
total_tokens=120_000,
)
usage.add(req3)

# Verify aggregated totals
assert usage.requests == 3
assert usage.input_tokens == 330_000 # 100K + 150K + 80K
assert usage.output_tokens == 165_000 # 50K + 75K + 40K
assert usage.total_tokens == 495_000 # 150K + 225K + 120K

# Verify individual request preservation
assert len(usage.individual_requests) == 3
assert usage.individual_requests[0].input_tokens == 100_000
assert usage.individual_requests[1].input_tokens == 150_000
assert usage.individual_requests[2].input_tokens == 80_000

# All individual requests are under 200K threshold
for req in usage.individual_requests:
assert req.input_tokens < 200_000
assert req.output_tokens < 200_000