Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 27 additions & 9 deletions litellm/cost_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,25 @@ def _cost_per_token_custom_pricing_helper(
return None


def _transcription_usage_has_token_details(
usage_block: Optional[Usage],
) -> bool:
if usage_block is None:
return False

prompt_tokens_val = getattr(usage_block, "prompt_tokens", 0) or 0
completion_tokens_val = getattr(usage_block, "completion_tokens", 0) or 0
prompt_details = getattr(usage_block, "prompt_tokens_details", None)

if prompt_details is not None:
audio_token_count = getattr(prompt_details, "audio_tokens", 0) or 0
text_token_count = getattr(prompt_details, "text_tokens", 0) or 0
if audio_token_count > 0 or text_token_count > 0:
return True

return (prompt_tokens_val > 0) or (completion_tokens_val > 0)


def cost_per_token( # noqa: PLR0915
model: str = "",
prompt_tokens: int = 0,
Expand Down Expand Up @@ -324,19 +343,18 @@ def cost_per_token( # noqa: PLR0915
usage=usage_block, model=model, custom_llm_provider=custom_llm_provider
)
elif call_type == "atranscription" or call_type == "transcription":

if model == "gpt-4o-mini-transcribe":
if _transcription_usage_has_token_details(usage_block):
return openai_cost_per_token(
model=model,
model=model_without_prefix,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need this change ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Image The openai_cost_per_token takes model name as model name without the provider prefix

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this seems brittle. Accepting pr as patch, but it'd be great to make this more robust.

usage=usage_block,
service_tier=service_tier,
)
else:
return openai_cost_per_second(
model=model,
custom_llm_provider=custom_llm_provider,
duration=audio_transcription_file_duration,
)

return openai_cost_per_second(
model=model_without_prefix,
custom_llm_provider=custom_llm_provider,
duration=audio_transcription_file_duration,
)
elif call_type == "search" or call_type == "asearch":
# Search providers use per-query pricing
from litellm.search import search_provider_cost_per_query
Expand Down
49 changes: 49 additions & 0 deletions tests/test_litellm/test_cost_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
)
from litellm.types.llms.openai import OpenAIRealtimeStreamList
from litellm.types.utils import ModelResponse, PromptTokensDetailsWrapper, Usage
from litellm.utils import TranscriptionResponse


def test_cost_calculator_with_response_cost_in_additional_headers():
Expand Down Expand Up @@ -77,6 +78,54 @@ def test_cost_calculator_with_usage():
assert result == expected_cost, f"Got {result}, Expected {expected_cost}"


def test_transcription_cost_uses_token_pricing():
from litellm import completion_cost

os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")

usage = Usage(
prompt_tokens=14,
completion_tokens=45,
total_tokens=59,
prompt_tokens_details=PromptTokensDetailsWrapper(
text_tokens=0, audio_tokens=14
),
)
response = TranscriptionResponse(text="demo text")
response.usage = usage

cost = completion_cost(
completion_response=response,
model="gpt-4o-transcribe",
custom_llm_provider="openai",
call_type="atranscription",
)

expected_cost = (14 * 6e-06) + (45 * 1e-05)
assert pytest.approx(cost, rel=1e-6) == expected_cost


def test_transcription_cost_falls_back_to_duration():
from litellm import completion_cost

os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")

response = TranscriptionResponse(text="demo text")
response.duration = 10.0

cost = completion_cost(
completion_response=response,
model="whisper-1",
custom_llm_provider="openai",
call_type="atranscription",
)

expected_cost = 10.0 * 0.0001
assert pytest.approx(cost, rel=1e-6) == expected_cost


def test_handle_realtime_stream_cost_calculation():
from litellm.cost_calculator import RealtimeAPITokenUsageProcessor

Expand Down
Loading