Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions litellm/llms/perplexity/chat/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Perplexity chat completion transformations."""
101 changes: 71 additions & 30 deletions litellm/llms/perplexity/chat/transformation.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,32 @@
"""
Translate from OpenAI's `/v1/chat/completions` to Perplexity's `/v1/chat/completions`
"""
"""Translate from OpenAI's `/v1/chat/completions` to Perplexity's `/v1/chat/completions`."""

from typing import Any, List, Optional, Tuple
from __future__ import annotations

from typing import TYPE_CHECKING, Any, List, Optional, Tuple

import httpx
import litellm
from litellm._logging import verbose_logger
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import Usage, PromptTokensDetailsWrapper
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
from litellm.types.utils import ModelResponse
from litellm.types.llms.openai import ChatCompletionAnnotation
from litellm.types.llms.openai import ChatCompletionAnnotationURLCitation
from litellm.secret_managers.main import get_secret_str
from litellm.types.utils import ModelResponse, PromptTokensDetailsWrapper, Usage

if TYPE_CHECKING:
import httpx

from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionAnnotation,
ChatCompletionAnnotationURLCitation,
)


class PerplexityChatConfig(OpenAIGPTConfig):
"""Configuration for Perplexity chat completions."""

@property
def custom_llm_provider(self) -> Optional[str]:
def custom_llm_provider(self) -> str | None:
"""Return the custom LLM provider name."""
return "perplexity"

def _get_openai_compatible_provider_info(
Expand Down Expand Up @@ -72,7 +79,8 @@ def get_supported_openai_params(self, model: str) -> list:

return base_openai_params

def transform_response(

def transform_response( # noqa: PLR0913
self,
model: str,
raw_response: httpx.Response,
Expand All @@ -82,10 +90,11 @@ def transform_response(
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
encoding: Any,
encoding: Any,
api_key: Optional[str] = None,
json_mode: Optional[bool] = None,
json_mode: Optional[bool] = None,
) -> ModelResponse:
"""Transform Perplexity response to standard format."""
# Call the parent transform_response first to handle the standard transformation
model_response = super().transform_response(
model=model,
Expand All @@ -104,28 +113,29 @@ def transform_response(
# Extract and enhance usage with Perplexity-specific fields
try:
raw_response_json = raw_response.json()
self.add_cost_to_usage(model_response, raw_response_json)
self._enhance_usage_with_perplexity_fields(
model_response, raw_response_json
model_response, raw_response_json,
)
self._add_citations_as_annotations(model_response, raw_response_json)
except Exception as e:
except (ValueError, TypeError, KeyError) as e:
verbose_logger.debug(f"Error extracting Perplexity-specific usage fields: {e}")

return model_response

def _enhance_usage_with_perplexity_fields(
self, model_response: ModelResponse, raw_response_json: dict
def _enhance_usage_with_perplexity_fields(
self, model_response: ModelResponse, raw_response_json: dict,
) -> None:
"""
Extract citation tokens and search queries from Perplexity API response
and add them to the usage object using standard LiteLLM fields.
"""Extract citation tokens and search queries from Perplexity API response.

Add them to the usage object using standard LiteLLM fields.
"""
if not hasattr(model_response, "usage") or model_response.usage is None:
# Create a usage object if it doesn't exist (when usage was None)
model_response.usage = Usage( # type: ignore[attr-defined]
prompt_tokens=0,
completion_tokens=0,
total_tokens=0
total_tokens=0,
)

usage = model_response.usage # type: ignore[attr-defined]
Expand All @@ -146,7 +156,7 @@ def _enhance_usage_with_perplexity_fields(
# Extract search queries count from usage or response metadata
# Perplexity might include this in the usage object or as separate metadata
perplexity_usage = raw_response_json.get("usage", {})

# Try to extract search queries from usage field first, then root level
num_search_queries = perplexity_usage.get("num_search_queries")
if num_search_queries is None:
Expand All @@ -155,18 +165,18 @@ def _enhance_usage_with_perplexity_fields(
num_search_queries = perplexity_usage.get("search_queries")
if num_search_queries is None:
num_search_queries = raw_response_json.get("search_queries")

# Create or update prompt_tokens_details to include web search requests and citation tokens
if citation_tokens > 0 or (
num_search_queries is not None and num_search_queries > 0
):
if usage.prompt_tokens_details is None:
usage.prompt_tokens_details = PromptTokensDetailsWrapper()

# Store citation tokens count for cost calculation
if citation_tokens > 0:
setattr(usage, "citation_tokens", citation_tokens)
usage.citation_tokens = citation_tokens

# Store search queries count in the standard web_search_requests field
if num_search_queries is not None and num_search_queries > 0:
usage.prompt_tokens_details.web_search_requests = num_search_queries
Expand Down Expand Up @@ -248,4 +258,35 @@ def _add_citations_as_annotations(
if citations:
setattr(model_response, "citations", citations)
if search_results:
setattr(model_response, "search_results", search_results)
setattr(model_response, "search_results", search_results)

def add_cost_to_usage(self, model_response: ModelResponse, raw_response_json: dict) -> None:
"""Add the cost to the usage object."""
try:
usage_data = raw_response_json.get("usage")
if usage_data:
# Try different possible cost field locations
response_cost = None

# Check if cost is directly in usage (flat structure)
if "total_cost" in usage_data:
response_cost = usage_data["total_cost"]
# Check if cost is nested (cost.total_cost structure)
elif "cost" in usage_data and isinstance(usage_data["cost"], dict):
response_cost = usage_data["cost"].get("total_cost")
# Check if cost is a simple value
elif "cost" in usage_data:
response_cost = usage_data["cost"]

if response_cost is not None:
# Store cost in hidden params for the cost calculator to use
if not hasattr(model_response, "_hidden_params"):
model_response._hidden_params = {}
if "additional_headers" not in model_response._hidden_params:
model_response._hidden_params["additional_headers"] = {}
model_response._hidden_params["additional_headers"][
"llm_provider-x-litellm-response-cost"
] = float(response_cost)
except (ValueError, TypeError, KeyError) as e:
verbose_logger.debug(f"Error adding cost to usage: {e}")
# If we can't extract cost, continue without it - don't fail the response
27 changes: 26 additions & 1 deletion litellm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2017,11 +2017,36 @@ def completion( # type: ignore # noqa: PLR0915
logging.post_call(
input=messages, api_key=api_key, original_response=response
)
elif custom_llm_provider == "perplexity":
response = base_llm_http_handler.completion(
model=model,
messages=messages,
headers=headers,
model_response=model_response,
api_key=api_key,
api_base=api_base,
acompletion=acompletion,
logging_obj=logging,
optional_params=optional_params,
litellm_params=litellm_params,
shared_session=shared_session,
timeout=timeout,
client=client,
custom_llm_provider=custom_llm_provider,
encoding=encoding,
stream=stream,
provider_config=provider_config,
)

## LOGGING - Call after response has been processed by transform_response
logging.post_call(
input=messages, api_key=api_key, original_response=response
)

elif (
model in litellm.open_ai_chat_completion_models
or custom_llm_provider == "custom_openai"
or custom_llm_provider == "deepinfra"
or custom_llm_provider == "perplexity"
or custom_llm_provider == "nvidia_nim"
or custom_llm_provider == "cerebras"
or custom_llm_provider == "baseten"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -707,4 +707,152 @@ def test_add_citations_as_annotations_no_message(self):
# Check that no annotations were created (message content is None)
assert choice.message.content is None
# No annotations should be created since content is None
assert not hasattr(choice.message, 'annotations') or choice.message.annotations is None
assert not hasattr(choice.message, 'annotations') or choice.message.annotations is None

# Tests for cost extraction functionality
def test_add_cost_to_usage_flat_structure(self):
"""Test cost extraction from flat usage structure."""
config = PerplexityChatConfig()

# Create a ModelResponse
model_response = ModelResponse()
model_response.usage = Usage(
prompt_tokens=100,
completion_tokens=50,
total_tokens=150
)

# Mock raw response with flat cost structure
raw_response_json = {
"choices": [{"message": {"content": "Test response"}}],
"usage": {
"prompt_tokens": 100,
"completion_tokens": 50,
"total_tokens": 150,
"total_cost": 0.00015
}
}

# Test cost extraction
config.add_cost_to_usage(model_response, raw_response_json)

# Check that cost was stored in hidden params
assert hasattr(model_response, "_hidden_params")
assert "additional_headers" in model_response._hidden_params
assert "llm_provider-x-litellm-response-cost" in model_response._hidden_params["additional_headers"]

cost = model_response._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"]
assert cost == 0.00015

def test_add_cost_to_usage_nested_structure(self):
"""Test cost extraction from nested usage structure."""
config = PerplexityChatConfig()

# Create a ModelResponse
model_response = ModelResponse()
model_response.usage = Usage(
prompt_tokens=100,
completion_tokens=50,
total_tokens=150
)

# Mock raw response with nested cost structure
raw_response_json = {
"choices": [{"message": {"content": "Test response"}}],
"usage": {
"prompt_tokens": 100,
"completion_tokens": 50,
"total_tokens": 150,
"cost": {
"total_cost": 0.00025
}
}
}

# Test cost extraction
config.add_cost_to_usage(model_response, raw_response_json)

# Check that cost was stored in hidden params
assert hasattr(model_response, "_hidden_params")
assert "additional_headers" in model_response._hidden_params
assert "llm_provider-x-litellm-response-cost" in model_response._hidden_params["additional_headers"]

cost = model_response._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"]
assert cost == 0.00025

def test_add_cost_to_usage_no_cost_data(self):
"""Test handling when no cost data is present."""
config = PerplexityChatConfig()

# Create a ModelResponse
model_response = ModelResponse()
model_response.usage = Usage(
prompt_tokens=100,
completion_tokens=50,
total_tokens=150
)

# Mock raw response without cost
raw_response_json = {
"choices": [{"message": {"content": "Test response"}}],
"usage": {
"prompt_tokens": 100,
"completion_tokens": 50,
"total_tokens": 150
}
}

# Test cost extraction - should not raise error
config.add_cost_to_usage(model_response, raw_response_json)

# Should not have cost in hidden params
if hasattr(model_response, "_hidden_params"):
assert "llm_provider-x-litellm-response-cost" not in model_response._hidden_params.get("additional_headers", {})

def test_transform_response_includes_cost_extraction(self):
"""Test that transform_response includes cost extraction."""
config = PerplexityChatConfig()

# Mock raw response
mock_response = Mock()
mock_response.json.return_value = {
"choices": [{"message": {"content": "Test response"}}],
"usage": {
"prompt_tokens": 100,
"completion_tokens": 50,
"total_tokens": 150,
"total_cost": 0.00015
}
}
mock_response.headers = {}

# Create a ModelResponse
model_response = ModelResponse()
model_response.usage = Usage(
prompt_tokens=100,
completion_tokens=50,
total_tokens=150
)
model_response.model = "perplexity/sonar-pro"

# Mock the parent transform_response to return our model_response
with patch.object(config.__class__.__bases__[0], 'transform_response', return_value=model_response):
result = config.transform_response(
model="perplexity/sonar-pro",
raw_response=mock_response,
model_response=model_response,
logging_obj=Mock(),
request_data={},
messages=[{"role": "user", "content": "Test"}],
optional_params={},
litellm_params={},
encoding=None,
)

# Check that cost was extracted and stored
assert hasattr(result, "_hidden_params")
assert "additional_headers" in result._hidden_params
assert "llm_provider-x-litellm-response-cost" in result._hidden_params["additional_headers"]

cost = result._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"]
assert cost == 0.00015
Loading
Loading