BerriAI · Sameerlite · Oct 15, 2025 · Oct 15, 2025 · Oct 17, 2025
diff --git a/litellm/llms/perplexity/chat/__init__.py b/litellm/llms/perplexity/chat/__init__.py
@@ -0,0 +1 @@
+"""Perplexity chat completion transformations."""
diff --git a/litellm/llms/perplexity/chat/transformation.py b/litellm/llms/perplexity/chat/transformation.py
@@ -1,25 +1,32 @@
-"""
-Translate from OpenAI's `/v1/chat/completions` to Perplexity's `/v1/chat/completions`
-"""
+"""Translate from OpenAI's `/v1/chat/completions` to Perplexity's `/v1/chat/completions`."""
 
-from typing import Any, List, Optional, Tuple
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, List, Optional, Tuple
 
-import httpx
 import litellm
 from litellm._logging import verbose_logger
-from litellm.secret_managers.main import get_secret_str
-from litellm.types.llms.openai import AllMessageValues
-from litellm.types.utils import Usage, PromptTokensDetailsWrapper
-from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
-from litellm.types.utils import ModelResponse
-from litellm.types.llms.openai import ChatCompletionAnnotation
-from litellm.types.llms.openai import ChatCompletionAnnotationURLCitation
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.utils import ModelResponse, PromptTokensDetailsWrapper, Usage
+
+if TYPE_CHECKING:
+    import httpx
+
+    from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+    from litellm.types.llms.openai import (
+        AllMessageValues,
+        ChatCompletionAnnotation,
+        ChatCompletionAnnotationURLCitation,
+    )
 
 
 class PerplexityChatConfig(OpenAIGPTConfig):
+    """Configuration for Perplexity chat completions."""
+
     @property
-    def custom_llm_provider(self) -> Optional[str]:
+    def custom_llm_provider(self) -> str | None:
+        """Return the custom LLM provider name."""
         return "perplexity"
 
     def _get_openai_compatible_provider_info(
@@ -72,7 +79,8 @@ def get_supported_openai_params(self, model: str) -> list:
 
         return base_openai_params
 
-    def transform_response(
+
+    def transform_response(  # noqa: PLR0913
         self,
         model: str,
         raw_response: httpx.Response,
@@ -82,10 +90,11 @@ def transform_response(
         messages: List[AllMessageValues],
         optional_params: dict,
         litellm_params: dict,
-        encoding: Any,
+        encoding: Any,  
         api_key: Optional[str] = None,
-        json_mode: Optional[bool] = None,
+        json_mode: Optional[bool] = None,  
     ) -> ModelResponse:
+        """Transform Perplexity response to standard format."""
         # Call the parent transform_response first to handle the standard transformation
         model_response = super().transform_response(
             model=model,
@@ -104,28 +113,29 @@ def transform_response(
         # Extract and enhance usage with Perplexity-specific fields
         try:
             raw_response_json = raw_response.json()
+            self.add_cost_to_usage(model_response, raw_response_json)
             self._enhance_usage_with_perplexity_fields(
-                model_response, raw_response_json
+                model_response, raw_response_json,
             )
             self._add_citations_as_annotations(model_response, raw_response_json)
-        except Exception as e:
+        except (ValueError, TypeError, KeyError) as e:
             verbose_logger.debug(f"Error extracting Perplexity-specific usage fields: {e}")
 
         return model_response
 
-    def _enhance_usage_with_perplexity_fields(
-        self, model_response: ModelResponse, raw_response_json: dict
+    def _enhance_usage_with_perplexity_fields(  
+        self, model_response: ModelResponse, raw_response_json: dict,
     ) -> None:
-        """
-        Extract citation tokens and search queries from Perplexity API response
-        and add them to the usage object using standard LiteLLM fields.
+        """Extract citation tokens and search queries from Perplexity API response.
+
+        Add them to the usage object using standard LiteLLM fields.
         """
         if not hasattr(model_response, "usage") or model_response.usage is None:
             # Create a usage object if it doesn't exist (when usage was None)
             model_response.usage = Usage(  # type: ignore[attr-defined]
                 prompt_tokens=0,
                 completion_tokens=0,
-                total_tokens=0
+                total_tokens=0,
             )
 
         usage = model_response.usage  # type: ignore[attr-defined]
@@ -146,7 +156,7 @@ def _enhance_usage_with_perplexity_fields(
         # Extract search queries count from usage or response metadata
         # Perplexity might include this in the usage object or as separate metadata
         perplexity_usage = raw_response_json.get("usage", {})
-        
+
         # Try to extract search queries from usage field first, then root level
         num_search_queries = perplexity_usage.get("num_search_queries")
         if num_search_queries is None:
@@ -155,18 +165,18 @@ def _enhance_usage_with_perplexity_fields(
             num_search_queries = perplexity_usage.get("search_queries")
         if num_search_queries is None:
             num_search_queries = raw_response_json.get("search_queries")
-        
+
         # Create or update prompt_tokens_details to include web search requests and citation tokens
         if citation_tokens > 0 or (
             num_search_queries is not None and num_search_queries > 0
         ):
             if usage.prompt_tokens_details is None:
                 usage.prompt_tokens_details = PromptTokensDetailsWrapper()
-            
+
             # Store citation tokens count for cost calculation
             if citation_tokens > 0:
-                setattr(usage, "citation_tokens", citation_tokens)
-            
+                usage.citation_tokens = citation_tokens
+
             # Store search queries count in the standard web_search_requests field
             if num_search_queries is not None and num_search_queries > 0:
                 usage.prompt_tokens_details.web_search_requests = num_search_queries
@@ -248,4 +258,35 @@ def _add_citations_as_annotations(
         if citations:
             setattr(model_response, "citations", citations)
         if search_results:
-            setattr(model_response, "search_results", search_results)
+            setattr(model_response, "search_results", search_results)
+
+    def add_cost_to_usage(self, model_response: ModelResponse, raw_response_json: dict) -> None:
+        """Add the cost to the usage object."""
+        try:
+            usage_data = raw_response_json.get("usage")
+            if usage_data:
+                # Try different possible cost field locations
+                response_cost = None
+
+                # Check if cost is directly in usage (flat structure)
+                if "total_cost" in usage_data:
+                    response_cost = usage_data["total_cost"]
+                # Check if cost is nested (cost.total_cost structure)
+                elif "cost" in usage_data and isinstance(usage_data["cost"], dict):
+                    response_cost = usage_data["cost"].get("total_cost")
+                # Check if cost is a simple value
+                elif "cost" in usage_data:
+                    response_cost = usage_data["cost"]
+
+                if response_cost is not None:
+                    # Store cost in hidden params for the cost calculator to use
+                    if not hasattr(model_response, "_hidden_params"):
+                        model_response._hidden_params = {}  
+                    if "additional_headers" not in model_response._hidden_params:  
+                        model_response._hidden_params["additional_headers"] = {}  
+                    model_response._hidden_params["additional_headers"][  
+                        "llm_provider-x-litellm-response-cost"
+                    ] = float(response_cost)
+        except (ValueError, TypeError, KeyError) as e:
+            verbose_logger.debug(f"Error adding cost to usage: {e}")
+            # If we can't extract cost, continue without it - don't fail the response
diff --git a/litellm/main.py b/litellm/main.py
@@ -2017,11 +2017,36 @@ def completion(  # type: ignore # noqa: PLR0915
             logging.post_call(
                 input=messages, api_key=api_key, original_response=response
             )
+        elif custom_llm_provider == "perplexity":
+            response = base_llm_http_handler.completion(
+                model=model,
+                messages=messages,
+                headers=headers,
+                model_response=model_response,
+                api_key=api_key,
+                api_base=api_base,
+                acompletion=acompletion,
+                logging_obj=logging,
+                optional_params=optional_params,
+                litellm_params=litellm_params,
+                shared_session=shared_session,
+                timeout=timeout,
+                client=client,
+                custom_llm_provider=custom_llm_provider,
+                encoding=encoding,
+                stream=stream,
+                provider_config=provider_config,
+            )
+
+            ## LOGGING - Call after response has been processed by transform_response
+            logging.post_call(
+                input=messages, api_key=api_key, original_response=response
+            )
+
         elif (
             model in litellm.open_ai_chat_completion_models
             or custom_llm_provider == "custom_openai"
             or custom_llm_provider == "deepinfra"
-            or custom_llm_provider == "perplexity"
             or custom_llm_provider == "nvidia_nim"
             or custom_llm_provider == "cerebras"
             or custom_llm_provider == "baseten"

diff --git a/tests/test_litellm/llms/perplexity/chat/test_perplexity_chat_transformation.py b/tests/test_litellm/llms/perplexity/chat/test_perplexity_chat_transformation.py
@@ -707,4 +707,152 @@ def test_add_citations_as_annotations_no_message(self):
         # Check that no annotations were created (message content is None)
         assert choice.message.content is None
         # No annotations should be created since content is None
-        assert not hasattr(choice.message, 'annotations') or choice.message.annotations is None
+        assert not hasattr(choice.message, 'annotations') or choice.message.annotations is None
+
+    # Tests for cost extraction functionality
+    def test_add_cost_to_usage_flat_structure(self):
+        """Test cost extraction from flat usage structure."""
+        config = PerplexityChatConfig()
+
+        # Create a ModelResponse
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+
+        # Mock raw response with flat cost structure
+        raw_response_json = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "total_cost": 0.00015
+            }
+        }
+
+        # Test cost extraction
+        config.add_cost_to_usage(model_response, raw_response_json)
+
+        # Check that cost was stored in hidden params
+        assert hasattr(model_response, "_hidden_params")
+        assert "additional_headers" in model_response._hidden_params
+        assert "llm_provider-x-litellm-response-cost" in model_response._hidden_params["additional_headers"]
+
+        cost = model_response._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"]
+        assert cost == 0.00015
+
+    def test_add_cost_to_usage_nested_structure(self):
+        """Test cost extraction from nested usage structure."""
+        config = PerplexityChatConfig()
+
+        # Create a ModelResponse
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+
+        # Mock raw response with nested cost structure
+        raw_response_json = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "cost": {
+                    "total_cost": 0.00025
+                }
+            }
+        }
+
+        # Test cost extraction
+        config.add_cost_to_usage(model_response, raw_response_json)
+
+        # Check that cost was stored in hidden params
+        assert hasattr(model_response, "_hidden_params")
+        assert "additional_headers" in model_response._hidden_params
+        assert "llm_provider-x-litellm-response-cost" in model_response._hidden_params["additional_headers"]
+
+        cost = model_response._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"]
+        assert cost == 0.00025
+
+    def test_add_cost_to_usage_no_cost_data(self):
+        """Test handling when no cost data is present."""
+        config = PerplexityChatConfig()
+
+        # Create a ModelResponse
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+
+        # Mock raw response without cost
+        raw_response_json = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150
+            }
+        }
+
+        # Test cost extraction - should not raise error
+        config.add_cost_to_usage(model_response, raw_response_json)
+
+        # Should not have cost in hidden params
+        if hasattr(model_response, "_hidden_params"):
+            assert "llm_provider-x-litellm-response-cost" not in model_response._hidden_params.get("additional_headers", {})
+
+    def test_transform_response_includes_cost_extraction(self):
+        """Test that transform_response includes cost extraction."""
+        config = PerplexityChatConfig()
+
+        # Mock raw response
+        mock_response = Mock()
+        mock_response.json.return_value = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "total_cost": 0.00015
+            }
+        }
+        mock_response.headers = {}
+
+        # Create a ModelResponse
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        model_response.model = "perplexity/sonar-pro"
+
+        # Mock the parent transform_response to return our model_response
+        with patch.object(config.__class__.__bases__[0], 'transform_response', return_value=model_response):
+            result = config.transform_response(
+                model="perplexity/sonar-pro",
+                raw_response=mock_response,
+                model_response=model_response,
+                logging_obj=Mock(),
+                request_data={},
+                messages=[{"role": "user", "content": "Test"}],
+                optional_params={},
+                litellm_params={},
+                encoding=None,
+            )
+
+        # Check that cost was extracted and stored
+        assert hasattr(result, "_hidden_params")
+        assert "additional_headers" in result._hidden_params
+        assert "llm_provider-x-litellm-response-cost" in result._hidden_params["additional_headers"]
+
+        cost = result._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"]
+        assert cost == 0.00015
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""Perplexity chat completion transformations."""