[Feat] adds support for openai responses api logging and adds another method to querybuilder for fetching specific prompt version (#91)

danpiths · web-flow · commit f8967ca115ae · 2025-10-23T16:48:52.000+05:30
diff --git a/README.md b/README.md
@@ -60,33 +60,49 @@ See [cookbook/agno_agent.py](cookbook/agno_agent.py) for an example of tracing a
 
 ## Version changelog
 
+### 3.12.0
+
+- feat: Added support for OpenAI Responses API format in addition to Chat Completion API
+- feat: Added TTL-based caching (60s) for prompt version number single-condition fetches
+- feat: Added new `prompt_version_number()` method in QueryBuilder for convenient version-specific queries
+- improvement: Enhanced `generation_parser` to detect and handle OpenAI Responses API structure
+
 ### 3.11.4
+
 - fix: Fixes race condition in LiveKit realtime tracing
 - fix: Fixes import errors for Gemini and Google realtime session imports
 
 ### 3.11.3
+
 - fix: Fixed Nested Spans issue for Google ADK
 
 ### 3.11.2
+
 - fix: Fixed Google ADK integration to support spans for agent hand offs and tool calls.
 
 ### 3.11.1
+
 - feat: Added `ContainerManager` to Langchain to manage containers
 - fix: Fixes `trace.end` in Langchain integration `MaximLangchainTracer`
 
 ### 3.11.0
+
 - feat: Added observability for google adk
 
 ### 3.10.10
+
 - feat: Added case for `commit_user_turn` for LiveKit logs
 
 ### 3.10.9
+
 - fix: Fixed session audio silences for LiveKit Realtime session implementation
 
 ### 3.10.8
+
 - feat: Added Pydantic AI Single Line Integration
 
 ### 3.10.7
+
 - fix: Fixes local data test runs
 
 ### 3.10.6
diff --git a/maxim/apis/maxim_apis.py b/maxim/apis/maxim_apis.py
@@ -309,7 +309,7 @@ def __make_network_call(
             method, endpoint, body, headers, self.max_retries
         )
 
-    def get_prompt(self, id: str) -> VersionAndRulesWithPromptId:
+    def get_prompt(self, id: str, prompt_version_number: Optional[int] = None) -> VersionAndRulesWithPromptId:
         """
         Get a prompt by ID.
 
@@ -323,9 +323,10 @@ def get_prompt(self, id: str) -> VersionAndRulesWithPromptId:
             Exception: If the request fails
         """
         try:
-            res = self.__make_network_call(
-                method="GET", endpoint=f"/api/sdk/v4/prompts?promptId={id}"
-            )
+            endpoint = f"/api/sdk/v4/prompts?promptId={id}"
+            if prompt_version_number is not None:
+                endpoint += f"&promptVersionNumber={prompt_version_number}"
+            res = self.__make_network_call(method="GET", endpoint=endpoint)
             data = json.loads(res.decode())["data"]
             return VersionAndRulesWithPromptId.from_dict(data)
         except httpx.HTTPStatusError as e:
diff --git a/maxim/logger/components/generation.py b/maxim/logger/components/generation.py
@@ -699,6 +699,9 @@ def convert_result(
                     and result_dict["object"] == "text.completion"
                 ):
                     raise ValueError("Text completion is not yet supported.")
+                elif "object" in result_dict and result_dict["object"] == "response":
+                    # OpenAI Responses API format - return as-is for logging
+                    return result_dict
             return result
 
     def result(self, result: Any):
diff --git a/maxim/logger/parsers/generation_parser.py b/maxim/logger/parsers/generation_parser.py
@@ -56,7 +56,9 @@ def parse_tool_calls(tool_calls_data):
     Returns:
         The parsed tool calls.
     """
-    if ChatCompletionMessageToolCall is not None and isinstance(tool_calls_data, ChatCompletionMessageToolCall):
+    if ChatCompletionMessageToolCall is not None and isinstance(
+        tool_calls_data, ChatCompletionMessageToolCall
+    ):
         validate_type(tool_calls_data.id, str, "id")
         validate_type(tool_calls_data.type, str, "type")
         parse_function_call(tool_calls_data.function)
@@ -217,10 +219,58 @@ def default_json_serializer(o: Any) -> Any:
     raise TypeError(f"Object of type {o.__class__.__name__} is not JSON serializable")
 
 
+def is_openai_response_structure(data: Any) -> bool:
+    """
+    Check if data matches the general top-level shape of an OpenAI Responses API result.
+
+    The OpenAI Responses API structure includes:
+    - id: string identifier
+    - object: string (must be "response")
+    - created_at: integer timestamp
+    - status: string (e.g., "completed", "in_progress")
+    - output: list of output items
+    - usage: dict with token usage
+
+    Args:
+        data: The dictionary to check.
+
+    Returns:
+        True if the data matches the OpenAI Responses API structure, False otherwise.
+    """
+    if not isinstance(data, dict):
+        return False
+
+    # Check for required OpenAI Responses API fields
+    required_fields = {
+        "id": str,
+        "object": str,
+        "created_at": (int, float),
+        "status": str,
+        "output": list,
+        "usage": dict,
+    }
+
+    for field, expected_type in required_fields.items():
+        if field not in data:
+            return False
+
+        value = data[field]
+        if not isinstance(value, expected_type):
+            return False
+
+    # Verify that object is specifically "response"
+    if data.get("object") != "response":
+        return False
+
+    return True
+
+
 def parse_result(data: Any) -> Dict[str, Any]:
     """
     Parse result from a dictionary.
 
+    Supports both OpenAI Chat Completion API and OpenAI Responses API result structures.
+
     Args:
         data: The dictionary to parse.
 
@@ -229,6 +279,14 @@ def parse_result(data: Any) -> Dict[str, Any]:
     """
     if not isinstance(data, dict):
         raise ValueError("Text completion is not supported.")
+
+    # Check if this is an OpenAI Responses API result structure
+    if is_openai_response_structure(data):
+        # For Responses API results, return as-is without deep validation
+        # Only the general top-level shape is validated by is_openai_response_structure
+        return data
+
+    # Otherwise, process as Chat Completion API result (existing behavior)
     validate_type(data.get("id"), str, "id")
     validate_optional_type(data.get("object"), str, "object")
     validate_type(data.get("created"), int, "created")
diff --git a/maxim/maxim.py b/maxim/maxim.py
@@ -49,6 +49,7 @@
 from .scribe import scribe
 from .test_runs import TestRunBuilder
 from .version import current_version
+from .expiring_key_value_store import ExpiringKeyValueStore
 
 
 class ConfigDict(TypedDict, total=False):
@@ -174,6 +175,8 @@ def __init__(self, config: Union[Config, ConfigDict, None] = None):
         self.__loggers: Dict[str, Logger] = {}
         self.prompt_management = final_config.get("prompt_management", False)
         self.__cache = final_config.get("cache", MaximInMemoryCache())
+        # Local TTL cache for promptVersionNumber single-condition fetches
+        self.__prompt_version_by_number_cache: ExpiringKeyValueStore[Prompt] = ExpiringKeyValueStore()
         if self.prompt_management:
             self.__sync_thread = threading.Thread(target=self.__sync_timer)
             self.__sync_thread.daemon = True
@@ -748,6 +751,34 @@ def get_prompt(self, id: str, rule: QueryRule) -> Optional[RunnablePrompt]:
             raise Exception(
                 "prompt_management is disabled. You can enable it by initializing Maxim with Config(...prompt_management=True)."
             )
+        # First, check if this is a single-condition promptVersionNumber query
+        try:
+            parsed_rules = parse_incoming_query(rule.query)
+        except Exception:
+            parsed_rules = []
+        if len(parsed_rules) == 1 and parsed_rules[0].field == "promptVersionNumber" and parsed_rules[0].operator == "=":
+            version_number = None
+            try:
+                version_number = int(parsed_rules[0].value)
+            except Exception:
+                version_number = None
+            if version_number is not None:
+                cache_key = f"pvnum:{id}:{version_number}"
+                cached_prompt = self.__prompt_version_by_number_cache.get(cache_key)
+                if cached_prompt is not None:
+                    return RunnablePrompt(cached_prompt, self.maxim_api)
+                # Fetch prompt only for the specific version
+                version_and_rules_with_prompt_id = self.maxim_api.get_prompt(id, version_number)
+                if len(version_and_rules_with_prompt_id.versions) == 0:
+                    return None
+                specific = next((v for v in version_and_rules_with_prompt_id.versions if v.version == version_number), None)
+                if specific is None:
+                    return None
+                formatted = self.__format_prompt(specific)
+                # Cache for 60 seconds
+                self.__prompt_version_by_number_cache.set(cache_key, formatted, 60)
+                return RunnablePrompt(formatted, self.maxim_api)
+
         key = self.__get_cache_key("PROMPT", id)
         version_and_rules_with_prompt_id = self.__get_prompt_from_cache(key)
         if version_and_rules_with_prompt_id is None:
diff --git a/maxim/models/query_builder.py b/maxim/models/query_builder.py
@@ -104,6 +104,21 @@ def tag(self, key: str, value: Union[str, int, bool], enforce: bool = False) ->
         self.query += f"{'!!' if enforce else ''}{key}={value}"
         return self
 
+    def prompt_version_number(self, number: int) -> 'QueryBuilder':
+        """
+        Adds a rule to fetch a specific prompt version by its numeric version.
+
+        Args:
+            number (int): The version number of the prompt to fetch.
+
+        Returns:
+            QueryBuilder: The current QueryBuilder instance for method chaining.
+        """
+        if len(self.query) > 0:
+            self.query += ","
+        self.query += f"promptVersionNumber={number}"
+        return self
+
     def build(self) -> QueryRule:
         """
         Builds the final query rule.
diff --git a/maxim/tests/test_openai_responses_logger.py b/maxim/tests/test_openai_responses_logger.py
diff --git a/maxim/tests/test_prompts.py b/maxim/tests/test_prompts.py
diff --git a/pyproject.toml b/pyproject.toml