From 62930024cc5d5c2a93c9b285fcc7abaa2ad097ec Mon Sep 17 00:00:00 2001 From: Ricky Kirkendall Date: Sat, 30 Aug 2025 10:02:22 -0400 Subject: [PATCH 1/9] Enhance OpenAIClient to support Responses API mode - Added configuration option for API_MODE in config.ini to enable Responses API for local runs. - Updated OpenAIClient to handle both 'responses' and 'legacy' modes, preserving existing behavior while allowing for new functionality. - Implemented parameter mapping for Responses API, ensuring compatibility with legacy chat parameters. This change aims to improve flexibility in API usage and facilitate a smoother transition to the new Responses API. --- config.ini | 4 + tests/unit/test_openai_utils_responses.py | 72 +++++++++++++ tinytroupe/config.ini | 2 + tinytroupe/openai_utils.py | 118 +++++++++++++++------- 4 files changed, 162 insertions(+), 34 deletions(-) create mode 100644 config.ini create mode 100644 tests/unit/test_openai_utils_responses.py diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..473b87d --- /dev/null +++ b/config.ini @@ -0,0 +1,4 @@ +[OpenAI] +API_MODE=responses + + diff --git a/tests/unit/test_openai_utils_responses.py b/tests/unit/test_openai_utils_responses.py new file mode 100644 index 0000000..edb1096 --- /dev/null +++ b/tests/unit/test_openai_utils_responses.py @@ -0,0 +1,72 @@ +import types +from unittest.mock import patch + +import tinytroupe.openai_utils as openai_utils + + +class _StubResponsesClient: + def __init__(self): + self.last_params = None + + class _Responses: + def __init__(self, outer): + self._outer = outer + + def create(self, **kwargs): + # Capture params for assertions + self._outer.last_params = kwargs + + # Return minimal object with output_text like the SDK does + return types.SimpleNamespace(output_text="ok") + + @property + def responses(self): + return _StubResponsesClient._Responses(self) + + +def test_send_message_uses_responses_api_when_api_mode_is_responses(): + stub = _StubResponsesClient() + + # Patch setup to force responses mode and inject stub client + original_setup = openai_utils.OpenAIClient._setup_from_config + + def _setup_with_responses(self): + self.client = stub + self.api_mode = "responses" + + try: + openai_utils.OpenAIClient._setup_from_config = _setup_with_responses + + client = openai_utils.OpenAIClient() + + messages = [ + {"role": "system", "content": "You are terse."}, + {"role": "user", "content": "Say ok."}, + ] + + result = client.send_message( + current_messages=messages, + model="gpt-4.1-mini", + temperature=0.2, + max_tokens=128, + ) + + # Verify mapping to Responses API + assert stub.last_params is not None + assert stub.last_params.get("model") == "gpt-4.1-mini" + assert stub.last_params.get("temperature") == 0.2 + assert stub.last_params.get("max_output_tokens") == 128 + + input_msgs = stub.last_params.get("input") + assert isinstance(input_msgs, list) and len(input_msgs) == 2 + assert input_msgs[0]["role"] == "system" + assert input_msgs[1]["role"] == "user" + assert input_msgs[1]["content"][0]["text"] == "Say ok." + + # Verify extractor returns assistant content + assert result["content"].lower().startswith("ok") + + finally: + openai_utils.OpenAIClient._setup_from_config = original_setup + + diff --git a/tinytroupe/config.ini b/tinytroupe/config.ini index 353bdb0..6c45ac5 100644 --- a/tinytroupe/config.ini +++ b/tinytroupe/config.ini @@ -1,4 +1,6 @@ [OpenAI] +# Enable Responses API path for local runs +API_MODE=responses # # OpenAI or Azure OpenAI Service # diff --git a/tinytroupe/openai_utils.py b/tinytroupe/openai_utils.py index c7a04cb..0c5556d 100644 --- a/tinytroupe/openai_utils.py +++ b/tinytroupe/openai_utils.py @@ -53,6 +53,11 @@ def _setup_from_config(self): Sets up the OpenAI API configurations for this client. """ self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + # API mode: 'responses' or 'legacy' (default to legacy to preserve behavior unless configured) + try: + self.api_mode = config["OpenAI"].get("API_MODE", "legacy").strip().lower() + except Exception: + self.api_mode = "legacy" @config_manager.config_defaults( model="model", @@ -228,55 +233,100 @@ def _raw_model_call(self, model, chat_api_params): Calls the OpenAI API with the given parameters. Subclasses should override this method to implement their own API calls. """ + + # If we are in LEGACY mode or response_format is provided (Pydantic path today), use legacy chat.completions + # This preserves current behavior for structured outputs while we migrate modules incrementally. + use_legacy = (self.api_mode != "responses") or ("response_format" in chat_api_params and chat_api_params["response_format"] is not None) + + if use_legacy: + # adjust parameters depending on the model (legacy path) + if self._is_reasoning_model(model): + # Reasoning models have slightly different parameters + if "stream" in chat_api_params: + del chat_api_params["stream"] + for k in ["temperature", "top_p", "frequency_penalty", "presence_penalty"]: + if k in chat_api_params: + del chat_api_params[k] + if "max_tokens" in chat_api_params: + chat_api_params["max_completion_tokens"] = chat_api_params["max_tokens"] + del chat_api_params["max_tokens"] + chat_api_params["reasoning_effort"] = default["reasoning_effort"] + + logged_params = {k: v for k, v in chat_api_params.items() if k != "messages"} + + if "response_format" in chat_api_params and chat_api_params["response_format"] is not None: + # to enforce the response format via pydantic, we need to use the parse helper + if "stream" in chat_api_params: + del chat_api_params["stream"] + logger.debug(f"Calling legacy .parse with params: {logged_params} (messages omitted).") + return self.client.beta.chat.completions.parse(**chat_api_params) + else: + logger.debug(f"Calling legacy chat.completions.create with params: {logged_params} (messages omitted).") + return self.client.chat.completions.create(**chat_api_params) - # adjust parameters depending on the model - if self._is_reasoning_model(model): - # Reasoning models have slightly different parameters - del chat_api_params["stream"] - del chat_api_params["temperature"] - del chat_api_params["top_p"] - del chat_api_params["frequency_penalty"] - del chat_api_params["presence_penalty"] - - chat_api_params["max_completion_tokens"] = chat_api_params["max_tokens"] - del chat_api_params["max_tokens"] - - chat_api_params["reasoning_effort"] = default["reasoning_effort"] - + # RESPONSES API path (no response_format in params) + responses_params = self._map_messages_to_responses_params(model, chat_api_params) + logged_params = {k: v for k, v in responses_params.items() if k != "input"} + logger.debug(f"Calling Responses API with params: {logged_params} (input omitted).") + return self.client.responses.create(**responses_params) - # To make the log cleaner, we remove the messages from the logged parameters - logged_params = {k: v for k, v in chat_api_params.items() if k != "messages"} + def _map_messages_to_responses_params(self, model, chat_api_params): + """ + Convert legacy chat parameters into Responses API parameters. + - messages -> input (list of message dicts with content blocks) + - max_tokens -> max_output_tokens + - for reasoning models: set reasoning.effort and drop unsupported fields + """ + # Map messages to Responses input + messages = chat_api_params.get("messages", []) + input_msgs = [] + for m in messages: + content = m.get("content", "") + input_msgs.append({ + "role": m.get("role", "user"), + "content": [{"type": "text", "text": content}] + }) + + params = { + "model": chat_api_params.get("model"), + "input": input_msgs, + } - if "response_format" in chat_api_params: - # to enforce the response format via pydantic, we need to use a different method + # Temperature and sampling parameters are supported for non-reasoning models + if not self._is_reasoning_model(model): + if chat_api_params.get("temperature") is not None: + params["temperature"] = chat_api_params.get("temperature") + if chat_api_params.get("top_p") is not None: + params["top_p"] = chat_api_params.get("top_p") - if "stream" in chat_api_params: - del chat_api_params["stream"] + # Map token limits + if chat_api_params.get("max_tokens") is not None: + params["max_output_tokens"] = chat_api_params.get("max_tokens") - logger.debug(f"Calling LLM model (using .parse too) with these parameters: {logged_params}. Not showing 'messages' parameter.") - # complete message - logger.debug(f" --> Complete messages sent to LLM: {chat_api_params['messages']}") + # Reasoning model adjustments + if self._is_reasoning_model(model): + params["reasoning"] = {"effort": default.get("reasoning_effort", "medium")} - result_message = self.client.beta.chat.completions.parse( - **chat_api_params - ) + # Timeout is still honored via request options in SDK; keep here if needed by higher layer + # Stop sequences are not yet mapped here; can be added if required. - return result_message - - else: - logger.debug(f"Calling LLM model with these parameters: {logged_params}. Not showing 'messages' parameter.") - return self.client.chat.completions.create( - **chat_api_params - ) + return params def _is_reasoning_model(self, model): - return "o1" in model or "o3" in model + return ("o1" in model) or ("o3" in model) or ("gpt-5" in model) def _raw_model_response_extractor(self, response): """ Extracts the response from the API response. Subclasses should override this method to implement their own response extraction. """ + # Responses API: has output_text and output messages + try: + if hasattr(response, "output_text") and response.output_text is not None: + return {"role": "assistant", "content": response.output_text} + except Exception: + pass + # Legacy chat.completions path return response.choices[0].message.to_dict() def _count_tokens(self, messages: list, model: str): From 32f26b4cf422569bec93b914179478ad77f9ffc5 Mon Sep 17 00:00:00 2001 From: Ricky Kirkendall Date: Sat, 30 Aug 2025 11:21:18 -0400 Subject: [PATCH 2/9] - Introduced Pydantic support for structured output in Responses API. - Updated response extraction logic to prefer typed parsed output when available. --- tinytroupe/openai_utils.py | 41 ++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/tinytroupe/openai_utils.py b/tinytroupe/openai_utils.py index 0c5556d..4afec87 100644 --- a/tinytroupe/openai_utils.py +++ b/tinytroupe/openai_utils.py @@ -9,6 +9,7 @@ import tiktoken +from pydantic import BaseModel from tinytroupe import utils from tinytroupe.control import transactional from tinytroupe import default @@ -234,9 +235,8 @@ def _raw_model_call(self, model, chat_api_params): override this method to implement their own API calls. """ - # If we are in LEGACY mode or response_format is provided (Pydantic path today), use legacy chat.completions - # This preserves current behavior for structured outputs while we migrate modules incrementally. - use_legacy = (self.api_mode != "responses") or ("response_format" in chat_api_params and chat_api_params["response_format"] is not None) + # Prefer Responses API when enabled; otherwise use legacy chat.completions + use_legacy = (self.api_mode != "responses") if use_legacy: # adjust parameters depending on the model (legacy path) @@ -264,16 +264,26 @@ def _raw_model_call(self, model, chat_api_params): logger.debug(f"Calling legacy chat.completions.create with params: {logged_params} (messages omitted).") return self.client.chat.completions.create(**chat_api_params) - # RESPONSES API path (no response_format in params) + # RESPONSES API path + rf = chat_api_params.get("response_format") + is_pydantic = isinstance(rf, type) and issubclass(rf, BaseModel) + responses_params = self._map_messages_to_responses_params(model, chat_api_params) logged_params = {k: v for k, v in responses_params.items() if k != "input"} - logger.debug(f"Calling Responses API with params: {logged_params} (input omitted).") + + if is_pydantic: + # Structured output via responses.parse with Pydantic text_format + logger.debug(f"Calling Responses API parse with params: {logged_params} (input omitted).") + return self.client.responses.parse(text_format=rf, **responses_params) + + # Unstructured path + logger.debug(f"Calling Responses API create with params: {logged_params} (input omitted).") return self.client.responses.create(**responses_params) def _map_messages_to_responses_params(self, model, chat_api_params): """ Convert legacy chat parameters into Responses API parameters. - - messages -> input (list of message dicts with content blocks) + - messages -> input (list of message dicts with plain string content) - max_tokens -> max_output_tokens - for reasoning models: set reasoning.effort and drop unsupported fields """ @@ -284,7 +294,7 @@ def _map_messages_to_responses_params(self, model, chat_api_params): content = m.get("content", "") input_msgs.append({ "role": m.get("role", "user"), - "content": [{"type": "text", "text": content}] + "content": content, }) params = { @@ -307,6 +317,8 @@ def _map_messages_to_responses_params(self, model, chat_api_params): if self._is_reasoning_model(model): params["reasoning"] = {"effort": default.get("reasoning_effort", "medium")} + # Do not attach response_format to Responses API params; use responses.parse(text_format=...) instead + # Timeout is still honored via request options in SDK; keep here if needed by higher layer # Stop sequences are not yet mapped here; can be added if required. @@ -320,8 +332,21 @@ def _raw_model_response_extractor(self, response): Extracts the response from the API response. Subclasses should override this method to implement their own response extraction. """ - # Responses API: has output_text and output messages + # Responses API: prefer typed parsed output when available try: + if hasattr(response, "output_parsed") and response.output_parsed is not None: + parsed = response.output_parsed + try: + # Pydantic v2 BaseModel + content_text = parsed.model_dump_json() + except Exception: + try: + # Fallback to dict serialization + import json + content_text = json.dumps(getattr(parsed, "dict", lambda: parsed)()) + except Exception: + content_text = str(parsed) + return {"role": "assistant", "content": content_text} if hasattr(response, "output_text") and response.output_text is not None: return {"role": "assistant", "content": response.output_text} except Exception: From 4c009bf3fa1b05a6912122cd4cb81b9d260dc350 Mon Sep 17 00:00:00 2001 From: Ricky Kirkendall Date: Sat, 6 Sep 2025 20:58:17 -0400 Subject: [PATCH 3/9] PR2 prep: add DEBUG config override and structured outputs demo script --- config.ini | 52 ++++++++++++++++++++++++++++++ scripts/structured_outputs_demo.py | 31 ++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 config.ini create mode 100644 scripts/structured_outputs_demo.py diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..544a8c8 --- /dev/null +++ b/config.ini @@ -0,0 +1,52 @@ +[OpenAI] +API_TYPE=openai +MODEL=gpt-4.1-mini +REASONING_MODEL=o3-mini +EMBEDDING_MODEL=text-embedding-3-small + +# Keep calls fast and inexpensive for the demo +MAX_TOKENS=800 +TEMPERATURE=1.0 +FREQ_PENALTY=0.0 +PRESENCE_PENALTY=0.0 +TIMEOUT=120 +MAX_ATTEMPTS=1 +WAITING_TIME=0 +EXPONENTIAL_BACKOFF_FACTOR=5 + +REASONING_EFFORT=low + +CACHE_API_CALLS=False +CACHE_FILE_NAME=openai_api_cache.pickle + +MAX_CONTENT_DISPLAY_LENGTH=2000 + +[Simulation] +PARALLEL_AGENT_GENERATION=True +PARALLEL_AGENT_ACTIONS=True +RAI_HARMFUL_CONTENT_PREVENTION=True +RAI_COPYRIGHT_INFRINGEMENT_PREVENTION=True + +[Cognition] +ENABLE_MEMORY_CONSOLIDATION=False +MIN_EPISODE_LENGTH=5 +MAX_EPISODE_LENGTH=20 +EPISODIC_MEMORY_FIXED_PREFIX_LENGTH=5 +EPISODIC_MEMORY_LOOKBACK_LENGTH=10 + +[ActionGenerator] +MAX_ATTEMPTS=1 +ENABLE_QUALITY_CHECKS=False +ENABLE_REGENERATION=False +ENABLE_DIRECT_CORRECTION=False +ENABLE_QUALITY_CHECK_FOR_PERSONA_ADHERENCE=False +ENABLE_QUALITY_CHECK_FOR_SELFCONSISTENCY=False +ENABLE_QUALITY_CHECK_FOR_FLUENCY=False +ENABLE_QUALITY_CHECK_FOR_SUITABILITY=False +ENABLE_QUALITY_CHECK_FOR_SIMILARITY=False +CONTINUE_ON_FAILURE=True +QUALITY_THRESHOLD=5 + +[Logging] +LOGLEVEL=DEBUG + diff --git a/scripts/structured_outputs_demo.py b/scripts/structured_outputs_demo.py new file mode 100644 index 0000000..d41b0f0 --- /dev/null +++ b/scripts/structured_outputs_demo.py @@ -0,0 +1,31 @@ +import os +import logging + +from tinytroupe.agent import TinyPerson + + +def main(): + # Ensure DEBUG logs + logger = logging.getLogger("tinytroupe") + logger.setLevel(logging.DEBUG) + + # Require API key + if not os.getenv("OPENAI_API_KEY"): + raise RuntimeError("OPENAI_API_KEY must be set in the environment.") + + # Create a simple agent and act once to force an action generation + agent = TinyPerson(name="DemoAgent") + agent.listen("You're in a coffee shop. Order a cappuccino politely.") + + # Act once; structured output is enforced by ActionGenerator with Pydantic models + outputs = agent.act(until_done=False, n=1, return_actions=True, communication_display=False) + + # Print the raw structured response for inspection + print("\n=== Structured Output ===") + print(outputs[-1]) + + +if __name__ == "__main__": + main() + + From 0b57eff93c97d81bee5f62ded949aaf42a3e3f70 Mon Sep 17 00:00:00 2001 From: Ricky Kirkendall Date: Mon, 8 Sep 2025 21:11:51 -0400 Subject: [PATCH 4/9] Enhance API key loading and response handling. Updated action generator to prefer structured responses and handle model refusals more gracefully. --- scripts/structured_outputs_demo.py | 43 ++++- ...est_action_generator_structured_outputs.py | 57 +++++++ tinytroupe/agent/action_generator.py | 43 ++++- tinytroupe/openai_utils.py | 148 +++++++++++++++--- 4 files changed, 263 insertions(+), 28 deletions(-) create mode 100644 tests/unit/test_action_generator_structured_outputs.py diff --git a/scripts/structured_outputs_demo.py b/scripts/structured_outputs_demo.py index d41b0f0..0a085b0 100644 --- a/scripts/structured_outputs_demo.py +++ b/scripts/structured_outputs_demo.py @@ -1,17 +1,58 @@ import os import logging +from pathlib import Path from tinytroupe.agent import TinyPerson +def _load_api_key_from_dotenv_if_missing(): + if os.getenv("OPENAI_API_KEY"): + return + + # Try to read from .env in TinyTroupe/ and project root without overwriting env + candidate_paths = [ + Path(__file__).resolve().parent.parent / ".env", # project_root/TinyTroupe/../.env + Path(__file__).resolve().parent / ".env", # TinyTroupe/.env + Path.cwd() / ".env", # current working dir .env + ] + + api_key = None + for dotenv_path in candidate_paths: + try: + if dotenv_path.exists(): + with open(dotenv_path, "r", encoding="utf-8", errors="replace") as f: + for line in f: + line = line.strip() + if not line or line.startswith("#"): + continue + if "=" in line: + k, v = line.split("=", 1) + k = k.strip() + v = v.strip().strip('"').strip("'") + if k == "OPENAI_API_KEY" and v: + api_key = v + break + if api_key: + break + except Exception: + continue + + if api_key and not os.getenv("OPENAI_API_KEY"): + # Set only for this process; do not overwrite existing env + os.environ["OPENAI_API_KEY"] = api_key + + def main(): # Ensure DEBUG logs logger = logging.getLogger("tinytroupe") logger.setLevel(logging.DEBUG) + # Load from .env if needed (non-destructive) + _load_api_key_from_dotenv_if_missing() + # Require API key if not os.getenv("OPENAI_API_KEY"): - raise RuntimeError("OPENAI_API_KEY must be set in the environment.") + raise RuntimeError("OPENAI_API_KEY must be set in the environment or in a local .env file.") # Create a simple agent and act once to force an action generation agent = TinyPerson(name="DemoAgent") diff --git a/tests/unit/test_action_generator_structured_outputs.py b/tests/unit/test_action_generator_structured_outputs.py new file mode 100644 index 0000000..1f9a022 --- /dev/null +++ b/tests/unit/test_action_generator_structured_outputs.py @@ -0,0 +1,57 @@ +import pytest + +from tinytroupe.agent.action_generator import ActionGenerator, ActionRefusedException +from tinytroupe.agent import TinyPerson, CognitiveActionModel + + +class FakeClient: + def __init__(self, message): + self._message = message + + def send_message(self, *args, **kwargs): + return self._message + + +def test_prefers_parsed_payload(monkeypatch): + TinyPerson.clear_agents() + # Build a parsed payload consistent with CognitiveActionModel + parsed = { + "action": {"type": "THINK", "content": "test content", "target": ""}, + "cognitive_state": { + "goals": "g", + "context": ["c"], + "attention": "a", + "emotions": "e", + }, + } + + message = {"role": "assistant", "content": "{\"action\":{}}", "parsed": parsed} + + # Patch client used by action generator to return our fake message + from tinytroupe import openai_utils + + monkeypatch.setattr(openai_utils, "client", lambda: FakeClient(message)) + + agent = TinyPerson(name="Tester") + ag = ActionGenerator() + + action, role, content = ag._generate_tentative_action(agent, agent.current_messages)[0:3] + + assert content == parsed + assert action == parsed["action"] + assert role == "assistant" + + +def test_refusal_raises(monkeypatch): + TinyPerson.clear_agents() + message = {"role": "assistant", "content": "{}", "refusal": "safety refusal"} + + from tinytroupe import openai_utils + + monkeypatch.setattr(openai_utils, "client", lambda: FakeClient(message)) + + agent = TinyPerson(name="Tester") + ag = ActionGenerator() + + with pytest.raises(ActionRefusedException): + ag._generate_tentative_action(agent, agent.current_messages) diff --git a/tinytroupe/agent/action_generator.py b/tinytroupe/agent/action_generator.py index 08aca7f..c38ddf2 100644 --- a/tinytroupe/agent/action_generator.py +++ b/tinytroupe/agent/action_generator.py @@ -288,7 +288,19 @@ def _generate_tentative_action(self, agent, current_messages, feedback_from_prev if not self.enable_reasoning_step: logger.debug(f"[{agent.name}] Reasoning step disabled.") - next_message = openai_utils.client().send_message(current_messages_context, response_format=CognitiveActionModel) + # Prefer Responses API JSON Schema when API_MODE=responses; fallback to Pydantic class on legacy + response_format = CognitiveActionModel + try: + # If running in responses mode, provide a JSON Schema envelope with strict mode + from pydantic import TypeAdapter + schema = TypeAdapter(CognitiveActionModel).json_schema() + response_format = { + "type": "json_schema", + "json_schema": {"name": "CognitiveActionModel", "schema": schema, "strict": True}, + } + except Exception: + pass + next_message = openai_utils.client().send_message(current_messages_context, response_format=response_format) else: logger.debug(f"[{agent.name}] Reasoning step enabled.") @@ -302,11 +314,31 @@ def _generate_tentative_action(self, agent, current_messages, feedback_from_prev current_messages_context.append({"role": "system", "content": "Use the \"reasoning\" field to add any reasoning process you might wish to use before generating the next action and cognitive state. "}) - next_message = openai_utils.client().send_message(current_messages_context, response_format=CognitiveActionModelWithReasoning) + response_format = CognitiveActionModelWithReasoning + try: + from pydantic import TypeAdapter + schema = TypeAdapter(CognitiveActionModelWithReasoning).json_schema() + response_format = { + "type": "json_schema", + "json_schema": {"name": "CognitiveActionModelWithReasoning", "schema": schema, "strict": True}, + } + except Exception: + pass + next_message = openai_utils.client().send_message(current_messages_context, response_format=response_format) logger.debug(f"[{agent.name}] Received message: {next_message}") - role, content = next_message["role"], utils.extract_json(next_message["content"]) + # Prefer typed parsed payload when available; otherwise, fall back to JSON extraction + role = next_message.get("role", "assistant") + + # Handle explicit refusal from provider payloads when present + refusal = next_message.get("refusal") + if refusal: + # Log and raise a specialized exception to surface actionable errors + logger.warning(f"[{agent.name}] Model refusal received: {refusal}") + raise ActionRefusedException(refusal) + + content = next_message.get("parsed") or utils.extract_json(next_message["content"]) action = content['action'] logger.debug(f"{agent.name}'s action: {action}") @@ -530,3 +562,8 @@ class PoorQualityActionException(Exception): def __init__(self, message="The generated action is of poor quality"): self.message = message super().__init__(self.message) + + +class ActionRefusedException(Exception): + def __init__(self, refusal_message: str = "The model refused to generate an action"): + super().__init__(refusal_message) diff --git a/tinytroupe/openai_utils.py b/tinytroupe/openai_utils.py index c7a04cb..7e93970 100644 --- a/tinytroupe/openai_utils.py +++ b/tinytroupe/openai_utils.py @@ -229,55 +229,155 @@ def _raw_model_call(self, model, chat_api_params): override this method to implement their own API calls. """ - # adjust parameters depending on the model + # Choose API mode (legacy chat vs responses) + api_mode = config["OpenAI"].get("API_MODE", "legacy").lower() + + # adjust parameters depending on the model (legacy path expectations) if self._is_reasoning_model(model): # Reasoning models have slightly different parameters - del chat_api_params["stream"] - del chat_api_params["temperature"] - del chat_api_params["top_p"] - del chat_api_params["frequency_penalty"] - del chat_api_params["presence_penalty"] + if api_mode == "legacy": + if "stream" in chat_api_params: del chat_api_params["stream"] + if "temperature" in chat_api_params: del chat_api_params["temperature"] + if "top_p" in chat_api_params: del chat_api_params["top_p"] + if "frequency_penalty" in chat_api_params: del chat_api_params["frequency_penalty"] + if "presence_penalty" in chat_api_params: del chat_api_params["presence_penalty"] - chat_api_params["max_completion_tokens"] = chat_api_params["max_tokens"] - del chat_api_params["max_tokens"] + chat_api_params["max_completion_tokens"] = chat_api_params["max_tokens"] + del chat_api_params["max_tokens"] - chat_api_params["reasoning_effort"] = default["reasoning_effort"] + chat_api_params["reasoning_effort"] = default["reasoning_effort"] # To make the log cleaner, we remove the messages from the logged parameters logged_params = {k: v for k, v in chat_api_params.items() if k != "messages"} - if "response_format" in chat_api_params: - # to enforce the response format via pydantic, we need to use a different method + if api_mode == "responses": + # Build Responses API params + responses_params = self._build_responses_params(model, chat_api_params) + # Log sanitized params and full messages separately + rp_logged = {k: v for k, v in responses_params.items() if k != "input" and k != "messages"} + logger.debug(f"Calling LLM model (Responses API) with these parameters: {rp_logged}. Not showing 'messages'/'input' parameter.") + logger.debug(f" --> Complete messages sent to LLM: {responses_params.get('messages') or responses_params.get('input')}") + + # If using Pydantic model, prefer parse helper when available + if isinstance(chat_api_params.get("response_format"), type): + # Responses parse path with Pydantic model + return self.client.responses.parse(**responses_params) + else: + return self.client.responses.create(**responses_params) + + # Legacy Chat Completions path + if "response_format" in chat_api_params: if "stream" in chat_api_params: del chat_api_params["stream"] logger.debug(f"Calling LLM model (using .parse too) with these parameters: {logged_params}. Not showing 'messages' parameter.") - # complete message logger.debug(f" --> Complete messages sent to LLM: {chat_api_params['messages']}") + return self.client.beta.chat.completions.parse(**chat_api_params) + else: + logger.debug(f"Calling LLM model with these parameters: {logged_params}. Not showing 'messages' parameter.") + return self.client.chat.completions.create(**chat_api_params) - result_message = self.client.beta.chat.completions.parse( - **chat_api_params - ) + def _build_responses_params(self, model, chat_api_params): + """ + Map legacy chat-style params to Responses API params. + - Prefer 'messages' as input if present; else use 'input'. + - Map max_tokens -> max_output_tokens + - For reasoning models add reasoning: { effort: ... } and drop sampling params. + - If response_format is a Pydantic model class, pass it directly (Responses parse supports Pydantic); + if it's a dict (JSON Schema), pass as-is with strict mode expected to be set by caller. + """ + params = { + "model": model, + # Latest SDKs accept either 'input' or 'messages'. We pass both for compatibility; the SDK ignores the unused one. + "messages": chat_api_params.get("messages"), + "input": chat_api_params.get("messages"), + "max_output_tokens": chat_api_params.get("max_tokens"), + "timeout": chat_api_params.get("timeout"), + } - return result_message - + # Include response_format (Pydantic class or JSON Schema dict) + if chat_api_params.get("response_format") is not None: + rf = chat_api_params["response_format"] + params["response_format"] = rf + + # Reasoning models: remove sampling controls and set reasoning effort + if self._is_reasoning_model(model): + params["reasoning"] = {"effort": default["reasoning_effort"]} else: - logger.debug(f"Calling LLM model with these parameters: {logged_params}. Not showing 'messages' parameter.") - return self.client.chat.completions.create( - **chat_api_params - ) + # Non-reasoning: sampling controls are valid + for key in ("temperature", "top_p", "frequency_penalty", "presence_penalty"): + if chat_api_params.get(key) is not None: + params[key] = chat_api_params[key] + + return params def _is_reasoning_model(self, model): return "o1" in model or "o3" in model def _raw_model_response_extractor(self, response): """ - Extracts the response from the API response. Subclasses should - override this method to implement their own response extraction. + Extract the response into a unified dict shape used by callers. + Supports both Chat Completions and Responses API return shapes. """ - return response.choices[0].message.to_dict() + # Legacy chat path + if hasattr(response, "choices"): + return response.choices[0].message.to_dict() + + # Responses API path + try: + # Try to obtain a dict-like representation + resp_dict = None + if hasattr(response, "to_dict"): + resp_dict = response.to_dict() + elif hasattr(response, "model_dump"): + resp_dict = response.model_dump() + + # Fall back to attribute traversal if needed + output_items = None + if resp_dict is not None: + output_items = resp_dict.get("output") or resp_dict.get("outputs") + else: + output_items = getattr(response, "output", None) or getattr(response, "outputs", None) + + role = "assistant" + content_text = None + parsed = None + refusal = None + + if output_items: + # Expect the first item to be a message with content parts + first = output_items[0] + contents = first.get("content") if isinstance(first, dict) else getattr(first, "content", []) + for part in contents or []: + ptype = part.get("type") if isinstance(part, dict) else getattr(part, "type", None) + # Text output + if ptype in ("output_text", "text"): + content_text = part.get("text") if isinstance(part, dict) else getattr(part, "text", None) + # Structured parse + if (isinstance(part, dict) and "parsed" in part): + parsed = part.get("parsed") + elif hasattr(part, "parsed"): + parsed = getattr(part, "parsed") + # Refusal + if (isinstance(part, dict) and "refusal" in part): + refusal = part.get("refusal") + elif hasattr(part, "refusal"): + refusal = getattr(part, "refusal") + + # As a final fallback, try convenience property 'output_text' + if content_text is None and hasattr(response, "output_text"): + try: + content_text = response.output_text + except Exception: + pass + + return {"role": role, "content": content_text, "parsed": parsed, "refusal": refusal} + except Exception as e: + logger.error(f"Failed to extract Responses API payload: {e}") + # best-effort fallback + return {"role": "assistant", "content": None, "parsed": None, "refusal": None} def _count_tokens(self, messages: list, model: str): """ From e4fda17f0a7753ef9416bd0f4a27aba9aaf051a2 Mon Sep 17 00:00:00 2001 From: Ricky Kirkendall Date: Mon, 8 Sep 2025 21:15:56 -0400 Subject: [PATCH 5/9] Remove structured_outputs_demo.py from branch --- scripts/structured_outputs_demo.py | 72 ------------------------------ 1 file changed, 72 deletions(-) delete mode 100644 scripts/structured_outputs_demo.py diff --git a/scripts/structured_outputs_demo.py b/scripts/structured_outputs_demo.py deleted file mode 100644 index 0a085b0..0000000 --- a/scripts/structured_outputs_demo.py +++ /dev/null @@ -1,72 +0,0 @@ -import os -import logging -from pathlib import Path - -from tinytroupe.agent import TinyPerson - - -def _load_api_key_from_dotenv_if_missing(): - if os.getenv("OPENAI_API_KEY"): - return - - # Try to read from .env in TinyTroupe/ and project root without overwriting env - candidate_paths = [ - Path(__file__).resolve().parent.parent / ".env", # project_root/TinyTroupe/../.env - Path(__file__).resolve().parent / ".env", # TinyTroupe/.env - Path.cwd() / ".env", # current working dir .env - ] - - api_key = None - for dotenv_path in candidate_paths: - try: - if dotenv_path.exists(): - with open(dotenv_path, "r", encoding="utf-8", errors="replace") as f: - for line in f: - line = line.strip() - if not line or line.startswith("#"): - continue - if "=" in line: - k, v = line.split("=", 1) - k = k.strip() - v = v.strip().strip('"').strip("'") - if k == "OPENAI_API_KEY" and v: - api_key = v - break - if api_key: - break - except Exception: - continue - - if api_key and not os.getenv("OPENAI_API_KEY"): - # Set only for this process; do not overwrite existing env - os.environ["OPENAI_API_KEY"] = api_key - - -def main(): - # Ensure DEBUG logs - logger = logging.getLogger("tinytroupe") - logger.setLevel(logging.DEBUG) - - # Load from .env if needed (non-destructive) - _load_api_key_from_dotenv_if_missing() - - # Require API key - if not os.getenv("OPENAI_API_KEY"): - raise RuntimeError("OPENAI_API_KEY must be set in the environment or in a local .env file.") - - # Create a simple agent and act once to force an action generation - agent = TinyPerson(name="DemoAgent") - agent.listen("You're in a coffee shop. Order a cappuccino politely.") - - # Act once; structured output is enforced by ActionGenerator with Pydantic models - outputs = agent.act(until_done=False, n=1, return_actions=True, communication_display=False) - - # Print the raw structured response for inspection - print("\n=== Structured Output ===") - print(outputs[-1]) - - -if __name__ == "__main__": - main() - - From 8c0d66a1b7cf2482c7f73a33b6755430adf64c4f Mon Sep 17 00:00:00 2001 From: Ricky Kirkendall Date: Sat, 30 Aug 2025 10:02:22 -0400 Subject: [PATCH 6/9] Enhance OpenAIClient to support Responses API mode - Added configuration option for API_MODE in config.ini to enable Responses API for local runs. - Updated OpenAIClient to handle both 'responses' and 'legacy' modes, preserving existing behavior while allowing for new functionality. - Implemented parameter mapping for Responses API, ensuring compatibility with legacy chat parameters. This change aims to improve flexibility in API usage and facilitate a smoother transition to the new Responses API. --- config.ini | 52 ---------------- tests/unit/test_openai_utils_responses.py | 72 +++++++++++++++++++++++ tinytroupe/config.ini | 2 + 3 files changed, 74 insertions(+), 52 deletions(-) delete mode 100644 config.ini create mode 100644 tests/unit/test_openai_utils_responses.py diff --git a/config.ini b/config.ini deleted file mode 100644 index 544a8c8..0000000 --- a/config.ini +++ /dev/null @@ -1,52 +0,0 @@ -[OpenAI] -API_TYPE=openai -MODEL=gpt-4.1-mini -REASONING_MODEL=o3-mini -EMBEDDING_MODEL=text-embedding-3-small - -# Keep calls fast and inexpensive for the demo -MAX_TOKENS=800 -TEMPERATURE=1.0 -FREQ_PENALTY=0.0 -PRESENCE_PENALTY=0.0 -TIMEOUT=120 -MAX_ATTEMPTS=1 -WAITING_TIME=0 -EXPONENTIAL_BACKOFF_FACTOR=5 - -REASONING_EFFORT=low - -CACHE_API_CALLS=False -CACHE_FILE_NAME=openai_api_cache.pickle - -MAX_CONTENT_DISPLAY_LENGTH=2000 - -[Simulation] -PARALLEL_AGENT_GENERATION=True -PARALLEL_AGENT_ACTIONS=True -RAI_HARMFUL_CONTENT_PREVENTION=True -RAI_COPYRIGHT_INFRINGEMENT_PREVENTION=True - -[Cognition] -ENABLE_MEMORY_CONSOLIDATION=False -MIN_EPISODE_LENGTH=5 -MAX_EPISODE_LENGTH=20 -EPISODIC_MEMORY_FIXED_PREFIX_LENGTH=5 -EPISODIC_MEMORY_LOOKBACK_LENGTH=10 - -[ActionGenerator] -MAX_ATTEMPTS=1 -ENABLE_QUALITY_CHECKS=False -ENABLE_REGENERATION=False -ENABLE_DIRECT_CORRECTION=False -ENABLE_QUALITY_CHECK_FOR_PERSONA_ADHERENCE=False -ENABLE_QUALITY_CHECK_FOR_SELFCONSISTENCY=False -ENABLE_QUALITY_CHECK_FOR_FLUENCY=False -ENABLE_QUALITY_CHECK_FOR_SUITABILITY=False -ENABLE_QUALITY_CHECK_FOR_SIMILARITY=False -CONTINUE_ON_FAILURE=True -QUALITY_THRESHOLD=5 - -[Logging] -LOGLEVEL=DEBUG - diff --git a/tests/unit/test_openai_utils_responses.py b/tests/unit/test_openai_utils_responses.py new file mode 100644 index 0000000..edb1096 --- /dev/null +++ b/tests/unit/test_openai_utils_responses.py @@ -0,0 +1,72 @@ +import types +from unittest.mock import patch + +import tinytroupe.openai_utils as openai_utils + + +class _StubResponsesClient: + def __init__(self): + self.last_params = None + + class _Responses: + def __init__(self, outer): + self._outer = outer + + def create(self, **kwargs): + # Capture params for assertions + self._outer.last_params = kwargs + + # Return minimal object with output_text like the SDK does + return types.SimpleNamespace(output_text="ok") + + @property + def responses(self): + return _StubResponsesClient._Responses(self) + + +def test_send_message_uses_responses_api_when_api_mode_is_responses(): + stub = _StubResponsesClient() + + # Patch setup to force responses mode and inject stub client + original_setup = openai_utils.OpenAIClient._setup_from_config + + def _setup_with_responses(self): + self.client = stub + self.api_mode = "responses" + + try: + openai_utils.OpenAIClient._setup_from_config = _setup_with_responses + + client = openai_utils.OpenAIClient() + + messages = [ + {"role": "system", "content": "You are terse."}, + {"role": "user", "content": "Say ok."}, + ] + + result = client.send_message( + current_messages=messages, + model="gpt-4.1-mini", + temperature=0.2, + max_tokens=128, + ) + + # Verify mapping to Responses API + assert stub.last_params is not None + assert stub.last_params.get("model") == "gpt-4.1-mini" + assert stub.last_params.get("temperature") == 0.2 + assert stub.last_params.get("max_output_tokens") == 128 + + input_msgs = stub.last_params.get("input") + assert isinstance(input_msgs, list) and len(input_msgs) == 2 + assert input_msgs[0]["role"] == "system" + assert input_msgs[1]["role"] == "user" + assert input_msgs[1]["content"][0]["text"] == "Say ok." + + # Verify extractor returns assistant content + assert result["content"].lower().startswith("ok") + + finally: + openai_utils.OpenAIClient._setup_from_config = original_setup + + diff --git a/tinytroupe/config.ini b/tinytroupe/config.ini index 353bdb0..6c45ac5 100644 --- a/tinytroupe/config.ini +++ b/tinytroupe/config.ini @@ -1,4 +1,6 @@ [OpenAI] +# Enable Responses API path for local runs +API_MODE=responses # # OpenAI or Azure OpenAI Service # From 33a1d021c650c202fb96ce9dfe39181f4807e7b5 Mon Sep 17 00:00:00 2001 From: Ricky Kirkendall Date: Sat, 30 Aug 2025 11:21:18 -0400 Subject: [PATCH 7/9] - Introduced Pydantic support for structured output in Responses API. - Updated response extraction logic to prefer typed parsed output when available. --- tinytroupe/openai_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tinytroupe/openai_utils.py b/tinytroupe/openai_utils.py index 7e93970..6a4be60 100644 --- a/tinytroupe/openai_utils.py +++ b/tinytroupe/openai_utils.py @@ -9,6 +9,7 @@ import tiktoken +from pydantic import BaseModel from tinytroupe import utils from tinytroupe.control import transactional from tinytroupe import default @@ -227,7 +228,7 @@ def _raw_model_call(self, model, chat_api_params): """ Calls the OpenAI API with the given parameters. Subclasses should override this method to implement their own API calls. - """ + """ # Choose API mode (legacy chat vs responses) api_mode = config["OpenAI"].get("API_MODE", "legacy").lower() @@ -247,9 +248,8 @@ def _raw_model_call(self, model, chat_api_params): chat_api_params["reasoning_effort"] = default["reasoning_effort"] - # To make the log cleaner, we remove the messages from the logged parameters - logged_params = {k: v for k, v in chat_api_params.items() if k != "messages"} + logged_params = {k: v for k, v in chat_api_params.items() if k != "messages"} if api_mode == "responses": # Build Responses API params From b5066e991a1467e1fbbc41cadc7e147a051012fc Mon Sep 17 00:00:00 2001 From: Ricky Kirkendall Date: Sat, 6 Sep 2025 20:58:17 -0400 Subject: [PATCH 8/9] PR2 prep: add DEBUG config override and structured outputs demo script --- scripts/structured_outputs_demo.py | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 scripts/structured_outputs_demo.py diff --git a/scripts/structured_outputs_demo.py b/scripts/structured_outputs_demo.py new file mode 100644 index 0000000..d41b0f0 --- /dev/null +++ b/scripts/structured_outputs_demo.py @@ -0,0 +1,31 @@ +import os +import logging + +from tinytroupe.agent import TinyPerson + + +def main(): + # Ensure DEBUG logs + logger = logging.getLogger("tinytroupe") + logger.setLevel(logging.DEBUG) + + # Require API key + if not os.getenv("OPENAI_API_KEY"): + raise RuntimeError("OPENAI_API_KEY must be set in the environment.") + + # Create a simple agent and act once to force an action generation + agent = TinyPerson(name="DemoAgent") + agent.listen("You're in a coffee shop. Order a cappuccino politely.") + + # Act once; structured output is enforced by ActionGenerator with Pydantic models + outputs = agent.act(until_done=False, n=1, return_actions=True, communication_display=False) + + # Print the raw structured response for inspection + print("\n=== Structured Output ===") + print(outputs[-1]) + + +if __name__ == "__main__": + main() + + From 7860a3e6552d42b46e2947d99368e3c680a33e36 Mon Sep 17 00:00:00 2001 From: Ricky Kirkendall Date: Mon, 8 Sep 2025 21:15:56 -0400 Subject: [PATCH 9/9] Remove structured_outputs_demo.py from branch --- scripts/structured_outputs_demo.py | 31 ------------------------------ 1 file changed, 31 deletions(-) delete mode 100644 scripts/structured_outputs_demo.py diff --git a/scripts/structured_outputs_demo.py b/scripts/structured_outputs_demo.py deleted file mode 100644 index d41b0f0..0000000 --- a/scripts/structured_outputs_demo.py +++ /dev/null @@ -1,31 +0,0 @@ -import os -import logging - -from tinytroupe.agent import TinyPerson - - -def main(): - # Ensure DEBUG logs - logger = logging.getLogger("tinytroupe") - logger.setLevel(logging.DEBUG) - - # Require API key - if not os.getenv("OPENAI_API_KEY"): - raise RuntimeError("OPENAI_API_KEY must be set in the environment.") - - # Create a simple agent and act once to force an action generation - agent = TinyPerson(name="DemoAgent") - agent.listen("You're in a coffee shop. Order a cappuccino politely.") - - # Act once; structured output is enforced by ActionGenerator with Pydantic models - outputs = agent.act(until_done=False, n=1, return_actions=True, communication_display=False) - - # Print the raw structured response for inspection - print("\n=== Structured Output ===") - print(outputs[-1]) - - -if __name__ == "__main__": - main() - -