From 62930024cc5d5c2a93c9b285fcc7abaa2ad097ec Mon Sep 17 00:00:00 2001
From: Ricky Kirkendall <rkirkendall304@gmail.com>
Date: Sat, 30 Aug 2025 10:02:22 -0400
Subject: [PATCH 1/9] Enhance OpenAIClient to support Responses API mode

- Added configuration option for API_MODE in config.ini to enable Responses API for local runs.
- Updated OpenAIClient to handle both 'responses' and 'legacy' modes, preserving existing behavior while allowing for new functionality.
- Implemented parameter mapping for Responses API, ensuring compatibility with legacy chat parameters.

This change aims to improve flexibility in API usage and facilitate a smoother transition to the new Responses API.
---
 config.ini                                |   4 +
 tests/unit/test_openai_utils_responses.py |  72 +++++++++++++
 tinytroupe/config.ini                     |   2 +
 tinytroupe/openai_utils.py                | 118 +++++++++++++++-------
 4 files changed, 162 insertions(+), 34 deletions(-)
 create mode 100644 config.ini
 create mode 100644 tests/unit/test_openai_utils_responses.py

diff --git a/config.ini b/config.ini
new file mode 100644
index 0000000..473b87d
--- /dev/null
+++ b/config.ini
@@ -0,0 +1,4 @@
+[OpenAI]
+API_MODE=responses
+
+
diff --git a/tests/unit/test_openai_utils_responses.py b/tests/unit/test_openai_utils_responses.py
new file mode 100644
index 0000000..edb1096
--- /dev/null
+++ b/tests/unit/test_openai_utils_responses.py
@@ -0,0 +1,72 @@
+import types
+from unittest.mock import patch
+
+import tinytroupe.openai_utils as openai_utils
+
+
+class _StubResponsesClient:
+    def __init__(self):
+        self.last_params = None
+
+    class _Responses:
+        def __init__(self, outer):
+            self._outer = outer
+
+        def create(self, **kwargs):
+            # Capture params for assertions
+            self._outer.last_params = kwargs
+
+            # Return minimal object with output_text like the SDK does
+            return types.SimpleNamespace(output_text="ok")
+
+    @property
+    def responses(self):
+        return _StubResponsesClient._Responses(self)
+
+
+def test_send_message_uses_responses_api_when_api_mode_is_responses():
+    stub = _StubResponsesClient()
+
+    # Patch setup to force responses mode and inject stub client
+    original_setup = openai_utils.OpenAIClient._setup_from_config
+
+    def _setup_with_responses(self):
+        self.client = stub
+        self.api_mode = "responses"
+
+    try:
+        openai_utils.OpenAIClient._setup_from_config = _setup_with_responses
+
+        client = openai_utils.OpenAIClient()
+
+        messages = [
+            {"role": "system", "content": "You are terse."},
+            {"role": "user", "content": "Say ok."},
+        ]
+
+        result = client.send_message(
+            current_messages=messages,
+            model="gpt-4.1-mini",
+            temperature=0.2,
+            max_tokens=128,
+        )
+
+        # Verify mapping to Responses API
+        assert stub.last_params is not None
+        assert stub.last_params.get("model") == "gpt-4.1-mini"
+        assert stub.last_params.get("temperature") == 0.2
+        assert stub.last_params.get("max_output_tokens") == 128
+
+        input_msgs = stub.last_params.get("input")
+        assert isinstance(input_msgs, list) and len(input_msgs) == 2
+        assert input_msgs[0]["role"] == "system"
+        assert input_msgs[1]["role"] == "user"
+        assert input_msgs[1]["content"][0]["text"] == "Say ok."
+
+        # Verify extractor returns assistant content
+        assert result["content"].lower().startswith("ok")
+
+    finally:
+        openai_utils.OpenAIClient._setup_from_config = original_setup
+
+
diff --git a/tinytroupe/config.ini b/tinytroupe/config.ini
index 353bdb0..6c45ac5 100644
--- a/tinytroupe/config.ini
+++ b/tinytroupe/config.ini
@@ -1,4 +1,6 @@
 [OpenAI]
+# Enable Responses API path for local runs
+API_MODE=responses
 #
 # OpenAI or Azure OpenAI Service
 #
diff --git a/tinytroupe/openai_utils.py b/tinytroupe/openai_utils.py
index c7a04cb..0c5556d 100644
--- a/tinytroupe/openai_utils.py
+++ b/tinytroupe/openai_utils.py
@@ -53,6 +53,11 @@ def _setup_from_config(self):
         Sets up the OpenAI API configurations for this client.
         """
         self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+        # API mode: 'responses' or 'legacy' (default to legacy to preserve behavior unless configured)
+        try:
+            self.api_mode = config["OpenAI"].get("API_MODE", "legacy").strip().lower()
+        except Exception:
+            self.api_mode = "legacy"
 
     @config_manager.config_defaults(
         model="model",
@@ -228,55 +233,100 @@ def _raw_model_call(self, model, chat_api_params):
         Calls the OpenAI API with the given parameters. Subclasses should
         override this method to implement their own API calls.
         """   
+        
+        # If we are in LEGACY mode or response_format is provided (Pydantic path today), use legacy chat.completions
+        # This preserves current behavior for structured outputs while we migrate modules incrementally.
+        use_legacy = (self.api_mode != "responses") or ("response_format" in chat_api_params and chat_api_params["response_format"] is not None)
+
+        if use_legacy:
+            # adjust parameters depending on the model (legacy path)
+            if self._is_reasoning_model(model):
+                # Reasoning models have slightly different parameters
+                if "stream" in chat_api_params:
+                    del chat_api_params["stream"]
+                for k in ["temperature", "top_p", "frequency_penalty", "presence_penalty"]:
+                    if k in chat_api_params:
+                        del chat_api_params[k]
+                if "max_tokens" in chat_api_params:
+                    chat_api_params["max_completion_tokens"] = chat_api_params["max_tokens"]
+                    del chat_api_params["max_tokens"]
+                chat_api_params["reasoning_effort"] = default["reasoning_effort"]
+
+            logged_params = {k: v for k, v in chat_api_params.items() if k != "messages"}
+
+            if "response_format" in chat_api_params and chat_api_params["response_format"] is not None:
+                # to enforce the response format via pydantic, we need to use the parse helper
+                if "stream" in chat_api_params:
+                    del chat_api_params["stream"]
+                logger.debug(f"Calling legacy .parse with params: {logged_params} (messages omitted).")
+                return self.client.beta.chat.completions.parse(**chat_api_params)
+            else:
+                logger.debug(f"Calling legacy chat.completions.create with params: {logged_params} (messages omitted).")
+                return self.client.chat.completions.create(**chat_api_params)
 
-        # adjust parameters depending on the model
-        if self._is_reasoning_model(model):
-            # Reasoning models have slightly different parameters
-            del chat_api_params["stream"]
-            del chat_api_params["temperature"]
-            del chat_api_params["top_p"]
-            del chat_api_params["frequency_penalty"]
-            del chat_api_params["presence_penalty"]            
-
-            chat_api_params["max_completion_tokens"] = chat_api_params["max_tokens"]
-            del chat_api_params["max_tokens"]
-
-            chat_api_params["reasoning_effort"] = default["reasoning_effort"]
-
+        # RESPONSES API path (no response_format in params)
+        responses_params = self._map_messages_to_responses_params(model, chat_api_params)
+        logged_params = {k: v for k, v in responses_params.items() if k != "input"}
+        logger.debug(f"Calling Responses API with params: {logged_params} (input omitted).")
+        return self.client.responses.create(**responses_params)
 
-        # To make the log cleaner, we remove the messages from the logged parameters
-        logged_params = {k: v for k, v in chat_api_params.items() if k != "messages"} 
+    def _map_messages_to_responses_params(self, model, chat_api_params):
+        """
+        Convert legacy chat parameters into Responses API parameters.
+        - messages -> input (list of message dicts with content blocks)
+        - max_tokens -> max_output_tokens
+        - for reasoning models: set reasoning.effort and drop unsupported fields
+        """
+        # Map messages to Responses input
+        messages = chat_api_params.get("messages", [])
+        input_msgs = []
+        for m in messages:
+            content = m.get("content", "")
+            input_msgs.append({
+                "role": m.get("role", "user"),
+                "content": [{"type": "text", "text": content}]
+            })
+
+        params = {
+            "model": chat_api_params.get("model"),
+            "input": input_msgs,
+        }
 
-        if "response_format" in chat_api_params:
-            # to enforce the response format via pydantic, we need to use a different method
+        # Temperature and sampling parameters are supported for non-reasoning models
+        if not self._is_reasoning_model(model):
+            if chat_api_params.get("temperature") is not None:
+                params["temperature"] = chat_api_params.get("temperature")
+            if chat_api_params.get("top_p") is not None:
+                params["top_p"] = chat_api_params.get("top_p")
 
-            if "stream" in chat_api_params:
-                del chat_api_params["stream"]
+        # Map token limits
+        if chat_api_params.get("max_tokens") is not None:
+            params["max_output_tokens"] = chat_api_params.get("max_tokens")
 
-            logger.debug(f"Calling LLM model (using .parse too) with these parameters: {logged_params}. Not showing 'messages' parameter.")
-            # complete message
-            logger.debug(f"   --> Complete messages sent to LLM: {chat_api_params['messages']}")
+        # Reasoning model adjustments
+        if self._is_reasoning_model(model):
+            params["reasoning"] = {"effort": default.get("reasoning_effort", "medium")}
 
-            result_message = self.client.beta.chat.completions.parse(
-                    **chat_api_params
-                )
+        # Timeout is still honored via request options in SDK; keep here if needed by higher layer
+        # Stop sequences are not yet mapped here; can be added if required.
 
-            return result_message 
-        
-        else:
-            logger.debug(f"Calling LLM model with these parameters: {logged_params}. Not showing 'messages' parameter.")
-            return self.client.chat.completions.create(
-                        **chat_api_params
-                    )
+        return params
 
     def _is_reasoning_model(self, model):
-        return "o1" in model or "o3" in model
+        return ("o1" in model) or ("o3" in model) or ("gpt-5" in model)
 
     def _raw_model_response_extractor(self, response):
         """
         Extracts the response from the API response. Subclasses should
         override this method to implement their own response extraction.
         """
+        # Responses API: has output_text and output messages
+        try:
+            if hasattr(response, "output_text") and response.output_text is not None:
+                return {"role": "assistant", "content": response.output_text}
+        except Exception:
+            pass
+        # Legacy chat.completions path
         return response.choices[0].message.to_dict()
 
     def _count_tokens(self, messages: list, model: str):

From 32f26b4cf422569bec93b914179478ad77f9ffc5 Mon Sep 17 00:00:00 2001
From: Ricky Kirkendall <rkirkendall304@gmail.com>
Date: Sat, 30 Aug 2025 11:21:18 -0400
Subject: [PATCH 2/9] - Introduced Pydantic support for structured output in
 Responses API. - Updated response extraction logic to prefer typed parsed
 output when available.

---
 tinytroupe/openai_utils.py | 41 ++++++++++++++++++++++++++++++--------
 1 file changed, 33 insertions(+), 8 deletions(-)

diff --git a/tinytroupe/openai_utils.py b/tinytroupe/openai_utils.py
index 0c5556d..4afec87 100644
--- a/tinytroupe/openai_utils.py
+++ b/tinytroupe/openai_utils.py
@@ -9,6 +9,7 @@
 
 
 import tiktoken
+from pydantic import BaseModel
 from tinytroupe import utils
 from tinytroupe.control import transactional
 from tinytroupe import default
@@ -234,9 +235,8 @@ def _raw_model_call(self, model, chat_api_params):
         override this method to implement their own API calls.
         """   
         
-        # If we are in LEGACY mode or response_format is provided (Pydantic path today), use legacy chat.completions
-        # This preserves current behavior for structured outputs while we migrate modules incrementally.
-        use_legacy = (self.api_mode != "responses") or ("response_format" in chat_api_params and chat_api_params["response_format"] is not None)
+        # Prefer Responses API when enabled; otherwise use legacy chat.completions
+        use_legacy = (self.api_mode != "responses")
 
         if use_legacy:
             # adjust parameters depending on the model (legacy path)
@@ -264,16 +264,26 @@ def _raw_model_call(self, model, chat_api_params):
                 logger.debug(f"Calling legacy chat.completions.create with params: {logged_params} (messages omitted).")
                 return self.client.chat.completions.create(**chat_api_params)
 
-        # RESPONSES API path (no response_format in params)
+        # RESPONSES API path
+        rf = chat_api_params.get("response_format")
+        is_pydantic = isinstance(rf, type) and issubclass(rf, BaseModel)
+
         responses_params = self._map_messages_to_responses_params(model, chat_api_params)
         logged_params = {k: v for k, v in responses_params.items() if k != "input"}
-        logger.debug(f"Calling Responses API with params: {logged_params} (input omitted).")
+
+        if is_pydantic:
+            # Structured output via responses.parse with Pydantic text_format
+            logger.debug(f"Calling Responses API parse with params: {logged_params} (input omitted).")
+            return self.client.responses.parse(text_format=rf, **responses_params)
+
+        # Unstructured path
+        logger.debug(f"Calling Responses API create with params: {logged_params} (input omitted).")
         return self.client.responses.create(**responses_params)
 
     def _map_messages_to_responses_params(self, model, chat_api_params):
         """
         Convert legacy chat parameters into Responses API parameters.
-        - messages -> input (list of message dicts with content blocks)
+        - messages -> input (list of message dicts with plain string content)
         - max_tokens -> max_output_tokens
         - for reasoning models: set reasoning.effort and drop unsupported fields
         """
@@ -284,7 +294,7 @@ def _map_messages_to_responses_params(self, model, chat_api_params):
             content = m.get("content", "")
             input_msgs.append({
                 "role": m.get("role", "user"),
-                "content": [{"type": "text", "text": content}]
+                "content": content,
             })
 
         params = {
@@ -307,6 +317,8 @@ def _map_messages_to_responses_params(self, model, chat_api_params):
         if self._is_reasoning_model(model):
             params["reasoning"] = {"effort": default.get("reasoning_effort", "medium")}
 
+        # Do not attach response_format to Responses API params; use responses.parse(text_format=...) instead
+
         # Timeout is still honored via request options in SDK; keep here if needed by higher layer
         # Stop sequences are not yet mapped here; can be added if required.
 
@@ -320,8 +332,21 @@ def _raw_model_response_extractor(self, response):
         Extracts the response from the API response. Subclasses should
         override this method to implement their own response extraction.
         """
-        # Responses API: has output_text and output messages
+        # Responses API: prefer typed parsed output when available
         try:
+            if hasattr(response, "output_parsed") and response.output_parsed is not None:
+                parsed = response.output_parsed
+                try:
+                    # Pydantic v2 BaseModel
+                    content_text = parsed.model_dump_json()
+                except Exception:
+                    try:
+                        # Fallback to dict serialization
+                        import json
+                        content_text = json.dumps(getattr(parsed, "dict", lambda: parsed)())
+                    except Exception:
+                        content_text = str(parsed)
+                return {"role": "assistant", "content": content_text}
             if hasattr(response, "output_text") and response.output_text is not None:
                 return {"role": "assistant", "content": response.output_text}
         except Exception:

From 4c009bf3fa1b05a6912122cd4cb81b9d260dc350 Mon Sep 17 00:00:00 2001
From: Ricky Kirkendall <rkirkendall304@gmail.com>
Date: Sat, 6 Sep 2025 20:58:17 -0400
Subject: [PATCH 3/9] PR2 prep: add DEBUG config override and structured
 outputs demo script

---
 config.ini                         | 52 ++++++++++++++++++++++++++++++
 scripts/structured_outputs_demo.py | 31 ++++++++++++++++++
 2 files changed, 83 insertions(+)
 create mode 100644 config.ini
 create mode 100644 scripts/structured_outputs_demo.py

diff --git a/config.ini b/config.ini
new file mode 100644
index 0000000..544a8c8
--- /dev/null
+++ b/config.ini
@@ -0,0 +1,52 @@
+[OpenAI]
+API_TYPE=openai
+MODEL=gpt-4.1-mini
+REASONING_MODEL=o3-mini
+EMBEDDING_MODEL=text-embedding-3-small
+
+# Keep calls fast and inexpensive for the demo
+MAX_TOKENS=800
+TEMPERATURE=1.0
+FREQ_PENALTY=0.0
+PRESENCE_PENALTY=0.0
+TIMEOUT=120
+MAX_ATTEMPTS=1
+WAITING_TIME=0
+EXPONENTIAL_BACKOFF_FACTOR=5
+
+REASONING_EFFORT=low
+
+CACHE_API_CALLS=False
+CACHE_FILE_NAME=openai_api_cache.pickle
+
+MAX_CONTENT_DISPLAY_LENGTH=2000
+
+[Simulation]
+PARALLEL_AGENT_GENERATION=True
+PARALLEL_AGENT_ACTIONS=True
+RAI_HARMFUL_CONTENT_PREVENTION=True
+RAI_COPYRIGHT_INFRINGEMENT_PREVENTION=True
+
+[Cognition]
+ENABLE_MEMORY_CONSOLIDATION=False
+MIN_EPISODE_LENGTH=5
+MAX_EPISODE_LENGTH=20
+EPISODIC_MEMORY_FIXED_PREFIX_LENGTH=5
+EPISODIC_MEMORY_LOOKBACK_LENGTH=10
+
+[ActionGenerator]
+MAX_ATTEMPTS=1
+ENABLE_QUALITY_CHECKS=False
+ENABLE_REGENERATION=False
+ENABLE_DIRECT_CORRECTION=False
+ENABLE_QUALITY_CHECK_FOR_PERSONA_ADHERENCE=False
+ENABLE_QUALITY_CHECK_FOR_SELFCONSISTENCY=False
+ENABLE_QUALITY_CHECK_FOR_FLUENCY=False
+ENABLE_QUALITY_CHECK_FOR_SUITABILITY=False
+ENABLE_QUALITY_CHECK_FOR_SIMILARITY=False
+CONTINUE_ON_FAILURE=True
+QUALITY_THRESHOLD=5
+
+[Logging]
+LOGLEVEL=DEBUG
+
diff --git a/scripts/structured_outputs_demo.py b/scripts/structured_outputs_demo.py
new file mode 100644
index 0000000..d41b0f0
--- /dev/null
+++ b/scripts/structured_outputs_demo.py
@@ -0,0 +1,31 @@
+import os
+import logging
+
+from tinytroupe.agent import TinyPerson
+
+
+def main():
+    # Ensure DEBUG logs
+    logger = logging.getLogger("tinytroupe")
+    logger.setLevel(logging.DEBUG)
+
+    # Require API key
+    if not os.getenv("OPENAI_API_KEY"):
+        raise RuntimeError("OPENAI_API_KEY must be set in the environment.")
+
+    # Create a simple agent and act once to force an action generation
+    agent = TinyPerson(name="DemoAgent")
+    agent.listen("You're in a coffee shop. Order a cappuccino politely.")
+
+    # Act once; structured output is enforced by ActionGenerator with Pydantic models
+    outputs = agent.act(until_done=False, n=1, return_actions=True, communication_display=False)
+
+    # Print the raw structured response for inspection
+    print("\n=== Structured Output ===")
+    print(outputs[-1])
+
+
+if __name__ == "__main__":
+    main()
+
+

From 0b57eff93c97d81bee5f62ded949aaf42a3e3f70 Mon Sep 17 00:00:00 2001
From: Ricky Kirkendall <rkirkendall304@gmail.com>
Date: Mon, 8 Sep 2025 21:11:51 -0400
Subject: [PATCH 4/9] Enhance API key loading and response handling. Updated
 action generator to prefer structured responses and handle model refusals
 more gracefully.

---
 scripts/structured_outputs_demo.py            |  43 ++++-
 ...est_action_generator_structured_outputs.py |  57 +++++++
 tinytroupe/agent/action_generator.py          |  43 ++++-
 tinytroupe/openai_utils.py                    | 148 +++++++++++++++---
 4 files changed, 263 insertions(+), 28 deletions(-)
 create mode 100644 tests/unit/test_action_generator_structured_outputs.py

diff --git a/scripts/structured_outputs_demo.py b/scripts/structured_outputs_demo.py
index d41b0f0..0a085b0 100644
--- a/scripts/structured_outputs_demo.py
+++ b/scripts/structured_outputs_demo.py
@@ -1,17 +1,58 @@
 import os
 import logging
+from pathlib import Path
 
 from tinytroupe.agent import TinyPerson
 
 
+def _load_api_key_from_dotenv_if_missing():
+    if os.getenv("OPENAI_API_KEY"):
+        return
+
+    # Try to read from .env in TinyTroupe/ and project root without overwriting env
+    candidate_paths = [
+        Path(__file__).resolve().parent.parent / ".env",        # project_root/TinyTroupe/../.env
+        Path(__file__).resolve().parent / ".env",               # TinyTroupe/.env
+        Path.cwd() / ".env",                                    # current working dir .env
+    ]
+
+    api_key = None
+    for dotenv_path in candidate_paths:
+        try:
+            if dotenv_path.exists():
+                with open(dotenv_path, "r", encoding="utf-8", errors="replace") as f:
+                    for line in f:
+                        line = line.strip()
+                        if not line or line.startswith("#"):
+                            continue
+                        if "=" in line:
+                            k, v = line.split("=", 1)
+                            k = k.strip()
+                            v = v.strip().strip('"').strip("'")
+                            if k == "OPENAI_API_KEY" and v:
+                                api_key = v
+                                break
+            if api_key:
+                break
+        except Exception:
+            continue
+
+    if api_key and not os.getenv("OPENAI_API_KEY"):
+        # Set only for this process; do not overwrite existing env
+        os.environ["OPENAI_API_KEY"] = api_key
+
+
 def main():
     # Ensure DEBUG logs
     logger = logging.getLogger("tinytroupe")
     logger.setLevel(logging.DEBUG)
 
+    # Load from .env if needed (non-destructive)
+    _load_api_key_from_dotenv_if_missing()
+
     # Require API key
     if not os.getenv("OPENAI_API_KEY"):
-        raise RuntimeError("OPENAI_API_KEY must be set in the environment.")
+        raise RuntimeError("OPENAI_API_KEY must be set in the environment or in a local .env file.")
 
     # Create a simple agent and act once to force an action generation
     agent = TinyPerson(name="DemoAgent")
diff --git a/tests/unit/test_action_generator_structured_outputs.py b/tests/unit/test_action_generator_structured_outputs.py
new file mode 100644
index 0000000..1f9a022
--- /dev/null
+++ b/tests/unit/test_action_generator_structured_outputs.py
@@ -0,0 +1,57 @@
+import pytest
+
+from tinytroupe.agent.action_generator import ActionGenerator, ActionRefusedException
+from tinytroupe.agent import TinyPerson, CognitiveActionModel
+
+
+class FakeClient:
+    def __init__(self, message):
+        self._message = message
+
+    def send_message(self, *args, **kwargs):
+        return self._message
+
+
+def test_prefers_parsed_payload(monkeypatch):
+    TinyPerson.clear_agents()
+    # Build a parsed payload consistent with CognitiveActionModel
+    parsed = {
+        "action": {"type": "THINK", "content": "test content", "target": ""},
+        "cognitive_state": {
+            "goals": "g",
+            "context": ["c"],
+            "attention": "a",
+            "emotions": "e",
+        },
+    }
+
+    message = {"role": "assistant", "content": "{\"action\":{}}", "parsed": parsed}
+
+    # Patch client used by action generator to return our fake message
+    from tinytroupe import openai_utils
+
+    monkeypatch.setattr(openai_utils, "client", lambda: FakeClient(message))
+
+    agent = TinyPerson(name="Tester")
+    ag = ActionGenerator()
+
+    action, role, content = ag._generate_tentative_action(agent, agent.current_messages)[0:3]
+
+    assert content == parsed
+    assert action == parsed["action"]
+    assert role == "assistant"
+
+
+def test_refusal_raises(monkeypatch):
+    TinyPerson.clear_agents()
+    message = {"role": "assistant", "content": "{}", "refusal": "safety refusal"}
+
+    from tinytroupe import openai_utils
+
+    monkeypatch.setattr(openai_utils, "client", lambda: FakeClient(message))
+
+    agent = TinyPerson(name="Tester")
+    ag = ActionGenerator()
+
+    with pytest.raises(ActionRefusedException):
+        ag._generate_tentative_action(agent, agent.current_messages)
diff --git a/tinytroupe/agent/action_generator.py b/tinytroupe/agent/action_generator.py
index 08aca7f..c38ddf2 100644
--- a/tinytroupe/agent/action_generator.py
+++ b/tinytroupe/agent/action_generator.py
@@ -288,7 +288,19 @@ def _generate_tentative_action(self, agent, current_messages, feedback_from_prev
 
         if not self.enable_reasoning_step:
             logger.debug(f"[{agent.name}] Reasoning step disabled.")
-            next_message = openai_utils.client().send_message(current_messages_context, response_format=CognitiveActionModel)
+            # Prefer Responses API JSON Schema when API_MODE=responses; fallback to Pydantic class on legacy
+            response_format = CognitiveActionModel
+            try:
+                # If running in responses mode, provide a JSON Schema envelope with strict mode
+                from pydantic import TypeAdapter
+                schema = TypeAdapter(CognitiveActionModel).json_schema()
+                response_format = {
+                    "type": "json_schema",
+                    "json_schema": {"name": "CognitiveActionModel", "schema": schema, "strict": True},
+                }
+            except Exception:
+                pass
+            next_message = openai_utils.client().send_message(current_messages_context, response_format=response_format)
             
         else:
             logger.debug(f"[{agent.name}] Reasoning step enabled.")
@@ -302,11 +314,31 @@ def _generate_tentative_action(self, agent, current_messages, feedback_from_prev
             current_messages_context.append({"role": "system",
                                             "content": "Use the \"reasoning\" field to add any reasoning process you might wish to use before generating the next action and cognitive state. "})
 
-            next_message = openai_utils.client().send_message(current_messages_context, response_format=CognitiveActionModelWithReasoning)
+            response_format = CognitiveActionModelWithReasoning
+            try:
+                from pydantic import TypeAdapter
+                schema = TypeAdapter(CognitiveActionModelWithReasoning).json_schema()
+                response_format = {
+                    "type": "json_schema",
+                    "json_schema": {"name": "CognitiveActionModelWithReasoning", "schema": schema, "strict": True},
+                }
+            except Exception:
+                pass
+            next_message = openai_utils.client().send_message(current_messages_context, response_format=response_format)
 
         logger.debug(f"[{agent.name}] Received message: {next_message}")
 
-        role, content = next_message["role"], utils.extract_json(next_message["content"])
+        # Prefer typed parsed payload when available; otherwise, fall back to JSON extraction
+        role = next_message.get("role", "assistant")
+
+        # Handle explicit refusal from provider payloads when present
+        refusal = next_message.get("refusal")
+        if refusal:
+            # Log and raise a specialized exception to surface actionable errors
+            logger.warning(f"[{agent.name}] Model refusal received: {refusal}")
+            raise ActionRefusedException(refusal)
+
+        content = next_message.get("parsed") or utils.extract_json(next_message["content"])
 
         action = content['action']
         logger.debug(f"{agent.name}'s action: {action}")
@@ -530,3 +562,8 @@ class PoorQualityActionException(Exception):
     def __init__(self, message="The generated action is of poor quality"):
         self.message = message
         super().__init__(self.message)
+
+
+class ActionRefusedException(Exception):
+    def __init__(self, refusal_message: str = "The model refused to generate an action"):
+        super().__init__(refusal_message)
diff --git a/tinytroupe/openai_utils.py b/tinytroupe/openai_utils.py
index c7a04cb..7e93970 100644
--- a/tinytroupe/openai_utils.py
+++ b/tinytroupe/openai_utils.py
@@ -229,55 +229,155 @@ def _raw_model_call(self, model, chat_api_params):
         override this method to implement their own API calls.
         """   
 
-        # adjust parameters depending on the model
+        # Choose API mode (legacy chat vs responses)
+        api_mode = config["OpenAI"].get("API_MODE", "legacy").lower()
+
+        # adjust parameters depending on the model (legacy path expectations)
         if self._is_reasoning_model(model):
             # Reasoning models have slightly different parameters
-            del chat_api_params["stream"]
-            del chat_api_params["temperature"]
-            del chat_api_params["top_p"]
-            del chat_api_params["frequency_penalty"]
-            del chat_api_params["presence_penalty"]            
+            if api_mode == "legacy":
+                if "stream" in chat_api_params: del chat_api_params["stream"]
+                if "temperature" in chat_api_params: del chat_api_params["temperature"]
+                if "top_p" in chat_api_params: del chat_api_params["top_p"]
+                if "frequency_penalty" in chat_api_params: del chat_api_params["frequency_penalty"]
+                if "presence_penalty" in chat_api_params: del chat_api_params["presence_penalty"]
 
-            chat_api_params["max_completion_tokens"] = chat_api_params["max_tokens"]
-            del chat_api_params["max_tokens"]
+                chat_api_params["max_completion_tokens"] = chat_api_params["max_tokens"]
+                del chat_api_params["max_tokens"]
 
-            chat_api_params["reasoning_effort"] = default["reasoning_effort"]
+                chat_api_params["reasoning_effort"] = default["reasoning_effort"]
 
 
         # To make the log cleaner, we remove the messages from the logged parameters
         logged_params = {k: v for k, v in chat_api_params.items() if k != "messages"} 
 
-        if "response_format" in chat_api_params:
-            # to enforce the response format via pydantic, we need to use a different method
+        if api_mode == "responses":
+            # Build Responses API params
+            responses_params = self._build_responses_params(model, chat_api_params)
 
+            # Log sanitized params and full messages separately
+            rp_logged = {k: v for k, v in responses_params.items() if k != "input" and k != "messages"}
+            logger.debug(f"Calling LLM model (Responses API) with these parameters: {rp_logged}. Not showing 'messages'/'input' parameter.")
+            logger.debug(f"   --> Complete messages sent to LLM: {responses_params.get('messages') or responses_params.get('input')}")
+
+            # If using Pydantic model, prefer parse helper when available
+            if isinstance(chat_api_params.get("response_format"), type):
+                # Responses parse path with Pydantic model
+                return self.client.responses.parse(**responses_params)
+            else:
+                return self.client.responses.create(**responses_params)
+
+        # Legacy Chat Completions path
+        if "response_format" in chat_api_params:
             if "stream" in chat_api_params:
                 del chat_api_params["stream"]
 
             logger.debug(f"Calling LLM model (using .parse too) with these parameters: {logged_params}. Not showing 'messages' parameter.")
-            # complete message
             logger.debug(f"   --> Complete messages sent to LLM: {chat_api_params['messages']}")
+            return self.client.beta.chat.completions.parse(**chat_api_params)
+        else:
+            logger.debug(f"Calling LLM model with these parameters: {logged_params}. Not showing 'messages' parameter.")
+            return self.client.chat.completions.create(**chat_api_params)
 
-            result_message = self.client.beta.chat.completions.parse(
-                    **chat_api_params
-                )
+    def _build_responses_params(self, model, chat_api_params):
+        """
+        Map legacy chat-style params to Responses API params.
+        - Prefer 'messages' as input if present; else use 'input'.
+        - Map max_tokens -> max_output_tokens
+        - For reasoning models add reasoning: { effort: ... } and drop sampling params.
+        - If response_format is a Pydantic model class, pass it directly (Responses parse supports Pydantic);
+          if it's a dict (JSON Schema), pass as-is with strict mode expected to be set by caller.
+        """
+        params = {
+            "model": model,
+            # Latest SDKs accept either 'input' or 'messages'. We pass both for compatibility; the SDK ignores the unused one.
+            "messages": chat_api_params.get("messages"),
+            "input": chat_api_params.get("messages"),
+            "max_output_tokens": chat_api_params.get("max_tokens"),
+            "timeout": chat_api_params.get("timeout"),
+        }
 
-            return result_message 
-        
+        # Include response_format (Pydantic class or JSON Schema dict)
+        if chat_api_params.get("response_format") is not None:
+            rf = chat_api_params["response_format"]
+            params["response_format"] = rf
+
+        # Reasoning models: remove sampling controls and set reasoning effort
+        if self._is_reasoning_model(model):
+            params["reasoning"] = {"effort": default["reasoning_effort"]}
         else:
-            logger.debug(f"Calling LLM model with these parameters: {logged_params}. Not showing 'messages' parameter.")
-            return self.client.chat.completions.create(
-                        **chat_api_params
-                    )
+            # Non-reasoning: sampling controls are valid
+            for key in ("temperature", "top_p", "frequency_penalty", "presence_penalty"):
+                if chat_api_params.get(key) is not None:
+                    params[key] = chat_api_params[key]
+
+        return params
 
     def _is_reasoning_model(self, model):
         return "o1" in model or "o3" in model
 
     def _raw_model_response_extractor(self, response):
         """
-        Extracts the response from the API response. Subclasses should
-        override this method to implement their own response extraction.
+        Extract the response into a unified dict shape used by callers.
+        Supports both Chat Completions and Responses API return shapes.
         """
-        return response.choices[0].message.to_dict()
+        # Legacy chat path
+        if hasattr(response, "choices"):
+            return response.choices[0].message.to_dict()
+
+        # Responses API path
+        try:
+            # Try to obtain a dict-like representation
+            resp_dict = None
+            if hasattr(response, "to_dict"):
+                resp_dict = response.to_dict()
+            elif hasattr(response, "model_dump"):
+                resp_dict = response.model_dump()
+
+            # Fall back to attribute traversal if needed
+            output_items = None
+            if resp_dict is not None:
+                output_items = resp_dict.get("output") or resp_dict.get("outputs")
+            else:
+                output_items = getattr(response, "output", None) or getattr(response, "outputs", None)
+
+            role = "assistant"
+            content_text = None
+            parsed = None
+            refusal = None
+
+            if output_items:
+                # Expect the first item to be a message with content parts
+                first = output_items[0]
+                contents = first.get("content") if isinstance(first, dict) else getattr(first, "content", [])
+                for part in contents or []:
+                    ptype = part.get("type") if isinstance(part, dict) else getattr(part, "type", None)
+                    # Text output
+                    if ptype in ("output_text", "text"):
+                        content_text = part.get("text") if isinstance(part, dict) else getattr(part, "text", None)
+                    # Structured parse
+                    if (isinstance(part, dict) and "parsed" in part):
+                        parsed = part.get("parsed")
+                    elif hasattr(part, "parsed"):
+                        parsed = getattr(part, "parsed")
+                    # Refusal
+                    if (isinstance(part, dict) and "refusal" in part):
+                        refusal = part.get("refusal")
+                    elif hasattr(part, "refusal"):
+                        refusal = getattr(part, "refusal")
+
+            # As a final fallback, try convenience property 'output_text'
+            if content_text is None and hasattr(response, "output_text"):
+                try:
+                    content_text = response.output_text
+                except Exception:
+                    pass
+
+            return {"role": role, "content": content_text, "parsed": parsed, "refusal": refusal}
+        except Exception as e:
+            logger.error(f"Failed to extract Responses API payload: {e}")
+            # best-effort fallback
+            return {"role": "assistant", "content": None, "parsed": None, "refusal": None}
 
     def _count_tokens(self, messages: list, model: str):
         """

From e4fda17f0a7753ef9416bd0f4a27aba9aaf051a2 Mon Sep 17 00:00:00 2001
From: Ricky Kirkendall <rkirkendall304@gmail.com>
Date: Mon, 8 Sep 2025 21:15:56 -0400
Subject: [PATCH 5/9] Remove structured_outputs_demo.py from branch

---
 scripts/structured_outputs_demo.py | 72 ------------------------------
 1 file changed, 72 deletions(-)
 delete mode 100644 scripts/structured_outputs_demo.py

diff --git a/scripts/structured_outputs_demo.py b/scripts/structured_outputs_demo.py
deleted file mode 100644
index 0a085b0..0000000
--- a/scripts/structured_outputs_demo.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import os
-import logging
-from pathlib import Path
-
-from tinytroupe.agent import TinyPerson
-
-
-def _load_api_key_from_dotenv_if_missing():
-    if os.getenv("OPENAI_API_KEY"):
-        return
-
-    # Try to read from .env in TinyTroupe/ and project root without overwriting env
-    candidate_paths = [
-        Path(__file__).resolve().parent.parent / ".env",        # project_root/TinyTroupe/../.env
-        Path(__file__).resolve().parent / ".env",               # TinyTroupe/.env
-        Path.cwd() / ".env",                                    # current working dir .env
-    ]
-
-    api_key = None
-    for dotenv_path in candidate_paths:
-        try:
-            if dotenv_path.exists():
-                with open(dotenv_path, "r", encoding="utf-8", errors="replace") as f:
-                    for line in f:
-                        line = line.strip()
-                        if not line or line.startswith("#"):
-                            continue
-                        if "=" in line:
-                            k, v = line.split("=", 1)
-                            k = k.strip()
-                            v = v.strip().strip('"').strip("'")
-                            if k == "OPENAI_API_KEY" and v:
-                                api_key = v
-                                break
-            if api_key:
-                break
-        except Exception:
-            continue
-
-    if api_key and not os.getenv("OPENAI_API_KEY"):
-        # Set only for this process; do not overwrite existing env
-        os.environ["OPENAI_API_KEY"] = api_key
-
-
-def main():
-    # Ensure DEBUG logs
-    logger = logging.getLogger("tinytroupe")
-    logger.setLevel(logging.DEBUG)
-
-    # Load from .env if needed (non-destructive)
-    _load_api_key_from_dotenv_if_missing()
-
-    # Require API key
-    if not os.getenv("OPENAI_API_KEY"):
-        raise RuntimeError("OPENAI_API_KEY must be set in the environment or in a local .env file.")
-
-    # Create a simple agent and act once to force an action generation
-    agent = TinyPerson(name="DemoAgent")
-    agent.listen("You're in a coffee shop. Order a cappuccino politely.")
-
-    # Act once; structured output is enforced by ActionGenerator with Pydantic models
-    outputs = agent.act(until_done=False, n=1, return_actions=True, communication_display=False)
-
-    # Print the raw structured response for inspection
-    print("\n=== Structured Output ===")
-    print(outputs[-1])
-
-
-if __name__ == "__main__":
-    main()
-
-

From 8c0d66a1b7cf2482c7f73a33b6755430adf64c4f Mon Sep 17 00:00:00 2001
From: Ricky Kirkendall <rkirkendall304@gmail.com>
Date: Sat, 30 Aug 2025 10:02:22 -0400
Subject: [PATCH 6/9] Enhance OpenAIClient to support Responses API mode

- Added configuration option for API_MODE in config.ini to enable Responses API for local runs.
- Updated OpenAIClient to handle both 'responses' and 'legacy' modes, preserving existing behavior while allowing for new functionality.
- Implemented parameter mapping for Responses API, ensuring compatibility with legacy chat parameters.

This change aims to improve flexibility in API usage and facilitate a smoother transition to the new Responses API.
---
 config.ini                                | 52 ----------------
 tests/unit/test_openai_utils_responses.py | 72 +++++++++++++++++++++++
 tinytroupe/config.ini                     |  2 +
 3 files changed, 74 insertions(+), 52 deletions(-)
 delete mode 100644 config.ini
 create mode 100644 tests/unit/test_openai_utils_responses.py

diff --git a/config.ini b/config.ini
deleted file mode 100644
index 544a8c8..0000000
--- a/config.ini
+++ /dev/null
@@ -1,52 +0,0 @@
-[OpenAI]
-API_TYPE=openai
-MODEL=gpt-4.1-mini
-REASONING_MODEL=o3-mini
-EMBEDDING_MODEL=text-embedding-3-small
-
-# Keep calls fast and inexpensive for the demo
-MAX_TOKENS=800
-TEMPERATURE=1.0
-FREQ_PENALTY=0.0
-PRESENCE_PENALTY=0.0
-TIMEOUT=120
-MAX_ATTEMPTS=1
-WAITING_TIME=0
-EXPONENTIAL_BACKOFF_FACTOR=5
-
-REASONING_EFFORT=low
-
-CACHE_API_CALLS=False
-CACHE_FILE_NAME=openai_api_cache.pickle
-
-MAX_CONTENT_DISPLAY_LENGTH=2000
-
-[Simulation]
-PARALLEL_AGENT_GENERATION=True
-PARALLEL_AGENT_ACTIONS=True
-RAI_HARMFUL_CONTENT_PREVENTION=True
-RAI_COPYRIGHT_INFRINGEMENT_PREVENTION=True
-
-[Cognition]
-ENABLE_MEMORY_CONSOLIDATION=False
-MIN_EPISODE_LENGTH=5
-MAX_EPISODE_LENGTH=20
-EPISODIC_MEMORY_FIXED_PREFIX_LENGTH=5
-EPISODIC_MEMORY_LOOKBACK_LENGTH=10
-
-[ActionGenerator]
-MAX_ATTEMPTS=1
-ENABLE_QUALITY_CHECKS=False
-ENABLE_REGENERATION=False
-ENABLE_DIRECT_CORRECTION=False
-ENABLE_QUALITY_CHECK_FOR_PERSONA_ADHERENCE=False
-ENABLE_QUALITY_CHECK_FOR_SELFCONSISTENCY=False
-ENABLE_QUALITY_CHECK_FOR_FLUENCY=False
-ENABLE_QUALITY_CHECK_FOR_SUITABILITY=False
-ENABLE_QUALITY_CHECK_FOR_SIMILARITY=False
-CONTINUE_ON_FAILURE=True
-QUALITY_THRESHOLD=5
-
-[Logging]
-LOGLEVEL=DEBUG
-
diff --git a/tests/unit/test_openai_utils_responses.py b/tests/unit/test_openai_utils_responses.py
new file mode 100644
index 0000000..edb1096
--- /dev/null
+++ b/tests/unit/test_openai_utils_responses.py
@@ -0,0 +1,72 @@
+import types
+from unittest.mock import patch
+
+import tinytroupe.openai_utils as openai_utils
+
+
+class _StubResponsesClient:
+    def __init__(self):
+        self.last_params = None
+
+    class _Responses:
+        def __init__(self, outer):
+            self._outer = outer
+
+        def create(self, **kwargs):
+            # Capture params for assertions
+            self._outer.last_params = kwargs
+
+            # Return minimal object with output_text like the SDK does
+            return types.SimpleNamespace(output_text="ok")
+
+    @property
+    def responses(self):
+        return _StubResponsesClient._Responses(self)
+
+
+def test_send_message_uses_responses_api_when_api_mode_is_responses():
+    stub = _StubResponsesClient()
+
+    # Patch setup to force responses mode and inject stub client
+    original_setup = openai_utils.OpenAIClient._setup_from_config
+
+    def _setup_with_responses(self):
+        self.client = stub
+        self.api_mode = "responses"
+
+    try:
+        openai_utils.OpenAIClient._setup_from_config = _setup_with_responses
+
+        client = openai_utils.OpenAIClient()
+
+        messages = [
+            {"role": "system", "content": "You are terse."},
+            {"role": "user", "content": "Say ok."},
+        ]
+
+        result = client.send_message(
+            current_messages=messages,
+            model="gpt-4.1-mini",
+            temperature=0.2,
+            max_tokens=128,
+        )
+
+        # Verify mapping to Responses API
+        assert stub.last_params is not None
+        assert stub.last_params.get("model") == "gpt-4.1-mini"
+        assert stub.last_params.get("temperature") == 0.2
+        assert stub.last_params.get("max_output_tokens") == 128
+
+        input_msgs = stub.last_params.get("input")
+        assert isinstance(input_msgs, list) and len(input_msgs) == 2
+        assert input_msgs[0]["role"] == "system"
+        assert input_msgs[1]["role"] == "user"
+        assert input_msgs[1]["content"][0]["text"] == "Say ok."
+
+        # Verify extractor returns assistant content
+        assert result["content"].lower().startswith("ok")
+
+    finally:
+        openai_utils.OpenAIClient._setup_from_config = original_setup
+
+
diff --git a/tinytroupe/config.ini b/tinytroupe/config.ini
index 353bdb0..6c45ac5 100644
--- a/tinytroupe/config.ini
+++ b/tinytroupe/config.ini
@@ -1,4 +1,6 @@
 [OpenAI]
+# Enable Responses API path for local runs
+API_MODE=responses
 #
 # OpenAI or Azure OpenAI Service
 #

From 33a1d021c650c202fb96ce9dfe39181f4807e7b5 Mon Sep 17 00:00:00 2001
From: Ricky Kirkendall <rkirkendall304@gmail.com>
Date: Sat, 30 Aug 2025 11:21:18 -0400
Subject: [PATCH 7/9] - Introduced Pydantic support for structured output in
 Responses API. - Updated response extraction logic to prefer typed parsed
 output when available.

---
 tinytroupe/openai_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tinytroupe/openai_utils.py b/tinytroupe/openai_utils.py
index 7e93970..6a4be60 100644
--- a/tinytroupe/openai_utils.py
+++ b/tinytroupe/openai_utils.py
@@ -9,6 +9,7 @@
 
 
 import tiktoken
+from pydantic import BaseModel
 from tinytroupe import utils
 from tinytroupe.control import transactional
 from tinytroupe import default
@@ -227,7 +228,7 @@ def _raw_model_call(self, model, chat_api_params):
         """
         Calls the OpenAI API with the given parameters. Subclasses should
         override this method to implement their own API calls.
-        """   
+        """
 
         # Choose API mode (legacy chat vs responses)
         api_mode = config["OpenAI"].get("API_MODE", "legacy").lower()
@@ -247,9 +248,8 @@ def _raw_model_call(self, model, chat_api_params):
 
                 chat_api_params["reasoning_effort"] = default["reasoning_effort"]
 
-
         # To make the log cleaner, we remove the messages from the logged parameters
-        logged_params = {k: v for k, v in chat_api_params.items() if k != "messages"} 
+        logged_params = {k: v for k, v in chat_api_params.items() if k != "messages"}
 
         if api_mode == "responses":
             # Build Responses API params

From b5066e991a1467e1fbbc41cadc7e147a051012fc Mon Sep 17 00:00:00 2001
From: Ricky Kirkendall <rkirkendall304@gmail.com>
Date: Sat, 6 Sep 2025 20:58:17 -0400
Subject: [PATCH 8/9] PR2 prep: add DEBUG config override and structured
 outputs demo script

---
 scripts/structured_outputs_demo.py | 31 ++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 scripts/structured_outputs_demo.py

diff --git a/scripts/structured_outputs_demo.py b/scripts/structured_outputs_demo.py
new file mode 100644
index 0000000..d41b0f0
--- /dev/null
+++ b/scripts/structured_outputs_demo.py
@@ -0,0 +1,31 @@
+import os
+import logging
+
+from tinytroupe.agent import TinyPerson
+
+
+def main():
+    # Ensure DEBUG logs
+    logger = logging.getLogger("tinytroupe")
+    logger.setLevel(logging.DEBUG)
+
+    # Require API key
+    if not os.getenv("OPENAI_API_KEY"):
+        raise RuntimeError("OPENAI_API_KEY must be set in the environment.")
+
+    # Create a simple agent and act once to force an action generation
+    agent = TinyPerson(name="DemoAgent")
+    agent.listen("You're in a coffee shop. Order a cappuccino politely.")
+
+    # Act once; structured output is enforced by ActionGenerator with Pydantic models
+    outputs = agent.act(until_done=False, n=1, return_actions=True, communication_display=False)
+
+    # Print the raw structured response for inspection
+    print("\n=== Structured Output ===")
+    print(outputs[-1])
+
+
+if __name__ == "__main__":
+    main()
+
+

From 7860a3e6552d42b46e2947d99368e3c680a33e36 Mon Sep 17 00:00:00 2001
From: Ricky Kirkendall <rkirkendall304@gmail.com>
Date: Mon, 8 Sep 2025 21:15:56 -0400
Subject: [PATCH 9/9] Remove structured_outputs_demo.py from branch

---
 scripts/structured_outputs_demo.py | 31 ------------------------------
 1 file changed, 31 deletions(-)
 delete mode 100644 scripts/structured_outputs_demo.py

diff --git a/scripts/structured_outputs_demo.py b/scripts/structured_outputs_demo.py
deleted file mode 100644
index d41b0f0..0000000
--- a/scripts/structured_outputs_demo.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import os
-import logging
-
-from tinytroupe.agent import TinyPerson
-
-
-def main():
-    # Ensure DEBUG logs
-    logger = logging.getLogger("tinytroupe")
-    logger.setLevel(logging.DEBUG)
-
-    # Require API key
-    if not os.getenv("OPENAI_API_KEY"):
-        raise RuntimeError("OPENAI_API_KEY must be set in the environment.")
-
-    # Create a simple agent and act once to force an action generation
-    agent = TinyPerson(name="DemoAgent")
-    agent.listen("You're in a coffee shop. Order a cappuccino politely.")
-
-    # Act once; structured output is enforced by ActionGenerator with Pydantic models
-    outputs = agent.act(until_done=False, n=1, return_actions=True, communication_display=False)
-
-    # Print the raw structured response for inspection
-    print("\n=== Structured Output ===")
-    print(outputs[-1])
-
-
-if __name__ == "__main__":
-    main()
-
-