From 255670b34d993a7995fa1071143e13b76344d594 Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Tue, 30 Sep 2025 21:53:46 +0800
Subject: [PATCH 01/27] update

---
 camel/agents/chat_agent.py | 240 ++++++++++++++++++++++++++++---------
 pyproject.toml             |   2 +
 uv.lock                    |   4 +
 3 files changed, 190 insertions(+), 56 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index b15e5387d7..5a98140d6d 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -43,6 +43,7 @@
 
 from openai import (
     AsyncStream,
+    BadRequestError,
     RateLimitError,
     Stream,
 )
@@ -858,11 +859,23 @@ def _write_single_record(
 
             with warnings.catch_warnings():
                 warnings.filterwarnings("ignore", category=EmptyMemoryWarning)
-                _, ctx_tokens = self.memory.get_context()
+                context_messages, _ = self.memory.get_context()
+
+            # Calculate system prompt tokens
+            sys_prompt_tokens = 0
+            for msg in context_messages:
+                if msg.get('role') in {'system'}:
+                    sys_prompt_tokens += (
+                        token_counter.count_tokens_from_messages([msg])
+                    )
+                else:
+                    break  # Stop after first non-system message
 
-            remaining_budget = max(0, token_limit - ctx_tokens)
+            # Effective limit = total limit - system prompt tokens
+            effective_limit = token_limit - sys_prompt_tokens
 
-            if current_tokens <= remaining_budget:
+            # Only chunk if message exceeds effective limit
+            if current_tokens <= effective_limit:
                 _write_single_record(message, role, base_ts)
                 return
         except Exception as e:
@@ -875,8 +888,8 @@ def _write_single_record(
 
         # 4. Perform slicing
         logger.warning(
-            f"Message with {current_tokens} tokens exceeds remaining budget "
-            f"of {remaining_budget}. Slicing into smaller chunks."
+            f"Message with {current_tokens} tokens exceeds effective limit "
+            f"of {effective_limit}, Slicing into smaller chunks."
         )
 
         text_to_chunk: Optional[str] = None
@@ -905,41 +918,20 @@ def _write_single_record(
             _write_single_record(message, role, base_ts)  # Nothing to chunk
             return
 
-        # 1.  Base chunk size: one-tenth of the smaller of (a) total token
-        # limit and (b) current remaining budget.  This prevents us from
-        # creating chunks that are guaranteed to overflow the
-        # immediate context window.
-        base_chunk_size = max(1, remaining_budget) // 10
-
-        # 2.  Each chunk gets a textual prefix such as:
-        #        "[chunk 3/12 of a long message]\n"
-        #     The prefix itself consumes tokens, so if we do not subtract its
-        #     length the *total* tokens of the outgoing message (prefix + body)
-        #     can exceed the intended bound.  We estimate the prefix length
-        #     with a representative example that is safely long enough for the
-        #     vast majority of cases (three-digit indices).
-        sample_prefix = "[chunk 1/1000 of a long message]\n"
-        prefix_token_len = len(token_counter.encode(sample_prefix))
-
-        # 3.  The real capacity for the message body is therefore the base
-        #     chunk size minus the prefix length.  Fallback to at least one
-        #     token to avoid zero or negative sizes.
-        chunk_body_limit = max(1, base_chunk_size - prefix_token_len)
-
-        # 4.  Calculate how many chunks we will need with this body size.
-        num_chunks = math.ceil(len(all_token_ids) / chunk_body_limit)
+        # TODO: Research more about how to set a more appropriate chunk size
+        chunk_size = int(effective_limit * 0.05)
+
+        # Calculate number of chunks needed
+        num_chunks = math.ceil(len(all_token_ids) / chunk_size)
         group_id = str(_uuid.uuid4())
 
         for i in range(num_chunks):
-            start_idx = i * chunk_body_limit
-            end_idx = start_idx + chunk_body_limit
+            start_idx = i * chunk_size
+            end_idx = start_idx + chunk_size
             chunk_token_ids = all_token_ids[start_idx:end_idx]
 
             chunk_body = token_counter.decode(chunk_token_ids)
 
-            prefix = f"[chunk {i + 1}/{num_chunks} of a long message]\n"
-            new_body = prefix + chunk_body
-
             if is_function_result and isinstance(
                 message, FunctionCallingMessage
             ):
@@ -950,11 +942,11 @@ def _write_single_record(
                     content=message.content,
                     func_name=message.func_name,
                     args=message.args,
-                    result=new_body,
+                    result=chunk_body,
                     tool_call_id=message.tool_call_id,
                 )
             else:
-                new_msg = message.create_new_instance(new_body)
+                new_msg = message.create_new_instance(chunk_body)
 
             meta = (new_msg.meta_dict or {}).copy()
             meta.update(
@@ -1662,6 +1654,8 @@ def _step_impl(
         step_token_usage = self._create_token_usage_tracker()
         iteration_count: int = 0
         prev_num_openai_messages: int = 0
+        # Track if we've already retried due to token limit
+        token_limit_retry_attempted: bool = False
 
         while True:
             if self.pause_event is not None and not self.pause_event.is_set():
@@ -1675,17 +1669,82 @@ def _step_impl(
                 return self._step_terminate(
                     e.args[1], tool_call_records, "max_tokens_exceeded"
                 )
-            # Get response from model backend
-            response = self._get_model_response(
-                openai_messages,
-                num_tokens=num_tokens,
-                current_iteration=iteration_count,
-                response_format=response_format,
-                tool_schemas=[]
-                if disable_tools
-                else self._get_full_tool_schemas(),
-                prev_num_openai_messages=prev_num_openai_messages,
-            )
+            # Get response from model backend with token limit error handling
+            try:
+                response = self._get_model_response(
+                    openai_messages,
+                    num_tokens=num_tokens,
+                    current_iteration=iteration_count,
+                    response_format=response_format,
+                    tool_schemas=[]
+                    if disable_tools
+                    else self._get_full_tool_schemas(),
+                    prev_num_openai_messages=prev_num_openai_messages,
+                )
+            except (BadRequestError, Exception) as e:
+                # Check if this is a token limit error
+                error_message = str(e).lower()
+                is_token_limit_error = (
+                    'context_length_exceeded' in error_message
+                    or 'maximum context length' in error_message
+                    or 'context length' in error_message
+                    or 'context limit' in error_message
+                )
+
+                if is_token_limit_error and not token_limit_retry_attempted:
+                    logger.warning(
+                        "Token limit exceeded error detected. "
+                        "Removing first non-system message and retrying once."
+                    )
+                    token_limit_retry_attempted = True
+
+                    # Find and remove first non-system message
+                    modified_messages = None
+                    for i, msg in enumerate(openai_messages):
+                        msg_role = msg.get('role', '')
+                        # Skip system and developer messages
+                        if msg_role not in {'system'}:
+                            # Found first non-system message, create new list
+                            modified_messages = (
+                                openai_messages[:i] + openai_messages[i + 1 :]
+                            )
+                            logger.info(
+                                "Removed first non-system message "
+                                "and retrying."
+                            )
+                            break
+
+                    if modified_messages is not None:
+                        # Retry with modified messages
+                        try:
+                            response = self._get_model_response(
+                                modified_messages,
+                                num_tokens=num_tokens,
+                                current_iteration=iteration_count,
+                                response_format=response_format,
+                                tool_schemas=[]
+                                if disable_tools
+                                else self._get_full_tool_schemas(),
+                                prev_num_openai_messages=prev_num_openai_messages,
+                            )
+                            # Success! Continue with the response
+                        except Exception:
+                            # Retry also failed, re-raise the original error
+                            logger.warning(
+                                "Retry with reduced context also failed. "
+                                "Re-raising original error."
+                            )
+                            raise e
+                    else:
+                        # No non-system message to remove
+                        logger.warning(
+                            "No non-system message to remove. "
+                            "Re-raising the error."
+                        )
+                        raise
+                else:
+                    raise
+
             prev_num_openai_messages = len(openai_messages)
             iteration_count += 1
 
@@ -1877,6 +1936,9 @@ async def _astep_non_streaming_task(
         step_token_usage = self._create_token_usage_tracker()
         iteration_count: int = 0
         prev_num_openai_messages: int = 0
+        # Track if we've already retried due to token limit
+        token_limit_retry_attempted: bool = False
+
         while True:
             if self.pause_event is not None and not self.pause_event.is_set():
                 await self.pause_event.wait()
@@ -1887,16 +1949,82 @@ async def _astep_non_streaming_task(
                 return self._step_terminate(
                     e.args[1], tool_call_records, "max_tokens_exceeded"
                 )
-            response = await self._aget_model_response(
-                openai_messages,
-                num_tokens=num_tokens,
-                current_iteration=iteration_count,
-                response_format=response_format,
-                tool_schemas=[]
-                if disable_tools
-                else self._get_full_tool_schemas(),
-                prev_num_openai_messages=prev_num_openai_messages,
-            )
+            # Get response from model backend with token limit error handling
+            try:
+                response = await self._aget_model_response(
+                    openai_messages,
+                    num_tokens=num_tokens,
+                    current_iteration=iteration_count,
+                    response_format=response_format,
+                    tool_schemas=[]
+                    if disable_tools
+                    else self._get_full_tool_schemas(),
+                    prev_num_openai_messages=prev_num_openai_messages,
+                )
+            except (BadRequestError, Exception) as e:
+                # Check if this is a token limit error
+                error_message = str(e).lower()
+                is_token_limit_error = (
+                    'context_length_exceeded' in error_message
+                    or 'maximum context length' in error_message
+                    or 'context length' in error_message
+                    or 'context limit' in error_message
+                )
+
+                if is_token_limit_error and not token_limit_retry_attempted:
+                    logger.warning(
+                        "Token limit exceeded error detected. "
+                        "Removing first non-system message and retrying once."
+                    )
+                    token_limit_retry_attempted = True
+
+                    # Find and remove first non-system message
+                    modified_messages = None
+                    for i, msg in enumerate(openai_messages):
+                        msg_role = msg.get('role', '')
+                        # Skip system and developer messages
+                        if msg_role not in {'system'}:
+                            # Found first non-system message, create new list
+                            modified_messages = (
+                                openai_messages[:i] + openai_messages[i + 1 :]
+                            )
+                            logger.info(
+                                "Removed first non-system message "
+                                "and retrying."
+                            )
+                            break
+
+                    if modified_messages is not None:
+                        # Retry with modified messages
+                        try:
+                            response = await self._aget_model_response(
+                                modified_messages,
+                                num_tokens=num_tokens,
+                                current_iteration=iteration_count,
+                                response_format=response_format,
+                                tool_schemas=[]
+                                if disable_tools
+                                else self._get_full_tool_schemas(),
+                                prev_num_openai_messages=prev_num_openai_messages,
+                            )
+                            # Success! Continue with the response
+                        except Exception:
+                            # Retry also failed, re-raise the original error
+                            logger.warning(
+                                "Retry with reduced context also failed. "
+                                "Re-raising original error."
+                            )
+                            raise e
+                    else:
+                        # No non-system message to remove
+                        logger.warning(
+                            "No non-system message to remove. "
+                            "Re-raising the error."
+                        )
+                        raise
+                else:
+                    raise
+
             prev_num_openai_messages = len(openai_messages)
             iteration_count += 1
 
diff --git a/pyproject.toml b/pyproject.toml
index 32923e44ec..8ddf647a23 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -299,6 +299,8 @@ eigent = [
     "onnxruntime<=1.19.2",
     "slack-sdk>=3.27.2,<4",
     "google-api-python-client==2.166.0",
+    "google-auth-httplib2==0.2.0",
+    "google-auth-oauthlib==1.2.1",
 ]
 all = [
     "numpy>=1.2,<=2.2",
diff --git a/uv.lock b/uv.lock
index b546bdf3e6..214b330228 100644
--- a/uv.lock
+++ b/uv.lock
@@ -888,6 +888,8 @@ eigent = [
     { name = "exa-py" },
     { name = "ffmpeg-python" },
     { name = "google-api-python-client" },
+    { name = "google-auth-httplib2" },
+    { name = "google-auth-oauthlib" },
     { name = "imageio", extra = ["pyav"] },
     { name = "markitdown", extra = ["all"] },
     { name = "mcp-server-fetch" },
@@ -1156,8 +1158,10 @@ requires-dist = [
     { name = "google-api-python-client", marker = "extra == 'eigent'", specifier = "==2.166.0" },
     { name = "google-api-python-client", marker = "extra == 'web-tools'", specifier = "==2.166.0" },
     { name = "google-auth-httplib2", marker = "extra == 'all'", specifier = "==0.2.0" },
+    { name = "google-auth-httplib2", marker = "extra == 'eigent'", specifier = "==0.2.0" },
     { name = "google-auth-httplib2", marker = "extra == 'web-tools'", specifier = "==0.2.0" },
     { name = "google-auth-oauthlib", marker = "extra == 'all'", specifier = "==1.2.1" },
+    { name = "google-auth-oauthlib", marker = "extra == 'eigent'", specifier = "==1.2.1" },
     { name = "google-auth-oauthlib", marker = "extra == 'web-tools'", specifier = "==1.2.1" },
     { name = "google-cloud-aiplatform", marker = "extra == 'all'", specifier = ">=1.111.0" },
     { name = "google-cloud-storage", marker = "extra == 'all'", specifier = ">=2.18.0,<3" },

From b1f32135eba7782575b337fd10c92f32d084c5ab Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Tue, 30 Sep 2025 22:08:51 +0800
Subject: [PATCH 02/27] Update chat_agent.py

---
 camel/agents/chat_agent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index 5a98140d6d..aa90480189 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -919,7 +919,7 @@ def _write_single_record(
             return
 
         # TODO: Research more about how to set a more appropriate chunk size
-        chunk_size = int(effective_limit * 0.05)
+        chunk_size = int(effective_limit * 0.2)
 
         # Calculate number of chunks needed
         num_chunks = math.ceil(len(all_token_ids) / chunk_size)

From 746a4f53f955d4a55314b996182bf1e12d9b272e Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Tue, 30 Sep 2025 23:28:17 +0800
Subject: [PATCH 03/27] Update oceanbase.py

---
 camel/storages/vectordb_storages/oceanbase.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/camel/storages/vectordb_storages/oceanbase.py b/camel/storages/vectordb_storages/oceanbase.py
index 60ec1d0ef0..7cfe4b5c16 100644
--- a/camel/storages/vectordb_storages/oceanbase.py
+++ b/camel/storages/vectordb_storages/oceanbase.py
@@ -74,7 +74,6 @@ def __init__(
             ObVecClient,
         )
         from pyobvector.client.index_param import (
-            IndexParam,
             IndexParams,
         )
         from pyobvector.schema import VECTOR

From cc31148736c18b24afa4b167613e9565be154880 Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Wed, 1 Oct 2025 11:01:18 +0800
Subject: [PATCH 04/27] update

---
 camel/storages/vectordb_storages/oceanbase.py |  9 ++---
 .../vector_storages/test_oceanbase.py         | 33 +++++++++++++++----
 2 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/camel/storages/vectordb_storages/oceanbase.py b/camel/storages/vectordb_storages/oceanbase.py
index 7cfe4b5c16..c250e87b0b 100644
--- a/camel/storages/vectordb_storages/oceanbase.py
+++ b/camel/storages/vectordb_storages/oceanbase.py
@@ -121,10 +121,11 @@ def __init__(
             )
 
             # Get the first index parameter
-            first_index_param = next(iter(index_params))
-            self._client.create_vidx_with_vec_index_param(
-                table_name=self.table_name, vidx_param=first_index_param
-            )
+            first_index_param = next(iter(index_params), None)
+            if first_index_param is not None:
+                self._client.create_vidx_with_vec_index_param(
+                    table_name=self.table_name, vidx_param=first_index_param
+                )
 
             logger.info(f"Created table {self.table_name} with vector index")
         else:
diff --git a/test/storages/vector_storages/test_oceanbase.py b/test/storages/vector_storages/test_oceanbase.py
index de89175a0d..374c7ca072 100644
--- a/test/storages/vector_storages/test_oceanbase.py
+++ b/test/storages/vector_storages/test_oceanbase.py
@@ -108,8 +108,15 @@ def mock_ob_client():
 
 def test_oceanbase_storage_initialization(mock_ob_client):
     with patch('pyobvector.schema.VECTOR'):
-        with patch('pyobvector.client.index_param.IndexParams'):
-            with patch('pyobvector.client.index_param.IndexParam'):
+        with patch('pyobvector.client.index_param.IndexParams') as MockIndexParams:
+            with patch('pyobvector.client.index_param.IndexParam') as MockIndexParam:
+                # Setup mock IndexParams to be iterable
+                mock_index_param_instance = MagicMock()
+                mock_index_params = MagicMock()
+                mock_index_params.add_index.return_value = None
+                mock_index_params.__iter__.return_value = iter([mock_index_param_instance])
+                MockIndexParams.return_value = mock_index_params
+                
                 # Test initialization with default parameters
                 storage = OceanBaseStorage(
                     vector_dim=64,
@@ -142,9 +149,16 @@ def test_oceanbase_storage_initialization(mock_ob_client):
 
 def test_oceanbase_storage_operations(mock_ob_client):
     with patch('pyobvector.schema.VECTOR'):
-        with patch('pyobvector.client.index_param.IndexParams'):
-            with patch('pyobvector.client.index_param.IndexParam'):
+        with patch('pyobvector.client.index_param.IndexParams') as MockIndexParams:
+            with patch('pyobvector.client.index_param.IndexParam') as MockIndexParam:
                 with patch('sqlalchemy.func'):
+                    # Setup mock IndexParams to be iterable
+                    mock_index_param_instance = MagicMock()
+                    mock_index_params = MagicMock()
+                    mock_index_params.add_index.return_value = None
+                    mock_index_params.__iter__.return_value = iter([mock_index_param_instance])
+                    MockIndexParams.return_value = mock_index_params
+                    
                     # Setup mock for query results
                     mock_result = MagicMock()
                     mock_result._mapping = {
@@ -189,8 +203,15 @@ def test_oceanbase_storage_operations(mock_ob_client):
 def test_distance_to_similarity_conversion():
     with patch('pyobvector.client.ObVecClient'):
         with patch('pyobvector.schema.VECTOR'):
-            with patch('pyobvector.client.index_param.IndexParams'):
-                with patch('pyobvector.client.index_param.IndexParam'):
+            with patch('pyobvector.client.index_param.IndexParams') as MockIndexParams:
+                with patch('pyobvector.client.index_param.IndexParam') as MockIndexParam:
+                    # Setup mock IndexParams to be iterable
+                    mock_index_param_instance = MagicMock()
+                    mock_index_params = MagicMock()
+                    mock_index_params.add_index.return_value = None
+                    mock_index_params.__iter__.return_value = iter([mock_index_param_instance])
+                    MockIndexParams.return_value = mock_index_params
+                    
                     # Test cosine distance conversion
                     cosine_storage = OceanBaseStorage(
                         vector_dim=4,

From 0dc9c12dc50c8c64f3dab8e8a891b2ab9ad4accb Mon Sep 17 00:00:00 2001
From: Wendong-Fan <w3ndong.fan@gmail.com>
Date: Sun, 5 Oct 2025 16:59:38 +0800
Subject: [PATCH 05/27] minor enhance

---
 camel/agents/chat_agent.py                      | 3 ++-
 test/storages/vector_storages/test_oceanbase.py | 7 -------
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index aa90480189..cce7933c0a 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -919,7 +919,8 @@ def _write_single_record(
             return
 
         # TODO: Research more about how to set a more appropriate chunk size
-        chunk_size = int(effective_limit * 0.2)
+        # Ensure chunk_size is at least 50
+        chunk_size = max(50, int(effective_limit * 0.2))
 
         # Calculate number of chunks needed
         num_chunks = math.ceil(len(all_token_ids) / chunk_size)
diff --git a/test/storages/vector_storages/test_oceanbase.py b/test/storages/vector_storages/test_oceanbase.py
index ed50f713f6..adea7b1527 100644
--- a/test/storages/vector_storages/test_oceanbase.py
+++ b/test/storages/vector_storages/test_oceanbase.py
@@ -167,13 +167,6 @@ def create_mock_index_params():
         ):
             with patch('pyobvector.client.index_param.IndexParam'):
                 with patch('sqlalchemy.func'):
-                    # Setup mock IndexParams to be iterable
-                    mock_index_param_instance = MagicMock()
-                    mock_index_params = MagicMock()
-                    mock_index_params.add_index.return_value = None
-                    mock_index_params.__iter__.return_value = iter([mock_index_param_instance])
-                    MockIndexParams.return_value = mock_index_params
-                    
                     # Setup mock for query results
                     mock_result = MagicMock()
                     mock_result._mapping = {

From 2df88da0a53698b5610a9bd39cc329c0e6e62da8 Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Thu, 9 Oct 2025 16:00:35 +0800
Subject: [PATCH 06/27] Update chat_agent.py

---
 camel/agents/chat_agent.py | 232 +++++--------------------------------
 1 file changed, 31 insertions(+), 201 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index cce7933c0a..4cc11a77df 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -799,16 +799,6 @@ def update_memory(
     ) -> None:
         r"""Updates the agent memory with a new message.
 
-        If the single *message* exceeds the model's context window, it will
-        be **automatically split into multiple smaller chunks** before being
-        written into memory. This prevents later failures in
-        `ScoreBasedContextCreator` where an over-sized message cannot fit
-        into the available token budget at all.
-
-        This slicing logic handles both regular text messages (in the
-        `content` field) and long tool call results (in the `result` field of
-        a `FunctionCallingMessage`).
-
         Args:
             message (BaseMessage): The new message to add to the stored
                 messages.
@@ -818,150 +808,19 @@ def update_memory(
                 (default: :obj:`None`)
                     (default: obj:`None`)
         """
-        import math
         import time
-        import uuid as _uuid
-
-        # 1. Helper to write a record to memory
-        def _write_single_record(
-            message: BaseMessage, role: OpenAIBackendRole, timestamp: float
-        ):
-            self.memory.write_record(
-                MemoryRecord(
-                    message=message,
-                    role_at_backend=role,
-                    timestamp=timestamp,
-                    agent_id=self.agent_id,
-                )
-            )
-
-        base_ts = (
-            timestamp
-            if timestamp is not None
-            else time.time_ns() / 1_000_000_000
-        )
-
-        # 2. Get token handling utilities, fallback if unavailable
-        try:
-            context_creator = self.memory.get_context_creator()
-            token_counter = context_creator.token_counter
-            token_limit = context_creator.token_limit
-        except AttributeError:
-            _write_single_record(message, role, base_ts)
-            return
 
-        # 3. Check if slicing is necessary
-        try:
-            current_tokens = token_counter.count_tokens_from_messages(
-                [message.to_openai_message(role)]
+        self.memory.write_record(
+            MemoryRecord(
+                message=message,
+                role_at_backend=role,
+                timestamp=timestamp
+                if timestamp is not None
+                else time.time_ns() / 1_000_000_000,  # Nanosecond precision
+                agent_id=self.agent_id,
             )
-            import warnings
-
-            with warnings.catch_warnings():
-                warnings.filterwarnings("ignore", category=EmptyMemoryWarning)
-                context_messages, _ = self.memory.get_context()
-
-            # Calculate system prompt tokens
-            sys_prompt_tokens = 0
-            for msg in context_messages:
-                if msg.get('role') in {'system'}:
-                    sys_prompt_tokens += (
-                        token_counter.count_tokens_from_messages([msg])
-                    )
-                else:
-                    break  # Stop after first non-system message
-
-            # Effective limit = total limit - system prompt tokens
-            effective_limit = token_limit - sys_prompt_tokens
-
-            # Only chunk if message exceeds effective limit
-            if current_tokens <= effective_limit:
-                _write_single_record(message, role, base_ts)
-                return
-        except Exception as e:
-            logger.warning(
-                f"Token calculation failed before chunking, "
-                f"writing message as-is. Error: {e}"
-            )
-            _write_single_record(message, role, base_ts)
-            return
-
-        # 4. Perform slicing
-        logger.warning(
-            f"Message with {current_tokens} tokens exceeds effective limit "
-            f"of {effective_limit}, Slicing into smaller chunks."
         )
 
-        text_to_chunk: Optional[str] = None
-        is_function_result = False
-
-        if isinstance(message, FunctionCallingMessage) and isinstance(
-            message.result, str
-        ):
-            text_to_chunk = message.result
-            is_function_result = True
-        elif isinstance(message.content, str):
-            text_to_chunk = message.content
-
-        if not text_to_chunk or not text_to_chunk.strip():
-            _write_single_record(message, role, base_ts)
-            return
-        # Encode the entire text to get a list of all token IDs
-        try:
-            all_token_ids = token_counter.encode(text_to_chunk)
-        except Exception as e:
-            logger.error(f"Failed to encode text for chunking: {e}")
-            _write_single_record(message, role, base_ts)  # Fallback
-            return
-
-        if not all_token_ids:
-            _write_single_record(message, role, base_ts)  # Nothing to chunk
-            return
-
-        # TODO: Research more about how to set a more appropriate chunk size
-        # Ensure chunk_size is at least 50
-        chunk_size = max(50, int(effective_limit * 0.2))
-
-        # Calculate number of chunks needed
-        num_chunks = math.ceil(len(all_token_ids) / chunk_size)
-        group_id = str(_uuid.uuid4())
-
-        for i in range(num_chunks):
-            start_idx = i * chunk_size
-            end_idx = start_idx + chunk_size
-            chunk_token_ids = all_token_ids[start_idx:end_idx]
-
-            chunk_body = token_counter.decode(chunk_token_ids)
-
-            if is_function_result and isinstance(
-                message, FunctionCallingMessage
-            ):
-                new_msg: BaseMessage = FunctionCallingMessage(
-                    role_name=message.role_name,
-                    role_type=message.role_type,
-                    meta_dict=message.meta_dict,
-                    content=message.content,
-                    func_name=message.func_name,
-                    args=message.args,
-                    result=chunk_body,
-                    tool_call_id=message.tool_call_id,
-                )
-            else:
-                new_msg = message.create_new_instance(chunk_body)
-
-            meta = (new_msg.meta_dict or {}).copy()
-            meta.update(
-                {
-                    "chunk_idx": i + 1,
-                    "chunk_total": num_chunks,
-                    "chunk_group_id": group_id,
-                }
-            )
-            new_msg.meta_dict = meta
-
-            # Increment timestamp slightly to maintain order
-            _write_single_record(new_msg, role, base_ts + i * 1e-6)
-
     def load_memory(self, memory: AgentMemory) -> None:
         r"""Load the provided memory into the agent.
 
@@ -1656,7 +1515,6 @@ def _step_impl(
         iteration_count: int = 0
         prev_num_openai_messages: int = 0
         # Track if we've already retried due to token limit
-        token_limit_retry_attempted: bool = False
 
         while True:
             if self.pause_event is not None and not self.pause_event.is_set():
@@ -1682,69 +1540,41 @@ def _step_impl(
                     else self._get_full_tool_schemas(),
                     prev_num_openai_messages=prev_num_openai_messages,
                 )
-            except (BadRequestError, Exception) as e:
+            except Exception as e:
                 # Check if this is a token limit error
                 error_message = str(e).lower()
                 is_token_limit_error = (
                     'context_length_exceeded' in error_message
-                    or 'maximum context length' in error_message
+                    or 'exceeded your current quota' in error_message
+                    or 'tokens must be reduced' in error_message
                     or 'context length' in error_message
+                    or 'token count' in error_message
                     or 'context limit' in error_message
                 )
 
-                if is_token_limit_error and not token_limit_retry_attempted:
+                if is_token_limit_error :
                     logger.warning(
                         "Token limit exceeded error detected. "
-                        "Removing first non-system message and retrying once."
+                        "Summarizing context."
                     )
-                    token_limit_retry_attempted = True
-
-                    # Find and remove first non-system message
-                    modified_messages = None
-                    for i, msg in enumerate(openai_messages):
-                        msg_role = msg.get('role', '')
-                        # Skip system and developer messages
-                        if msg_role not in {'system'}:
-                            # Found first non-system message, create new list
-                            modified_messages = (
-                                openai_messages[:i] + openai_messages[i + 1 :]
-                            )
-                            logger.info(
-                                "Removed first non-system message "
-                                "and retrying."
-                            )
-                            break
-
-                    if modified_messages is not None:
-                        # Retry with modified messages
-                        try:
-                            response = self._get_model_response(
-                                modified_messages,
-                                num_tokens=num_tokens,
-                                current_iteration=iteration_count,
-                                response_format=response_format,
-                                tool_schemas=[]
-                                if disable_tools
-                                else self._get_full_tool_schemas(),
-                                prev_num_openai_messages=prev_num_openai_messages,
-                            )
-                            # Success! Continue with the response
-                        except Exception:
-                            # Retry also failed, re-raise the original error
-                            logger.warning(
-                                "Retry with reduced context also failed. "
-                                "Re-raising original error."
-                            )
-                            raise e
+                    full_context_file_path = self.working_directory / "full_context.json"
+                    self.save_memory(path=str(full_context_file_path))
+                    self.memory.clear()
+                    summary= self.summarize()
+                    summary_messages = (
+                        f"[Context Summary]\n\n" + 
+                        summary.get('summary', '') + 
+                        "\n\n[Full Context file path is]\n\n" 
+                        + str(full_context_file_path)
+                    )
+                    camel_workdir = os.environ.get("CAMEL_WORKDIR")
+                    if camel_workdir:
+                        self.working_directory = Path(camel_workdir) / "context_files"
                     else:
-                        # No non-system message to remove
-                        logger.warning(
-                            "No non-system message to remove. "
-                            "Re-raising the error."
-                        )
-                        raise
-                else:
-                    raise
+                        self.working_directory = Path("context_files")
+                    
+                    self.update_memory(summary_messages, OpenAIBackendRole.ASSISTANT)
+                    self._step_impl(input_message,response_format)
 
             prev_num_openai_messages = len(openai_messages)
             iteration_count += 1

From 8c76dc51e327b84c41683f48c1f50c5068e841a0 Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Thu, 9 Oct 2025 16:06:48 +0800
Subject: [PATCH 07/27] Update chat_agent.py

---
 camel/agents/chat_agent.py | 210 ++++---------------------------------
 1 file changed, 21 insertions(+), 189 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index f50fd09c41..ac9210ab4a 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -845,140 +845,6 @@ def update_memory(
             )
         )
 
-        base_ts = (
-            timestamp
-            if timestamp is not None
-            else time.time_ns() / 1_000_000_000
-        )
-
-        # 2. Get token handling utilities, fallback if unavailable
-        try:
-            context_creator = self.memory.get_context_creator()
-            token_counter = context_creator.token_counter
-            token_limit = context_creator.token_limit
-        except AttributeError:
-            _write_single_record(message, role, base_ts)
-            return
-
-        # 3. Check if slicing is necessary
-        try:
-            current_tokens = token_counter.count_tokens_from_messages(
-                [message.to_openai_message(role)]
-            )
-
-            with warnings.catch_warnings():
-                warnings.filterwarnings("ignore", category=EmptyMemoryWarning)
-                _, ctx_tokens = self.memory.get_context()
-
-            remaining_budget = max(0, token_limit - ctx_tokens)
-
-            if current_tokens <= remaining_budget:
-                _write_single_record(message, role, base_ts)
-                return
-        except Exception as e:
-            logger.warning(
-                f"Token calculation failed before chunking, "
-                f"writing message as-is. Error: {e}"
-            )
-            _write_single_record(message, role, base_ts)
-            return
-
-        # 4. Perform slicing
-        logger.warning(
-            f"Message with {current_tokens} tokens exceeds remaining budget "
-            f"of {remaining_budget}. Slicing into smaller chunks."
-        )
-
-        text_to_chunk: Optional[str] = None
-        is_function_result = False
-
-        if isinstance(message, FunctionCallingMessage) and isinstance(
-            message.result, str
-        ):
-            text_to_chunk = message.result
-            is_function_result = True
-        elif isinstance(message.content, str):
-            text_to_chunk = message.content
-
-        if not text_to_chunk or not text_to_chunk.strip():
-            _write_single_record(message, role, base_ts)
-            return
-        # Encode the entire text to get a list of all token IDs
-        try:
-            all_token_ids = token_counter.encode(text_to_chunk)
-        except Exception as e:
-            logger.error(f"Failed to encode text for chunking: {e}")
-            _write_single_record(message, role, base_ts)  # Fallback
-            return
-
-        if not all_token_ids:
-            _write_single_record(message, role, base_ts)  # Nothing to chunk
-            return
-
-        # 1.  Base chunk size: one-tenth of the smaller of (a) total token
-        # limit and (b) current remaining budget.  This prevents us from
-        # creating chunks that are guaranteed to overflow the
-        # immediate context window.
-        base_chunk_size = max(1, remaining_budget) // 10
-
-        # 2.  Each chunk gets a textual prefix such as:
-        #        "[chunk 3/12 of a long message]\n"
-        #     The prefix itself consumes tokens, so if we do not subtract its
-        #     length the *total* tokens of the outgoing message (prefix + body)
-        #     can exceed the intended bound.  We estimate the prefix length
-        #     with a representative example that is safely long enough for the
-        #     vast majority of cases (three-digit indices).
-        sample_prefix = "[chunk 1/1000 of a long message]\n"
-        prefix_token_len = len(token_counter.encode(sample_prefix))
-
-        # 3.  The real capacity for the message body is therefore the base
-        #     chunk size minus the prefix length.  Fallback to at least one
-        #     token to avoid zero or negative sizes.
-        chunk_body_limit = max(1, base_chunk_size - prefix_token_len)
-
-        # 4.  Calculate how many chunks we will need with this body size.
-        num_chunks = math.ceil(len(all_token_ids) / chunk_body_limit)
-        group_id = str(uuid.uuid4())
-
-        for i in range(num_chunks):
-            start_idx = i * chunk_body_limit
-            end_idx = start_idx + chunk_body_limit
-            chunk_token_ids = all_token_ids[start_idx:end_idx]
-
-            chunk_body = token_counter.decode(chunk_token_ids)
-
-            prefix = f"[chunk {i + 1}/{num_chunks} of a long message]\n"
-            new_body = prefix + chunk_body
-
-            if is_function_result and isinstance(
-                message, FunctionCallingMessage
-            ):
-                new_msg: BaseMessage = FunctionCallingMessage(
-                    role_name=message.role_name,
-                    role_type=message.role_type,
-                    meta_dict=message.meta_dict,
-                    content=message.content,
-                    func_name=message.func_name,
-                    args=message.args,
-                    result=new_body,
-                    tool_call_id=message.tool_call_id,
-                )
-            else:
-                new_msg = message.create_new_instance(new_body)
-
-            meta = (new_msg.meta_dict or {}).copy()
-            meta.update(
-                {
-                    "chunk_idx": i + 1,
-                    "chunk_total": num_chunks,
-                    "chunk_group_id": group_id,
-                }
-            )
-            new_msg.meta_dict = meta
-
-            # Increment timestamp slightly to maintain order
-            _write_single_record(new_msg, role, base_ts + i * 1e-6)
-
     def load_memory(self, memory: AgentMemory) -> None:
         r"""Load the provided memory into the agent.
 
@@ -1756,7 +1622,6 @@ def _step_impl(
         step_token_usage = self._create_token_usage_tracker()
         iteration_count: int = 0
         prev_num_openai_messages: int = 0
-        # Track if we've already retried due to token limit
 
         while True:
             if self.pause_event is not None and not self.pause_event.is_set():
@@ -2017,9 +1882,7 @@ async def _astep_non_streaming_task(
         step_token_usage = self._create_token_usage_tracker()
         iteration_count: int = 0
         prev_num_openai_messages: int = 0
-        # Track if we've already retried due to token limit
-        token_limit_retry_attempted: bool = False
-
+    
         while True:
             if self.pause_event is not None and not self.pause_event.is_set():
                 if isinstance(self.pause_event, asyncio.Event):
@@ -2047,69 +1910,38 @@ async def _astep_non_streaming_task(
                     else self._get_full_tool_schemas(),
                     prev_num_openai_messages=prev_num_openai_messages,
                 )
-            except (BadRequestError, Exception) as e:
+            except Exception as e:
                 # Check if this is a token limit error
                 error_message = str(e).lower()
                 is_token_limit_error = (
                     'context_length_exceeded' in error_message
-                    or 'maximum context length' in error_message
+                    or 'exceeded your current quota' in error_message
+                    or 'tokens must be reduced' in error_message
                     or 'context length' in error_message
+                    or 'token count exceeds' in error_message
                     or 'context limit' in error_message
                 )
 
-                if is_token_limit_error and not token_limit_retry_attempted:
+
+                if is_token_limit_error:
                     logger.warning(
                         "Token limit exceeded error detected. "
-                        "Removing first non-system message and retrying once."
+                        "Summarizing context."
                     )
-                    token_limit_retry_attempted = True
-
-                    # Find and remove first non-system message
-                    modified_messages = None
-                    for i, msg in enumerate(openai_messages):
-                        msg_role = msg.get('role', '')
-                        # Skip system and developer messages
-                        if msg_role not in {'system'}:
-                            # Found first non-system message, create new list
-                            modified_messages = (
-                                openai_messages[:i] + openai_messages[i + 1 :]
-                            )
-                            logger.info(
-                                "Removed first non-system message "
-                                "and retrying."
-                            )
-                            break
 
-                    if modified_messages is not None:
-                        # Retry with modified messages
-                        try:
-                            response = await self._aget_model_response(
-                                modified_messages,
-                                num_tokens=num_tokens,
-                                current_iteration=iteration_count,
-                                response_format=response_format,
-                                tool_schemas=[]
-                                if disable_tools
-                                else self._get_full_tool_schemas(),
-                                prev_num_openai_messages=prev_num_openai_messages,
-                            )
-                            # Success! Continue with the response
-                        except Exception:
-                            # Retry also failed, re-raise the original error
-                            logger.warning(
-                                "Retry with reduced context also failed. "
-                                "Re-raising original error."
-                            )
-                            raise e
-                    else:
-                        # No non-system message to remove
-                        logger.warning(
-                            "No non-system message to remove. "
-                            "Re-raising the error."
-                        )
-                        raise
-                else:
-                    raise
+                    full_context_file_path = self.working_directory / "full_context.json"
+                    self.save_memory(path=str(full_context_file_path))
+                    self.memory.clear()
+                    summary= self.summarize()
+                    summary_messages = (
+                        f"[Context Summary]\n\n" + 
+                        summary.get('summary', '') + 
+                        "\n\n[Full Context file path is]\n\n" 
+                        + str(full_context_file_path)
+                    )
+
+                    self.update_memory(summary_messages, OpenAIBackendRole.ASSISTANT)
+                    self._astep_non_streaming_task(input_message, response_format)
 
             prev_num_openai_messages = len(openai_messages)
             iteration_count += 1

From cbdb4efcd499258916ab60ebc50f1386d8246785 Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Thu, 9 Oct 2025 23:35:44 +0800
Subject: [PATCH 08/27] Update chat_agent.py

---
 camel/agents/chat_agent.py | 53 +++++++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 23 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index ac9210ab4a..83e7087279 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -20,7 +20,6 @@
 import hashlib
 import inspect
 import json
-import math
 import os
 import random
 import re
@@ -29,7 +28,6 @@
 import threading
 import time
 import uuid
-import warnings
 from datetime import datetime
 from pathlib import Path
 from typing import (
@@ -50,7 +48,6 @@
 
 from openai import (
     AsyncStream,
-    BadRequestError,
     RateLimitError,
     Stream,
 )
@@ -72,7 +69,6 @@
     MemoryRecord,
     ScoreBasedContextCreator,
 )
-from camel.memories.blocks.chat_history_block import EmptyMemoryWarning
 from camel.messages import (
     BaseMessage,
     FunctionCallingMessage,
@@ -1664,29 +1660,35 @@ def _step_impl(
                     or 'context limit' in error_message
                 )
 
-                if is_token_limit_error :
+                if is_token_limit_error:
                     logger.warning(
                         "Token limit exceeded error detected. "
                         "Summarizing context."
                     )
-                    full_context_file_path = self.working_directory / "full_context.json"
+                    full_context_file_path = (
+                        self.working_directory / "full_context.json"
+                    )
                     self.save_memory(path=str(full_context_file_path))
                     self.memory.clear()
-                    summary= self.summarize()
+                    summary = self.summarize()
                     summary_messages = (
-                        f"[Context Summary]\n\n" + 
-                        summary.get('summary', '') + 
-                        "\n\n[Full Context file path is]\n\n" 
+                        "[Context Summary]\n\n"
+                        + summary.get('summary', '')
+                        + "\n\n[Full Context file path is]\n\n"
                         + str(full_context_file_path)
                     )
                     camel_workdir = os.environ.get("CAMEL_WORKDIR")
                     if camel_workdir:
-                        self.working_directory = Path(camel_workdir) / "context_files"
+                        self.working_directory = (
+                            Path(camel_workdir) / "context_files"
+                        )
                     else:
                         self.working_directory = Path("context_files")
-                    
-                    self.update_memory(summary_messages, OpenAIBackendRole.ASSISTANT)
-                    self._step_impl(input_message,response_format)
+
+                    self.update_memory(
+                        summary_messages, OpenAIBackendRole.ASSISTANT
+                    )
+                    self._step_impl(input_message, response_format)
 
             prev_num_openai_messages = len(openai_messages)
             iteration_count += 1
@@ -1882,7 +1884,7 @@ async def _astep_non_streaming_task(
         step_token_usage = self._create_token_usage_tracker()
         iteration_count: int = 0
         prev_num_openai_messages: int = 0
-    
+
         while True:
             if self.pause_event is not None and not self.pause_event.is_set():
                 if isinstance(self.pause_event, asyncio.Event):
@@ -1922,26 +1924,31 @@ async def _astep_non_streaming_task(
                     or 'context limit' in error_message
                 )
 
-
                 if is_token_limit_error:
                     logger.warning(
                         "Token limit exceeded error detected. "
                         "Summarizing context."
                     )
 
-                    full_context_file_path = self.working_directory / "full_context.json"
+                    full_context_file_path = (
+                        self.working_directory / "full_context.json"
+                    )
                     self.save_memory(path=str(full_context_file_path))
                     self.memory.clear()
-                    summary= self.summarize()
+                    summary = self.summarize()
                     summary_messages = (
-                        f"[Context Summary]\n\n" + 
-                        summary.get('summary', '') + 
-                        "\n\n[Full Context file path is]\n\n" 
+                        "[Context Summary]\n\n"
+                        + summary.get('summary', '')
+                        + "\n\n[Full Context file path is]\n\n"
                         + str(full_context_file_path)
                     )
 
-                    self.update_memory(summary_messages, OpenAIBackendRole.ASSISTANT)
-                    self._astep_non_streaming_task(input_message, response_format)
+                    self.update_memory(
+                        summary_messages, OpenAIBackendRole.ASSISTANT
+                    )
+                    self._astep_non_streaming_task(
+                        input_message, response_format
+                    )
 
             prev_num_openai_messages = len(openai_messages)
             iteration_count += 1

From b69fd1605885550c19a816a1f3344eff8bdde295 Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Tue, 14 Oct 2025 16:23:04 +0800
Subject: [PATCH 09/27] update

---
 camel/agents/chat_agent.py                    | 369 ++++++++++++++--
 camel/memories/agent_memories.py              |  14 +
 camel/memories/base.py                        |  12 +
 camel/memories/blocks/chat_history_block.py   |  51 +++
 .../memories/context_creators/score_based.py  | 409 ++----------------
 test/memories/test_chat_history_memory.py     |  66 +++
 6 files changed, 491 insertions(+), 430 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index 83e7087279..e261be55ce 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -66,6 +66,7 @@
 from camel.memories import (
     AgentMemory,
     ChatHistoryMemory,
+    ContextRecord,
     MemoryRecord,
     ScoreBasedContextCreator,
 )
@@ -100,6 +101,19 @@
 from camel.utils.commons import dependencies_required
 from camel.utils.context_utils import ContextUtility
 
+TOKEN_LIMIT_ERROR_MARKERS = (
+    "context_length_exceeded",
+    "prompt is too long",
+    "exceeded your current quota",
+    "tokens must be reduced",
+    "context length",
+    "token count",
+    "context limit",
+)
+
+SUMMARY_MAX_DEPTH = 3
+SUMMARY_PROGRESS_RECORD_THRESHOLD = 2
+
 if TYPE_CHECKING:
     from camel.terminators import ResponseTerminator
 
@@ -352,9 +366,8 @@ class ChatAgent(BaseAgent):
         message_window_size (int, optional): The maximum number of previous
             messages to include in the context window. If `None`, no windowing
             is performed. (default: :obj:`None`)
-        token_limit (int, optional): The maximum number of tokens in a context.
-            The context will be automatically pruned to fulfill the limitation.
-            If `None`, it will be set according to the backend model.
+        token_limit (int, optional): Deprecated. Configure context limits via
+            the model configuration instead. Passing a value raises an error.
             (default: :obj:`None`)
         output_language (str, optional): The language to be output by the
             agent. (default: :obj:`None`)
@@ -471,10 +484,17 @@ def __init__(
         # Assign unique ID
         self.agent_id = agent_id if agent_id else str(uuid.uuid4())
 
+        if token_limit is not None:
+            logger.warning(
+                "`token_limit` parameter is deprecated and will be ignored. "
+                "Configure the model's token limit in the backend settings "
+                "instead."
+            )
+
         # Set up memory
         context_creator = ScoreBasedContextCreator(
             self.model_backend.token_counter,
-            token_limit or self.model_backend.token_limit,
+            self.model_backend.token_limit,
         )
 
         self._memory: AgentMemory = memory or ChatHistoryMemory(
@@ -501,6 +521,8 @@ def __init__(
         )
         self.init_messages()
 
+        self._reset_summary_state()
+
         # Set up role name and role type
         self.role_name: str = (
             getattr(self.system_message, "role_name", None) or "assistant"
@@ -548,6 +570,9 @@ def __init__(
         self._context_utility: Optional[ContextUtility] = None
         self._context_summary_agent: Optional["ChatAgent"] = None
         self.stream_accumulate = stream_accumulate
+        self._last_tool_call_record: Optional[ToolCallingRecord] = None
+        self._last_tool_call_signature: Optional[str] = None
+        self._last_token_limit_tool_signature: Optional[str] = None
 
     def reset(self):
         r"""Resets the :obj:`ChatAgent` to its initial state."""
@@ -759,6 +784,137 @@ def _get_full_tool_schemas(self) -> List[Dict[str, Any]]:
             for func_tool in self._internal_tools.values()
         ]
 
+    @staticmethod
+    def _is_token_limit_error(error: Exception) -> bool:
+        r"""Return True when the exception message indicates a token limit."""
+        error_message = str(error).lower()
+        return any(
+            marker in error_message for marker in TOKEN_LIMIT_ERROR_MARKERS
+        )
+
+    @staticmethod
+    def _is_tool_related_record(record: MemoryRecord) -> bool:
+        r"""Determine whether the given memory record
+        belongs to a tool call."""
+        if record.role_at_backend in {
+            OpenAIBackendRole.TOOL,
+            OpenAIBackendRole.FUNCTION,
+        }:
+            return True
+
+        if (
+            record.role_at_backend == OpenAIBackendRole.ASSISTANT
+            and isinstance(record.message, FunctionCallingMessage)
+        ):
+            return True
+
+        return False
+
+    @staticmethod
+    def _serialize_tool_args(args: Dict[str, Any]) -> str:
+        try:
+            return json.dumps(args, ensure_ascii=False, sort_keys=True)
+        except TypeError:
+            return str(args)
+
+    @classmethod
+    def _build_tool_signature(
+        cls, func_name: str, args: Dict[str, Any]
+    ) -> str:
+        args_repr = cls._serialize_tool_args(args)
+        return f"{func_name}:{args_repr}"
+
+    def _describe_tool_call(
+        self, record: Optional[ToolCallingRecord]
+    ) -> Optional[str]:
+        if record is None:
+            return None
+        args_repr = self._serialize_tool_args(record.args)
+        return f"Tool `{record.tool_name}` invoked with arguments {args_repr}."
+
+    def _format_tool_limit_notice(self) -> Optional[str]:
+        description = self._describe_tool_call(self._last_tool_call_record)
+        if description is None:
+            return None
+        return "[Tool Call Causing Token Limit]\n" f"{description}"
+
+    def _reset_summary_state(self) -> None:
+        self._summary_state = {
+            "depth": 0,
+            "last_summary_log_length": 0,
+            "last_summary_user_signature": None,
+            "record_count_since_summary": 0,
+            "user_count_since_summary": 0,
+        }
+
+    def _register_record_addition(self, role: OpenAIBackendRole) -> None:
+        state = getattr(self, "_summary_state", None)
+        if not state:
+            return
+
+        state["record_count_since_summary"] = (
+            state.get("record_count_since_summary", 0) + 1
+        )
+
+        if role == OpenAIBackendRole.USER:
+            state["user_count_since_summary"] = (
+                state.get("user_count_since_summary", 0) + 1
+            )
+
+    def _can_attempt_summary(self) -> Tuple[bool, Optional[str]]:
+        state = getattr(self, "_summary_state", None)
+        if not state:
+            return True, None
+
+        depth = state.get("depth", 0)
+
+        if depth >= SUMMARY_MAX_DEPTH:
+            return (
+                False,
+                "Maximum number of summaries reached for this session.",
+            )
+
+        if depth == 0:
+            return True, None
+
+        if (
+            state.get("record_count_since_summary", 0)
+            >= SUMMARY_PROGRESS_RECORD_THRESHOLD
+        ):
+            return True, None
+
+        return (
+            False,
+            "Context was summarized recently but the conversation did not add "
+            "enough new exchanges to summarize again.",
+        )
+
+    def _on_summary(self, records_before_summary: List[ContextRecord]) -> None:
+        state = getattr(self, "_summary_state", None)
+        if state is None:
+            return
+
+        state["depth"] = state.get("depth", 0) + 1
+        state["last_summary_log_length"] = len(records_before_summary)
+        state["record_count_since_summary"] = 0
+        state["user_count_since_summary"] = 0
+        state["last_summary_user_signature"] = (
+            self._extract_last_user_signature(records_before_summary)
+        )
+
+    @staticmethod
+    def _extract_last_user_signature(
+        records: List[ContextRecord],
+    ) -> Optional[str]:
+        for context_record in reversed(records):
+            memory_record = context_record.memory_record
+            if memory_record.role_at_backend == OpenAIBackendRole.USER:
+                content = getattr(memory_record.message, "content", None)
+                if isinstance(content, str):
+                    return content
+                return None
+        return None
+
     def _get_external_tool_names(self) -> Set[str]:
         r"""Returns a set of external tool names."""
         return set(self._external_tool_schemas.keys())
@@ -840,6 +996,7 @@ def update_memory(
                 agent_id=self.agent_id,
             )
         )
+        self._register_record_addition(role)
 
     def load_memory(self, memory: AgentMemory) -> None:
         r"""Load the provided memory into the agent.
@@ -1197,6 +1354,7 @@ def init_messages(self) -> None:
         r"""Initializes the stored messages list with the current system
         message.
         """
+        self._reset_summary_state()
         self.memory.clear()
         # avoid UserWarning: The `ChatHistoryMemory` is empty.
         if self.system_message is not None:
@@ -1648,47 +1806,101 @@ def _step_impl(
                     else self._get_full_tool_schemas(),
                     prev_num_openai_messages=prev_num_openai_messages,
                 )
-            except Exception as e:
-                # Check if this is a token limit error
-                error_message = str(e).lower()
-                is_token_limit_error = (
-                    'context_length_exceeded' in error_message
-                    or 'exceeded your current quota' in error_message
-                    or 'tokens must be reduced' in error_message
-                    or 'context length' in error_message
-                    or 'token count' in error_message
-                    or 'context limit' in error_message
-                )
+            except Exception as exc:
+                logger.exception("Model error: %s", exc)
+
+                if self._is_token_limit_error(exc):
+                    tool_signature = self._last_tool_call_signature
+                    if (
+                        tool_signature is not None
+                        and tool_signature
+                        == self._last_token_limit_tool_signature
+                    ):
+                        description = self._describe_tool_call(
+                            self._last_tool_call_record
+                        )
+                        repeated_msg = (
+                            "Context exceeded again by the same tool call."
+                        )
+                        if description:
+                            repeated_msg += f" {description}"
+                        raise RuntimeError(repeated_msg) from exc
+
+                    allowed, reason = self._can_attempt_summary()
+                    if not allowed:
+                        error_message = (
+                            "Context limit exceeded and further summarization "
+                            "is not possible."
+                        )
+                        if reason:
+                            error_message += f" {reason}"
+                        raise RuntimeError(error_message) from exc
 
-                if is_token_limit_error:
                     logger.warning(
                         "Token limit exceeded error detected. "
                         "Summarizing context."
                     )
-                    full_context_file_path = (
-                        self.working_directory / "full_context.json"
+                    camel_workdir = os.environ.get("CAMEL_WORKDIR")
+                    context_dir = (
+                        Path(camel_workdir) / "context_files"
+                        if camel_workdir
+                        else Path("context_files")
                     )
+                    context_dir.mkdir(parents=True, exist_ok=True)
+                    full_context_file_path = context_dir / "full_context.json"
+
                     self.save_memory(path=str(full_context_file_path))
-                    self.memory.clear()
+
+                    recent_records: List[ContextRecord]
+                    try:
+                        recent_records = self.memory.retrieve()
+                    except Exception:  # pragma: no cover - defensive guard
+                        recent_records = []
+
+                    pop_count = 1
+                    if recent_records:
+                        last_record = recent_records[-1].memory_record
+                        if self._is_tool_related_record(last_record):
+                            pop_count = 2
+                    self.memory.pop_records(pop_count)
+
                     summary = self.summarize()
+                    if isinstance(input_message, BaseMessage):
+                        input_message = input_message.content
+
+                    tool_notice = self._format_tool_limit_notice()
                     summary_messages = (
+                        "Due to token limit being exceeded in this request, "
+                        "the conversation has been summarized."
+                        "the content after [Input Message] is the summary "
+                        "of [Input Message]"
+                        + "<Input Message>"
+                        + str(input_message)
+                        + "</Input Message>"
+                        + "\n\n"
                         "[Context Summary]\n\n"
-                        + summary.get('summary', '')
+                        + summary.get("summary", "")
                         + "\n\n[Full Context file path is]\n\n"
                         + str(full_context_file_path)
+                        + "\n\nThe following is the user's original input. "
+                        "Please continue based on existing information."
+                    )
+                    if tool_notice:
+                        summary_messages += "\n\n" + tool_notice
+                    self.memory.clear()
+                    summary_messages = BaseMessage.make_assistant_message(
+                        role_name="Assistant", content=summary_messages
                     )
-                    camel_workdir = os.environ.get("CAMEL_WORKDIR")
-                    if camel_workdir:
-                        self.working_directory = (
-                            Path(camel_workdir) / "context_files"
-                        )
-                    else:
-                        self.working_directory = Path("context_files")
-
                     self.update_memory(
                         summary_messages, OpenAIBackendRole.ASSISTANT
                     )
-                    self._step_impl(input_message, response_format)
+                    self._on_summary(recent_records)
+
+                    self._last_token_limit_tool_signature = tool_signature
+
+                    return self._step_impl(input_message, response_format)
+
+                raise
 
             prev_num_openai_messages = len(openai_messages)
             iteration_count += 1
@@ -1912,44 +2124,104 @@ async def _astep_non_streaming_task(
                     else self._get_full_tool_schemas(),
                     prev_num_openai_messages=prev_num_openai_messages,
                 )
-            except Exception as e:
-                # Check if this is a token limit error
-                error_message = str(e).lower()
-                is_token_limit_error = (
-                    'context_length_exceeded' in error_message
-                    or 'exceeded your current quota' in error_message
-                    or 'tokens must be reduced' in error_message
-                    or 'context length' in error_message
-                    or 'token count exceeds' in error_message
-                    or 'context limit' in error_message
-                )
+            except Exception as exc:
+                if self._is_token_limit_error(exc):
+                    tool_signature = self._last_tool_call_signature
+                    if (
+                        tool_signature is not None
+                        and tool_signature
+                        == self._last_token_limit_tool_signature
+                    ):
+                        description = self._describe_tool_call(
+                            self._last_tool_call_record
+                        )
+                        repeated_msg = (
+                            "Context exceeded again by the same tool call."
+                        )
+                        if description:
+                            repeated_msg += f" {description}"
+                        raise RuntimeError(repeated_msg) from exc
+
+                    allowed, reason = self._can_attempt_summary()
+                    if not allowed:
+                        error_message = (
+                            "Context limit exceeded and further summarization "
+                            "is not possible."
+                        )
+                        if reason:
+                            error_message += f" {reason}"
+                        raise RuntimeError(error_message) from exc
 
-                if is_token_limit_error:
                     logger.warning(
                         "Token limit exceeded error detected. "
                         "Summarizing context."
                     )
 
-                    full_context_file_path = (
-                        self.working_directory / "full_context.json"
+                    camel_workdir = os.environ.get("CAMEL_WORKDIR")
+                    context_dir = (
+                        Path(camel_workdir) / "context_files"
+                        if camel_workdir
+                        else Path("context_files")
                     )
+                    context_dir.mkdir(parents=True, exist_ok=True)
+                    full_context_file_path = context_dir / "full_context.json"
+
                     self.save_memory(path=str(full_context_file_path))
-                    self.memory.clear()
+
+                    recent_records: List[ContextRecord]
+                    try:
+                        recent_records = self.memory.retrieve()
+                    except Exception:  # pragma: no cover - defensive guard
+                        recent_records = []
+
+                    pop_count = 1
+                    if recent_records:
+                        last_record = recent_records[-1].memory_record
+                        if self._is_tool_related_record(last_record):
+                            pop_count = 2
+                    self.memory.pop_records(pop_count)
+
                     summary = self.summarize()
+                    if isinstance(input_message, BaseMessage):
+                        input_message = input_message.content
+
+                    tool_notice = self._format_tool_limit_notice()
                     summary_messages = (
+                        "Due to token limit being exceeded in this request, "
+                        "the conversation has been summarized."
+                        "the content after [Input Message] is the summary"
+                        + "<Input Message>"
+                        + str(input_message)
+                        + "</Input Message>"
+                        + "\n\n"
                         "[Context Summary]\n\n"
-                        + summary.get('summary', '')
+                        + summary.get("summary", "")
                         + "\n\n[Full Context file path is]\n\n"
                         + str(full_context_file_path)
+                        + "\n\nThe following is the user's original input. "
+                        "Please continue based on existing information."
                     )
 
+                    if tool_notice:
+                        summary_messages += "\n\n" + tool_notice
+
+                    self.memory.clear()
+                    summary_messages = BaseMessage.make_assistant_message(
+                        role_name="Assistant", content=summary_messages
+                    )
                     self.update_memory(
                         summary_messages, OpenAIBackendRole.ASSISTANT
                     )
-                    self._astep_non_streaming_task(
+                    self._on_summary(recent_records)
+
+                    self._last_token_limit_tool_signature = tool_signature
+
+                    return await self._astep_non_streaming_task(
                         input_message, response_format
                     )
 
+                raise
+
             prev_num_openai_messages = len(openai_messages)
             iteration_count += 1
 
@@ -2693,6 +2965,11 @@ def _record_tool_calling(
             tool_call_id=tool_call_id,
         )
 
+        self._last_tool_call_record = tool_record
+        self._last_tool_call_signature = self._build_tool_signature(
+            func_name, args
+        )
+
         return tool_record
 
     def _stream(
diff --git a/camel/memories/agent_memories.py b/camel/memories/agent_memories.py
index 01abee68a1..a85a3f7b44 100644
--- a/camel/memories/agent_memories.py
+++ b/camel/memories/agent_memories.py
@@ -129,6 +129,10 @@ def clean_tool_calls(self) -> None:
         # Save the modified records back to storage
         self._chat_history_block.storage.save(record_dicts)
 
+    def pop_records(self, count: int) -> List[MemoryRecord]:
+        r"""Removes the most recent records from chat history memory."""
+        return self._chat_history_block.pop_records(count)
+
 
 class VectorDBMemory(AgentMemory):
     r"""An agent memory wrapper of :obj:`VectorDBBlock`. This memory queries
@@ -193,6 +197,12 @@ def clear(self) -> None:
         r"""Removes all records from the vector database memory."""
         self._vectordb_block.clear()
 
+    def pop_records(self, count: int) -> List[MemoryRecord]:
+        r"""Rolling back is unsupported for vector database memory."""
+        raise NotImplementedError(
+            "VectorDBMemory does not support removing historical records."
+        )
+
 
 class LongtermAgentMemory(AgentMemory):
     r"""An implementation of the :obj:`AgentMemory` abstract base class for
@@ -277,3 +287,7 @@ def clear(self) -> None:
         r"""Removes all records from the memory."""
         self.chat_history_block.clear()
         self.vector_db_block.clear()
+
+    def pop_records(self, count: int) -> List[MemoryRecord]:
+        r"""Removes recent chat history records while leaving vector memory."""
+        return self.chat_history_block.pop_records(count)
diff --git a/camel/memories/base.py b/camel/memories/base.py
index f9d4a0ad83..bedcc1a080 100644
--- a/camel/memories/base.py
+++ b/camel/memories/base.py
@@ -45,6 +45,18 @@ def write_record(self, record: MemoryRecord) -> None:
         """
         self.write_records([record])
 
+    def pop_records(self, count: int) -> List[MemoryRecord]:
+        r"""Removes records from the memory and returns the removed records.
+
+        Args:
+            count (int): Number of records to remove.
+
+        Returns:
+            List[MemoryRecord]: The records that were removed from the memory
+                in their original order.
+        """
+        raise NotImplementedError
+
     @abstractmethod
     def clear(self) -> None:
         r"""Clears all messages from the memory."""
diff --git a/camel/memories/blocks/chat_history_block.py b/camel/memories/blocks/chat_history_block.py
index 502d1dbb63..0a303d1894 100644
--- a/camel/memories/blocks/chat_history_block.py
+++ b/camel/memories/blocks/chat_history_block.py
@@ -182,3 +182,54 @@ def write_records(self, records: List[MemoryRecord]) -> None:
     def clear(self) -> None:
         r"""Clears all chat messages from the memory."""
         self.storage.clear()
+
+    def pop_records(self, count: int) -> List[MemoryRecord]:
+        r"""Removes the most recent records from the memory.
+
+        Args:
+            count (int): Number of records to remove from the end of the
+                conversation history. A value of 0 results in no changes.
+
+        Returns:
+            List[MemoryRecord]: The removed records in chronological order.
+        """
+        if not isinstance(count, int):
+            raise TypeError("`count` must be an integer.")
+        if count < 0:
+            raise ValueError("`count` must be non-negative.")
+        if count == 0:
+            return []
+
+        record_dicts = self.storage.load()
+        if not record_dicts:
+            return []
+
+        # Preserve initial system/developer instruction if present.
+        protected_prefix = (
+            1
+            if (
+                record_dicts
+                and record_dicts[0]['role_at_backend']
+                in {
+                    OpenAIBackendRole.SYSTEM.value,
+                    OpenAIBackendRole.DEVELOPER.value,
+                }
+            )
+            else 0
+        )
+
+        removable_count = max(len(record_dicts) - protected_prefix, 0)
+        if removable_count == 0:
+            return []
+
+        pop_count = min(count, removable_count)
+        split_index = len(record_dicts) - pop_count
+
+        popped_dicts = record_dicts[split_index:]
+        remaining_dicts = record_dicts[:split_index]
+
+        self.storage.clear()
+        if remaining_dicts:
+            self.storage.save(remaining_dicts)
+
+        return [MemoryRecord.from_dict(record) for record in popped_dicts]
diff --git a/camel/memories/context_creators/score_based.py b/camel/memories/context_creators/score_based.py
index 6d2b9ea349..6733a38f8e 100644
--- a/camel/memories/context_creators/score_based.py
+++ b/camel/memories/context_creators/score_based.py
@@ -11,41 +11,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
 
-from pydantic import BaseModel
+from typing import List, Optional, Tuple
 
-from camel.logger import get_logger
 from camel.memories.base import BaseContextCreator
 from camel.memories.records import ContextRecord
-from camel.messages import FunctionCallingMessage, OpenAIMessage
+from camel.messages import OpenAIMessage
 from camel.types.enums import OpenAIBackendRole
 from camel.utils import BaseTokenCounter
 
-logger = get_logger(__name__)
-
-
-class _ContextUnit(BaseModel):
-    idx: int
-    record: ContextRecord
-    num_tokens: int
-
 
 class ScoreBasedContextCreator(BaseContextCreator):
-    r"""A default implementation of context creation strategy, which inherits
-    from :obj:`BaseContextCreator`.
-
-    This class provides a strategy to generate a conversational context from
-    a list of chat history records while ensuring the total token count of
-    the context does not exceed a specified limit. It prunes messages based
-    on their score if the total token count exceeds the limit.
+    r"""A context creation strategy that orders records chronologically.
 
     Args:
-        token_counter (BaseTokenCounter): An instance responsible for counting
-            tokens in a message.
-        token_limit (int): The maximum number of tokens allowed in the
-            generated context.
+        token_counter (BaseTokenCounter): Token counter instance used to
+            compute the combined token count of the returned messages.
+        token_limit (int): Retained for API compatibility. No longer used to
+            filter records.
     """
 
     def __init__(
@@ -66,376 +49,34 @@ def create_context(
         self,
         records: List[ContextRecord],
     ) -> Tuple[List[OpenAIMessage], int]:
-        r"""Constructs conversation context from chat history while respecting
-        token limits.
-
-        Key strategies:
-        1. System message is always prioritized and preserved
-        2. Truncation removes low-score messages first
-        3. Final output maintains chronological order and in history memory,
-           the score of each message decreases according to keep_rate. The
-           newer the message, the higher the score.
-        4. Tool calls and their responses are kept together to maintain
-           API compatibility
-
-        Args:
-            records (List[ContextRecord]): List of context records with scores
-                and timestamps.
-
-        Returns:
-            Tuple[List[OpenAIMessage], int]:
-            - Ordered list of OpenAI messages
-            - Total token count of the final context
-
-        Raises:
-            RuntimeError: If system message alone exceeds token limit
-        """
-        # ======================
-        # 1. System Message Handling
-        # ======================
-        system_unit, regular_units = self._extract_system_message(records)
-        system_tokens = system_unit.num_tokens if system_unit else 0
+        """Returns messages sorted by timestamp and their total token count."""
 
-        # Check early if system message alone exceeds token limit
-        if system_tokens > self.token_limit:
-            raise RuntimeError(
-                f"System message alone exceeds token limit"
-                f": {system_tokens} > {self.token_limit}",
-                system_tokens,
-            )
+        system_record: Optional[ContextRecord] = None
+        remaining_records: List[ContextRecord] = []
 
-        # ======================
-        # 2. Deduplication & Initial Processing
-        # ======================
-        seen_uuids = set()
-        if system_unit:
-            seen_uuids.add(system_unit.record.memory_record.uuid)
-
-        # Process non-system messages with deduplication
-        for idx, record in enumerate(records):
+        for record in records:
             if (
-                record.memory_record.role_at_backend
+                system_record is None
+                and record.memory_record.role_at_backend
                 == OpenAIBackendRole.SYSTEM
             ):
+                system_record = record
                 continue
-            if record.memory_record.uuid in seen_uuids:
-                continue
-            seen_uuids.add(record.memory_record.uuid)
-
-            token_count = self.token_counter.count_tokens_from_messages(
-                [record.memory_record.to_openai_message()]
-            )
-            regular_units.append(
-                _ContextUnit(
-                    idx=idx,
-                    record=record,
-                    num_tokens=token_count,
-                )
-            )
-
-        # ======================
-        # 3. Tool Call Relationship Mapping
-        # ======================
-        tool_call_groups = self._group_tool_calls_and_responses(regular_units)
-
-        # ======================
-        # 4. Token Calculation
-        # ======================
-        total_tokens = system_tokens + sum(u.num_tokens for u in regular_units)
-
-        # ======================
-        # 5. Early Return if Within Limit
-        # ======================
-        if total_tokens <= self.token_limit:
-            sorted_units = sorted(
-                regular_units, key=self._conversation_sort_key
-            )
-            return self._assemble_output(sorted_units, system_unit)
-
-        # ======================
-        # 6. Truncation Logic with Tool Call Awareness
-        # ======================
-        remaining_units = self._truncate_with_tool_call_awareness(
-            regular_units, tool_call_groups, system_tokens
-        )
-
-        # Log only after truncation is actually performed so that both
-        # the original and the final token counts are visible.
-        tokens_after = system_tokens + sum(
-            u.num_tokens for u in remaining_units
-        )
-        logger.warning(
-            "Context truncation performed: "
-            f"before={total_tokens}, after={tokens_after}, "
-            f"limit={self.token_limit}"
-        )
-
-        # ======================
-        # 7. Output Assembly
-        # ======================
-
-        # In case system message is the only message in memory when sorted
-        # units are empty, raise an error
-        if system_unit and len(remaining_units) == 0 and len(records) > 1:
-            raise RuntimeError(
-                "System message and current message exceeds token limit ",
-                total_tokens,
-            )
-
-        # Sort remaining units chronologically
-        final_units = sorted(remaining_units, key=self._conversation_sort_key)
-        return self._assemble_output(final_units, system_unit)
-
-    def _group_tool_calls_and_responses(
-        self, units: List[_ContextUnit]
-    ) -> Dict[str, List[_ContextUnit]]:
-        r"""Groups tool calls with their corresponding responses based on
-        `tool_call_id`.
-
-        This improved logic robustly gathers all messages (assistant requests
-        and tool responses, including chunks) that share a `tool_call_id`.
-
-        Args:
-            units (List[_ContextUnit]): List of context units to analyze.
-
-        Returns:
-            Dict[str, List[_ContextUnit]]: Mapping from `tool_call_id` to a
-                list of related units.
-        """
-        tool_call_groups: Dict[str, List[_ContextUnit]] = defaultdict(list)
-
-        for unit in units:
-            # FunctionCallingMessage stores tool_call_id.
-            message = unit.record.memory_record.message
-            tool_call_id = getattr(message, 'tool_call_id', None)
-
-            if tool_call_id:
-                tool_call_groups[tool_call_id].append(unit)
-
-        # Filter out empty or incomplete groups if necessary,
-        # though defaultdict and getattr handle this gracefully.
-        return dict(tool_call_groups)
-
-    def _truncate_with_tool_call_awareness(
-        self,
-        regular_units: List[_ContextUnit],
-        tool_call_groups: Dict[str, List[_ContextUnit]],
-        system_tokens: int,
-    ) -> List[_ContextUnit]:
-        r"""Truncates messages while preserving tool call-response pairs.
-        This method implements a more sophisticated truncation strategy:
-        1. It treats tool call groups (request + responses) and standalone
-           messages as individual items to be included.
-        2. It sorts all items by score and greedily adds them to the context.
-        3. **Partial Truncation**: If a complete tool group is too large to
-        fit,it attempts to add the request message and as many of the most
-        recent response chunks as the token budget allows.
-
-        Args:
-            regular_units (List[_ContextUnit]): All regular message units.
-            tool_call_groups (Dict[str, List[_ContextUnit]]): Grouped tool
-                calls.
-            system_tokens (int): Tokens used by the system message.
-
-        Returns:
-            List[_ContextUnit]: A list of units that fit within the token
-                limit.
-        """
-
-        # Create a set for quick lookup of units belonging to any tool call
-        tool_call_unit_ids = {
-            unit.record.memory_record.uuid
-            for group in tool_call_groups.values()
-            for unit in group
-        }
-
-        # Separate standalone units from tool call groups
-        standalone_units = [
-            u
-            for u in regular_units
-            if u.record.memory_record.uuid not in tool_call_unit_ids
-        ]
-
-        # Prepare all items (standalone units and groups) for sorting
-        all_potential_items: List[Dict] = []
-        for unit in standalone_units:
-            all_potential_items.append(
-                {
-                    "type": "standalone",
-                    "score": unit.record.score,
-                    "timestamp": unit.record.timestamp,
-                    "tokens": unit.num_tokens,
-                    "item": unit,
-                }
-            )
-        for group in tool_call_groups.values():
-            all_potential_items.append(
-                {
-                    "type": "group",
-                    "score": max(u.record.score for u in group),
-                    "timestamp": max(u.record.timestamp for u in group),
-                    "tokens": sum(u.num_tokens for u in group),
-                    "item": group,
-                }
-            )
-
-        # Sort all potential items by score (high to low), then timestamp
-        all_potential_items.sort(key=lambda x: (-x["score"], -x["timestamp"]))
-
-        remaining_units: List[_ContextUnit] = []
-        current_tokens = system_tokens
-
-        for item_dict in all_potential_items:
-            item_type = item_dict["type"]
-            item = item_dict["item"]
-            item_tokens = item_dict["tokens"]
-
-            if current_tokens + item_tokens <= self.token_limit:
-                # The whole item (standalone or group) fits, so add it
-                if item_type == "standalone":
-                    remaining_units.append(item)
-                else:  # item_type == "group"
-                    remaining_units.extend(item)
-                current_tokens += item_tokens
-
-            elif item_type == "group":
-                # The group does not fit completely; try partial inclusion.
-                request_unit: Optional[_ContextUnit] = None
-                response_units: List[_ContextUnit] = []
-
-                for unit in item:
-                    # Assistant msg with `args` is the request
-                    if (
-                        isinstance(
-                            unit.record.memory_record.message,
-                            FunctionCallingMessage,
-                        )
-                        and unit.record.memory_record.message.args is not None
-                    ):
-                        request_unit = unit
-                    else:
-                        response_units.append(unit)
-
-                # A group must have a request to be considered for inclusion.
-                if request_unit is None:
-                    continue
-
-                # Check if we can at least fit the request.
-                if (
-                    current_tokens + request_unit.num_tokens
-                    <= self.token_limit
-                ):
-                    units_to_add = [request_unit]
-                    tokens_to_add = request_unit.num_tokens
-
-                    # Sort responses by timestamp to add newest chunks first
-                    response_units.sort(
-                        key=lambda u: u.record.timestamp, reverse=True
-                    )
+            remaining_records.append(record)
 
-                    for resp_unit in response_units:
-                        if (
-                            current_tokens
-                            + tokens_to_add
-                            + resp_unit.num_tokens
-                            <= self.token_limit
-                        ):
-                            units_to_add.append(resp_unit)
-                            tokens_to_add += resp_unit.num_tokens
+        remaining_records.sort(key=lambda record: record.timestamp)
 
-                    # A request must be followed by at least one response
-                    if len(units_to_add) > 1:
-                        remaining_units.extend(units_to_add)
-                        current_tokens += tokens_to_add
+        messages: List[OpenAIMessage] = []
+        if system_record is not None:
+            messages.append(system_record.memory_record.to_openai_message())
 
-        return remaining_units
-
-    def _extract_system_message(
-        self, records: List[ContextRecord]
-    ) -> Tuple[Optional[_ContextUnit], List[_ContextUnit]]:
-        r"""Extracts the system message from records and validates it.
-
-        Args:
-            records (List[ContextRecord]): List of context records
-                representing conversation history.
-
-        Returns:
-            Tuple[Optional[_ContextUnit], List[_ContextUnit]]: containing:
-            - The system message as a `_ContextUnit`, if valid; otherwise,
-                `None`.
-            - An empty list, serving as the initial container for regular
-                messages.
-        """
-        if not records:
-            return None, []
-
-        first_record = records[0]
-        if (
-            first_record.memory_record.role_at_backend
-            != OpenAIBackendRole.SYSTEM
-        ):
-            return None, []
-
-        message = first_record.memory_record.to_openai_message()
-        tokens = self.token_counter.count_tokens_from_messages([message])
-        system_message_unit = _ContextUnit(
-            idx=0,
-            record=first_record,
-            num_tokens=tokens,
+        messages.extend(
+            record.memory_record.to_openai_message()
+            for record in remaining_records
         )
-        return system_message_unit, []
-
-    def _conversation_sort_key(
-        self, unit: _ContextUnit
-    ) -> Tuple[float, float]:
-        r"""Defines the sorting key for assembling the final output.
-
-        Sorting priority:
-        - Primary: Sort by timestamp in ascending order (chronological order).
-        - Secondary: Sort by score in descending order (higher scores first
-            when timestamps are equal).
-
-        Args:
-            unit (_ContextUnit): A `_ContextUnit` representing a conversation
-                record.
-
-        Returns:
-            Tuple[float, float]:
-            - Timestamp for chronological sorting.
-            - Negative score for descending order sorting.
-        """
-        return (unit.record.timestamp, -unit.record.score)
-
-    def _assemble_output(
-        self,
-        context_units: List[_ContextUnit],
-        system_unit: Optional[_ContextUnit],
-    ) -> Tuple[List[OpenAIMessage], int]:
-        r"""Assembles final message list with proper ordering and token count.
-
-        Args:
-            context_units (List[_ContextUnit]): Sorted list of regular message
-                units.
-            system_unit (Optional[_ContextUnit]): System message unit (if
-                present).
-
-        Returns:
-            Tuple[List[OpenAIMessage], int]: Tuple of (ordered messages, total
-                tokens)
-        """
-        messages = []
-        total_tokens = 0
-
-        # Add system message first if present
-        if system_unit:
-            messages.append(
-                system_unit.record.memory_record.to_openai_message()
-            )
-            total_tokens += system_unit.num_tokens
 
-        # Add sorted regular messages
-        for unit in context_units:
-            messages.append(unit.record.memory_record.to_openai_message())
-            total_tokens += unit.num_tokens
+        if not messages:
+            return [], 0
 
+        total_tokens = self.token_counter.count_tokens_from_messages(messages)
         return messages, total_tokens
diff --git a/test/memories/test_chat_history_memory.py b/test/memories/test_chat_history_memory.py
index 22daecfe84..3eee644402 100644
--- a/test/memories/test_chat_history_memory.py
+++ b/test/memories/test_chat_history_memory.py
@@ -91,3 +91,69 @@ def test_chat_history_memory(memory: ChatHistoryMemory):
     assert output_messages[0] == system_msg.to_openai_system_message()
     assert output_messages[1] == user_msg.to_openai_user_message()
     assert output_messages[2] == assistant_msg.to_openai_assistant_message()
+
+
+@pytest.mark.parametrize("memory", ["in-memory", "json"], indirect=True)
+def test_chat_history_memory_pop_records(memory: ChatHistoryMemory):
+    system_msg = BaseMessage(
+        "system",
+        role_type=RoleType.DEFAULT,
+        meta_dict=None,
+        content="System instructions",
+    )
+    user_msgs = [
+        BaseMessage(
+            "AI user",
+            role_type=RoleType.USER,
+            meta_dict=None,
+            content=f"Message {idx}",
+        )
+        for idx in range(3)
+    ]
+
+    records = [
+        MemoryRecord(
+            message=system_msg,
+            role_at_backend=OpenAIBackendRole.SYSTEM,
+            timestamp=datetime.now().timestamp(),
+            agent_id="system",
+        ),
+        *[
+            MemoryRecord(
+                message=msg,
+                role_at_backend=OpenAIBackendRole.USER,
+                timestamp=datetime.now().timestamp(),
+                agent_id="user",
+            )
+            for msg in user_msgs
+        ],
+    ]
+
+    memory.write_records(records)
+
+    popped = memory.pop_records(2)
+    assert [record.message.content for record in popped] == [
+        "Message 1",
+        "Message 2",
+    ]
+
+    remaining_messages, _ = memory.get_context()
+    assert [msg['content'] for msg in remaining_messages] == [
+        "System instructions",
+        "Message 0",
+    ]
+
+    # Attempting to pop more than available should leave system message intact.
+    popped = memory.pop_records(5)
+    assert [record.message.content for record in popped] == ["Message 0"]
+
+    remaining_messages, _ = memory.get_context()
+    assert [msg['content'] for msg in remaining_messages] == [
+        "System instructions",
+    ]
+
+    # Zero pop should be a no-op.
+    assert memory.pop_records(0) == []
+
+    with pytest.raises(ValueError):
+        memory.pop_records(-1)

From ce3608e6eb931896d000650760be9fca53f4f553 Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Tue, 14 Oct 2025 16:36:20 +0800
Subject: [PATCH 10/27] Update chat_agent.py

---
 camel/agents/chat_agent.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index b621fc1e36..c1edb4871e 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -549,6 +549,29 @@ def __init__(
         # Assign unique ID
         self.agent_id = agent_id if agent_id else str(uuid.uuid4())
 
+        self._tool_output_cache_enabled = (
+            enable_tool_output_cache and tool_output_cache_threshold > 0
+        )
+        self._tool_output_cache_threshold = max(0, tool_output_cache_threshold)
+        self._tool_output_cache_dir: Optional[Path]
+        self._tool_output_cache_manager: Optional[_ToolOutputCacheManager]
+        if self._tool_output_cache_enabled:
+            cache_dir = (
+                Path(tool_output_cache_dir).expanduser()
+                if tool_output_cache_dir is not None
+                else Path("tool_cache")
+            )
+            self._tool_output_cache_dir = cache_dir
+            self._tool_output_cache_manager = _ToolOutputCacheManager(
+                cache_dir
+            )
+
+        else:
+            self._tool_output_cache_dir = None
+            self._tool_output_cache_manager = None
+        self._tool_output_history: List[_ToolOutputHistoryEntry] = []
+        self._cache_lookup_tool_name = "retrieve_cached_tool_output"
+
         if token_limit is not None:
             logger.warning(
                 "`token_limit` parameter is deprecated and will be ignored. "

From d18494465420ca926dde8dcc2ad0d76237d9b6ad Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Tue, 21 Oct 2025 16:17:58 +0800
Subject: [PATCH 11/27] update

---
 camel/agents/chat_agent.py                  | 120 +++++++++++++++++---
 camel/memories/agent_memories.py            |  20 ++++
 camel/memories/base.py                      |  14 +++
 camel/memories/blocks/chat_history_block.py |  64 +++++++++++
 4 files changed, 204 insertions(+), 14 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index c1edb4871e..bdf89e148c 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -28,7 +28,6 @@
 import threading
 import time
 import uuid
-import warnings
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
@@ -902,6 +901,96 @@ def _is_tool_related_record(record: MemoryRecord) -> bool:
 
         return False
 
+    def _find_indices_to_remove_for_last_tool_pair(
+        self, recent_records: List[ContextRecord]
+    ) -> List[int]:
+        """Find indices of records that should be removed to clean up the most
+        recent incomplete tool interaction pair.
+
+        This method identifies tool call/result pairs by tool_call_id and
+        returns the exact indices to remove, allowing non-contiguous deletions.
+
+        Logic:
+        - If the last record is a tool result (TOOL/FUNCTION) with a
+          tool_call_id, find the matching assistant call anywhere in history
+          and return both indices.
+        - If the last record is an assistant tool call without a result yet,
+          return just that index.
+        - For normal messages (non tool-related): remove just the last one.
+        - Fallback: If no tool_call_id is available, use heuristic (last 2 if
+          tool-related, otherwise last 1).
+
+        Returns:
+            List[int]: Indices to remove (may be non-contiguous).
+        """
+        if not recent_records:
+            return []
+
+        last_idx = len(recent_records) - 1
+        last_record = recent_records[last_idx].memory_record
+
+        # Case A: Last is an ASSISTANT tool call with no result yet
+        if (
+            last_record.role_at_backend == OpenAIBackendRole.ASSISTANT
+            and isinstance(last_record.message, FunctionCallingMessage)
+            and last_record.message.result is None
+        ):
+            return [last_idx]
+
+        # Case B: Last is TOOL/FUNCTION result, try id-based pairing
+        if last_record.role_at_backend in {
+            OpenAIBackendRole.TOOL,
+            OpenAIBackendRole.FUNCTION,
+        }:
+            tool_id = None
+            if isinstance(last_record.message, FunctionCallingMessage):
+                tool_id = last_record.message.tool_call_id
+
+            if tool_id:
+                for idx in range(len(recent_records) - 2, -1, -1):
+                    rec = recent_records[idx].memory_record
+                    if rec.role_at_backend != OpenAIBackendRole.ASSISTANT:
+                        continue
+
+                    # Check if this assistant message contains the tool_call_id
+                    matched = False
+
+                    # Case 1: FunctionCallingMessage (single tool call)
+                    if isinstance(rec.message, FunctionCallingMessage):
+                        if rec.message.tool_call_id == tool_id:
+                            matched = True
+
+                    # Case 2: BaseMessage with multiple tool_calls in meta_dict
+                    elif (
+                        hasattr(rec.message, "meta_dict")
+                        and rec.message.meta_dict
+                    ):
+                        tool_calls_list = rec.message.meta_dict.get(
+                            "tool_calls", []
+                        )
+                        if isinstance(tool_calls_list, list):
+                            for tc in tool_calls_list:
+                                if (
+                                    isinstance(tc, dict)
+                                    and tc.get("id") == tool_id
+                                ):
+                                    matched = True
+                                    break
+
+                    if matched:
+                        # Return both assistant call and tool result indices
+                        return [idx, last_idx]
+
+            # Fallback: no tool_call_id, use heuristic
+            if self._is_tool_related_record(last_record):
+                # Remove last 2 (assume they are paired)
+                return [last_idx - 1, last_idx] if last_idx > 0 else [last_idx]
+            else:
+                return [last_idx]
+
+        # Default: non tool-related tail => remove last one
+        return [last_idx]
+
     @staticmethod
     def _serialize_tool_args(args: Dict[str, Any]) -> str:
         try:
@@ -2124,12 +2213,12 @@ def _step_impl(
                     except Exception:  # pragma: no cover - defensive guard
                         recent_records = []
 
-                    pop_count = 1
-                    if recent_records:
-                        last_record = recent_records[-1].memory_record
-                        if self._is_tool_related_record(last_record):
-                            pop_count = 2
-                    self.memory.pop_records(pop_count)
+                    indices_to_remove = (
+                        self._find_indices_to_remove_for_last_tool_pair(
+                            recent_records
+                        )
+                    )
+                    self.memory.remove_records_by_indices(indices_to_remove)
 
                     summary = self.summarize()
                     if isinstance(input_message, BaseMessage):
@@ -2441,12 +2530,12 @@ async def _astep_non_streaming_task(
                     except Exception:  # pragma: no cover - defensive guard
                         recent_records = []
 
-                    pop_count = 1
-                    if recent_records:
-                        last_record = recent_records[-1].memory_record
-                        if self._is_tool_related_record(last_record):
-                            pop_count = 2
-                    self.memory.pop_records(pop_count)
+                    indices_to_remove = (
+                        self._find_indices_to_remove_for_last_tool_pair(
+                            recent_records
+                        )
+                    )
+                    self.memory.remove_records_by_indices(indices_to_remove)
 
                     summary = self.summarize()
                     if isinstance(input_message, BaseMessage):
@@ -2661,6 +2750,8 @@ def _get_model_response(
                 if response:
                     break
             except RateLimitError as e:
+                if self._is_token_limit_error(e):
+                    raise
                 last_error = e
                 if attempt < self.retry_attempts - 1:
                     delay = min(self.retry_delay * (2**attempt), 60.0)
@@ -2678,7 +2769,6 @@ def _get_model_response(
             except Exception:
                 logger.error(
                     f"Model error: {self.model_backend.model_type}",
-                    exc_info=True,
                 )
                 raise
         else:
@@ -2725,6 +2815,8 @@ async def _aget_model_response(
                 if response:
                     break
             except RateLimitError as e:
+                if self._is_token_limit_error(e):
+                    raise
                 last_error = e
                 if attempt < self.retry_attempts - 1:
                     delay = min(self.retry_delay * (2**attempt), 60.0)
diff --git a/camel/memories/agent_memories.py b/camel/memories/agent_memories.py
index a85a3f7b44..1789cec469 100644
--- a/camel/memories/agent_memories.py
+++ b/camel/memories/agent_memories.py
@@ -133,6 +133,12 @@ def pop_records(self, count: int) -> List[MemoryRecord]:
         r"""Removes the most recent records from chat history memory."""
         return self._chat_history_block.pop_records(count)
 
+    def remove_records_by_indices(
+        self, indices: List[int]
+    ) -> List[MemoryRecord]:
+        r"""Removes records at specified indices from chat history memory."""
+        return self._chat_history_block.remove_records_by_indices(indices)
+
 
 class VectorDBMemory(AgentMemory):
     r"""An agent memory wrapper of :obj:`VectorDBBlock`. This memory queries
@@ -203,6 +209,14 @@ def pop_records(self, count: int) -> List[MemoryRecord]:
             "VectorDBMemory does not support removing historical records."
         )
 
+    def remove_records_by_indices(
+        self, indices: List[int]
+    ) -> List[MemoryRecord]:
+        r"""Removing by indices is unsupported for vector database memory."""
+        raise NotImplementedError(
+            "VectorDBMemory does not support removing records by indices."
+        )
+
 
 class LongtermAgentMemory(AgentMemory):
     r"""An implementation of the :obj:`AgentMemory` abstract base class for
@@ -291,3 +305,9 @@ def clear(self) -> None:
     def pop_records(self, count: int) -> List[MemoryRecord]:
         r"""Removes recent chat history records while leaving vector memory."""
         return self.chat_history_block.pop_records(count)
+
+    def remove_records_by_indices(
+        self, indices: List[int]
+    ) -> List[MemoryRecord]:
+        r"""Removes records at specified indices from chat history."""
+        return self.chat_history_block.remove_records_by_indices(indices)
diff --git a/camel/memories/base.py b/camel/memories/base.py
index bedcc1a080..4cf2ccce15 100644
--- a/camel/memories/base.py
+++ b/camel/memories/base.py
@@ -57,6 +57,20 @@ def pop_records(self, count: int) -> List[MemoryRecord]:
         """
         raise NotImplementedError
 
+    def remove_records_by_indices(
+        self, indices: List[int]
+    ) -> List[MemoryRecord]:
+        r"""Removes records at specified indices from the memory.
+
+        Args:
+            indices (List[int]): List of indices to remove. Indices should be
+                valid positions in the current record list.
+
+        Returns:
+            List[MemoryRecord]: The removed records in their original order.
+        """
+        raise NotImplementedError
+
     @abstractmethod
     def clear(self) -> None:
         r"""Clears all messages from the memory."""
diff --git a/camel/memories/blocks/chat_history_block.py b/camel/memories/blocks/chat_history_block.py
index 0a303d1894..4c4d4ed76c 100644
--- a/camel/memories/blocks/chat_history_block.py
+++ b/camel/memories/blocks/chat_history_block.py
@@ -233,3 +233,67 @@ def pop_records(self, count: int) -> List[MemoryRecord]:
             self.storage.save(remaining_dicts)
 
         return [MemoryRecord.from_dict(record) for record in popped_dicts]
+
+    def remove_records_by_indices(
+        self, indices: List[int]
+    ) -> List[MemoryRecord]:
+        r"""Removes records at specified indices from the memory.
+
+        Args:
+            indices (List[int]): List of indices to remove. Indices are
+                positions in the current record list (0-based).
+                System/developer messages at index 0 are protected and will
+                not be removed.
+
+        Returns:
+            List[MemoryRecord]: The removed records in their original order.
+        """
+        if not indices:
+            return []
+
+        record_dicts = self.storage.load()
+        if not record_dicts:
+            return []
+
+        # Preserve initial system/developer instruction if present.
+        protected_prefix = (
+            1
+            if (
+                record_dicts
+                and record_dicts[0]['role_at_backend']
+                in {
+                    OpenAIBackendRole.SYSTEM.value,
+                    OpenAIBackendRole.DEVELOPER.value,
+                }
+            )
+            else 0
+        )
+
+        # Filter out protected indices and invalid ones
+        valid_indices = sorted(
+            {
+                idx
+                for idx in indices
+                if idx >= protected_prefix and idx < len(record_dicts)
+            }
+        )
+
+        if not valid_indices:
+            return []
+
+        # Extract records to remove (in original order)
+        removed_records = [record_dicts[idx] for idx in valid_indices]
+
+        # Build remaining records by excluding removed indices
+        remaining_dicts = [
+            record
+            for idx, record in enumerate(record_dicts)
+            if idx not in valid_indices
+        ]
+
+        # Save back to storage
+        self.storage.clear()
+        if remaining_dicts:
+            self.storage.save(remaining_dicts)
+
+        return [MemoryRecord.from_dict(record) for record in removed_records]

From bc69be71dfe5ccdabfee41f968dc62cda508d92e Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Tue, 21 Oct 2025 16:27:29 +0800
Subject: [PATCH 12/27] Update chat_agent.py

---
 camel/agents/chat_agent.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index bdf89e148c..e084698eef 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -1326,8 +1326,7 @@ def update_memory(
         message: BaseMessage,
         role: OpenAIBackendRole,
         timestamp: Optional[float] = None,
-        return_records: bool = False,
-    ) -> Optional[List[MemoryRecord]]:
+    ) -> None:
         r"""Updates the agent memory with a new message.
 
         Args:
@@ -1339,17 +1338,15 @@ def update_memory(
                 (default: :obj:`None`)
                     (default: obj:`None`)
         """
-
-        self.memory.write_record(
-            MemoryRecord(
-                message=message,
-                role_at_backend=role,
-                timestamp=timestamp
-                if timestamp is not None
-                else time.time_ns() / 1_000_000_000,  # Nanosecond precision
-                agent_id=self.agent_id,
-            )
+        record = MemoryRecord(
+            message=message,
+            role_at_backend=role,
+            timestamp=timestamp
+            if timestamp is not None
+            else time.time_ns() / 1_000_000_000,  # Nanosecond precision
+            agent_id=self.agent_id,
         )
+        self.memory.write_record(record)
         self._register_record_addition(role)
 
     def load_memory(self, memory: AgentMemory) -> None:

From 3309c0f1a1ff590768b1d60a30b4e7c4e0712395 Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Tue, 21 Oct 2025 20:49:39 +0800
Subject: [PATCH 13/27] Update chat_agent.py

---
 camel/agents/chat_agent.py | 226 +++++++++++++++++++++++++++++++++++--
 1 file changed, 219 insertions(+), 7 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index 2c6f18a4d4..546c48a32e 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -367,8 +367,9 @@ class ChatAgent(BaseAgent):
         message_window_size (int, optional): The maximum number of previous
             messages to include in the context window. If `None`, no windowing
             is performed. (default: :obj:`None`)
-        token_limit (int, optional): Deprecated. Configure context limits via
-            the model configuration instead. Passing a value raises an error.
+        summarize_threshold (int, optional): The percentage of the context
+            window that triggers summarization. If `None`, will trigger
+            summarization when the context window is full.
             (default: :obj:`None`)
         output_language (str, optional): The language to be output by the
             agent. (default: :obj:`None`)
@@ -448,6 +449,7 @@ def __init__(
         ] = None,
         memory: Optional[AgentMemory] = None,
         message_window_size: Optional[int] = None,
+        summarize_threshold: Optional[int] = None,
         token_limit: Optional[int] = None,
         output_language: Optional[str] = None,
         tools: Optional[List[Union[FunctionTool, Callable]]] = None,
@@ -522,6 +524,8 @@ def __init__(
         )
         self.init_messages()
 
+        # Set up summarize threshold
+        self.summarize_threshold = summarize_threshold
         self._reset_summary_state()
 
         # Set up role name and role type
@@ -930,14 +934,131 @@ def _format_tool_limit_notice(self) -> Optional[str]:
         return "[Tool Call Causing Token Limit]\n" f"{description}"
 
     def _reset_summary_state(self) -> None:
-        self._summary_state = {
+        self._summary_state: Dict[str, Any] = {
             "depth": 0,
             "last_summary_log_length": 0,
             "last_summary_user_signature": None,
             "record_count_since_summary": 0,
             "user_count_since_summary": 0,
+            "summary_token_count": 0,  # Total tokens in summary messages
+            "last_summary_threshold": 0,  # Last calculated threshold
+            "is_summary_message": set(),  # Set of message  that are summaries
         }
 
+    def _calculate_next_summary_threshold(self) -> int:
+        r"""Calculate the next token threshold that should trigger
+        summarization.
+
+        The threshold calculation follows a progressive strategy:
+        - First time: token_limit * (summarize_threshold / 100)
+        - Subsequent times: (limit - summary_token) / 2 + summary_token
+
+        This ensures that as summaries accumulate, the threshold adapts
+        to maintain a reasonable balance between context and summaries.
+
+        Returns:
+            int: The token count threshold for next summarization.
+        """
+        if self.summarize_threshold is None:
+            return 0
+
+        token_limit = self.model_backend.token_limit
+        summary_token_count = self._summary_state.get("summary_token_count", 0)
+
+        # First summarization: use the percentage threshold
+        if summary_token_count == 0:
+            threshold = int(token_limit * self.summarize_threshold / 100)
+        else:
+            # Subsequent summarizations: adaptive threshold
+            threshold = int(
+                (token_limit - summary_token_count)
+                * self.summarize_threshold
+                / 100
+                + summary_token_count
+            )
+
+        return threshold
+
+    def _update_memory_with_summary(self, summary: Dict[str, Any]) -> None:
+        r"""Update memory with summary result.
+
+        This method handles memory clearing and restoration of summaries based
+        on whether it's a progressive or full compression.
+        """
+        if not summary or self.summarize_threshold is None:
+            return
+
+        summary_with_prefix = summary.get("summary_with_prefix")
+        if not summary_with_prefix:
+            return
+
+        include_summaries = summary.get("include_summaries", False)
+
+        existing_summaries = []
+        if not include_summaries:
+            messages, _ = self.memory.get_context()
+            for msg in messages:
+                content = msg.get('content', '')
+                if isinstance(content, str) and content.startswith(
+                    '[CAMEL_SUMMARY]'
+                ):
+                    existing_summaries.append(msg)
+
+        # Clear memory
+        self.memory.clear()
+
+        # Re-add system message if it exists
+        if self.system_message is not None:
+            self.memory.write_record(
+                MemoryRecord(
+                    message=self.system_message,
+                    role_at_backend=OpenAIBackendRole.SYSTEM,
+                    timestamp=time.time_ns() / 1_000_000_000,
+                    agent_id=self.agent_id,
+                )
+            )
+
+        # Restore old summaries (for progressive compression)
+        for old_summary in existing_summaries:
+            content = old_summary.get('content', '')
+            if not isinstance(content, str):
+                content = str(content)
+            summary_msg = BaseMessage.make_assistant_message(
+                role_name="Assistant", content=content
+            )
+            self.update_memory(summary_msg, OpenAIBackendRole.ASSISTANT)
+
+        # Add new summary
+        new_summary_msg = BaseMessage.make_assistant_message(
+            role_name="Assistant", content=summary_with_prefix
+        )
+        self.update_memory(new_summary_msg, OpenAIBackendRole.ASSISTANT)
+
+        # Update token count
+        try:
+            summary_tokens = (
+                self.model_backend.token_counter.count_tokens_from_messages(
+                    [{"role": "assistant", "content": summary_with_prefix}]
+                )
+            )
+
+            if include_summaries:  # Full compression - reset count
+                self._summary_state["summary_token_count"] = summary_tokens
+                logger.info(
+                    f"Full compression: Summary with {summary_tokens} tokens. "
+                    f"Total summary tokens reset to: {summary_tokens}"
+                )
+            else:  # Progressive compression - accumulate
+                self._summary_state["summary_token_count"] += summary_tokens
+                logger.info(
+                    f"Progressive compression: New summary "
+                    f"with {summary_tokens} tokens. "
+                    f"Total summary tokens: "
+                    f"{self._summary_state['summary_token_count']}"
+                )
+        except Exception as e:
+            logger.warning(f"Failed to count summary tokens: {e}")
+
     def _register_record_addition(self, role: OpenAIBackendRole) -> None:
         state = getattr(self, "_summary_state", None)
         if not state:
@@ -1170,6 +1291,7 @@ def summarize(
         summary_prompt: Optional[str] = None,
         response_format: Optional[Type[BaseModel]] = None,
         working_directory: Optional[Union[str, Path]] = None,
+        include_summaries: bool = False,
     ) -> Dict[str, Any]:
         r"""Summarize the agent's current conversation context and persist it
         to a markdown file.
@@ -1189,6 +1311,11 @@ def summarize(
                 defining the expected structure of the response. If provided,
                 the summary will be generated as structured output and included
                 in the result.
+            include_summaries (bool): Whether to include previously generated
+                summaries in the content to be summarized. If False (default),
+                only non-summary messages will be summarized. If True, all
+                messages including previous summaries will be summarized
+                (full compression). (default: :obj:`False`)
             working_directory (Optional[str|Path]): Optional directory to save
                 the markdown summary file. If provided, overrides the default
                 directory used by ContextUtility.
@@ -1242,6 +1369,12 @@ def summarize(
                 role = message.get('role', 'unknown')
                 content = message.get('content', '')
 
+                # Skip summary messages if include_summaries is False
+                if not include_summaries and isinstance(content, str):
+                    # Check if this is a summary message by looking for marker
+                    if content.startswith('[CAMEL_SUMMARY]'):
+                        continue
+
                 # Handle tool call messages (assistant calling tools)
                 tool_calls = message.get('tool_calls')
                 if tool_calls and isinstance(tool_calls, (list, tuple)):
@@ -1421,6 +1554,13 @@ def summarize(
             }
 
             result.update(result_dict)
+
+            # Add prefix to summary content in result
+            if self.summarize_threshold is not None:
+                summary_with_prefix = f"[CAMEL_SUMMARY] {summary_content}"
+                result["summary_with_prefix"] = summary_with_prefix
+                result["include_summaries"] = include_summaries
+
             logger.info("Conversation summary saved to %s", file_path)
             return result
 
@@ -1436,6 +1576,7 @@ async def asummarize(
         summary_prompt: Optional[str] = None,
         response_format: Optional[Type[BaseModel]] = None,
         working_directory: Optional[Union[str, Path]] = None,
+        include_summaries: bool = False,
     ) -> Dict[str, Any]:
         r"""Asynchronously summarize the agent's current conversation context
         and persist it to a markdown file.
@@ -1458,6 +1599,11 @@ async def asummarize(
             working_directory (Optional[str|Path]): Optional directory to save
                 the markdown summary file. If provided, overrides the default
                 directory used by ContextUtility.
+            include_summaries (bool): Whether to include previously generated
+                summaries in the content to be summarized. If False (default),
+                only non-summary messages will be summarized. If True, all
+                messages including previous summaries will be summarized
+                (full compression). (default: :obj:`False`)
 
         Returns:
             Dict[str, Any]: A dictionary containing the summary text, file
@@ -1498,6 +1644,12 @@ async def asummarize(
                 role = message.get('role', 'unknown')
                 content = message.get('content', '')
 
+                # Skip summary messages if include_summaries is False
+                if not include_summaries and isinstance(content, str):
+                    # Check if this is a summary message by looking for marker
+                    if content.startswith('[CAMEL_SUMMARY]'):
+                        continue
+
                 # Handle tool call messages (assistant calling tools)
                 tool_calls = message.get('tool_calls')
                 if tool_calls and isinstance(tool_calls, (list, tuple)):
@@ -1686,6 +1838,13 @@ async def asummarize(
             }
 
             result.update(result_dict)
+
+            # Add prefix to summary content in result
+            if self.summarize_threshold is not None:
+                summary_with_prefix = f"[CAMEL_SUMMARY] {summary_content}"
+                result["summary_with_prefix"] = summary_with_prefix
+                result["include_summaries"] = include_summaries
+
             logger.info("Conversation summary saved to %s", file_path)
             return result
 
@@ -2173,6 +2332,32 @@ def _step_impl(
 
             try:
                 openai_messages, num_tokens = self.memory.get_context()
+                if self.summarize_threshold is not None:
+                    threshold = self._calculate_next_summary_threshold()
+                    summary_token_count = self._summary_state.get(
+                        "summary_token_count", 0
+                    )
+                    token_limit = self.model_backend.token_limit
+
+                    # Check if summary tokens exceed 60% of limit
+                    # If so, perform full compression (including summaries)
+                    if summary_token_count > token_limit * 0.6:
+                        logger.info(
+                            f"Summary tokens ({summary_token_count}) exceed "
+                            f"60% of limit ({token_limit * 0.6}). "
+                            "Performing full compression."
+                        )
+                        # Summarize everything (including summaries)
+                        summary = self.summarize(include_summaries=True)
+                        self._update_memory_with_summary(summary)
+                    elif num_tokens > threshold:
+                        logger.info(
+                            f"Token count ({num_tokens}) exceeds threshold "
+                            f"({threshold}). Triggering summarization."
+                        )
+                        # Only summarize non-summary content
+                        summary = self.summarize(include_summaries=False)
+                        self._update_memory_with_summary(summary)
                 accumulated_context_tokens += num_tokens
             except RuntimeError as e:
                 return self._step_terminate(
@@ -2271,7 +2456,7 @@ def _step_impl(
                     )
                     if tool_notice:
                         summary_messages += "\n\n" + tool_notice
-                    self.memory.clear()
+                    self.clear_memory()
                     summary_messages = BaseMessage.make_assistant_message(
                         role_name="Assistant", content=summary_messages
                     )
@@ -2491,6 +2676,34 @@ async def _astep_non_streaming_task(
                     await loop.run_in_executor(None, self.pause_event.wait)
             try:
                 openai_messages, num_tokens = self.memory.get_context()
+                if self.summarize_threshold is not None:
+                    threshold = self._calculate_next_summary_threshold()
+                    summary_token_count = self._summary_state.get(
+                        "summary_token_count", 0
+                    )
+                    token_limit = self.model_backend.token_limit
+
+                    # Check if summary tokens exceed 60% of limit
+                    # If so, perform full compression (including summaries)
+                    if summary_token_count > token_limit * 0.6:
+                        logger.info(
+                            f"Summary tokens ({summary_token_count}) exceed "
+                            f"60% of limit ({token_limit * 0.6}). "
+                            "Performing full compression."
+                        )
+                        # Summarize everything (including summaries)
+                        summary = await self.asummarize(include_summaries=True)
+                        self._update_memory_with_summary(summary)
+                    elif num_tokens > threshold:
+                        logger.info(
+                            f"Token count ({num_tokens}) exceeds threshold "
+                            f"({threshold}). Triggering summarization."
+                        )
+                        # Only summarize non-summary content
+                        summary = await self.asummarize(
+                            include_summaries=False
+                        )
+                        self._update_memory_with_summary(summary)
                 accumulated_context_tokens += num_tokens
             except RuntimeError as e:
                 return self._step_terminate(
@@ -2565,7 +2778,7 @@ async def _astep_non_streaming_task(
                     )
                     self.memory.remove_records_by_indices(indices_to_remove)
 
-                    summary = self.summarize()
+                    summary = await self.asummarize()
                     if isinstance(input_message, BaseMessage):
                         input_message = input_message.content
 
@@ -2588,8 +2801,7 @@ async def _astep_non_streaming_task(
 
                     if tool_notice:
                         summary_messages += "\n\n" + tool_notice
-
-                    self.memory.clear()
+                    self.clear_memory()
                     summary_messages = BaseMessage.make_assistant_message(
                         role_name="Assistant", content=summary_messages
                     )

From 8d66d0d23af57e77b86779a6aa8020df4087b108 Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Tue, 21 Oct 2025 20:50:40 +0800
Subject: [PATCH 14/27] Update chat_agent.py

---
 camel/agents/chat_agent.py | 34 ++++++++++------------------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index 546c48a32e..046a4a72df 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -1005,18 +1005,7 @@ def _update_memory_with_summary(self, summary: Dict[str, Any]) -> None:
                     existing_summaries.append(msg)
 
         # Clear memory
-        self.memory.clear()
-
-        # Re-add system message if it exists
-        if self.system_message is not None:
-            self.memory.write_record(
-                MemoryRecord(
-                    message=self.system_message,
-                    role_at_backend=OpenAIBackendRole.SYSTEM,
-                    timestamp=time.time_ns() / 1_000_000_000,
-                    agent_id=self.agent_id,
-                )
-            )
+        self.clear_memory()
 
         # Restore old summaries (for progressive compression)
         for old_summary in existing_summaries:
@@ -1863,7 +1852,14 @@ def clear_memory(self) -> None:
         self.memory.clear()
 
         if self.system_message is not None:
-            self.update_memory(self.system_message, OpenAIBackendRole.SYSTEM)
+            self.memory.write_record(
+                MemoryRecord(
+                    message=self.system_message,
+                    role_at_backend=OpenAIBackendRole.SYSTEM,
+                    timestamp=time.time_ns() / 1_000_000_000,
+                    agent_id=self.agent_id,
+                )
+            )
 
     def _generate_system_message_for_output_language(
         self,
@@ -1898,17 +1894,7 @@ def init_messages(self) -> None:
         message.
         """
         self._reset_summary_state()
-        self.memory.clear()
-        # Write system message to memory if provided
-        if self.system_message is not None:
-            self.memory.write_record(
-                MemoryRecord(
-                    message=self.system_message,
-                    role_at_backend=OpenAIBackendRole.SYSTEM,
-                    timestamp=time.time_ns() / 1_000_000_000,
-                    agent_id=self.agent_id,
-                )
-            )
+        self.clear_memory()
 
     def reset_to_original_system_message(self) -> None:
         r"""Reset system message to original, removing any appended context.

From b0752ef80e75dc6d07440a35a6a23a26cb22ff3c Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Tue, 21 Oct 2025 21:18:09 +0800
Subject: [PATCH 15/27] Update chat_agent.py

---
 camel/agents/chat_agent.py | 33 ++++++---------------------------
 1 file changed, 6 insertions(+), 27 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index 046a4a72df..3e9f71d9d3 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -991,7 +991,7 @@ def _update_memory_with_summary(self, summary: Dict[str, Any]) -> None:
         summary_with_prefix = summary.get("summary_with_prefix")
         if not summary_with_prefix:
             return
-
+        summary_status = summary.get("status", "")
         include_summaries = summary.get("include_summaries", False)
 
         existing_summaries = []
@@ -1019,7 +1019,7 @@ def _update_memory_with_summary(self, summary: Dict[str, Any]) -> None:
 
         # Add new summary
         new_summary_msg = BaseMessage.make_assistant_message(
-            role_name="Assistant", content=summary_with_prefix
+            role_name="Assistant", content=summary_with_prefix + " " + summary_status
         )
         self.update_memory(new_summary_msg, OpenAIBackendRole.ASSISTANT)
 
@@ -2395,16 +2395,6 @@ def _step_impl(
                         "Token limit exceeded error detected. "
                         "Summarizing context."
                     )
-                    camel_workdir = os.environ.get("CAMEL_WORKDIR")
-                    context_dir = (
-                        Path(camel_workdir) / "context_files"
-                        if camel_workdir
-                        else Path("context_files")
-                    )
-                    context_dir.mkdir(parents=True, exist_ok=True)
-                    full_context_file_path = context_dir / "full_context.json"
-
-                    self.save_memory(path=str(full_context_file_path))
 
                     recent_records: List[ContextRecord]
                     try:
@@ -2435,8 +2425,8 @@ def _step_impl(
                         + "\n\n"
                         "[Context Summary]\n\n"
                         + summary.get("summary", "")
-                        + "\n\n[Full Context file path is]\n\n"
-                        + str(full_context_file_path)
+                        + "\n\n[Summary status]\n\n"
+                        + str(summary.get("status", ""))
                         + "\n\nThe following is the user's original input. "
                         "Please continue based on existing information."
                     )
@@ -2740,17 +2730,6 @@ async def _astep_non_streaming_task(
                         "Summarizing context."
                     )
 
-                    camel_workdir = os.environ.get("CAMEL_WORKDIR")
-                    context_dir = (
-                        Path(camel_workdir) / "context_files"
-                        if camel_workdir
-                        else Path("context_files")
-                    )
-                    context_dir.mkdir(parents=True, exist_ok=True)
-                    full_context_file_path = context_dir / "full_context.json"
-
-                    self.save_memory(path=str(full_context_file_path))
-
                     recent_records: List[ContextRecord]
                     try:
                         recent_records = self.memory.retrieve()
@@ -2779,8 +2758,8 @@ async def _astep_non_streaming_task(
                         + "\n\n"
                         "[Context Summary]\n\n"
                         + summary.get("summary", "")
-                        + "\n\n[Full Context file path is]\n\n"
-                        + str(full_context_file_path)
+                        + "\n\n[Summary status]\n\n"
+                        + str(summary.get("status", ""))
                         + "\n\nThe following is the user's original input. "
                         "Please continue based on existing information."
                     )

From cc983c93b977dc609e739f40c190e7aaeb44287e Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Tue, 21 Oct 2025 21:35:44 +0800
Subject: [PATCH 16/27] Update chat_agent.py

---
 camel/agents/chat_agent.py | 95 +++++++++++++++++++++-----------------
 1 file changed, 53 insertions(+), 42 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index 3e9f71d9d3..5b0fca62f5 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -524,7 +524,13 @@ def __init__(
         )
         self.init_messages()
 
-        # Set up summarize threshold
+        # Set up summarize threshold with validation
+        if summarize_threshold is not None:
+            if not (0 < summarize_threshold <= 100):
+                raise ValueError(
+                    f"summarize_threshold must be between 0 and 100, "
+                    f"got {summarize_threshold}"
+                )
         self.summarize_threshold = summarize_threshold
         self._reset_summary_state()
 
@@ -1019,7 +1025,8 @@ def _update_memory_with_summary(self, summary: Dict[str, Any]) -> None:
 
         # Add new summary
         new_summary_msg = BaseMessage.make_assistant_message(
-            role_name="Assistant", content=summary_with_prefix + " " + summary_status
+            role_name="Assistant",
+            content=summary_with_prefix + " " + summary_status,
         )
         self.update_memory(new_summary_msg, OpenAIBackendRole.ASSISTANT)
 
@@ -2325,25 +2332,26 @@ def _step_impl(
                     )
                     token_limit = self.model_backend.token_limit
 
-                    # Check if summary tokens exceed 60% of limit
-                    # If so, perform full compression (including summaries)
-                    if summary_token_count > token_limit * 0.6:
-                        logger.info(
-                            f"Summary tokens ({summary_token_count}) exceed "
-                            f"60% of limit ({token_limit * 0.6}). "
-                            "Performing full compression."
-                        )
-                        # Summarize everything (including summaries)
-                        summary = self.summarize(include_summaries=True)
-                        self._update_memory_with_summary(summary)
-                    elif num_tokens > threshold:
-                        logger.info(
-                            f"Token count ({num_tokens}) exceeds threshold "
-                            f"({threshold}). Triggering summarization."
-                        )
-                        # Only summarize non-summary content
-                        summary = self.summarize(include_summaries=False)
-                        self._update_memory_with_summary(summary)
+                    if num_tokens <= token_limit:
+                        # Check if summary tokens exceed 60% of limit
+                        # If so, perform full compression (including summaries)
+                        if summary_token_count > token_limit * 0.6:
+                            logger.info(
+                                f"Summary tokens ({summary_token_count}) "
+                                f"exceed 60% of limit ({token_limit * 0.6}). "
+                                f"Performing full compression."
+                            )
+                            # Summarize everything (including summaries)
+                            summary = self.summarize(include_summaries=True)
+                            self._update_memory_with_summary(summary)
+                        elif num_tokens > threshold:
+                            logger.info(
+                                f"Token count ({num_tokens}) exceed threshold "
+                                f"({threshold}). Triggering summarization."
+                            )
+                            # Only summarize non-summary content
+                            summary = self.summarize(include_summaries=False)
+                            self._update_memory_with_summary(summary)
                 accumulated_context_tokens += num_tokens
             except RuntimeError as e:
                 return self._step_terminate(
@@ -2659,27 +2667,30 @@ async def _astep_non_streaming_task(
                     )
                     token_limit = self.model_backend.token_limit
 
-                    # Check if summary tokens exceed 60% of limit
-                    # If so, perform full compression (including summaries)
-                    if summary_token_count > token_limit * 0.6:
-                        logger.info(
-                            f"Summary tokens ({summary_token_count}) exceed "
-                            f"60% of limit ({token_limit * 0.6}). "
-                            "Performing full compression."
-                        )
-                        # Summarize everything (including summaries)
-                        summary = await self.asummarize(include_summaries=True)
-                        self._update_memory_with_summary(summary)
-                    elif num_tokens > threshold:
-                        logger.info(
-                            f"Token count ({num_tokens}) exceeds threshold "
-                            f"({threshold}). Triggering summarization."
-                        )
-                        # Only summarize non-summary content
-                        summary = await self.asummarize(
-                            include_summaries=False
-                        )
-                        self._update_memory_with_summary(summary)
+                    if num_tokens <= token_limit:
+                        # Check if summary tokens exceed 60% of limit
+                        # If so, perform full compression (including summaries)
+                        if summary_token_count > token_limit * 0.6:
+                            logger.info(
+                                f"Summary tokens ({summary_token_count}) "
+                                f"exceed 60% of limit ({token_limit * 0.6}). "
+                                f"Full compression."
+                            )
+                            # Summarize everything (including summaries)
+                            summary = await self.asummarize(
+                                include_summaries=True
+                            )
+                            self._update_memory_with_summary(summary)
+                        elif num_tokens > threshold:
+                            logger.info(
+                                f"Token count ({num_tokens}) exceed threshold "
+                                "({threshold}). Triggering summarization."
+                            )
+                            # Only summarize non-summary content
+                            summary = await self.asummarize(
+                                include_summaries=False
+                            )
+                            self._update_memory_with_summary(summary)
                 accumulated_context_tokens += num_tokens
             except RuntimeError as e:
                 return self._step_terminate(

From 41ed7ee0f75df853ba0b5f12df03e69320068aab Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Tue, 21 Oct 2025 22:35:54 +0800
Subject: [PATCH 17/27] Update test_chat_agent.py

---
 test/agents/test_chat_agent.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/test/agents/test_chat_agent.py b/test/agents/test_chat_agent.py
index ffc8a06da5..3502263cdb 100644
--- a/test/agents/test_chat_agent.py
+++ b/test/agents/test_chat_agent.py
@@ -560,6 +560,21 @@ def test_chat_agent_step_exceed_token_number(step_call_count=3):
         system_message=system_msg,
         token_limit=1,
     )
+    
+    # Mock memory.get_context() to raise RuntimeError indicating token limit exceeded
+    original_get_context = assistant.memory.get_context
+    def mock_get_context():
+        messages, _ = original_get_context()
+        # Raise RuntimeError as if context size exceeded limit
+        raise RuntimeError(
+            "Context size exceeded",
+            {
+                "status": "error",
+                "message": "The chat history has exceeded the maximum token limit."
+            }
+        )
+    
+    assistant.memory.get_context = mock_get_context
     assistant.model_backend.run = MagicMock(
         return_value=model_backend_rsp_base
     )

From 71d367d6a099db9c38f3d388e07a896ae44c2417 Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Tue, 21 Oct 2025 23:44:55 +0800
Subject: [PATCH 18/27] Update test_chat_agent.py

---
 test/agents/test_chat_agent.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/agents/test_chat_agent.py b/test/agents/test_chat_agent.py
index 3502263cdb..2e6f7538eb 100644
--- a/test/agents/test_chat_agent.py
+++ b/test/agents/test_chat_agent.py
@@ -560,9 +560,9 @@ def test_chat_agent_step_exceed_token_number(step_call_count=3):
         system_message=system_msg,
         token_limit=1,
     )
-    
-    # Mock memory.get_context() to raise RuntimeError indicating token limit exceeded
+
     original_get_context = assistant.memory.get_context
+
     def mock_get_context():
         messages, _ = original_get_context()
         # Raise RuntimeError as if context size exceeded limit
@@ -570,10 +570,10 @@ def mock_get_context():
             "Context size exceeded",
             {
                 "status": "error",
-                "message": "The chat history has exceeded the maximum token limit."
-            }
+                "message": "The context has exceeded the maximum token limit.",
+            },
         )
-    
+
     assistant.memory.get_context = mock_get_context
     assistant.model_backend.run = MagicMock(
         return_value=model_backend_rsp_base

From b45d5584b314c04be70e1a4e8b949172070c787c Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Thu, 23 Oct 2025 22:07:05 +0800
Subject: [PATCH 19/27] Update test_score_based.py

---
 .../context_creators/test_score_based.py        | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/test/memories/context_creators/test_score_based.py b/test/memories/context_creators/test_score_based.py
index a396212d03..4f862f2967 100644
--- a/test/memories/context_creators/test_score_based.py
+++ b/test/memories/context_creators/test_score_based.py
@@ -70,10 +70,8 @@ def test_score_based_context_creator():
     ]
 
     expected_output = [
-        r.memory_record.to_openai_message()
-        for r in [
-            context_records[1]  # Only expect the highest scoring message
-        ]
+        record.memory_record.to_openai_message()
+        for record in sorted(context_records, key=lambda r: r.timestamp)
     ]
     output, _ = context_creator.create_context(records=context_records)
     assert expected_output == output
@@ -137,9 +135,16 @@ def test_score_based_context_creator_with_system_message():
             score=0.9,
         ),
     ]
+    sorted_records = sorted(
+        (record for record in context_records[1:]),
+        key=lambda r: r.timestamp,
+    )
     expected_output = [
-        r.memory_record.to_openai_message()
-        for r in [context_records[0], context_records[2], context_records[3]]
+        context_records[0].memory_record.to_openai_message(),
+        *(
+            record.memory_record.to_openai_message()
+            for record in sorted_records
+        ),
     ]
     output, _ = context_creator.create_context(records=context_records)
     assert expected_output == output

From b5a21f1f8ecb1e2691fb32d7baa17dc0faf40c64 Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Fri, 24 Oct 2025 16:03:21 +0800
Subject: [PATCH 20/27] Update chat_agent.py

---
 camel/agents/chat_agent.py | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index 5b0fca62f5..19b3e1dfc4 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -427,6 +427,10 @@ class ChatAgent(BaseAgent):
             updates return accumulated content (current behavior). When False,
             partial updates return only the incremental delta. (default:
             :obj:`True`)
+        summary_window_ratio (float, optional): Maximum fraction of the total 
+            context window that can be occupied by summary information. Used 
+            to limit how much of the model’s context is reserved for 
+            summarization results. (default: :obj:`0.6`)
     """
 
     def __init__(
@@ -472,6 +476,7 @@ def __init__(
         retry_delay: float = 1.0,
         step_timeout: Optional[float] = None,
         stream_accumulate: bool = True,
+        summary_window_ratio: float = 0.6,
     ) -> None:
         if isinstance(model, ModelManager):
             self.model_backend = model
@@ -584,7 +589,7 @@ def __init__(
         self._last_tool_call_record: Optional[ToolCallingRecord] = None
         self._last_tool_call_signature: Optional[str] = None
         self._last_token_limit_tool_signature: Optional[str] = None
-
+        self.summary_window_ratio = summary_window_ratio
     def reset(self):
         r"""Resets the :obj:`ChatAgent` to its initial state."""
         self.terminated = False
@@ -1006,7 +1011,7 @@ def _update_memory_with_summary(self, summary: Dict[str, Any]) -> None:
             for msg in messages:
                 content = msg.get('content', '')
                 if isinstance(content, str) and content.startswith(
-                    '[CAMEL_SUMMARY]'
+                    '[CONTEXT_SUMMARY]'
                 ):
                     existing_summaries.append(msg)
 
@@ -1368,7 +1373,7 @@ def summarize(
                 # Skip summary messages if include_summaries is False
                 if not include_summaries and isinstance(content, str):
                     # Check if this is a summary message by looking for marker
-                    if content.startswith('[CAMEL_SUMMARY]'):
+                    if content.startswith('[CONTEXT_SUMMARY]'):
                         continue
 
                 # Handle tool call messages (assistant calling tools)
@@ -1553,7 +1558,7 @@ def summarize(
 
             # Add prefix to summary content in result
             if self.summarize_threshold is not None:
-                summary_with_prefix = f"[CAMEL_SUMMARY] {summary_content}"
+                summary_with_prefix = f"[CONTEXT_SUMMARY] {summary_content}"
                 result["summary_with_prefix"] = summary_with_prefix
                 result["include_summaries"] = include_summaries
 
@@ -1643,7 +1648,7 @@ async def asummarize(
                 # Skip summary messages if include_summaries is False
                 if not include_summaries and isinstance(content, str):
                     # Check if this is a summary message by looking for marker
-                    if content.startswith('[CAMEL_SUMMARY]'):
+                    if content.startswith('[CONTEXT_SUMMARY]'):
                         continue
 
                 # Handle tool call messages (assistant calling tools)
@@ -1837,7 +1842,7 @@ async def asummarize(
 
             # Add prefix to summary content in result
             if self.summarize_threshold is not None:
-                summary_with_prefix = f"[CAMEL_SUMMARY] {summary_content}"
+                summary_with_prefix = f"[CONTEXT_SUMMARY] {summary_content}"
                 result["summary_with_prefix"] = summary_with_prefix
                 result["include_summaries"] = include_summaries
 
@@ -2333,12 +2338,11 @@ def _step_impl(
                     token_limit = self.model_backend.token_limit
 
                     if num_tokens <= token_limit:
-                        # Check if summary tokens exceed 60% of limit
-                        # If so, perform full compression (including summaries)
-                        if summary_token_count > token_limit * 0.6:
+                        if summary_token_count > token_limit * self.summary_window_ratio:
                             logger.info(
                                 f"Summary tokens ({summary_token_count}) "
-                                f"exceed 60% of limit ({token_limit * 0.6}). "
+                                f"exceed {self.summary_window_ratio} of "
+                                f"limit ({token_limit * self.summary_window_ratio}). "
                                 f"Performing full compression."
                             )
                             # Summarize everything (including summaries)
@@ -2668,12 +2672,11 @@ async def _astep_non_streaming_task(
                     token_limit = self.model_backend.token_limit
 
                     if num_tokens <= token_limit:
-                        # Check if summary tokens exceed 60% of limit
-                        # If so, perform full compression (including summaries)
-                        if summary_token_count > token_limit * 0.6:
+                        if summary_token_count > token_limit * self.summary_window_ratio:
                             logger.info(
                                 f"Summary tokens ({summary_token_count}) "
-                                f"exceed 60% of limit ({token_limit * 0.6}). "
+                                f"exceed {self.summary_window_ratio} of "
+                                f"limit ({token_limit * self.summary_window_ratio}). "
                                 f"Full compression."
                             )
                             # Summarize everything (including summaries)
@@ -2709,6 +2712,8 @@ async def _astep_non_streaming_task(
                     prev_num_openai_messages=prev_num_openai_messages,
                 )
             except Exception as exc:
+                logger.exception("Model error: %s", exc)
+
                 if self._is_token_limit_error(exc):
                     tool_signature = self._last_tool_call_signature
                     if (

From 3810c5644af1727c53cc857a0bca813e1f1476c6 Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Fri, 24 Oct 2025 16:27:52 +0800
Subject: [PATCH 21/27] Update chat_agent.py

---
 camel/agents/chat_agent.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index 19b3e1dfc4..74bfcd301c 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -1558,7 +1558,10 @@ def summarize(
 
             # Add prefix to summary content in result
             if self.summarize_threshold is not None:
-                summary_with_prefix = f"[CONTEXT_SUMMARY] {summary_content}"
+                summary_with_prefix = (
+                    f"[CONTEXT_SUMMARY] The following is a summary of our "
+                    f"conversation from a previous session: {summary_content}"
+                )
                 result["summary_with_prefix"] = summary_with_prefix
                 result["include_summaries"] = include_summaries
 

From 78777edd8f712d77c41d12b75aaaf54c88f74bc7 Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Tue, 28 Oct 2025 19:16:20 +0800
Subject: [PATCH 22/27] Update chat_agent.py

---
 camel/agents/chat_agent.py | 398 +++++++++++++++++--------------------
 1 file changed, 179 insertions(+), 219 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index 74bfcd301c..be22a24c8b 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -112,9 +112,6 @@
     "context limit",
 )
 
-SUMMARY_MAX_DEPTH = 3
-SUMMARY_PROGRESS_RECORD_THRESHOLD = 2
-
 if TYPE_CHECKING:
     from camel.terminators import ResponseTerminator
 
@@ -427,9 +424,9 @@ class ChatAgent(BaseAgent):
             updates return accumulated content (current behavior). When False,
             partial updates return only the incremental delta. (default:
             :obj:`True`)
-        summary_window_ratio (float, optional): Maximum fraction of the total 
-            context window that can be occupied by summary information. Used 
-            to limit how much of the model’s context is reserved for 
+        summary_window_ratio (float, optional): Maximum fraction of the total
+            context window that can be occupied by summary information. Used
+            to limit how much of the model's context is reserved for
             summarization results. (default: :obj:`0.6`)
     """
 
@@ -492,13 +489,6 @@ def __init__(
         # Assign unique ID
         self.agent_id = agent_id if agent_id else str(uuid.uuid4())
 
-        if token_limit is not None:
-            logger.warning(
-                "`token_limit` parameter is deprecated and will be ignored. "
-                "Configure the model's token limit in the backend settings "
-                "instead."
-            )
-
         # Set up memory
         context_creator = ScoreBasedContextCreator(
             self.model_backend.token_counter,
@@ -590,10 +580,12 @@ def __init__(
         self._last_tool_call_signature: Optional[str] = None
         self._last_token_limit_tool_signature: Optional[str] = None
         self.summary_window_ratio = summary_window_ratio
+
     def reset(self):
         r"""Resets the :obj:`ChatAgent` to its initial state."""
         self.terminated = False
         self.init_messages()
+        self._reset_summary_state()
         for terminator in self.response_terminators:
             terminator.reset()
 
@@ -938,23 +930,61 @@ def _describe_tool_call(
         args_repr = self._serialize_tool_args(record.args)
         return f"Tool `{record.tool_name}` invoked with arguments {args_repr}."
 
+    def _update_last_tool_call_state(
+        self, record: Optional[ToolCallingRecord]
+    ) -> None:
+        """Track the most recent tool call and its identifying signature."""
+        self._last_tool_call_record = record
+        if record is None:
+            self._last_tool_call_signature = None
+            return
+
+        args = (
+            record.args
+            if isinstance(record.args, dict)
+            else {"_raw": record.args}
+        )
+        try:
+            signature = self._build_tool_signature(record.tool_name, args)
+        except Exception:  # pragma: no cover - defensive guard
+            signature = None
+        self._last_tool_call_signature = signature
+
     def _format_tool_limit_notice(self) -> Optional[str]:
-        description = self._describe_tool_call(self._last_tool_call_record)
+        record = self._last_tool_call_record
+        description = self._describe_tool_call(record)
         if description is None:
             return None
-        return "[Tool Call Causing Token Limit]\n" f"{description}"
+        notice_lines = [
+            "[Tool Call Causing Token Limit]",
+            description,
+        ]
+
+        if record is not None:
+            result = record.result
+            if isinstance(result, bytes):
+                result_repr = result.decode(errors="replace")
+            elif isinstance(result, str):
+                result_repr = result
+            else:
+                try:
+                    result_repr = json.dumps(
+                        result, ensure_ascii=False, sort_keys=True
+                    )
+                except (TypeError, ValueError):
+                    result_repr = str(result)
+
+            result_length = len(result_repr)
+            notice_lines.append(f"Tool result length: {result_length}")
+            if self.model_backend.token_limit != 999999999:
+                notice_lines.append(
+                    f"Token limit: {self.model_backend.token_limit}"
+                )
+
+        return "\n".join(notice_lines)
 
     def _reset_summary_state(self) -> None:
-        self._summary_state: Dict[str, Any] = {
-            "depth": 0,
-            "last_summary_log_length": 0,
-            "last_summary_user_signature": None,
-            "record_count_since_summary": 0,
-            "user_count_since_summary": 0,
-            "summary_token_count": 0,  # Total tokens in summary messages
-            "last_summary_threshold": 0,  # Last calculated threshold
-            "is_summary_message": set(),  # Set of message  that are summaries
-        }
+        self._summary_token_count = 0  # Total tokens in summary messages
 
     def _calculate_next_summary_threshold(self) -> int:
         r"""Calculate the next token threshold that should trigger
@@ -974,7 +1004,7 @@ def _calculate_next_summary_threshold(self) -> int:
             return 0
 
         token_limit = self.model_backend.token_limit
-        summary_token_count = self._summary_state.get("summary_token_count", 0)
+        summary_token_count = self._summary_token_count
 
         # First summarization: use the percentage threshold
         if summary_token_count == 0:
@@ -990,20 +1020,16 @@ def _calculate_next_summary_threshold(self) -> int:
 
         return threshold
 
-    def _update_memory_with_summary(self, summary: Dict[str, Any]) -> None:
+    def _update_memory_with_summary(
+        self, summary: Dict[str, Any], include_summaries: bool = False
+    ) -> None:
         r"""Update memory with summary result.
 
         This method handles memory clearing and restoration of summaries based
         on whether it's a progressive or full compression.
         """
-        if not summary or self.summarize_threshold is None:
-            return
 
-        summary_with_prefix = summary.get("summary_with_prefix")
-        if not summary_with_prefix:
-            return
-        summary_status = summary.get("status", "")
-        include_summaries = summary.get("include_summaries", False)
+        summary_content: str = summary.get("summary_content", "")
 
         existing_summaries = []
         if not include_summaries:
@@ -1023,111 +1049,51 @@ def _update_memory_with_summary(self, summary: Dict[str, Any]) -> None:
             content = old_summary.get('content', '')
             if not isinstance(content, str):
                 content = str(content)
-            summary_msg = BaseMessage.make_assistant_message(
-                role_name="Assistant", content=content
+            summary_msg = BaseMessage.make_user_message(
+                role_name="assistant", content=content
             )
             self.update_memory(summary_msg, OpenAIBackendRole.ASSISTANT)
 
         # Add new summary
-        new_summary_msg = BaseMessage.make_assistant_message(
-            role_name="Assistant",
-            content=summary_with_prefix + " " + summary_status,
+        new_summary_msg = BaseMessage.make_user_message(
+            role_name="assistant", content=summary_content
         )
         self.update_memory(new_summary_msg, OpenAIBackendRole.ASSISTANT)
-
+        input_message = BaseMessage.make_user_message(
+            role_name="user",
+            content=(
+                "Please continue the conversation from "
+                "where we left it off without asking the user any further "
+                "questions. Continue with the last task that you were "
+                "asked to work on."
+            ),
+        )
+        self.update_memory(input_message, OpenAIBackendRole.USER)
         # Update token count
         try:
             summary_tokens = (
                 self.model_backend.token_counter.count_tokens_from_messages(
-                    [{"role": "assistant", "content": summary_with_prefix}]
+                    [{"role": "assistant", "content": summary_content}]
                 )
             )
 
             if include_summaries:  # Full compression - reset count
-                self._summary_state["summary_token_count"] = summary_tokens
+                self._summary_token_count = summary_tokens
                 logger.info(
                     f"Full compression: Summary with {summary_tokens} tokens. "
                     f"Total summary tokens reset to: {summary_tokens}"
                 )
             else:  # Progressive compression - accumulate
-                self._summary_state["summary_token_count"] += summary_tokens
+                self._summary_token_count += summary_tokens
                 logger.info(
                     f"Progressive compression: New summary "
                     f"with {summary_tokens} tokens. "
                     f"Total summary tokens: "
-                    f"{self._summary_state['summary_token_count']}"
+                    f"{self._summary_token_count}"
                 )
         except Exception as e:
             logger.warning(f"Failed to count summary tokens: {e}")
 
-    def _register_record_addition(self, role: OpenAIBackendRole) -> None:
-        state = getattr(self, "_summary_state", None)
-        if not state:
-            return
-
-        state["record_count_since_summary"] = (
-            state.get("record_count_since_summary", 0) + 1
-        )
-
-        if role == OpenAIBackendRole.USER:
-            state["user_count_since_summary"] = (
-                state.get("user_count_since_summary", 0) + 1
-            )
-
-    def _can_attempt_summary(self) -> Tuple[bool, Optional[str]]:
-        state = getattr(self, "_summary_state", None)
-        if not state:
-            return True, None
-
-        depth = state.get("depth", 0)
-
-        if depth >= SUMMARY_MAX_DEPTH:
-            return (
-                False,
-                "Maximum number of summaries reached for this session.",
-            )
-
-        if depth == 0:
-            return True, None
-
-        if (
-            state.get("record_count_since_summary", 0)
-            >= SUMMARY_PROGRESS_RECORD_THRESHOLD
-        ):
-            return True, None
-
-        return (
-            False,
-            "Context was summarized recently but the conversation did not add "
-            "enough new exchanges to summarize again.",
-        )
-
-    def _on_summary(self, records_before_summary: List[ContextRecord]) -> None:
-        state = getattr(self, "_summary_state", None)
-        if state is None:
-            return
-
-        state["depth"] = state.get("depth", 0) + 1
-        state["last_summary_log_length"] = len(records_before_summary)
-        state["record_count_since_summary"] = 0
-        state["user_count_since_summary"] = 0
-        state["last_summary_user_signature"] = (
-            self._extract_last_user_signature(records_before_summary)
-        )
-
-    @staticmethod
-    def _extract_last_user_signature(
-        records: List[ContextRecord],
-    ) -> Optional[str]:
-        for context_record in reversed(records):
-            memory_record = context_record.memory_record
-            if memory_record.role_at_backend == OpenAIBackendRole.USER:
-                content = getattr(memory_record.message, "content", None)
-                if isinstance(content, str):
-                    return content
-                return None
-        return None
-
     def _get_external_tool_names(self) -> Set[str]:
         r"""Returns a set of external tool names."""
         return set(self._external_tool_schemas.keys())
@@ -1207,7 +1173,6 @@ def update_memory(
             agent_id=self.agent_id,
         )
         self.memory.write_record(record)
-        self._register_record_addition(role)
 
     def load_memory(self, memory: AgentMemory) -> None:
         r"""Load the provided memory into the agent.
@@ -1459,8 +1424,9 @@ def summarize(
             else:
                 prompt_text = (
                     "Summarize the context information in concise markdown "
-                    "bullet points highlighting key decisions, action items.\n"
-                    f"Context information:\n{conversation_text}"
+                    "bullet points highlighting key decisions, action items, "
+                    "user's intent.\n\nContext information:\n"
+                    f"{conversation_text}"
                 )
 
             try:
@@ -1545,7 +1511,10 @@ def summarize(
             file_path = (
                 context_util.get_working_directory() / f"{base_filename}.md"
             )
-
+            summary_content = (
+                f"[CONTEXT_SUMMARY] The following is a summary of our "
+                f"conversation from a previous session: {summary_content}"
+            )
             # Prepare result dictionary
             result_dict = {
                 "summary": summary_content,
@@ -1555,16 +1524,6 @@ def summarize(
             }
 
             result.update(result_dict)
-
-            # Add prefix to summary content in result
-            if self.summarize_threshold is not None:
-                summary_with_prefix = (
-                    f"[CONTEXT_SUMMARY] The following is a summary of our "
-                    f"conversation from a previous session: {summary_content}"
-                )
-                result["summary_with_prefix"] = summary_with_prefix
-                result["include_summaries"] = include_summaries
-
             logger.info("Conversation summary saved to %s", file_path)
             return result
 
@@ -1737,8 +1696,9 @@ async def asummarize(
             else:
                 prompt_text = (
                     "Summarize the context information in concise markdown "
-                    "bullet points highlighting key decisions, action items.\n"
-                    f"Context information:\n{conversation_text}"
+                    "bullet points highlighting key decisions, action items, "
+                    "user's intent.\n\nContext information:\n"
+                    f"{conversation_text}"
                 )
 
             try:
@@ -1833,6 +1793,11 @@ async def asummarize(
                 context_util.get_working_directory() / f"{base_filename}.md"
             )
 
+            summary_content = (
+                f"[CONTEXT_SUMMARY] The following is a summary of our "
+                f"conversation from a previous session: {summary_content}"
+            )
+
             # Prepare result dictionary
             result_dict = {
                 "summary": summary_content,
@@ -1842,13 +1807,6 @@ async def asummarize(
             }
 
             result.update(result_dict)
-
-            # Add prefix to summary content in result
-            if self.summarize_threshold is not None:
-                summary_with_prefix = f"[CONTEXT_SUMMARY] {summary_content}"
-                result["summary_with_prefix"] = summary_with_prefix
-                result["include_summaries"] = include_summaries
-
             logger.info("Conversation summary saved to %s", file_path)
             return result
 
@@ -2335,22 +2293,23 @@ def _step_impl(
                 openai_messages, num_tokens = self.memory.get_context()
                 if self.summarize_threshold is not None:
                     threshold = self._calculate_next_summary_threshold()
-                    summary_token_count = self._summary_state.get(
-                        "summary_token_count", 0
-                    )
+                    summary_token_count = self._summary_token_count
                     token_limit = self.model_backend.token_limit
 
                     if num_tokens <= token_limit:
-                        if summary_token_count > token_limit * self.summary_window_ratio:
+                        if (
+                            summary_token_count
+                            > token_limit * self.summary_window_ratio
+                        ):
                             logger.info(
                                 f"Summary tokens ({summary_token_count}) "
-                                f"exceed {self.summary_window_ratio} of "
-                                f"limit ({token_limit * self.summary_window_ratio}). "
-                                f"Performing full compression."
+                                f"exceed, full compression."
                             )
                             # Summarize everything (including summaries)
                             summary = self.summarize(include_summaries=True)
-                            self._update_memory_with_summary(summary)
+                            self._update_memory_with_summary(
+                                summary, include_summaries=True
+                            )
                         elif num_tokens > threshold:
                             logger.info(
                                 f"Token count ({num_tokens}) exceed threshold "
@@ -2358,7 +2317,9 @@ def _step_impl(
                             )
                             # Only summarize non-summary content
                             summary = self.summarize(include_summaries=False)
-                            self._update_memory_with_summary(summary)
+                            self._update_memory_with_summary(
+                                summary, include_summaries=False
+                            )
                 accumulated_context_tokens += num_tokens
             except RuntimeError as e:
                 return self._step_terminate(
@@ -2396,15 +2357,20 @@ def _step_impl(
                             repeated_msg += f" {description}"
                         raise RuntimeError(repeated_msg) from exc
 
-                    allowed, reason = self._can_attempt_summary()
-                    if not allowed:
-                        error_message = (
-                            "Context limit exceeded and further summarization "
-                            "is not possible."
-                        )
-                        if reason:
-                            error_message += f" {reason}"
-                        raise RuntimeError(error_message) from exc
+                    user_message_count = sum(
+                        1
+                        for msg in openai_messages
+                        if getattr(msg, "role", None) == "user"
+                    )
+                    if (
+                        user_message_count == 1
+                        and getattr(openai_messages[-1], "role", None)
+                        == "user"
+                    ):
+                        raise RuntimeError(
+                            "The provided user input alone exceeds the "
+                            "context window. Please shorten the input."
+                        ) from exc
 
                     logger.warning(
                         "Token limit exceeded error detected. "
@@ -2425,39 +2391,26 @@ def _step_impl(
                     self.memory.remove_records_by_indices(indices_to_remove)
 
                     summary = self.summarize()
-                    if isinstance(input_message, BaseMessage):
-                        input_message = input_message.content
-
                     tool_notice = self._format_tool_limit_notice()
-                    summary_messages = (
-                        "Due to token limit being exceeded in this request, "
-                        "the conversation has been summarized."
-                        "the content after [Input Message] is the summary "
-                        "of [Input Message]"
-                        + "<Input Message>"
-                        + str(input_message)
-                        + "</Input Message>"
-                        + "\n\n"
-                        "[Context Summary]\n\n"
-                        + summary.get("summary", "")
-                        + "\n\n[Summary status]\n\n"
-                        + str(summary.get("status", ""))
-                        + "\n\nThe following is the user's original input. "
-                        "Please continue based on existing information."
-                    )
+                    summary_messages = summary.get("summary", "")
+
                     if tool_notice:
                         summary_messages += "\n\n" + tool_notice
+                    help_message = (
+                        "Please continue the conversation from "
+                        "where we left it off without asking the user any "
+                        "further questions. Continue with the last task "
+                        "that you were asked to work on."
+                    )
+                    summary_messages += "\n\n" + help_message
                     self.clear_memory()
                     summary_messages = BaseMessage.make_assistant_message(
-                        role_name="Assistant", content=summary_messages
+                        role_name="assistant", content=summary_messages
                     )
                     self.update_memory(
                         summary_messages, OpenAIBackendRole.ASSISTANT
                     )
-                    self._on_summary(recent_records)
-
                     self._last_token_limit_tool_signature = tool_signature
-
                     return self._step_impl(input_message, response_format)
 
                 raise
@@ -2669,24 +2622,25 @@ async def _astep_non_streaming_task(
                 openai_messages, num_tokens = self.memory.get_context()
                 if self.summarize_threshold is not None:
                     threshold = self._calculate_next_summary_threshold()
-                    summary_token_count = self._summary_state.get(
-                        "summary_token_count", 0
-                    )
+                    summary_token_count = self._summary_token_count
                     token_limit = self.model_backend.token_limit
 
                     if num_tokens <= token_limit:
-                        if summary_token_count > token_limit * self.summary_window_ratio:
+                        if (
+                            summary_token_count
+                            > token_limit * self.summary_window_ratio
+                        ):
                             logger.info(
                                 f"Summary tokens ({summary_token_count}) "
-                                f"exceed {self.summary_window_ratio} of "
-                                f"limit ({token_limit * self.summary_window_ratio}). "
-                                f"Full compression."
+                                f"exceed, full compression."
                             )
                             # Summarize everything (including summaries)
                             summary = await self.asummarize(
                                 include_summaries=True
                             )
-                            self._update_memory_with_summary(summary)
+                            self._update_memory_with_summary(
+                                summary, include_summaries=True
+                            )
                         elif num_tokens > threshold:
                             logger.info(
                                 f"Token count ({num_tokens}) exceed threshold "
@@ -2696,7 +2650,9 @@ async def _astep_non_streaming_task(
                             summary = await self.asummarize(
                                 include_summaries=False
                             )
-                            self._update_memory_with_summary(summary)
+                            self._update_memory_with_summary(
+                                summary, include_summaries=False
+                            )
                 accumulated_context_tokens += num_tokens
             except RuntimeError as e:
                 return self._step_terminate(
@@ -2734,15 +2690,20 @@ async def _astep_non_streaming_task(
                             repeated_msg += f" {description}"
                         raise RuntimeError(repeated_msg) from exc
 
-                    allowed, reason = self._can_attempt_summary()
-                    if not allowed:
-                        error_message = (
-                            "Context limit exceeded and further summarization "
-                            "is not possible."
-                        )
-                        if reason:
-                            error_message += f" {reason}"
-                        raise RuntimeError(error_message) from exc
+                    user_message_count = sum(
+                        1
+                        for msg in openai_messages
+                        if getattr(msg, "role", None) == "user"
+                    )
+                    if (
+                        user_message_count == 1
+                        and getattr(openai_messages[-1], "role", None)
+                        == "user"
+                    ):
+                        raise RuntimeError(
+                            "The provided user input alone exceeds the"
+                            "context window. Please shorten the input."
+                        ) from exc
 
                     logger.warning(
                         "Token limit exceeded error detected. "
@@ -2763,39 +2724,27 @@ async def _astep_non_streaming_task(
                     self.memory.remove_records_by_indices(indices_to_remove)
 
                     summary = await self.asummarize()
-                    if isinstance(input_message, BaseMessage):
-                        input_message = input_message.content
 
                     tool_notice = self._format_tool_limit_notice()
-                    summary_messages = (
-                        "Due to token limit being exceeded in this request, "
-                        "the conversation has been summarized."
-                        "the content after [Input Message] is the summary"
-                        + "<Input Message>"
-                        + str(input_message)
-                        + "</Input Message>"
-                        + "\n\n"
-                        "[Context Summary]\n\n"
-                        + summary.get("summary", "")
-                        + "\n\n[Summary status]\n\n"
-                        + str(summary.get("status", ""))
-                        + "\n\nThe following is the user's original input. "
-                        "Please continue based on existing information."
-                    )
+                    summary_messages = summary.get("summary", "")
 
                     if tool_notice:
                         summary_messages += "\n\n" + tool_notice
+                    help_message = (
+                        "Please continue the conversation from "
+                        "where we left it off without asking the user any "
+                        "further questions. Continue with the last task "
+                        "that you were asked to work on."
+                    )
+                    summary_messages += "\n\n" + help_message
                     self.clear_memory()
                     summary_messages = BaseMessage.make_assistant_message(
-                        role_name="Assistant", content=summary_messages
+                        role_name="assistant", content=summary_messages
                     )
                     self.update_memory(
                         summary_messages, OpenAIBackendRole.ASSISTANT
                     )
-                    self._on_summary(recent_records)
-
                     self._last_token_limit_tool_signature = tool_signature
-
                     return await self._astep_non_streaming_task(
                         input_message, response_format
                     )
@@ -2878,6 +2827,8 @@ async def _astep_non_streaming_task(
         if self.prune_tool_calls_from_memory and tool_call_records:
             self.memory.clean_tool_calls()
 
+        self._last_token_limit_user_signature = None
+
         return self._convert_to_chatagent_response(
             response,
             tool_call_records,
@@ -3548,6 +3499,7 @@ def _record_tool_calling(
             tool_call_id=tool_call_id,
         )
 
+        self._update_last_tool_call_state(tool_record)
         return tool_record
 
     def _stream(
@@ -4109,12 +4061,14 @@ def _execute_tool_from_stream_data(
                         timestamp=base_timestamp + 1e-6,
                     )
 
-                    return ToolCallingRecord(
+                    tool_record = ToolCallingRecord(
                         tool_name=function_name,
                         args=args,
                         result=result,
                         tool_call_id=tool_call_id,
                     )
+                    self._update_last_tool_call_state(tool_record)
+                    return tool_record
 
                 except Exception as e:
                     error_msg = (
@@ -4136,12 +4090,14 @@ def _execute_tool_from_stream_data(
 
                     self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
 
-                    return ToolCallingRecord(
+                    tool_record = ToolCallingRecord(
                         tool_name=function_name,
                         args=args,
                         result=result,
                         tool_call_id=tool_call_id,
                     )
+                    self._update_last_tool_call_state(tool_record)
+                    return tool_record
             else:
                 logger.warning(
                     f"Tool '{function_name}' not found in internal tools"
@@ -4232,12 +4188,14 @@ async def _aexecute_tool_from_stream_data(
                         timestamp=base_timestamp + 1e-6,
                     )
 
-                    return ToolCallingRecord(
+                    tool_record = ToolCallingRecord(
                         tool_name=function_name,
                         args=args,
                         result=result,
                         tool_call_id=tool_call_id,
                     )
+                    self._update_last_tool_call_state(tool_record)
+                    return tool_record
 
                 except Exception as e:
                     error_msg = (
@@ -4259,12 +4217,14 @@ async def _aexecute_tool_from_stream_data(
 
                     self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
 
-                    return ToolCallingRecord(
+                    tool_record = ToolCallingRecord(
                         tool_name=function_name,
                         args=args,
                         result=result,
                         tool_call_id=tool_call_id,
                     )
+                    self._update_last_tool_call_state(tool_record)
+                    return tool_record
             else:
                 logger.warning(
                     f"Tool '{function_name}' not found in internal tools"

From 331ae1adb4bbaaa313ec3a7af187414de7ec5c94 Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Tue, 28 Oct 2025 22:37:11 +0800
Subject: [PATCH 23/27] update

---
 camel/agents/chat_agent.py                   |  2 +-
 examples/summarization/handle_token_limit.py | 70 ++++++++++++++++++++
 2 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 examples/summarization/handle_token_limit.py

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index be22a24c8b..a8cd45f55f 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -1029,7 +1029,7 @@ def _update_memory_with_summary(
         on whether it's a progressive or full compression.
         """
 
-        summary_content: str = summary.get("summary_content", "")
+        summary_content: str = summary.get("summary", "")
 
         existing_summaries = []
         if not include_summaries:
diff --git a/examples/summarization/handle_token_limit.py b/examples/summarization/handle_token_limit.py
new file mode 100644
index 0000000000..ca5292729d
--- /dev/null
+++ b/examples/summarization/handle_token_limit.py
@@ -0,0 +1,70 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+
+import os
+
+from camel.agents import ChatAgent
+from camel.models import ModelFactory
+from camel.toolkits import TerminalToolkit
+from camel.types import ModelPlatformType, ModelType
+
+# Get current script directory
+base_dir = os.path.dirname(os.path.abspath(__file__))
+# Define workspace directory for the toolkit
+workspace_dir = os.path.join(
+    os.path.dirname(os.path.dirname(base_dir)), "workspace"
+)
+
+# Define system message
+sys_msg = (
+    "You are a System Administrator helping with log management tasks. "
+    "You have access to terminal tools that can help you execute "
+    "shell commands and search files. "
+)
+
+# Set model config
+tools = TerminalToolkit(working_directory=workspace_dir).get_tools()
+
+
+model = ModelFactory.create(
+    model_platform=ModelPlatformType.OPENAI,
+    model_type=ModelType.GPT_3_5_TURBO,
+)
+
+# Set agent
+camel_agent = ChatAgent(
+    system_message=sys_msg,
+    model=model,
+    tools=tools,
+    summarize_threshold=1,
+    summary_window_ratio=0.02
+)
+camel_agent.reset()
+
+# Define a user message for creating logs directory
+usr_msg = (
+    f"Create a 'logs' directory in '{workspace_dir}' and list its contents"
+)
+
+# simulate a long conversation
+for i in range(20):
+    response = camel_agent.step(usr_msg)
+    print(camel_agent._summary_token_count)
+
+#handle a large file
+usr_msg = (
+    f"../../uv.lock,read this file"
+)
+
+response = camel_agent.step(usr_msg)
\ No newline at end of file

From 874ab7b4dbe5d7f18a73e42392c1490736b42f01 Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Tue, 28 Oct 2025 22:38:01 +0800
Subject: [PATCH 24/27] Update handle_token_limit.py

---
 examples/summarization/handle_token_limit.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/examples/summarization/handle_token_limit.py b/examples/summarization/handle_token_limit.py
index ca5292729d..9f810de855 100644
--- a/examples/summarization/handle_token_limit.py
+++ b/examples/summarization/handle_token_limit.py
@@ -48,7 +48,7 @@
     model=model,
     tools=tools,
     summarize_threshold=1,
-    summary_window_ratio=0.02
+    summary_window_ratio=0.02,
 )
 camel_agent.reset()
 
@@ -58,13 +58,11 @@
 )
 
 # simulate a long conversation
-for i in range(20):
+for _i in range(20):
     response = camel_agent.step(usr_msg)
     print(camel_agent._summary_token_count)
 
-#handle a large file
-usr_msg = (
-    f"../../uv.lock,read this file"
-)
+# handle a large file
+usr_msg = "../../uv.lock,read this file"
 
-response = camel_agent.step(usr_msg)
\ No newline at end of file
+response = camel_agent.step(usr_msg)

From b02cae98265a07ce12db6b09c4eb7be3faa4553a Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Wed, 29 Oct 2025 14:38:43 +0800
Subject: [PATCH 25/27] update

---
 camel/agents/_utils.py     |  38 +++++++++++
 camel/agents/chat_agent.py | 128 ++++++++++++++++++++++---------------
 2 files changed, 114 insertions(+), 52 deletions(-)

diff --git a/camel/agents/_utils.py b/camel/agents/_utils.py
index edae576813..415ab0a0ee 100644
--- a/camel/agents/_utils.py
+++ b/camel/agents/_utils.py
@@ -25,6 +25,44 @@
 logger = logging.getLogger(__name__)
 
 
+def build_default_summary_prompt(conversation_text: str) -> str:
+    r"""Create the default prompt used for conversation summarization.
+
+    Args:
+        conversation_text (str): The conversation to be summarized.
+
+    Returns:
+        str: A formatted prompt instructing the model to produce a structured
+            markdown summary.
+    """
+    template = textwrap.dedent(
+        """\
+        Summarize the conversation below.
+        Produce markdown that strictly follows this outline and numbering:
+
+        Summary:
+        1. **Primary Request and Intent**:
+        2. **Key Concepts**:
+        3. **Errors and Fixes**:
+        4. **Problem Solving**:
+        5. **Pending Tasks**:
+        6. **Current Work**:
+        7. **Optional Next Step**:
+
+        Requirements:
+        - Use bullet lists under each section (`- item`). If a section has no
+          information, output `- None noted`.
+        - Keep the ordering, headings, and formatting as written above.
+        - Focus on concrete actions, findings, and decisions.
+        - Do not invent details that are not supported by the conversation.
+
+        Conversation:
+        {conversation_text}
+        """
+    )
+    return template.format(conversation_text=conversation_text)
+
+
 def generate_tool_prompt(tool_schema_list: List[Dict[str, Any]]) -> str:
     r"""Generates a tool prompt based on the provided tool schema list.
 
diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index ab445d6ecc..da0a821e65 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -56,6 +56,7 @@
 
 from camel.agents._types import ModelResponse, ToolCallRequest
 from camel.agents._utils import (
+    build_default_summary_prompt,
     convert_to_function_tool,
     convert_to_schema,
     get_info_dict,
@@ -981,6 +982,36 @@ def _format_tool_limit_notice(self) -> Optional[str]:
 
         return "\n".join(notice_lines)
 
+    @staticmethod
+    def _append_user_messages_section(
+        summary_content: str, user_messages: List[str]
+    ) -> str:
+        section_title = "- **All User Messages**:"
+        sanitized_messages: List[str] = []
+        for msg in user_messages:
+            if not isinstance(msg, str):
+                msg = str(msg)
+            cleaned = " ".join(msg.strip().splitlines())
+            if cleaned:
+                sanitized_messages.append(cleaned)
+
+        bullet_block = (
+            "\n".join(f"- {m}" for m in sanitized_messages)
+            if sanitized_messages
+            else "- None noted"
+        )
+        user_section = f"{section_title}\n{bullet_block}"
+
+        pattern_existing = re.compile(
+            r"(?:\n\n)?- \*\*All User Messages\*\*:"
+            r"(?:\n- .*)*(?=\n\n- \*\*|\Z)",
+            re.DOTALL,
+        )
+        summary_clean = pattern_existing.sub("", summary_content).rstrip()
+
+        separator = "\n\n" if summary_clean else ""
+        return f"{summary_clean}{separator}{user_section}"
+
     def _reset_summary_state(self) -> None:
         self._summary_token_count = 0  # Total tokens in summary messages
 
@@ -1019,7 +1050,7 @@ def _calculate_next_summary_threshold(self) -> int:
         return threshold
 
     def _update_memory_with_summary(
-        self, summary: Dict[str, Any], include_summaries: bool = False
+        self, summary: str, include_summaries: bool = False
     ) -> None:
         r"""Update memory with summary result.
 
@@ -1027,7 +1058,7 @@ def _update_memory_with_summary(
         on whether it's a progressive or full compression.
         """
 
-        summary_content: str = summary.get("summary", "")
+        summary_content: str = summary
 
         existing_summaries = []
         if not include_summaries:
@@ -1047,18 +1078,18 @@ def _update_memory_with_summary(
             content = old_summary.get('content', '')
             if not isinstance(content, str):
                 content = str(content)
-            summary_msg = BaseMessage.make_user_message(
+            summary_msg = BaseMessage.make_assistant_message(
                 role_name="assistant", content=content
             )
             self.update_memory(summary_msg, OpenAIBackendRole.ASSISTANT)
 
         # Add new summary
-        new_summary_msg = BaseMessage.make_user_message(
+        new_summary_msg = BaseMessage.make_assistant_message(
             role_name="assistant", content=summary_content
         )
         self.update_memory(new_summary_msg, OpenAIBackendRole.ASSISTANT)
-        input_message = BaseMessage.make_user_message(
-            role_name="user",
+        input_message = BaseMessage.make_assistant_message(
+            role_name="assistant",
             content=(
                 "Please continue the conversation from "
                 "where we left it off without asking the user any further "
@@ -1066,7 +1097,7 @@ def _update_memory_with_summary(
                 "asked to work on."
             ),
         )
-        self.update_memory(input_message, OpenAIBackendRole.USER)
+        self.update_memory(input_message, OpenAIBackendRole.ASSISTANT)
         # Update token count
         try:
             summary_tokens = (
@@ -1256,6 +1287,7 @@ def summarize(
         response_format: Optional[Type[BaseModel]] = None,
         working_directory: Optional[Union[str, Path]] = None,
         include_summaries: bool = False,
+        add_user_messages: bool = True,
     ) -> Dict[str, Any]:
         r"""Summarize the agent's current conversation context and persist it
         to a markdown file.
@@ -1283,7 +1315,8 @@ def summarize(
             working_directory (Optional[str|Path]): Optional directory to save
                 the markdown summary file. If provided, overrides the default
                 directory used by ContextUtility.
-
+            add_user_messages (bool): Whether add user messages to summary.
+                (default: :obj:`True`)
         Returns:
             Dict[str, Any]: A dictionary containing the summary text, file
                 path, status message, and optionally structured_summary if
@@ -1329,6 +1362,7 @@ def summarize(
 
             # Convert messages to conversation text
             conversation_lines = []
+            user_messages: List[str] = []
             for message in messages:
                 role = message.get('role', 'unknown')
                 content = message.get('content', '')
@@ -1390,6 +1424,9 @@ def summarize(
 
                 # Handle regular content messages (user/assistant/system)
                 elif content:
+                    content = str(content)
+                    if role == 'user':
+                        user_messages.append(content)
                     conversation_lines.append(f"{role}: {content}")
 
             conversation_text = "\n".join(conversation_lines).strip()
@@ -1420,12 +1457,7 @@ def summarize(
                     f"{conversation_text}"
                 )
             else:
-                prompt_text = (
-                    "Summarize the context information in concise markdown "
-                    "bullet points highlighting key decisions, action items, "
-                    "user's intent.\n\nContext information:\n"
-                    f"{conversation_text}"
-                )
+                prompt_text = build_default_summary_prompt(conversation_text)
 
             try:
                 # Use structured output if response_format is provided
@@ -1495,6 +1527,10 @@ def summarize(
                 summary_content = context_util.structured_output_to_markdown(
                     structured_data=structured_output, metadata=metadata
                 )
+            if add_user_messages:
+                summary_content = self._append_user_messages_section(
+                    summary_content, user_messages
+                )
 
             # Save the markdown (either custom structured or default)
             save_status = context_util.save_markdown_file(
@@ -1538,6 +1574,7 @@ async def asummarize(
         response_format: Optional[Type[BaseModel]] = None,
         working_directory: Optional[Union[str, Path]] = None,
         include_summaries: bool = False,
+        add_user_messages: bool = True,
     ) -> Dict[str, Any]:
         r"""Asynchronously summarize the agent's current conversation context
         and persist it to a markdown file.
@@ -1565,7 +1602,8 @@ async def asummarize(
                 only non-summary messages will be summarized. If True, all
                 messages including previous summaries will be summarized
                 (full compression). (default: :obj:`False`)
-
+            add_user_messages (bool): Whether add user messages to summary.
+                (default: :obj:`True`)
         Returns:
             Dict[str, Any]: A dictionary containing the summary text, file
                 path, status message, and optionally structured_summary if
@@ -1601,6 +1639,7 @@ async def asummarize(
 
             # Convert messages to conversation text
             conversation_lines = []
+            user_messages: List[str] = []
             for message in messages:
                 role = message.get('role', 'unknown')
                 content = message.get('content', '')
@@ -1662,6 +1701,9 @@ async def asummarize(
 
                 # Handle regular content messages (user/assistant/system)
                 elif content:
+                    content = str(content)
+                    if role == 'user':
+                        user_messages.append(content)
                     conversation_lines.append(f"{role}: {content}")
 
             conversation_text = "\n".join(conversation_lines).strip()
@@ -1692,12 +1734,7 @@ async def asummarize(
                     f"{conversation_text}"
                 )
             else:
-                prompt_text = (
-                    "Summarize the context information in concise markdown "
-                    "bullet points highlighting key decisions, action items, "
-                    "user's intent.\n\nContext information:\n"
-                    f"{conversation_text}"
-                )
+                prompt_text = build_default_summary_prompt(conversation_text)
 
             try:
                 # Use structured output if response_format is provided
@@ -1776,6 +1813,10 @@ async def asummarize(
                 summary_content = context_util.structured_output_to_markdown(
                     structured_data=structured_output, metadata=metadata
                 )
+            if add_user_messages:
+                summary_content = self._append_user_messages_section(
+                    summary_content, user_messages
+                )
 
             # Save the markdown (either custom structured or default)
             save_status = context_util.save_markdown_file(
@@ -2359,7 +2400,8 @@ def _step_impl(
                             # Summarize everything (including summaries)
                             summary = self.summarize(include_summaries=True)
                             self._update_memory_with_summary(
-                                summary, include_summaries=True
+                                summary.get("summary", ""),
+                                include_summaries=True,
                             )
                         elif num_tokens > threshold:
                             logger.info(
@@ -2369,7 +2411,8 @@ def _step_impl(
                             # Only summarize non-summary content
                             summary = self.summarize(include_summaries=False)
                             self._update_memory_with_summary(
-                                summary, include_summaries=False
+                                summary.get("summary", ""),
+                                include_summaries=False,
                             )
                 accumulated_context_tokens += num_tokens
             except RuntimeError as e:
@@ -2441,25 +2484,15 @@ def _step_impl(
                     )
                     self.memory.remove_records_by_indices(indices_to_remove)
 
-                    summary = self.summarize()
+                    summary = self.summarize(include_summaries=False)
                     tool_notice = self._format_tool_limit_notice()
                     summary_messages = summary.get("summary", "")
 
                     if tool_notice:
                         summary_messages += "\n\n" + tool_notice
-                    help_message = (
-                        "Please continue the conversation from "
-                        "where we left it off without asking the user any "
-                        "further questions. Continue with the last task "
-                        "that you were asked to work on."
-                    )
-                    summary_messages += "\n\n" + help_message
-                    self.clear_memory()
-                    summary_messages = BaseMessage.make_assistant_message(
-                        role_name="assistant", content=summary_messages
-                    )
-                    self.update_memory(
-                        summary_messages, OpenAIBackendRole.ASSISTANT
+
+                    self._update_memory_with_summary(
+                        summary_messages, include_summaries=False
                     )
                     self._last_token_limit_tool_signature = tool_signature
                     return self._step_impl(input_message, response_format)
@@ -2690,7 +2723,8 @@ async def _astep_non_streaming_task(
                                 include_summaries=True
                             )
                             self._update_memory_with_summary(
-                                summary, include_summaries=True
+                                summary.get("summary", ""),
+                                include_summaries=True,
                             )
                         elif num_tokens > threshold:
                             logger.info(
@@ -2702,7 +2736,8 @@ async def _astep_non_streaming_task(
                                 include_summaries=False
                             )
                             self._update_memory_with_summary(
-                                summary, include_summaries=False
+                                summary.get("summary", ""),
+                                include_summaries=False,
                             )
                 accumulated_context_tokens += num_tokens
             except RuntimeError as e:
@@ -2781,19 +2816,8 @@ async def _astep_non_streaming_task(
 
                     if tool_notice:
                         summary_messages += "\n\n" + tool_notice
-                    help_message = (
-                        "Please continue the conversation from "
-                        "where we left it off without asking the user any "
-                        "further questions. Continue with the last task "
-                        "that you were asked to work on."
-                    )
-                    summary_messages += "\n\n" + help_message
-                    self.clear_memory()
-                    summary_messages = BaseMessage.make_assistant_message(
-                        role_name="assistant", content=summary_messages
-                    )
-                    self.update_memory(
-                        summary_messages, OpenAIBackendRole.ASSISTANT
+                    self._update_memory_with_summary(
+                        summary_messages, include_summaries=False
                     )
                     self._last_token_limit_tool_signature = tool_signature
                     return await self._astep_non_streaming_task(

From 44451e006ec26ab1a3891fb0bda63c01964219ff Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Wed, 29 Oct 2025 14:47:34 +0800
Subject: [PATCH 26/27] Update chat_agent.py

---
 camel/agents/chat_agent.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index da0a821e65..ff7ddf326e 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -1002,13 +1002,7 @@ def _append_user_messages_section(
         )
         user_section = f"{section_title}\n{bullet_block}"
 
-        pattern_existing = re.compile(
-            r"(?:\n\n)?- \*\*All User Messages\*\*:"
-            r"(?:\n- .*)*(?=\n\n- \*\*|\Z)",
-            re.DOTALL,
-        )
-        summary_clean = pattern_existing.sub("", summary_content).rstrip()
-
+        summary_clean = summary_content.rstrip()
         separator = "\n\n" if summary_clean else ""
         return f"{summary_clean}{separator}{user_section}"
 

From e054d1994d7c6f782ba3d2a3912f33bae7bbaec6 Mon Sep 17 00:00:00 2001
From: Sun Tao <2605127667@qq.com>
Date: Thu, 30 Oct 2025 13:45:48 +0800
Subject: [PATCH 27/27] Update chat_agent.py

---
 camel/agents/chat_agent.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
index ff7ddf326e..132979ada4 100644
--- a/camel/agents/chat_agent.py
+++ b/camel/agents/chat_agent.py
@@ -451,7 +451,7 @@ def __init__(
         ] = None,
         memory: Optional[AgentMemory] = None,
         message_window_size: Optional[int] = None,
-        summarize_threshold: Optional[int] = None,
+        summarize_threshold: Optional[int] = 50,
         token_limit: Optional[int] = None,
         output_language: Optional[str] = None,
         tools: Optional[List[Union[FunctionTool, Callable]]] = None,
@@ -525,6 +525,11 @@ def __init__(
                     f"summarize_threshold must be between 0 and 100, "
                     f"got {summarize_threshold}"
                 )
+            logger.info(
+                f"Automatic context compression is enabled. Will trigger "
+                f"summarization when context window exceeds "
+                f"{summarize_threshold}% of the total token limit."
+            )
         self.summarize_threshold = summarize_threshold
         self._reset_summary_state()
 
@@ -1023,9 +1028,6 @@ def _calculate_next_summary_threshold(self) -> int:
         Returns:
             int: The token count threshold for next summarization.
         """
-        if self.summarize_threshold is None:
-            return 0
-
         token_limit = self.model_backend.token_limit
         summary_token_count = self._summary_token_count
 
@@ -2389,7 +2391,7 @@ def _step_impl(
                         ):
                             logger.info(
                                 f"Summary tokens ({summary_token_count}) "
-                                f"exceed, full compression."
+                                f"exceed limit, full compression."
                             )
                             # Summarize everything (including summaries)
                             summary = self.summarize(include_summaries=True)
@@ -2710,7 +2712,7 @@ async def _astep_non_streaming_task(
                         ):
                             logger.info(
                                 f"Summary tokens ({summary_token_count}) "
-                                f"exceed, full compression."
+                                f"exceed limit, full compression."
                             )
                             # Summarize everything (including summaries)
                             summary = await self.asummarize(