From f2b97ac88488f9613409e4e1e9f2a7e8d6e83bcc Mon Sep 17 00:00:00 2001
From: Stephen Hu <812791840@qq.com>
Date: Mon, 20 Oct 2025 08:45:15 +0800
Subject: [PATCH] Improve how to get total token count for AnthropicCV

---
 rag/llm/cv_model.py   | 7 +++----
 rag/utils/__init__.py | 6 ++++++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py
index 739374f34b0..6c591123be8 100644
--- a/rag/llm/cv_model.py
+++ b/rag/llm/cv_model.py
@@ -688,8 +688,7 @@ def chat_streamly(self, system, history, gen_conf, images=[], **kwargs):
         try:
             response = self._request(self._form_history(system, history, images), gen_conf)
             cnt = response["choices"][0]["message"]["content"]
-            if "usage" in response and "total_tokens" in response["usage"]:
-                total_tokens +=  total_token_count_from_response(response)
+            total_tokens +=  total_token_count_from_response(response)
             for resp in cnt:
                 yield resp
         except Exception as e:
@@ -738,7 +737,7 @@ def describe_with_prompt(self, image, prompt=None):
         prompt = self.prompt(b64, prompt if prompt else vision_llm_describe_prompt())
 
         response = self.client.messages.create(model=self.model_name, max_tokens=self.max_tokens, messages=prompt)
-        return response["content"][0]["text"].strip(), response["usage"]["input_tokens"] + response["usage"]["output_tokens"]
+        return response["content"][0]["text"].strip(), total_token_count_from_response(response)
 
     def _clean_conf(self, gen_conf):
         if "presence_penalty" in gen_conf:
@@ -765,7 +764,7 @@ def chat(self, system, history, gen_conf, images=[]):
                 ans += "...\nFor the content length reason, it stopped, continue?" if is_english([ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
             return (
                 ans,
-                response["usage"]["input_tokens"] + response["usage"]["output_tokens"],
+                total_token_count_from_response(response),
             )
         except Exception as e:
             return ans + "\n**ERROR**: " + str(e), 0
diff --git a/rag/utils/__init__.py b/rag/utils/__init__.py
index 798b5bf6049..1a35f1aeaaa 100644
--- a/rag/utils/__init__.py
+++ b/rag/utils/__init__.py
@@ -106,6 +106,12 @@ def total_token_count_from_response(resp):
             return resp["usage"]["total_tokens"]
         except Exception:
             pass
+
+    if 'usage' in resp and 'input_tokens' in resp['usage'] and 'output_tokens' in resp['usage']:
+        try:
+            return resp["usage"]["input_tokens"] + resp["usage"]["output_tokens"]
+        except Exception:
+            pass
     return 0