From 1e39f0470959ecd997d758f7ff57f61971e0b96c Mon Sep 17 00:00:00 2001
From: chirag gupta <103719146+chiruu12@users.noreply.github.com>
Date: Wed, 26 Feb 2025 08:42:14 +0530
Subject: [PATCH 01/10] Adding support for async and streaming output mode

---
 .../hugegraph_llm/api/models/rag_requests.py  |   4 +-
 .../src/hugegraph_llm/api/rag_api.py          | 296 ++++++++++++++----
 2 files changed, 236 insertions(+), 64 deletions(-)

diff --git a/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
index de47aa02..8def701d 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
@@ -43,6 +43,7 @@ class RAGRequest(BaseModel):
         prompt.gremlin_generate_prompt,
         description="Prompt for the Text2Gremlin query.",
     )
+    stream: bool = Query(False, description="Enable streaming response")
 
 
 # TODO: import the default value of prompt.* dynamically
@@ -58,6 +59,7 @@ class GraphRAGRequest(BaseModel):
         prompt.gremlin_generate_prompt,
         description="Prompt for the Text2Gremlin query.",
     )
+    stream: bool = Query(False, description="Enable streaming response")
 
 
 class GraphConfigRequest(BaseModel):
@@ -94,4 +96,4 @@ class RerankerConfigRequest(BaseModel):
 
 class LogStreamRequest(BaseModel):
     admin_token: Optional[str] = None
-    log_file: Optional[str] = "llm-server.log"
+    log_file: Optional[str] = "llm-server.log"
\ No newline at end of file
diff --git a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
index d851fd12..9e683f08 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
@@ -16,8 +16,11 @@
 # under the License.
 
 import json
+import asyncio
+from typing import AsyncGenerator
 
 from fastapi import status, APIRouter, HTTPException
+from fastapi.responses import StreamingResponse
 
 from hugegraph_llm.api.exceptions.rag_exceptions import generate_response
 from hugegraph_llm.api.models.rag_requests import (
@@ -33,76 +36,243 @@
 
 
 def rag_http_api(
-    router: APIRouter,
-    rag_answer_func,
-    graph_rag_recall_func,
-    apply_graph_conf,
-    apply_llm_conf,
-    apply_embedding_conf,
-    apply_reranker_conf,
+        router: APIRouter,
+        rag_answer_func,
+        graph_rag_recall_func,
+        apply_graph_conf,
+        apply_llm_conf,
+        apply_embedding_conf,
+        apply_reranker_conf,
+        rag_answer_stream_func=None,
+        graph_rag_recall_stream_func=None,
 ):
-    @router.post("/rag", status_code=status.HTTP_200_OK)
-    def rag_answer_api(req: RAGRequest):
-        result = rag_answer_func(
-            text=req.query,
-            raw_answer=req.raw_answer,
-            vector_only_answer=req.vector_only,
-            graph_only_answer=req.graph_only,
-            graph_vector_answer=req.graph_vector_answer,
-            graph_ratio=req.graph_ratio,
-            rerank_method=req.rerank_method,
-            near_neighbor_first=req.near_neighbor_first,
-            custom_related_information=req.custom_priority_info,
-            answer_prompt=req.answer_prompt or prompt.answer_prompt,
-            keywords_extract_prompt=req.keywords_extract_prompt or prompt.keywords_extract_prompt,
-            gremlin_tmpl_num=req.gremlin_tmpl_num,
-            gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
-        )
-        # TODO: we need more info in the response for users to understand the query logic
-        return {
-            "query": req.query,
-            **{
-                key: value
-                for key, value in zip(["raw_answer", "vector_only", "graph_only", "graph_vector_answer"], result)
-                if getattr(req, key)
-            },
-        }
+    async def stream_rag_answer(
+            text,
+            raw_answer,
+            vector_only_answer,
+            graph_only_answer,
+            graph_vector_answer,
+            graph_ratio,
+            rerank_method,
+            near_neighbor_first,
+            custom_related_information,
+            answer_prompt,
+            keywords_extract_prompt,
+            gremlin_tmpl_num,
+            gremlin_prompt,
+    ) -> AsyncGenerator[str, None]:
+        """
+        Stream the RAG answer results
+        """
+        if rag_answer_stream_func:
+            # If a streaming-specific function exists, use it
+            async for chunk in rag_answer_stream_func(
+                    text=text,
+                    raw_answer=raw_answer,
+                    vector_only_answer=vector_only_answer,
+                    graph_only_answer=graph_only_answer,
+                    graph_vector_answer=graph_vector_answer,
+                    graph_ratio=graph_ratio,
+                    rerank_method=rerank_method,
+                    near_neighbor_first=near_neighbor_first,
+                    custom_related_information=custom_related_information,
+                    answer_prompt=answer_prompt,
+                    keywords_extract_prompt=keywords_extract_prompt,
+                    gremlin_tmpl_num=gremlin_tmpl_num,
+                    gremlin_prompt=gremlin_prompt,
+            ):
+                yield f"data: {json.dumps({'chunk': chunk})}\n\n"
+        else:
+            # Otherwise, use the normal function but adapt it for streaming
+            # by sending the entire result at once
+            result = rag_answer_func(
+                text=text,
+                raw_answer=raw_answer,
+                vector_only_answer=vector_only_answer,
+                graph_only_answer=graph_only_answer,
+                graph_vector_answer=graph_vector_answer,
+                graph_ratio=graph_ratio,
+                rerank_method=rerank_method,
+                near_neighbor_first=near_neighbor_first,
+                custom_related_information=custom_related_information,
+                answer_prompt=answer_prompt,
+                keywords_extract_prompt=keywords_extract_prompt,
+                gremlin_tmpl_num=gremlin_tmpl_num,
+                gremlin_prompt=gremlin_prompt,
+            )
 
-    @router.post("/rag/graph", status_code=status.HTTP_200_OK)
-    def graph_rag_recall_api(req: GraphRAGRequest):
-        try:
-            result = graph_rag_recall_func(
-                query=req.query,
-                gremlin_tmpl_num=req.gremlin_tmpl_num,
+            response_data = {
+                "query": text,
+                **{
+                    key: value
+                    for key, value in zip(["raw_answer", "vector_only", "graph_only", "graph_vector_answer"], result)
+                    if eval(key)  # Convert string to boolean
+                },
+            }
+
+            yield f"data: {json.dumps(response_data)}\n\n"
+            # Signal end of stream
+            yield "data: [DONE]\n\n"
+
+    async def stream_graph_rag_recall(
+            query,
+            gremlin_tmpl_num,
+            rerank_method,
+            near_neighbor_first,
+            custom_related_information,
+            gremlin_prompt,
+    ) -> AsyncGenerator[str, None]:
+        """
+        Stream the graph RAG recall results
+        """
+        if graph_rag_recall_stream_func:
+            # If a streaming-specific function exists, use it
+            async for chunk in graph_rag_recall_stream_func(
+                    query=query,
+                    gremlin_tmpl_num=gremlin_tmpl_num,
+                    rerank_method=rerank_method,
+                    near_neighbor_first=near_neighbor_first,
+                    custom_related_information=custom_related_information,
+                    gremlin_prompt=gremlin_prompt,
+            ):
+                yield f"data: {json.dumps({'chunk': chunk})}\n\n"
+        else:
+            # Otherwise, use the normal function but adapt it for streaming
+            try:
+                result = graph_rag_recall_func(
+                    query=query,
+                    gremlin_tmpl_num=gremlin_tmpl_num,
+                    rerank_method=rerank_method,
+                    near_neighbor_first=near_neighbor_first,
+                    custom_related_information=custom_related_information,
+                    gremlin_prompt=gremlin_prompt,
+                )
+
+                if isinstance(result, dict):
+                    params = [
+                        "query",
+                        "keywords",
+                        "match_vids",
+                        "graph_result_flag",
+                        "gremlin",
+                        "graph_result",
+                        "vertex_degree_list",
+                    ]
+                    user_result = {key: result[key] for key in params if key in result}
+                    yield f"data: {json.dumps({'graph_recall': user_result})}\n\n"
+                else:
+                    # Note: Maybe only for qianfan/wenxin
+                    yield f"data: {json.dumps({'graph_recall': json.dumps(result)})}\n\n"
+
+                # Signal end of stream
+                yield "data: [DONE]\n\n"
+
+            except TypeError as e:
+                log.error("TypeError in stream_graph_rag_recall: %s", e)
+                yield f"data: {json.dumps({'error': str(e), 'status': 400})}\n\n"
+            except Exception as e:
+                log.error("Unexpected error occurred: %s", e)
+                yield f"data: {json.dumps({'error': 'An unexpected error occurred.', 'status': 500})}\n\n"
+
+    @router.post("/rag", status_code=status.HTTP_200_OK)
+    async def rag_answer_api(req: RAGRequest):
+        if req.stream:
+            # Return a streaming response
+            return StreamingResponse(
+                stream_rag_answer(
+                    text=req.query,
+                    raw_answer=req.raw_answer,
+                    vector_only_answer=req.vector_only,
+                    graph_only_answer=req.graph_only,
+                    graph_vector_answer=req.graph_vector_answer,
+                    graph_ratio=req.graph_ratio,
+                    rerank_method=req.rerank_method,
+                    near_neighbor_first=req.near_neighbor_first,
+                    custom_related_information=req.custom_priority_info,
+                    answer_prompt=req.answer_prompt or prompt.answer_prompt,
+                    keywords_extract_prompt=req.keywords_extract_prompt or prompt.keywords_extract_prompt,
+                    gremlin_tmpl_num=req.gremlin_tmpl_num,
+                    gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
+                ),
+                media_type="text/event-stream",
+            )
+        else:
+            # Synchronous response (original behavior)
+            result = rag_answer_func(
+                text=req.query,
+                raw_answer=req.raw_answer,
+                vector_only_answer=req.vector_only,
+                graph_only_answer=req.graph_only,
+                graph_vector_answer=req.graph_vector_answer,
+                graph_ratio=req.graph_ratio,
                 rerank_method=req.rerank_method,
                 near_neighbor_first=req.near_neighbor_first,
                 custom_related_information=req.custom_priority_info,
+                answer_prompt=req.answer_prompt or prompt.answer_prompt,
+                keywords_extract_prompt=req.keywords_extract_prompt or prompt.keywords_extract_prompt,
+                gremlin_tmpl_num=req.gremlin_tmpl_num,
                 gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
             )
+            # TODO: we need more info in the response for users to understand the query logic
+            return {
+                "query": req.query,
+                **{
+                    key: value
+                    for key, value in zip(["raw_answer", "vector_only", "graph_only", "graph_vector_answer"], result)
+                    if getattr(req, key)
+                },
+            }
 
-            if isinstance(result, dict):
-                params = [
-                    "query",
-                    "keywords",
-                    "match_vids",
-                    "graph_result_flag",
-                    "gremlin",
-                    "graph_result",
-                    "vertex_degree_list",
-                ]
-                user_result = {key: result[key] for key in params if key in result}
-                return {"graph_recall": user_result}
-            # Note: Maybe only for qianfan/wenxin
-            return {"graph_recall": json.dumps(result)}
-
-        except TypeError as e:
-            log.error("TypeError in graph_rag_recall_api: %s", e)
-            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) from e
-        except Exception as e:
-            log.error("Unexpected error occurred: %s", e)
-            raise HTTPException(
-                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="An unexpected error occurred."
-            ) from e
+    @router.post("/rag/graph", status_code=status.HTTP_200_OK)
+    async def graph_rag_recall_api(req: GraphRAGRequest):
+        if req.stream:
+            # Return a streaming response
+            return StreamingResponse(
+                stream_graph_rag_recall(
+                    query=req.query,
+                    gremlin_tmpl_num=req.gremlin_tmpl_num,
+                    rerank_method=req.rerank_method,
+                    near_neighbor_first=req.near_neighbor_first,
+                    custom_related_information=req.custom_priority_info,
+                    gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
+                ),
+                media_type="text/event-stream",
+            )
+        else:
+            # Synchronous response (original behavior)
+            try:
+                result = graph_rag_recall_func(
+                    query=req.query,
+                    gremlin_tmpl_num=req.gremlin_tmpl_num,
+                    rerank_method=req.rerank_method,
+                    near_neighbor_first=req.near_neighbor_first,
+                    custom_related_information=req.custom_priority_info,
+                    gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
+                )
+
+                if isinstance(result, dict):
+                    params = [
+                        "query",
+                        "keywords",
+                        "match_vids",
+                        "graph_result_flag",
+                        "gremlin",
+                        "graph_result",
+                        "vertex_degree_list",
+                    ]
+                    user_result = {key: result[key] for key in params if key in result}
+                    return {"graph_recall": user_result}
+                # Note: Maybe only for qianfan/wenxin
+                return {"graph_recall": json.dumps(result)}
+
+            except TypeError as e:
+                log.error("TypeError in graph_rag_recall_api: %s", e)
+                raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) from e
+            except Exception as e:
+                log.error("Unexpected error occurred: %s", e)
+                raise HTTPException(
+                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="An unexpected error occurred."
+                ) from e
 
     @router.post("/config/graph", status_code=status.HTTP_201_CREATED)
     def graph_config_api(req: GraphConfigRequest):
@@ -145,4 +315,4 @@ def rerank_config_api(req: RerankerConfigRequest):
             res = apply_reranker_conf(req.api_key, req.reranker_model, None, origin_call="http")
         else:
             res = status.HTTP_501_NOT_IMPLEMENTED
-        return generate_response(RAGResponse(status_code=res, message="Missing Value"))
+        return generate_response(RAGResponse(status_code=res, message="Missing Value"))
\ No newline at end of file

From 8c7dbafacc1992f98262db2603666db87576f054 Mon Sep 17 00:00:00 2001
From: imbajin <jin@apache.org>
Date: Thu, 27 Feb 2025 15:37:47 +0800
Subject: [PATCH 02/10] Update and rename hugegraph-python-client.yml to
 python-client.yml

---
 .../{hugegraph-python-client.yml => python-client.yml}         | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
 rename .github/workflows/{hugegraph-python-client.yml => python-client.yml} (96%)

diff --git a/.github/workflows/hugegraph-python-client.yml b/.github/workflows/python-client.yml
similarity index 96%
rename from .github/workflows/hugegraph-python-client.yml
rename to .github/workflows/python-client.yml
index c0bdf5c9..dd8e03ab 100644
--- a/.github/workflows/hugegraph-python-client.yml
+++ b/.github/workflows/python-client.yml
@@ -20,7 +20,8 @@ jobs:
     - name: Prepare HugeGraph Server Environment
       run: |
         docker run -d --name=graph -p 8080:8080 -e PASSWORD=admin hugegraph/hugegraph:1.3.0
-        sleep 1
+        # wait server init-done (avoid some test error:)
+        sleep 5
 
     - uses: actions/checkout@v4
 

From 8c9f0f8a9575b1533a4b8e49d56c292151668cce Mon Sep 17 00:00:00 2001
From: chirag gupta <103719146+chiruu12@users.noreply.github.com>
Date: Thu, 27 Feb 2025 13:11:35 +0530
Subject: [PATCH 03/10] Adding support for async and streaming output mode

---
 .../src/hugegraph_llm/api/config_api.py       |  76 ++++
 .../src/hugegraph_llm/api/rag_api.py          | 337 +++---------------
 .../src/hugegraph_llm/api/stream_api.py       | 134 +++++++
 hugegraph-llm/src/tests/api/test_rag_api.py   | 135 +++++++
 4 files changed, 403 insertions(+), 279 deletions(-)
 create mode 100644 hugegraph-llm/src/hugegraph_llm/api/config_api.py
 create mode 100644 hugegraph-llm/src/hugegraph_llm/api/stream_api.py
 create mode 100644 hugegraph-llm/src/tests/api/test_rag_api.py

diff --git a/hugegraph-llm/src/hugegraph_llm/api/config_api.py b/hugegraph-llm/src/hugegraph_llm/api/config_api.py
new file mode 100644
index 00000000..d6f08704
--- /dev/null
+++ b/hugegraph-llm/src/hugegraph_llm/api/config_api.py
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from fastapi import status, APIRouter
+
+from hugegraph_llm.api.exceptions.rag_exceptions import generate_response
+from hugegraph_llm.api.models.rag_requests import (
+    GraphConfigRequest,
+    LLMConfigRequest,
+    RerankerConfigRequest,
+)
+from hugegraph_llm.api.models.rag_response import RAGResponse
+from hugegraph_llm.config import llm_settings
+
+
+async def config_http_api(
+    router: APIRouter,
+    apply_graph_conf,
+    apply_llm_conf,
+    apply_embedding_conf,
+    apply_reranker_conf,
+):
+    @router.post("/config/graph", status_code=status.HTTP_201_CREATED)
+    async def graph_config_api(req: GraphConfigRequest):
+        res = await apply_graph_conf(req.ip, req.port, req.name, req.user, req.pwd, req.gs, origin_call="http")
+        return generate_response(RAGResponse(status_code=res, message="Missing Value"))
+
+    @router.post("/config/llm", status_code=status.HTTP_201_CREATED)
+    async def llm_config_api(req: LLMConfigRequest):
+        llm_settings.llm_type = req.llm_type
+
+        if req.llm_type == "openai":
+            res = await apply_llm_conf(req.api_key, req.api_base, req.language_model, req.max_tokens, origin_call="http")
+        elif req.llm_type == "qianfan_wenxin":
+            res = await apply_llm_conf(req.api_key, req.secret_key, req.language_model, None, origin_call="http")
+        else:
+            res = await apply_llm_conf(req.host, req.port, req.language_model, None, origin_call="http")
+        return generate_response(RAGResponse(status_code=res, message="Missing Value"))
+
+    @router.post("/config/embedding", status_code=status.HTTP_201_CREATED)
+    async def embedding_config_api(req: LLMConfigRequest):
+        llm_settings.embedding_type = req.llm_type
+
+        if req.llm_type == "openai":
+            res = await apply_embedding_conf(req.api_key, req.api_base, req.language_model, origin_call="http")
+        elif req.llm_type == "qianfan_wenxin":
+            res = await apply_embedding_conf(req.api_key, req.api_base, None, origin_call="http")
+        else:
+            res = await apply_embedding_conf(req.host, req.port, req.language_model, origin_call="http")
+        return generate_response(RAGResponse(status_code=res, message="Missing Value"))
+
+    @router.post("/config/rerank", status_code=status.HTTP_201_CREATED)
+    async def rerank_config_api(req: RerankerConfigRequest):
+        llm_settings.reranker_type = req.reranker_type
+
+        if req.reranker_type == "cohere":
+            res = await apply_reranker_conf(req.api_key, req.reranker_model, req.cohere_base_url, origin_call="http")
+        elif req.reranker_type == "siliconflow":
+            res = await apply_reranker_conf(req.api_key, req.reranker_model, None, origin_call="http")
+        else:
+            res = status.HTTP_501_NOT_IMPLEMENTED
+        return generate_response(RAGResponse(status_code=res, message="Missing Value"))
\ No newline at end of file
diff --git a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
index 9e683f08..f878a37f 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
@@ -15,304 +15,83 @@
 # specific language governing permissions and limitations
 # under the License.
 
+
 import json
-import asyncio
-from typing import AsyncGenerator
 
-from fastapi import status, APIRouter, HTTPException
-from fastapi.responses import StreamingResponse
 
-from hugegraph_llm.api.exceptions.rag_exceptions import generate_response
+from fastapi import status, APIRouter, HTTPException
 from hugegraph_llm.api.models.rag_requests import (
     RAGRequest,
-    GraphConfigRequest,
-    LLMConfigRequest,
-    RerankerConfigRequest,
     GraphRAGRequest,
 )
-from hugegraph_llm.api.models.rag_response import RAGResponse
-from hugegraph_llm.config import llm_settings, prompt
+from hugegraph_llm.config import prompt
 from hugegraph_llm.utils.log import log
 
 
-def rag_http_api(
+async def rag_http_api(
         router: APIRouter,
         rag_answer_func,
         graph_rag_recall_func,
-        apply_graph_conf,
-        apply_llm_conf,
-        apply_embedding_conf,
-        apply_reranker_conf,
-        rag_answer_stream_func=None,
-        graph_rag_recall_stream_func=None,
 ):
-    async def stream_rag_answer(
-            text,
-            raw_answer,
-            vector_only_answer,
-            graph_only_answer,
-            graph_vector_answer,
-            graph_ratio,
-            rerank_method,
-            near_neighbor_first,
-            custom_related_information,
-            answer_prompt,
-            keywords_extract_prompt,
-            gremlin_tmpl_num,
-            gremlin_prompt,
-    ) -> AsyncGenerator[str, None]:
-        """
-        Stream the RAG answer results
-        """
-        if rag_answer_stream_func:
-            # If a streaming-specific function exists, use it
-            async for chunk in rag_answer_stream_func(
-                    text=text,
-                    raw_answer=raw_answer,
-                    vector_only_answer=vector_only_answer,
-                    graph_only_answer=graph_only_answer,
-                    graph_vector_answer=graph_vector_answer,
-                    graph_ratio=graph_ratio,
-                    rerank_method=rerank_method,
-                    near_neighbor_first=near_neighbor_first,
-                    custom_related_information=custom_related_information,
-                    answer_prompt=answer_prompt,
-                    keywords_extract_prompt=keywords_extract_prompt,
-                    gremlin_tmpl_num=gremlin_tmpl_num,
-                    gremlin_prompt=gremlin_prompt,
-            ):
-                yield f"data: {json.dumps({'chunk': chunk})}\n\n"
-        else:
-            # Otherwise, use the normal function but adapt it for streaming
-            # by sending the entire result at once
-            result = rag_answer_func(
-                text=text,
-                raw_answer=raw_answer,
-                vector_only_answer=vector_only_answer,
-                graph_only_answer=graph_only_answer,
-                graph_vector_answer=graph_vector_answer,
-                graph_ratio=graph_ratio,
-                rerank_method=rerank_method,
-                near_neighbor_first=near_neighbor_first,
-                custom_related_information=custom_related_information,
-                answer_prompt=answer_prompt,
-                keywords_extract_prompt=keywords_extract_prompt,
-                gremlin_tmpl_num=gremlin_tmpl_num,
-                gremlin_prompt=gremlin_prompt,
-            )
-
-            response_data = {
-                "query": text,
-                **{
-                    key: value
-                    for key, value in zip(["raw_answer", "vector_only", "graph_only", "graph_vector_answer"], result)
-                    if eval(key)  # Convert string to boolean
-                },
-            }
-
-            yield f"data: {json.dumps(response_data)}\n\n"
-            # Signal end of stream
-            yield "data: [DONE]\n\n"
-
-    async def stream_graph_rag_recall(
-            query,
-            gremlin_tmpl_num,
-            rerank_method,
-            near_neighbor_first,
-            custom_related_information,
-            gremlin_prompt,
-    ) -> AsyncGenerator[str, None]:
-        """
-        Stream the graph RAG recall results
-        """
-        if graph_rag_recall_stream_func:
-            # If a streaming-specific function exists, use it
-            async for chunk in graph_rag_recall_stream_func(
-                    query=query,
-                    gremlin_tmpl_num=gremlin_tmpl_num,
-                    rerank_method=rerank_method,
-                    near_neighbor_first=near_neighbor_first,
-                    custom_related_information=custom_related_information,
-                    gremlin_prompt=gremlin_prompt,
-            ):
-                yield f"data: {json.dumps({'chunk': chunk})}\n\n"
-        else:
-            # Otherwise, use the normal function but adapt it for streaming
-            try:
-                result = graph_rag_recall_func(
-                    query=query,
-                    gremlin_tmpl_num=gremlin_tmpl_num,
-                    rerank_method=rerank_method,
-                    near_neighbor_first=near_neighbor_first,
-                    custom_related_information=custom_related_information,
-                    gremlin_prompt=gremlin_prompt,
-                )
-
-                if isinstance(result, dict):
-                    params = [
-                        "query",
-                        "keywords",
-                        "match_vids",
-                        "graph_result_flag",
-                        "gremlin",
-                        "graph_result",
-                        "vertex_degree_list",
-                    ]
-                    user_result = {key: result[key] for key in params if key in result}
-                    yield f"data: {json.dumps({'graph_recall': user_result})}\n\n"
-                else:
-                    # Note: Maybe only for qianfan/wenxin
-                    yield f"data: {json.dumps({'graph_recall': json.dumps(result)})}\n\n"
-
-                # Signal end of stream
-                yield "data: [DONE]\n\n"
-
-            except TypeError as e:
-                log.error("TypeError in stream_graph_rag_recall: %s", e)
-                yield f"data: {json.dumps({'error': str(e), 'status': 400})}\n\n"
-            except Exception as e:
-                log.error("Unexpected error occurred: %s", e)
-                yield f"data: {json.dumps({'error': 'An unexpected error occurred.', 'status': 500})}\n\n"
-
     @router.post("/rag", status_code=status.HTTP_200_OK)
     async def rag_answer_api(req: RAGRequest):
-        if req.stream:
-            # Return a streaming response
-            return StreamingResponse(
-                stream_rag_answer(
-                    text=req.query,
-                    raw_answer=req.raw_answer,
-                    vector_only_answer=req.vector_only,
-                    graph_only_answer=req.graph_only,
-                    graph_vector_answer=req.graph_vector_answer,
-                    graph_ratio=req.graph_ratio,
-                    rerank_method=req.rerank_method,
-                    near_neighbor_first=req.near_neighbor_first,
-                    custom_related_information=req.custom_priority_info,
-                    answer_prompt=req.answer_prompt or prompt.answer_prompt,
-                    keywords_extract_prompt=req.keywords_extract_prompt or prompt.keywords_extract_prompt,
-                    gremlin_tmpl_num=req.gremlin_tmpl_num,
-                    gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
-                ),
-                media_type="text/event-stream",
-            )
-        else:
-            # Synchronous response (original behavior)
-            result = rag_answer_func(
-                text=req.query,
-                raw_answer=req.raw_answer,
-                vector_only_answer=req.vector_only,
-                graph_only_answer=req.graph_only,
-                graph_vector_answer=req.graph_vector_answer,
-                graph_ratio=req.graph_ratio,
+        result = await rag_answer_func(
+            text=req.query,
+            raw_answer=req.raw_answer,
+            vector_only_answer=req.vector_only,
+            graph_only_answer=req.graph_only,
+            graph_vector_answer=req.graph_vector_answer,
+            graph_ratio=req.graph_ratio,
+            rerank_method=req.rerank_method,
+            near_neighbor_first=req.near_neighbor_first,
+            custom_related_information=req.custom_priority_info,
+            answer_prompt=req.answer_prompt or prompt.answer_prompt,
+            keywords_extract_prompt=req.keywords_extract_prompt or prompt.keywords_extract_prompt,
+            gremlin_tmpl_num=req.gremlin_tmpl_num,
+            gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
+        )
+
+        return {
+            "query": req.query,
+            **{
+                key: value
+                for key, value in zip(["raw_answer", "vector_only", "graph_only", "graph_vector_answer"], result)
+                if getattr(req, key)
+            },
+        }
+
+    @router.post("/rag/graph", status_code=status.HTTP_200_OK)
+    async def graph_rag_recall_api(req: GraphRAGRequest):
+        try:
+            result = await graph_rag_recall_func(
+                query=req.query,
+                gremlin_tmpl_num=req.gremlin_tmpl_num,
                 rerank_method=req.rerank_method,
                 near_neighbor_first=req.near_neighbor_first,
                 custom_related_information=req.custom_priority_info,
-                answer_prompt=req.answer_prompt or prompt.answer_prompt,
-                keywords_extract_prompt=req.keywords_extract_prompt or prompt.keywords_extract_prompt,
-                gremlin_tmpl_num=req.gremlin_tmpl_num,
                 gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
             )
-            # TODO: we need more info in the response for users to understand the query logic
-            return {
-                "query": req.query,
-                **{
-                    key: value
-                    for key, value in zip(["raw_answer", "vector_only", "graph_only", "graph_vector_answer"], result)
-                    if getattr(req, key)
-                },
-            }
-
-    @router.post("/rag/graph", status_code=status.HTTP_200_OK)
-    async def graph_rag_recall_api(req: GraphRAGRequest):
-        if req.stream:
-            # Return a streaming response
-            return StreamingResponse(
-                stream_graph_rag_recall(
-                    query=req.query,
-                    gremlin_tmpl_num=req.gremlin_tmpl_num,
-                    rerank_method=req.rerank_method,
-                    near_neighbor_first=req.near_neighbor_first,
-                    custom_related_information=req.custom_priority_info,
-                    gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
-                ),
-                media_type="text/event-stream",
-            )
-        else:
-            # Synchronous response (original behavior)
-            try:
-                result = graph_rag_recall_func(
-                    query=req.query,
-                    gremlin_tmpl_num=req.gremlin_tmpl_num,
-                    rerank_method=req.rerank_method,
-                    near_neighbor_first=req.near_neighbor_first,
-                    custom_related_information=req.custom_priority_info,
-                    gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
-                )
-
-                if isinstance(result, dict):
-                    params = [
-                        "query",
-                        "keywords",
-                        "match_vids",
-                        "graph_result_flag",
-                        "gremlin",
-                        "graph_result",
-                        "vertex_degree_list",
-                    ]
-                    user_result = {key: result[key] for key in params if key in result}
-                    return {"graph_recall": user_result}
-                # Note: Maybe only for qianfan/wenxin
-                return {"graph_recall": json.dumps(result)}
-
-            except TypeError as e:
-                log.error("TypeError in graph_rag_recall_api: %s", e)
-                raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) from e
-            except Exception as e:
-                log.error("Unexpected error occurred: %s", e)
-                raise HTTPException(
-                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="An unexpected error occurred."
-                ) from e
-
-    @router.post("/config/graph", status_code=status.HTTP_201_CREATED)
-    def graph_config_api(req: GraphConfigRequest):
-        # Accept status code
-        res = apply_graph_conf(req.ip, req.port, req.name, req.user, req.pwd, req.gs, origin_call="http")
-        return generate_response(RAGResponse(status_code=res, message="Missing Value"))
-
-    # TODO: restructure the implement of llm to three types, like "/config/chat_llm"
-    @router.post("/config/llm", status_code=status.HTTP_201_CREATED)
-    def llm_config_api(req: LLMConfigRequest):
-        llm_settings.llm_type = req.llm_type
-
-        if req.llm_type == "openai":
-            res = apply_llm_conf(req.api_key, req.api_base, req.language_model, req.max_tokens, origin_call="http")
-        elif req.llm_type == "qianfan_wenxin":
-            res = apply_llm_conf(req.api_key, req.secret_key, req.language_model, None, origin_call="http")
-        else:
-            res = apply_llm_conf(req.host, req.port, req.language_model, None, origin_call="http")
-        return generate_response(RAGResponse(status_code=res, message="Missing Value"))
-
-    @router.post("/config/embedding", status_code=status.HTTP_201_CREATED)
-    def embedding_config_api(req: LLMConfigRequest):
-        llm_settings.embedding_type = req.llm_type
-
-        if req.llm_type == "openai":
-            res = apply_embedding_conf(req.api_key, req.api_base, req.language_model, origin_call="http")
-        elif req.llm_type == "qianfan_wenxin":
-            res = apply_embedding_conf(req.api_key, req.api_base, None, origin_call="http")
-        else:
-            res = apply_embedding_conf(req.host, req.port, req.language_model, origin_call="http")
-        return generate_response(RAGResponse(status_code=res, message="Missing Value"))
-
-    @router.post("/config/rerank", status_code=status.HTTP_201_CREATED)
-    def rerank_config_api(req: RerankerConfigRequest):
-        llm_settings.reranker_type = req.reranker_type
 
-        if req.reranker_type == "cohere":
-            res = apply_reranker_conf(req.api_key, req.reranker_model, req.cohere_base_url, origin_call="http")
-        elif req.reranker_type == "siliconflow":
-            res = apply_reranker_conf(req.api_key, req.reranker_model, None, origin_call="http")
-        else:
-            res = status.HTTP_501_NOT_IMPLEMENTED
-        return generate_response(RAGResponse(status_code=res, message="Missing Value"))
\ No newline at end of file
+            if isinstance(result, dict):
+                params = [
+                    "query",
+                    "keywords",
+                    "match_vids",
+                    "graph_result_flag",
+                    "gremlin",
+                    "graph_result",
+                    "vertex_degree_list",
+                ]
+                user_result = {key: result[key] for key in params if key in result}
+                return {"graph_recall": user_result}
+
+            return {"graph_recall": json.dumps(result)}
+
+        except TypeError as e:
+            log.error("TypeError in graph_rag_recall_api: %s", e)
+            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) from e
+        except Exception as e:
+            log.error("Unexpected error occurred: %s", e)
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="An unexpected error occurred."
+            ) from e
\ No newline at end of file
diff --git a/hugegraph-llm/src/hugegraph_llm/api/stream_api.py b/hugegraph-llm/src/hugegraph_llm/api/stream_api.py
new file mode 100644
index 00000000..32eda694
--- /dev/null
+++ b/hugegraph-llm/src/hugegraph_llm/api/stream_api.py
@@ -0,0 +1,134 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import json
+import asyncio
+
+from fastapi import status, APIRouter, HTTPException
+from fastapi.responses import StreamingResponse
+
+from hugegraph_llm.api.models.rag_requests import (
+    RAGRequest,
+    GraphRAGRequest,
+)
+from hugegraph_llm.config import prompt
+from hugegraph_llm.utils.log import log
+
+
+async def stream_http_api(
+        router: APIRouter,
+        rag_answer_stream_func,
+        graph_rag_recall_stream_func,
+):
+    @router.post("/rag/stream", status_code=status.HTTP_200_OK)
+    async def rag_answer_stream_api(req: RAGRequest):
+        if not req.stream:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="Stream parameter must be set to True for streaming endpoint"
+            )
+
+        async def generate_stream():
+            try:
+                async for chunk in rag_answer_stream_func(
+                        text=req.query,
+                        raw_answer=req.raw_answer,
+                        vector_only_answer=req.vector_only,
+                        graph_only_answer=req.graph_only,
+                        graph_vector_answer=req.graph_vector_answer,
+                        graph_ratio=req.graph_ratio,
+                        rerank_method=req.rerank_method,
+                        near_neighbor_first=req.near_neighbor_first,
+                        custom_related_information=req.custom_priority_info,
+                        answer_prompt=req.answer_prompt or prompt.answer_prompt,
+                        keywords_extract_prompt=req.keywords_extract_prompt or prompt.keywords_extract_prompt,
+                        gremlin_tmpl_num=req.gremlin_tmpl_num,
+                        gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
+                ):
+                    # Format as Server-Sent Events
+                    data = json.dumps({
+                        "query": req.query,
+                        "chunk": chunk
+                    })
+                    yield f"data: {data}\n\n"
+                    await asyncio.sleep(0.01)  # Small delay to prevent overwhelming
+            except Exception as e:
+                log.error(f"Error in streaming RAG response: {e}")
+                error_data = json.dumps({"error": str(e)})
+                yield f"data: {error_data}\n\n"
+
+        return StreamingResponse(
+            generate_stream(),
+            media_type="text/event-stream",
+            headers={
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+            }
+        )
+
+    @router.post("/rag/graph/stream", status_code=status.HTTP_200_OK)
+    async def graph_rag_recall_stream_api(req: GraphRAGRequest):
+        if not req.stream:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="Stream parameter must be set to True for streaming endpoint"
+            )
+
+        async def generate_graph_stream():
+            try:
+                async for chunk in graph_rag_recall_stream_func(
+                        query=req.query,
+                        gremlin_tmpl_num=req.gremlin_tmpl_num,
+                        rerank_method=req.rerank_method,
+                        near_neighbor_first=req.near_neighbor_first,
+                        custom_related_information=req.custom_priority_info,
+                        gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
+                ):
+                    if isinstance(chunk, dict):
+                        params = [
+                            "query",
+                            "keywords",
+                            "match_vids",
+                            "graph_result_flag",
+                            "gremlin",
+                            "graph_result",
+                            "vertex_degree_list",
+                        ]
+                        user_result = {key: chunk[key] for key in params if key in chunk}
+                        data = json.dumps({"graph_recall": user_result})
+                    else:
+                        data = json.dumps({"graph_recall": json.dumps(chunk)})
+
+                    yield f"data: {data}\n\n"
+                    await asyncio.sleep(0.01)  # Small delay
+            except TypeError as e:
+                log.error(f"TypeError in streaming graph RAG recall: {e}")
+                error_data = json.dumps({"error": str(e)})
+                yield f"data: {error_data}\n\n"
+            except Exception as e:
+                log.error(f"Unexpected error in streaming graph RAG recall: {e}")
+                error_data = json.dumps({"error": "An unexpected error occurred."})
+                yield f"data: {error_data}\n\n"
+
+        return StreamingResponse(
+            generate_graph_stream(),
+            media_type="text/event-stream",
+            headers={
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+            }
+        )
\ No newline at end of file
diff --git a/hugegraph-llm/src/tests/api/test_rag_api.py b/hugegraph-llm/src/tests/api/test_rag_api.py
new file mode 100644
index 00000000..5770b54c
--- /dev/null
+++ b/hugegraph-llm/src/tests/api/test_rag_api.py
@@ -0,0 +1,135 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import unittest
+import asyncio
+from fastapi import FastAPI, APIRouter
+from fastapi.testclient import TestClient
+
+from hugegraph_llm.api.rag_api import rag_http_api
+
+
+class MockAsyncFunction:
+    """Helper class to mock async functions"""
+
+    def __init__(self, return_value):
+        self.return_value = return_value
+        self.called = False
+        self.last_args = None
+        self.last_kwargs = None
+
+    async def __call__(self, *args, **kwargs):
+        self.called = True
+        self.last_args = args
+        self.last_kwargs = kwargs
+        return self.return_value
+
+
+class TestRagApi(unittest.TestCase):
+    def setUp(self):
+        self.app = FastAPI()
+        self.router = APIRouter()
+
+        # Mock RAG answer function
+        self.mock_rag_answer = MockAsyncFunction(
+            ["Test raw answer", "Test vector answer", "Test graph answer", "Test combined answer"]
+        )
+
+        # Mock graph RAG recall function
+        self.mock_graph_rag_recall = MockAsyncFunction({
+            "query": "test query",
+            "keywords": ["test", "keyword"],
+            "match_vids": ["1", "2"],
+            "graph_result_flag": True,
+            "gremlin": "g.V().has('name', 'test')",
+            "graph_result": ["result1", "result2"],
+            "vertex_degree_list": [1, 2]
+        })
+
+        # Setup the API
+        loop = asyncio.get_event_loop()
+        loop.run_until_complete(
+            rag_http_api(
+                router=self.router,
+                rag_answer_func=self.mock_rag_answer,
+                graph_rag_recall_func=self.mock_graph_rag_recall
+            )
+        )
+
+        self.app.include_router(self.router)
+        self.client = TestClient(self.app)
+
+    def test_rag_answer_api(self):
+        """Test the /rag endpoint"""
+        # Prepare test request
+        request_data = {
+            "query": "test query",
+            "raw_answer": True,
+            "vector_only": True,
+            "graph_only": True,
+            "graph_vector_answer": True
+        }
+
+        # Send request
+        response = self.client.post("/rag", json=request_data)
+
+        # Check response
+        self.assertEqual(response.status_code, 200)
+        self.assertTrue(self.mock_rag_answer.called)
+        self.assertEqual(self.mock_rag_answer.last_kwargs["text"], "test query")
+
+        # Check response content
+        response_data = response.json()
+        self.assertEqual(response_data["query"], "test query")
+        self.assertEqual(response_data["raw_answer"], "Test raw answer")
+        self.assertEqual(response_data["vector_only"], "Test vector answer")
+        self.assertEqual(response_data["graph_only"], "Test graph answer")
+        self.assertEqual(response_data["graph_vector_answer"], "Test combined answer")
+
+    def test_graph_rag_recall_api(self):
+        """Test the /rag/graph endpoint"""
+        # Prepare test request
+        request_data = {
+            "query": "test query",
+            "gremlin_tmpl_num": 1,
+            "rerank_method": "bleu",
+            "near_neighbor_first": False,
+            "custom_priority_info": "",
+            "stream": False
+        }
+
+        # Send request
+        response = self.client.post("/rag/graph", json=request_data)
+
+        # Check response
+        self.assertEqual(response.status_code, 200)
+        self.assertTrue(self.mock_graph_rag_recall.called)
+        self.assertEqual(self.mock_graph_rag_recall.last_kwargs["query"], "test query")
+
+        # Check response content
+        response_data = response.json()
+        self.assertIn("graph_recall", response_data)
+        graph_recall = response_data["graph_recall"]
+        self.assertEqual(graph_recall["query"], "test query")
+        self.assertListEqual(graph_recall["keywords"], ["test", "keyword"])
+        self.assertListEqual(graph_recall["match_vids"], ["1", "2"])
+        self.assertTrue(graph_recall["graph_result_flag"])
+        self.assertEqual(graph_recall["gremlin"], "g.V().has('name', 'test')")
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

From 66c221ef444a554f1bcd897201f38369de770137 Mon Sep 17 00:00:00 2001
From: chirag gupta <103719146+chiruu12@users.noreply.github.com>
Date: Thu, 27 Feb 2025 13:58:34 +0530
Subject: [PATCH 04/10] Adding support for async and streaming output mode

---
 .../src/hugegraph_llm/api/admin_api.py        |  3 +-
 .../src/hugegraph_llm/api/config_api.py       | 29 +++++++----
 .../api/exceptions/rag_exceptions.py          |  3 +-
 .../hugegraph_llm/api/models/rag_requests.py  | 50 ++++++++++++-------
 .../src/hugegraph_llm/api/rag_api.py          | 11 ++--
 .../src/hugegraph_llm/api/stream_api.py       |  8 +--
 6 files changed, 68 insertions(+), 36 deletions(-)

diff --git a/hugegraph-llm/src/hugegraph_llm/api/admin_api.py b/hugegraph-llm/src/hugegraph_llm/api/admin_api.py
index a234c502..26c04a87 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/admin_api.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/admin_api.py
@@ -30,7 +30,8 @@ def admin_http_api(router: APIRouter, log_stream):
     @router.post("/logs", status_code=status.HTTP_200_OK)
     async def log_stream_api(req: LogStreamRequest):
         if admin_settings.admin_token != req.admin_token:
-            raise generate_response(RAGResponse(status_code=status.HTTP_403_FORBIDDEN, message="Invalid admin_token")) #pylint: disable=E0702
+            raise generate_response(RAGResponse(status_code=status.HTTP_403_FORBIDDEN,
+                                                message="Invalid admin_token")) #pylint: disable=E0702
         log_path = os.path.join("logs", req.log_file)
 
         # Create a StreamingResponse that reads from the log stream generator
diff --git a/hugegraph-llm/src/hugegraph_llm/api/config_api.py b/hugegraph-llm/src/hugegraph_llm/api/config_api.py
index d6f08704..3c40724a 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/config_api.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/config_api.py
@@ -36,7 +36,8 @@ async def config_http_api(
 ):
     @router.post("/config/graph", status_code=status.HTTP_201_CREATED)
     async def graph_config_api(req: GraphConfigRequest):
-        res = await apply_graph_conf(req.ip, req.port, req.name, req.user, req.pwd, req.gs, origin_call="http")
+        res = await apply_graph_conf(req.ip, req.port, req.name,
+                                     req.user, req.pwd, req.gs, origin_call="http")
         return generate_response(RAGResponse(status_code=res, message="Missing Value"))
 
     @router.post("/config/llm", status_code=status.HTTP_201_CREATED)
@@ -44,11 +45,14 @@ async def llm_config_api(req: LLMConfigRequest):
         llm_settings.llm_type = req.llm_type
 
         if req.llm_type == "openai":
-            res = await apply_llm_conf(req.api_key, req.api_base, req.language_model, req.max_tokens, origin_call="http")
+            res = await apply_llm_conf(req.api_key, req.api_base, req.language_model,
+                                       req.max_tokens, origin_call="http")
         elif req.llm_type == "qianfan_wenxin":
-            res = await apply_llm_conf(req.api_key, req.secret_key, req.language_model, None, origin_call="http")
+            res = await apply_llm_conf(req.api_key, req.secret_key, req.language_model,
+                                       None, origin_call="http")
         else:
-            res = await apply_llm_conf(req.host, req.port, req.language_model, None, origin_call="http")
+            res = await apply_llm_conf(req.host, req.port, req.language_model,
+                                       None, origin_call="http")
         return generate_response(RAGResponse(status_code=res, message="Missing Value"))
 
     @router.post("/config/embedding", status_code=status.HTTP_201_CREATED)
@@ -56,11 +60,14 @@ async def embedding_config_api(req: LLMConfigRequest):
         llm_settings.embedding_type = req.llm_type
 
         if req.llm_type == "openai":
-            res = await apply_embedding_conf(req.api_key, req.api_base, req.language_model, origin_call="http")
+            res = await apply_embedding_conf(req.api_key, req.api_base,
+                                             req.language_model, origin_call="http")
         elif req.llm_type == "qianfan_wenxin":
-            res = await apply_embedding_conf(req.api_key, req.api_base, None, origin_call="http")
+            res = await apply_embedding_conf(req.api_key, req.api_base,
+                                             None, origin_call="http")
         else:
-            res = await apply_embedding_conf(req.host, req.port, req.language_model, origin_call="http")
+            res = await apply_embedding_conf(req.host, req.port, req.language_model,
+                                             origin_call="http")
         return generate_response(RAGResponse(status_code=res, message="Missing Value"))
 
     @router.post("/config/rerank", status_code=status.HTTP_201_CREATED)
@@ -68,9 +75,11 @@ async def rerank_config_api(req: RerankerConfigRequest):
         llm_settings.reranker_type = req.reranker_type
 
         if req.reranker_type == "cohere":
-            res = await apply_reranker_conf(req.api_key, req.reranker_model, req.cohere_base_url, origin_call="http")
+            res = await apply_reranker_conf(req.api_key, req.reranker_model,
+                                            req.cohere_base_url, origin_call="http")
         elif req.reranker_type == "siliconflow":
-            res = await apply_reranker_conf(req.api_key, req.reranker_model, None, origin_call="http")
+            res = await apply_reranker_conf(req.api_key, req.reranker_model,
+                                            None, origin_call="http")
         else:
             res = status.HTTP_501_NOT_IMPLEMENTED
-        return generate_response(RAGResponse(status_code=res, message="Missing Value"))
\ No newline at end of file
+        return generate_response(RAGResponse(status_code=res, message="Missing Value"))
diff --git a/hugegraph-llm/src/hugegraph_llm/api/exceptions/rag_exceptions.py b/hugegraph-llm/src/hugegraph_llm/api/exceptions/rag_exceptions.py
index 75eb14cf..993c495d 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/exceptions/rag_exceptions.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/exceptions/rag_exceptions.py
@@ -21,7 +21,8 @@
 
 class ExternalException(HTTPException):
     def __init__(self):
-        super().__init__(status_code=400, detail="Connect failed with error code -1, please check the input.")
+        super().__init__(status_code=400, detail="Connect failed with error code -1, "
+                                                 "please check the input.")
 
 
 class ConnectionFailedException(HTTPException):
diff --git a/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
index 8def701d..ed927e1f 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
@@ -25,41 +25,57 @@
 
 class RAGRequest(BaseModel):
     query: str = Query("", description="Query you want to ask")
-    raw_answer: bool = Query(False, description="Use LLM to generate answer directly")
-    vector_only: bool = Query(False, description="Use LLM to generate answer with vector")
-    graph_only: bool = Query(True, description="Use LLM to generate answer with graph RAG only")
-    graph_vector_answer: bool = Query(False, description="Use LLM to generate answer with vector & GraphRAG")
-    graph_ratio: float = Query(0.5, description="The ratio of GraphRAG ans & vector ans")
-    rerank_method: Literal["bleu", "reranker"] = Query("bleu", description="Method to rerank the results.")
-    near_neighbor_first: bool = Query(False, description="Prioritize near neighbors in the search results.")
-    custom_priority_info: str = Query("", description="Custom information to prioritize certain results.")
-    answer_prompt: Optional[str] = Query(prompt.answer_prompt, description="Prompt to guide the answer generation.")
+    raw_answer: bool = Query(False,
+                             description="Use LLM to generate answer directly")
+    vector_only: bool = Query(False,
+                              description="Use LLM to generate answer with vector")
+    graph_only: bool = Query(True,
+                             description="Use LLM to generate answer with graph RAG only")
+    graph_vector_answer: bool = Query(False,
+                            description="Use LLM to generate answer with vector & GraphRAG")
+    graph_ratio: float = Query(0.5,
+                               description="The ratio of GraphRAG ans & vector ans")
+    rerank_method: Literal["bleu", "reranker"] = Query("bleu",
+                                description="Method to rerank the results.")
+    near_neighbor_first: bool = Query(False,
+                                    description="Prioritize near neighbors in the search results.")
+    custom_priority_info: str = Query("",
+                                    description="Custom information to prioritize certain results.")
+    answer_prompt: Optional[str] = Query(prompt.answer_prompt,
+                                        description="Prompt to guide the answer generation.")
     keywords_extract_prompt: Optional[str] = Query(
         prompt.keywords_extract_prompt,
         description="Prompt for extracting keywords from query.",
     )
-    gremlin_tmpl_num: int = Query(1, description="Number of Gremlin templates to use.")
+    gremlin_tmpl_num: int = Query(1,
+                                  description="Number of Gremlin templates to use.")
     gremlin_prompt: Optional[str] = Query(
         prompt.gremlin_generate_prompt,
         description="Prompt for the Text2Gremlin query.",
     )
-    stream: bool = Query(False, description="Enable streaming response")
+    stream: bool = Query(False,
+                         description="Enable streaming response")
 
 
 # TODO: import the default value of prompt.* dynamically
 class GraphRAGRequest(BaseModel):
     query: str = Query("", description="Query you want to ask")
     gremlin_tmpl_num: int = Query(
-        1, description="Number of Gremlin templates to use. If num <=0 means template is not provided"
+        1,
+        description="Number of Gremlin templates to use. If num <=0 means template is not provided"
     )
-    rerank_method: Literal["bleu", "reranker"] = Query("bleu", description="Method to rerank the results.")
-    near_neighbor_first: bool = Query(False, description="Prioritize near neighbors in the search results.")
-    custom_priority_info: str = Query("", description="Custom information to prioritize certain results.")
+    rerank_method: Literal["bleu", "reranker"] = Query("bleu",
+                                                description="Method to rerank the results.")
+    near_neighbor_first: bool = Query(False,
+                            description="Prioritize near neighbors in the search results.")
+    custom_priority_info: str = Query("",
+                            description="Custom information to prioritize certain results.")
     gremlin_prompt: Optional[str] = Query(
         prompt.gremlin_generate_prompt,
         description="Prompt for the Text2Gremlin query.",
     )
-    stream: bool = Query(False, description="Enable streaming response")
+    stream: bool = Query(False,
+                         description="Enable streaming response")
 
 
 class GraphConfigRequest(BaseModel):
@@ -96,4 +112,4 @@ class RerankerConfigRequest(BaseModel):
 
 class LogStreamRequest(BaseModel):
     admin_token: Optional[str] = None
-    log_file: Optional[str] = "llm-server.log"
\ No newline at end of file
+    log_file: Optional[str] = "llm-server.log"
diff --git a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
index f878a37f..5a28bef3 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
@@ -46,7 +46,8 @@ async def rag_answer_api(req: RAGRequest):
             near_neighbor_first=req.near_neighbor_first,
             custom_related_information=req.custom_priority_info,
             answer_prompt=req.answer_prompt or prompt.answer_prompt,
-            keywords_extract_prompt=req.keywords_extract_prompt or prompt.keywords_extract_prompt,
+            keywords_extract_prompt=req.keywords_extract_prompt
+                                    or prompt.keywords_extract_prompt,
             gremlin_tmpl_num=req.gremlin_tmpl_num,
             gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
         )
@@ -55,7 +56,8 @@ async def rag_answer_api(req: RAGRequest):
             "query": req.query,
             **{
                 key: value
-                for key, value in zip(["raw_answer", "vector_only", "graph_only", "graph_vector_answer"], result)
+                for key, value in zip(["raw_answer", "vector_only", "graph_only",
+                                       "graph_vector_answer"], result)
                 if getattr(req, key)
             },
         }
@@ -93,5 +95,6 @@ async def graph_rag_recall_api(req: GraphRAGRequest):
         except Exception as e:
             log.error("Unexpected error occurred: %s", e)
             raise HTTPException(
-                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="An unexpected error occurred."
-            ) from e
\ No newline at end of file
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="An unexpected error occurred."
+            ) from e
diff --git a/hugegraph-llm/src/hugegraph_llm/api/stream_api.py b/hugegraph-llm/src/hugegraph_llm/api/stream_api.py
index 32eda694..5e177323 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/stream_api.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/stream_api.py
@@ -55,7 +55,8 @@ async def generate_stream():
                         near_neighbor_first=req.near_neighbor_first,
                         custom_related_information=req.custom_priority_info,
                         answer_prompt=req.answer_prompt or prompt.answer_prompt,
-                        keywords_extract_prompt=req.keywords_extract_prompt or prompt.keywords_extract_prompt,
+                        keywords_extract_prompt=req.keywords_extract_prompt
+                                                or prompt.keywords_extract_prompt,
                         gremlin_tmpl_num=req.gremlin_tmpl_num,
                         gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
                 ):
@@ -96,7 +97,8 @@ async def generate_graph_stream():
                         rerank_method=req.rerank_method,
                         near_neighbor_first=req.near_neighbor_first,
                         custom_related_information=req.custom_priority_info,
-                        gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
+                        gremlin_prompt=req.gremlin_prompt or
+                                       prompt.gremlin_generate_prompt,
                 ):
                     if isinstance(chunk, dict):
                         params = [
@@ -131,4 +133,4 @@ async def generate_graph_stream():
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
             }
-        )
\ No newline at end of file
+        )

From 06ef02de2826697f2817c9ed7c1ed0f135b5dc2e Mon Sep 17 00:00:00 2001
From: chirag gupta <103719146+chiruu12@users.noreply.github.com>
Date: Thu, 6 Mar 2025 08:06:15 +0530
Subject: [PATCH 05/10] Adding support for async and streaming output mode

---
 .../src/hugegraph_llm/api/rag_api.py          |  2 +-
 .../src/hugegraph_llm/api/stream_api.py       | 66 +++++++++++++++----
 2 files changed, 54 insertions(+), 14 deletions(-)

diff --git a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
index 61657938..14e5867b 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
@@ -142,4 +142,4 @@ async def graph_rag_recall_api(req: GraphRAGRequest):
     await graph_config_route(router, apply_graph_conf)
     await llm_config_route(router, apply_llm_conf)
     await embedding_config_route(router, apply_embedding_conf)
-    await rerank_config_route(router, apply_reranker_conf)
\ No newline at end of file
+    await rerank_config_route(router, apply_reranker_conf)
diff --git a/hugegraph-llm/src/hugegraph_llm/api/stream_api.py b/hugegraph-llm/src/hugegraph_llm/api/stream_api.py
index 5e177323..90fc8a50 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/stream_api.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/stream_api.py
@@ -25,7 +25,7 @@
     RAGRequest,
     GraphRAGRequest,
 )
-from hugegraph_llm.config import prompt
+from hugegraph_llm.config import prompt, huge_settings
 from hugegraph_llm.utils.log import log
 
 
@@ -42,6 +42,14 @@ async def rag_answer_stream_api(req: RAGRequest):
                 detail="Stream parameter must be set to True for streaming endpoint"
             )
 
+        if req.client_config:
+            huge_settings.graph_ip = req.client_config.ip
+            huge_settings.graph_port = req.client_config.port
+            huge_settings.graph_name = req.client_config.name
+            huge_settings.graph_user = req.client_config.user
+            huge_settings.graph_pwd = req.client_config.pwd
+            huge_settings.graph_space = req.client_config.gs
+
         async def generate_stream():
             try:
                 async for chunk in rag_answer_stream_func(
@@ -53,11 +61,15 @@ async def generate_stream():
                         graph_ratio=req.graph_ratio,
                         rerank_method=req.rerank_method,
                         near_neighbor_first=req.near_neighbor_first,
+                        gremlin_tmpl_num=req.gremlin_tmpl_num,
+                        max_graph_items=req.max_graph_items,
+                        topk_return_results=req.topk_return_results,
+                        vector_dis_threshold=req.vector_dis_threshold,
+                        topk_per_keyword=req.topk_per_keyword,
+                        # Keep prompt params in the end
                         custom_related_information=req.custom_priority_info,
                         answer_prompt=req.answer_prompt or prompt.answer_prompt,
-                        keywords_extract_prompt=req.keywords_extract_prompt
-                                                or prompt.keywords_extract_prompt,
-                        gremlin_tmpl_num=req.gremlin_tmpl_num,
+                        keywords_extract_prompt=req.keywords_extract_prompt or prompt.keywords_extract_prompt,
                         gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
                 ):
                     # Format as Server-Sent Events
@@ -67,10 +79,15 @@ async def generate_stream():
                     })
                     yield f"data: {data}\n\n"
                     await asyncio.sleep(0.01)  # Small delay to prevent overwhelming
-            except Exception as e:
-                log.error(f"Error in streaming RAG response: {e}")
+            except (ValueError, TypeError) as e:  # More specific exceptions
+                log.error("Error in streaming RAG response: %s", e)
                 error_data = json.dumps({"error": str(e)})
                 yield f"data: {error_data}\n\n"
+            except Exception as e:  # pylint: disable=broad-exception-caught
+                # We need to catch all exceptions here to ensure proper error response
+                log.error("Unexpected error in streaming RAG response: %s", e)
+                error_data = json.dumps({"error": "An unexpected error occurred"})
+                yield f"data: {error_data}\n\n"
 
         return StreamingResponse(
             generate_stream(),
@@ -89,17 +106,39 @@ async def graph_rag_recall_stream_api(req: GraphRAGRequest):
                 detail="Stream parameter must be set to True for streaming endpoint"
             )
 
+        # Set graph config if provided
+        if req.client_config:
+            huge_settings.graph_ip = req.client_config.ip
+            huge_settings.graph_port = req.client_config.port
+            huge_settings.graph_name = req.client_config.name
+            huge_settings.graph_user = req.client_config.user
+            huge_settings.graph_pwd = req.client_config.pwd
+            huge_settings.graph_space = req.client_config.gs
+
         async def generate_graph_stream():
             try:
                 async for chunk in graph_rag_recall_stream_func(
                         query=req.query,
+                        max_graph_items=req.max_graph_items,
+                        topk_return_results=req.topk_return_results,
+                        vector_dis_threshold=req.vector_dis_threshold,
+                        topk_per_keyword=req.topk_per_keyword,
                         gremlin_tmpl_num=req.gremlin_tmpl_num,
                         rerank_method=req.rerank_method,
                         near_neighbor_first=req.near_neighbor_first,
                         custom_related_information=req.custom_priority_info,
-                        gremlin_prompt=req.gremlin_prompt or
-                                       prompt.gremlin_generate_prompt,
+                        gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
+                        get_vertex_only=req.get_vertex_only
                 ):
+                    # Handle vertex details for get_vertex_only flag
+                    if req.get_vertex_only and isinstance(chunk, dict) and "match_vids" in chunk:
+                        from hugegraph_llm.operators.hugegraph_op.graph_rag_query import GraphRAGQuery
+                        graph_rag = GraphRAGQuery()
+                        graph_rag.init_client(chunk)
+                        vertex_details = await graph_rag.get_vertex_details(chunk["match_vids"])
+                        if vertex_details:
+                            chunk["match_vids"] = vertex_details
+
                     if isinstance(chunk, dict):
                         params = [
                             "query",
@@ -118,12 +157,13 @@ async def generate_graph_stream():
                     yield f"data: {data}\n\n"
                     await asyncio.sleep(0.01)  # Small delay
             except TypeError as e:
-                log.error(f"TypeError in streaming graph RAG recall: {e}")
+                log.error("TypeError in streaming graph RAG recall: %s", e)
                 error_data = json.dumps({"error": str(e)})
                 yield f"data: {error_data}\n\n"
-            except Exception as e:
-                log.error(f"Unexpected error in streaming graph RAG recall: {e}")
-                error_data = json.dumps({"error": "An unexpected error occurred."})
+            except Exception as e:  # pylint: disable=broad-exception-caught
+                # We need to catch all exceptions here to ensure proper error response
+                log.error("Unexpected error in streaming graph RAG recall: %s", e)
+                error_data = json.dumps({"error": "An unexpected error occurred"})
                 yield f"data: {error_data}\n\n"
 
         return StreamingResponse(
@@ -133,4 +173,4 @@ async def generate_graph_stream():
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
             }
-        )
+        )
\ No newline at end of file

From 6dd0b7f1f32f0c16e8383bd278c3e8afaaf1868c Mon Sep 17 00:00:00 2001
From: chirag gupta <103719146+chiruu12@users.noreply.github.com>
Date: Thu, 6 Mar 2025 08:14:10 +0530
Subject: [PATCH 06/10] Adding support for async and streaming output mode

---
 .github/workflows/python-client.yml           |   3 +-
 README.md                                     |   2 +-
 hugegraph-llm/.gitignore                      |   2 +
 hugegraph-llm/README.md                       |   1 +
 hugegraph-llm/poetry.lock                     | 428 ++++++++++++------
 hugegraph-llm/pyproject.toml                  |   5 +-
 hugegraph-llm/requirements.txt                |   3 +-
 .../src/hugegraph_llm/config/llm_config.py    |  35 +-
 .../src/hugegraph_llm/config/prompt_config.py |  62 ++-
 .../demo/rag_demo/configs_block.py            |  96 +++-
 .../hugegraph_llm/demo/rag_demo/rag_block.py  |  20 +-
 .../demo/rag_demo/text2gremlin_block.py       |  28 +-
 .../models/embeddings/init_embedding.py       |   7 +
 .../models/embeddings/litellm.py              |  93 ++++
 .../src/hugegraph_llm/models/llms/init_llm.py |  22 +
 .../src/hugegraph_llm/models/llms/litellm.py  | 156 +++++++
 .../operators/common_op/merge_dedup_rerank.py |  12 +-
 .../hugegraph_llm/operators/graph_rag_task.py |   9 +-
 .../operators/hugegraph_op/graph_rag_query.py |  19 +-
 .../operators/index_op/semantic_id_query.py   |   6 +-
 hugegraph-ml/README.md                        |  28 +-
 hugegraph-python-client/README.md             |   4 +-
 22 files changed, 830 insertions(+), 211 deletions(-)
 create mode 100644 hugegraph-llm/src/hugegraph_llm/models/embeddings/litellm.py
 create mode 100644 hugegraph-llm/src/hugegraph_llm/models/llms/litellm.py

diff --git a/.github/workflows/python-client.yml b/.github/workflows/python-client.yml
index dd8e03ab..c0bdf5c9 100644
--- a/.github/workflows/python-client.yml
+++ b/.github/workflows/python-client.yml
@@ -20,8 +20,7 @@ jobs:
     - name: Prepare HugeGraph Server Environment
       run: |
         docker run -d --name=graph -p 8080:8080 -e PASSWORD=admin hugegraph/hugegraph:1.3.0
-        # wait server init-done (avoid some test error:)
-        sleep 5
+        sleep 1
 
     - uses: actions/checkout@v4
 
diff --git a/README.md b/README.md
index 197161b7..c0d57aec 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ And here are links of other repositories:
 
 - Welcome to contribute to HugeGraph, please see [Guidelines](https://hugegraph.apache.org/docs/contribution-guidelines/) for more information.  
 - Note: It's recommended to use [GitHub Desktop](https://desktop.github.com/) to greatly simplify the PR and commit process.  
-- Code format: Please run [`./style/code_format_and_analysis.sh`](style/code_format_and_analysis.sh) to format your code before submitting a PR.
+- Code format: Please run [`./style/code_format_and_analysis.sh`](style/code_format_and_analysis.sh) to format your code before submitting a PR. (Use `pylint` to check code style)
 - Thank you to all the people who already contributed to HugeGraph!
 
 [![contributors graph](https://contrib.rocks/image?repo=apache/incubator-hugegraph-ai)](https://github.com/apache/incubator-hugegraph-ai/graphs/contributors)
diff --git a/hugegraph-llm/.gitignore b/hugegraph-llm/.gitignore
index 1740bd27..2c4cd3e0 100644
--- a/hugegraph-llm/.gitignore
+++ b/hugegraph-llm/.gitignore
@@ -1,3 +1,5 @@
 src/hugegraph_llm/resources/demo/questions_answers.xlsx
 src/hugegraph_llm/resources/demo/questions.xlsx
 src/hugegraph_llm/resources/backup-graph-data-4020/
+
+uv.lock
diff --git a/hugegraph-llm/README.md b/hugegraph-llm/README.md
index beaa52b4..c2c3caac 100644
--- a/hugegraph-llm/README.md
+++ b/hugegraph-llm/README.md
@@ -67,6 +67,7 @@ graph systems and large language models.
     ```bash
     python -m hugegraph_llm.config.generate --update
     ```
+    Note: `Litellm` support multi-LLM provider, refer [litellm.ai](https://docs.litellm.ai/docs/providers) to config it
 7. (__Optional__) You could use 
     [hugegraph-hubble](https://hugegraph.apache.org/docs/quickstart/hugegraph-hubble/#21-use-docker-convenient-for-testdev) 
     to visit the graph data, could run it via [Docker/Docker-Compose](https://hub.docker.com/r/hugegraph/hubble) 
diff --git a/hugegraph-llm/poetry.lock b/hugegraph-llm/poetry.lock
index 29825aa3..17019b90 100644
--- a/hugegraph-llm/poetry.lock
+++ b/hugegraph-llm/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.
 
 [[package]]
 name = "aiofiles"
@@ -7,7 +7,6 @@ description = "File support for asyncio."
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "aiofiles-23.2.1-py3-none-any.whl", hash = "sha256:19297512c647d4b27a2cf7c34caa7e405c0d60b5560618a29a9fe027b18b0107"},
     {file = "aiofiles-23.2.1.tar.gz", hash = "sha256:84ec2218d8419404abcb9f0c02df3f34c6e0a68ed41072acfb1cef5cbc29051a"},
@@ -20,7 +19,6 @@ description = "Happy Eyeballs for asyncio"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "aiohappyeyeballs-2.4.4-py3-none-any.whl", hash = "sha256:a980909d50efcd44795c4afeca523296716d50cd756ddca6af8c65b996e27de8"},
     {file = "aiohappyeyeballs-2.4.4.tar.gz", hash = "sha256:5fdd7d87889c63183afc18ce9271f9b0a7d32c2303e394468dd45d514a757745"},
@@ -33,7 +31,6 @@ description = "Async http client/server framework (asyncio)"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "aiohttp-3.11.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a60804bff28662cbcf340a4d61598891f12eea3a66af48ecfdc975ceec21e3c8"},
     {file = "aiohttp-3.11.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4b4fa1cb5f270fb3eab079536b764ad740bb749ce69a94d4ec30ceee1b5940d5"},
@@ -124,7 +121,7 @@ propcache = ">=0.2.0"
 yarl = ">=1.17.0,<2.0"
 
 [package.extras]
-speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"]
+speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.2.0) ; sys_platform == \"linux\" or sys_platform == \"darwin\"", "brotlicffi ; platform_python_implementation != \"CPython\""]
 
 [[package]]
 name = "aiolimiter"
@@ -133,7 +130,6 @@ description = "asyncio rate limiter, a leaky bucket implementation"
 optional = false
 python-versions = "<4.0,>=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "aiolimiter-1.2.1-py3-none-any.whl", hash = "sha256:d3f249e9059a20badcb56b61601a83556133655c11d1eb3dd3e04ff069e5f3c7"},
     {file = "aiolimiter-1.2.1.tar.gz", hash = "sha256:e02a37ea1a855d9e832252a105420ad4d15011505512a1a1d814647451b5cca9"},
@@ -146,7 +142,6 @@ description = "aiosignal: a list of registered asynchronous callbacks"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5"},
     {file = "aiosignal-1.3.2.tar.gz", hash = "sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54"},
@@ -162,7 +157,6 @@ description = "Reusable constraint types to use with typing.Annotated"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
     {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
@@ -175,7 +169,6 @@ description = "High level compatibility layer for multiple asynchronous event lo
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "anyio-4.8.0-py3-none-any.whl", hash = "sha256:b5011f270ab5eb0abf13385f851315585cc37ef330dd88e27ec3d34d651fd47a"},
     {file = "anyio-4.8.0.tar.gz", hash = "sha256:1d9fe889df5212298c0c0723fa20479d1b94883a2df44bd3897aa91083316f7a"},
@@ -189,7 +182,7 @@ typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""}
 
 [package.extras]
 doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx_rtd_theme"]
-test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"]
+test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1) ; python_version >= \"3.10\"", "uvloop (>=0.21) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\" and python_version < \"3.14\""]
 trio = ["trio (>=0.26.1)"]
 
 [[package]]
@@ -199,7 +192,6 @@ description = "In-process task scheduler with Cron-like capabilities"
 optional = false
 python-versions = ">=3.6"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "APScheduler-3.10.4-py3-none-any.whl", hash = "sha256:fb91e8a768632a4756a585f79ec834e0e27aad5860bac7eaa523d9ccefd87661"},
     {file = "APScheduler-3.10.4.tar.gz", hash = "sha256:e6df071b27d9be898e486bc7940a7be50b4af2e9da7c08f0744a96d4bd4cef4a"},
@@ -242,19 +234,18 @@ description = "Classes Without Boilerplate"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "attrs-24.3.0-py3-none-any.whl", hash = "sha256:ac96cd038792094f438ad1f6ff80837353805ac950cd2aa0e0625ef19850c308"},
     {file = "attrs-24.3.0.tar.gz", hash = "sha256:8f5c07333d543103541ba7be0e2ce16eeee8130cb0b3f9238ab904ce1e85baff"},
 ]
 
 [package.extras]
-benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
+benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"]
+cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"]
+dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"]
 docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
-tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
+tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"]
+tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\""]
 
 [[package]]
 name = "bce-python-sdk"
@@ -263,7 +254,6 @@ description = "BCE SDK for python"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,<4,>=2.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "bce_python_sdk-0.9.25-py3-none-any.whl", hash = "sha256:cd1ab4c887e163adba6bfb3cd40465a365e5f4255705a015b0cdbe768e649877"},
     {file = "bce_python_sdk-0.9.25.tar.gz", hash = "sha256:93a0623fbb1bf3a58b4f2d7bdbd799a3b342a538f0c72950c77168e431470e86"},
@@ -281,7 +271,6 @@ description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.6"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56"},
     {file = "certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db"},
@@ -294,7 +283,6 @@ description = "The Real First Universal Charset Detector. Open, modern and activ
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"},
     {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"},
@@ -397,7 +385,6 @@ description = "Composable command line interface toolkit"
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
     {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
@@ -413,7 +400,7 @@ description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\" and platform_system == \"Windows\""
+markers = "platform_system == \"Windows\""
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
@@ -426,7 +413,6 @@ description = "Decorators for Humans"
 optional = false
 python-versions = ">=3.5"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"},
     {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
@@ -439,7 +425,6 @@ description = "serialize all of Python"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "dill-0.3.9-py3-none-any.whl", hash = "sha256:468dff3b89520b474c0397703366b7b95eebe6303f108adf9b19da1f702be87a"},
     {file = "dill-0.3.9.tar.gz", hash = "sha256:81aa267dddf68cbfe8029c42ca9ec6a4ab3b22371d1c450abc54422577b4512c"},
@@ -456,7 +441,6 @@ description = "Disk Cache -- Disk and file backed persistent cache."
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19"},
     {file = "diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc"},
@@ -469,7 +453,6 @@ description = "Distro - an OS platform information API"
 optional = false
 python-versions = ">=3.6"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
     {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
@@ -482,7 +465,6 @@ description = "An implementation of lxml.xmlfile for the standard library"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa"},
     {file = "et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54"},
@@ -511,7 +493,6 @@ description = "A library for efficient similarity search and clustering of dense
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "faiss_cpu-1.8.0.post1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:fd84721eb599aa1da19b1b36345bb8705a60bb1d2887bbbc395a29e3d36a1a62"},
     {file = "faiss_cpu-1.8.0.post1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b78ff9079d15fd0f156bf5dd8a2975a8abffac1854a86ece263eec1500a2e836"},
@@ -552,7 +533,6 @@ description = "FastAPI framework, high performance, easy to learn, fast to code,
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "fastapi-0.115.6-py3-none-any.whl", hash = "sha256:e9240b29e36fa8f4bb7290316988e90c381e5092e0cbe84e7818cc3713bcf305"},
     {file = "fastapi-0.115.6.tar.gz", hash = "sha256:9ec46f7addc14ea472958a96aae5b5de65f39721a46aaf5705c480d9a8b76654"},
@@ -574,7 +554,6 @@ description = "A simple Python wrapper for FFmpeg"
 optional = false
 python-versions = "<4.0,>=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "ffmpy-0.5.0-py3-none-any.whl", hash = "sha256:df3799cf5816daa56d4959a023630ee53c6768b66009dae6d131519ba4b80233"},
     {file = "ffmpy-0.5.0.tar.gz", hash = "sha256:277e131f246d18e9dcfee9bb514c50749031c43582ce5ef82c57b51e3d3955c3"},
@@ -587,7 +566,6 @@ description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0"},
     {file = "filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435"},
@@ -596,7 +574,7 @@ files = [
 [package.extras]
 docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4.1)"]
 testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"]
-typing = ["typing-extensions (>=4.12.2)"]
+typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""]
 
 [[package]]
 name = "frozenlist"
@@ -605,7 +583,6 @@ description = "A list-like structure which implements collections.abc.MutableSeq
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a"},
     {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb"},
@@ -708,7 +685,6 @@ description = "File-system specification"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "fsspec-2024.12.0-py3-none-any.whl", hash = "sha256:b520aed47ad9804237ff878b504267a3b0b441e97508bd6d2d8774e3db85cee2"},
     {file = "fsspec-2024.12.0.tar.gz", hash = "sha256:670700c977ed2fb51e0d9f9253177ed20cbde4a3e5c0283cc5385b5870c8533f"},
@@ -749,7 +725,6 @@ description = "Clean single-source support for Python 3 and 2"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "future-1.0.0-py3-none-any.whl", hash = "sha256:929292d34f5872e70396626ef385ec22355a1fae8ad29e1a734c3e43f9fbc216"},
     {file = "future-1.0.0.tar.gz", hash = "sha256:bd2968309307861edae1458a4f8a4f3598c03be43b97521076aebf5d94c07b05"},
@@ -762,7 +737,6 @@ description = "Python library for easily interacting with trained machine learni
 optional = false
 python-versions = ">=3.10"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "gradio-5.12.0-py3-none-any.whl", hash = "sha256:b4b79a2c537131a8a5e23046565e64da40156ac24f9082e563e734e89641e160"},
 ]
@@ -806,7 +780,6 @@ description = "Python library for easily interacting with trained machine learni
 optional = false
 python-versions = ">=3.10"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "gradio_client-1.5.4-py3-none-any.whl", hash = "sha256:ad38c9a6f7fc590e822627f5bf5685321a7822b8f1a88b76d00a0621a43162d6"},
     {file = "gradio_client-1.5.4.tar.gz", hash = "sha256:281a1b6c4e45210c70b60888bb6f329c27f30645d7aa376e1f20966de82273dc"},
@@ -827,7 +800,6 @@ description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
     {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
@@ -840,7 +812,6 @@ description = "A minimal low-level HTTP client."
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"},
     {file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"},
@@ -863,7 +834,6 @@ description = "The next generation HTTP client."
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
     {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
@@ -877,7 +847,7 @@ idna = "*"
 sniffio = "*"
 
 [package.extras]
-brotli = ["brotli", "brotlicffi"]
+brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""]
 cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
 http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
@@ -890,7 +860,6 @@ description = "A Python SDK for Apache HugeGraph"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = []
 develop = true
 
@@ -912,7 +881,6 @@ description = "Client library to download and publish models, datasets and other
 optional = false
 python-versions = ">=3.8.0"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "huggingface_hub-0.27.1-py3-none-any.whl", hash = "sha256:1c5155ca7d60b60c2e2fc38cbb3ffb7f7c3adf48f824015b219af9061771daec"},
     {file = "huggingface_hub-0.27.1.tar.gz", hash = "sha256:c004463ca870283909d715d20f066ebd6968c2207dae9393fdffb3c1d4d8f98b"},
@@ -948,7 +916,6 @@ description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.6"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
     {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
@@ -957,6 +924,30 @@ files = [
 [package.extras]
 all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
 
+[[package]]
+name = "importlib-metadata"
+version = "8.6.1"
+description = "Read metadata from Python packages"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "importlib_metadata-8.6.1-py3-none-any.whl", hash = "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e"},
+    {file = "importlib_metadata-8.6.1.tar.gz", hash = "sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580"},
+]
+
+[package.dependencies]
+zipp = ">=3.20"
+
+[package.extras]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""]
+cover = ["pytest-cov"]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+enabler = ["pytest-enabler (>=2.2)"]
+perf = ["ipython"]
+test = ["flufl.flake8", "importlib_resources (>=1.3) ; python_version < \"3.9\"", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"]
+type = ["pytest-mypy"]
+
 [[package]]
 name = "jieba"
 version = "0.42.1"
@@ -964,7 +955,6 @@ description = "Chinese Words Segmentation Utilities"
 optional = false
 python-versions = "*"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "jieba-0.42.1.tar.gz", hash = "sha256:055ca12f62674fafed09427f176506079bc135638a14e23e25be909131928db2"},
 ]
@@ -976,7 +966,6 @@ description = "A very fast and expressive template engine."
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"},
     {file = "jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb"},
@@ -995,7 +984,6 @@ description = "Fast iterable JSON parser."
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "jiter-0.8.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:ca8577f6a413abe29b079bc30f907894d7eb07a865c4df69475e868d73e71c7b"},
     {file = "jiter-0.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b25bd626bde7fb51534190c7e3cb97cee89ee76b76d7585580e22f34f5e3f393"},
@@ -1082,7 +1070,6 @@ description = "Lightweight pipelining with Python functions"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"},
     {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
@@ -1095,7 +1082,6 @@ description = "Apply JSON-Patches (RFC 6902)"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"},
     {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"},
@@ -1111,12 +1097,48 @@ description = "Identify specific nodes in a JSON document (RFC 6901)"
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942"},
     {file = "jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef"},
 ]
 
+[[package]]
+name = "jsonschema"
+version = "4.23.0"
+description = "An implementation of JSON Schema validation for Python"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566"},
+    {file = "jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4"},
+]
+
+[package.dependencies]
+attrs = ">=22.2.0"
+jsonschema-specifications = ">=2023.03.6"
+referencing = ">=0.28.4"
+rpds-py = ">=0.7.1"
+
+[package.extras]
+format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
+format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=24.6.0)"]
+
+[[package]]
+name = "jsonschema-specifications"
+version = "2024.10.1"
+description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "jsonschema_specifications-2024.10.1-py3-none-any.whl", hash = "sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf"},
+    {file = "jsonschema_specifications-2024.10.1.tar.gz", hash = "sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272"},
+]
+
+[package.dependencies]
+referencing = ">=0.31.0"
+
 [[package]]
 name = "langchain-core"
 version = "0.2.43"
@@ -1124,7 +1146,6 @@ description = "Building applications with LLMs through composability"
 optional = false
 python-versions = "<4.0,>=3.8.1"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "langchain_core-0.2.43-py3-none-any.whl", hash = "sha256:619601235113298ebf8252a349754b7c28d3cf7166c7c922da24944b78a9363a"},
     {file = "langchain_core-0.2.43.tar.gz", hash = "sha256:42c2ef6adedb911f4254068b6adc9eb4c4075f6c8cb3d83590d3539a815695f5"},
@@ -1146,7 +1167,6 @@ description = "LangChain text splitting utilities"
 optional = false
 python-versions = "<4.0,>=3.8.1"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "langchain_text_splitters-0.2.4-py3-none-any.whl", hash = "sha256:2702dee5b7cbdd595ccbe43b8d38d01a34aa8583f4d6a5a68ad2305ae3e7b645"},
     {file = "langchain_text_splitters-0.2.4.tar.gz", hash = "sha256:f7daa7a3b0aa8309ce248e2e2b6fc8115be01118d336c7f7f7dfacda0e89bf29"},
@@ -1162,7 +1182,6 @@ description = "Client library to connect to the LangSmith LLM Tracing and Evalua
 optional = false
 python-versions = "<4.0,>=3.8.1"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "langsmith-0.1.147-py3-none-any.whl", hash = "sha256:7166fc23b965ccf839d64945a78e9f1157757add228b086141eb03a60d699a15"},
     {file = "langsmith-0.1.147.tar.gz", hash = "sha256:2e933220318a4e73034657103b3b1a3a6109cc5db3566a7e8e03be8d6d7def7a"},
@@ -1178,6 +1197,35 @@ requests-toolbelt = ">=1.0.0,<2.0.0"
 [package.extras]
 langsmith-pyo3 = ["langsmith-pyo3 (>=0.1.0rc2,<0.2.0)"]
 
+[[package]]
+name = "litellm"
+version = "1.61.16"
+description = "Library to easily interface with LLM API providers"
+optional = false
+python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
+groups = ["main"]
+files = [
+    {file = "litellm-1.61.16-py3-none-any.whl", hash = "sha256:d241436ac0edf64ec57fb5686f8d84a25998a7e52213d9063adf87df8432701f"},
+    {file = "litellm-1.61.16.tar.gz", hash = "sha256:02df5865f98ea9734a4d27ac7c33aad9a45c4015403d5c0797d3292ade3c5cb5"},
+]
+
+[package.dependencies]
+aiohttp = "*"
+click = "*"
+httpx = ">=0.23.0"
+importlib-metadata = ">=6.8.0"
+jinja2 = ">=3.1.2,<4.0.0"
+jsonschema = ">=4.22.0,<5.0.0"
+openai = ">=1.61.0"
+pydantic = ">=2.0.0,<3.0.0"
+python-dotenv = ">=0.2.0"
+tiktoken = ">=0.7.0"
+tokenizers = "*"
+
+[package.extras]
+extra-proxy = ["azure-identity (>=1.15.0,<2.0.0)", "azure-keyvault-secrets (>=4.8.0,<5.0.0)", "google-cloud-kms (>=2.21.3,<3.0.0)", "prisma (==0.11.0)", "resend (>=0.8.0,<0.9.0)"]
+proxy = ["PyJWT (>=2.8.0,<3.0.0)", "apscheduler (>=3.10.4,<4.0.0)", "backoff", "cryptography (>=43.0.1,<44.0.0)", "fastapi (>=0.115.5,<0.116.0)", "fastapi-sso (>=0.16.0,<0.17.0)", "gunicorn (>=22.0.0,<23.0.0)", "orjson (>=3.9.7,<4.0.0)", "pynacl (>=1.5.0,<2.0.0)", "python-multipart (>=0.0.18,<0.0.19)", "pyyaml (>=6.0.1,<7.0.0)", "rq", "uvicorn (>=0.29.0,<0.30.0)", "uvloop (>=0.21.0,<0.22.0)"]
+
 [[package]]
 name = "lxml"
 version = "5.3.0"
@@ -1185,7 +1233,6 @@ description = "Powerful and Pythonic XML processing library combining libxml2/li
 optional = false
 python-versions = ">=3.6"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "lxml-5.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:dd36439be765e2dde7660212b5275641edbc813e7b24668831a5c8ac91180656"},
     {file = "lxml-5.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ae5fe5c4b525aa82b8076c1a59d642c17b6e8739ecf852522c6321852178119d"},
@@ -1341,7 +1388,6 @@ description = "Python port of markdown-it. Markdown parsing, done right!"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"},
     {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"},
@@ -1367,7 +1413,6 @@ description = "Safely add untrusted strings to HTML/XML markup."
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"},
     {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"},
@@ -1438,7 +1483,6 @@ description = "Markdown URL utilities"
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
     {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
@@ -1451,7 +1495,6 @@ description = "multidict implementation"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3380252550e372e8511d49481bd836264c009adb826b23fefcc5dd3c69692f60"},
     {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:99f826cbf970077383d7de805c0681799491cb939c25450b9b5b3ced03ca99f1"},
@@ -1557,7 +1600,6 @@ description = "better multiprocessing and multithreading in Python"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "multiprocess-0.70.17-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7ddb24e5bcdb64e90ec5543a1f05a39463068b6d3b804aa3f2a4e16ec28562d6"},
     {file = "multiprocess-0.70.17-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d729f55198a3579f6879766a6d9b72b42d4b320c0dcb7844afb774d75b573c62"},
@@ -1587,7 +1629,6 @@ description = "Natural Language Toolkit"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1"},
     {file = "nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868"},
@@ -1614,7 +1655,6 @@ description = "Fundamental package for array computing in Python"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"},
     {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"},
@@ -1653,7 +1693,6 @@ description = "The official Python client for Ollama."
 optional = false
 python-versions = "<4.0,>=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "ollama-0.2.1-py3-none-any.whl", hash = "sha256:b6e2414921c94f573a903d1069d682ba2fb2607070ea9e19ca4a7872f2a460ec"},
     {file = "ollama-0.2.1.tar.gz", hash = "sha256:fa316baa9a81eac3beb4affb0a17deb3008fdd6ed05b123c26306cfbe4c349b6"},
@@ -1664,15 +1703,14 @@ httpx = ">=0.27.0,<0.28.0"
 
 [[package]]
 name = "openai"
-version = "1.47.1"
+version = "1.61.1"
 description = "The official Python library for the openai API"
 optional = false
-python-versions = ">=3.7.1"
+python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
-    {file = "openai-1.47.1-py3-none-any.whl", hash = "sha256:34277583bf268bb2494bc03f48ac123788c5e2a914db1d5a23d5edc29d35c825"},
-    {file = "openai-1.47.1.tar.gz", hash = "sha256:62c8f5f478f82ffafc93b33040f8bb16a45948306198bd0cba2da2ecd9cf7323"},
+    {file = "openai-1.61.1-py3-none-any.whl", hash = "sha256:72b0826240ce26026ac2cd17951691f046e5be82ad122d20a8e1b30ca18bd11e"},
+    {file = "openai-1.61.1.tar.gz", hash = "sha256:ce1851507218209961f89f3520e06726c0aa7d0512386f0f977e3ac3e4f2472e"},
 ]
 
 [package.dependencies]
@@ -1687,6 +1725,7 @@ typing-extensions = ">=4.11,<5"
 
 [package.extras]
 datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
+realtime = ["websockets (>=13,<15)"]
 
 [[package]]
 name = "openpyxl"
@@ -1695,7 +1734,6 @@ description = "A Python library to read/write Excel 2010 xlsx/xlsm files"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"},
     {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"},
@@ -1711,7 +1749,6 @@ description = "Fast, correct Python JSON library supporting dataclasses, datetim
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "orjson-3.10.14-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:849ea7845a55f09965826e816cdc7689d6cf74fe9223d79d758c714af955bcb6"},
     {file = "orjson-3.10.14-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5947b139dfa33f72eecc63f17e45230a97e741942955a6c9e650069305eb73d"},
@@ -1797,7 +1834,6 @@ description = "Core utilities for Python packages"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
     {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
@@ -1810,7 +1846,6 @@ description = "Powerful data structures for data analysis, time series, and stat
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "pandas-2.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8df8612be9cd1c7797c93e1c5df861b2ddda0b48b08f2c3eaa0702cf88fb5f88"},
     {file = "pandas-2.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0f573ab277252ed9aaf38240f3b54cfc90fff8e5cab70411ee1d03f5d51f3944"},
@@ -1884,7 +1919,6 @@ description = "Python Imaging Library (Fork)"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "pillow-11.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:e1abe69aca89514737465752b4bcaf8016de61b3be1397a8fc260ba33321b3a8"},
     {file = "pillow-11.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c640e5a06869c75994624551f45e5506e4256562ead981cce820d5ab39ae2192"},
@@ -1964,7 +1998,7 @@ docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline
 fpx = ["olefile"]
 mic = ["olefile"]
 tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout", "trove-classifiers (>=2024.10.12)"]
-typing = ["typing-extensions"]
+typing = ["typing-extensions ; python_version < \"3.10\""]
 xmp = ["defusedxml"]
 
 [[package]]
@@ -1974,7 +2008,6 @@ description = "Library for building powerful interactive command lines in Python
 optional = false
 python-versions = ">=3.7.0"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "prompt_toolkit-3.0.48-py3-none-any.whl", hash = "sha256:f49a827f90062e411f1ce1f854f2aedb3c23353244f8108b89283587397ac10e"},
     {file = "prompt_toolkit-3.0.48.tar.gz", hash = "sha256:d6623ab0477a80df74e646bdbc93621143f5caf104206aa29294d53de1a03d90"},
@@ -1990,7 +2023,6 @@ description = "Accelerated property cache"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "propcache-0.2.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6b3f39a85d671436ee3d12c017f8fdea38509e4f25b28eb25877293c98c243f6"},
     {file = "propcache-0.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d51fbe4285d5db5d92a929e3e21536ea3dd43732c5b177c7ef03f918dff9f2"},
@@ -2083,7 +2115,6 @@ description = "library with cross-python path, ini-parsing, io, code, log facili
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
     {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
@@ -2096,7 +2127,6 @@ description = "Python library for Apache Arrow"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "pyarrow-17.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07"},
     {file = "pyarrow-17.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655"},
@@ -2149,7 +2179,6 @@ description = "Cryptographic library for Python"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "pycryptodome-3.21.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:dad9bf36eda068e89059d1f07408e397856be9511d7113ea4b586642a429a4fd"},
     {file = "pycryptodome-3.21.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:a1752eca64c60852f38bb29e2c86fca30d7672c024128ef5d70cc15868fa10f4"},
@@ -2192,7 +2221,6 @@ description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "pydantic-2.10.5-py3-none-any.whl", hash = "sha256:4dd4e322dbe55472cb7ca7e73f4b63574eecccf2835ffa2af9021ce113c83c53"},
     {file = "pydantic-2.10.5.tar.gz", hash = "sha256:278b38dbbaec562011d659ee05f63346951b3a248a6f3642e1bc68894ea2b4ff"},
@@ -2205,7 +2233,7 @@ typing-extensions = ">=4.12.2"
 
 [package.extras]
 email = ["email-validator (>=2.0.0)"]
-timezone = ["tzdata"]
+timezone = ["tzdata ; python_version >= \"3.9\" and platform_system == \"Windows\""]
 
 [[package]]
 name = "pydantic-core"
@@ -2214,7 +2242,6 @@ description = "Core functionality for Pydantic validation and serialization"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "pydantic_core-2.27.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2d367ca20b2f14095a8f4fa1210f5a7b78b8a20009ecced6b12818f455b1e9fa"},
     {file = "pydantic_core-2.27.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:491a2b73db93fab69731eaee494f320faa4e093dbed776be1a829c2eb222c34c"},
@@ -2328,7 +2355,6 @@ description = "Settings management using Pydantic"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "pydantic_settings-2.6.1-py3-none-any.whl", hash = "sha256:7fb0637c786a558d3103436278a7c4f1cfd29ba8973238a50c5bb9a55387da87"},
     {file = "pydantic_settings-2.6.1.tar.gz", hash = "sha256:e0f92546d8a9923cb8941689abf85d6601a8c19a23e97a34b2964a2e3f813ca0"},
@@ -2350,7 +2376,6 @@ description = "Manipulate audio with an simple and easy high level interface"
 optional = false
 python-versions = "*"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6"},
     {file = "pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f"},
@@ -2363,7 +2388,6 @@ description = "Pygments is a syntax highlighting package written in Python."
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"},
     {file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"},
@@ -2379,7 +2403,6 @@ description = "Extensions to the standard Python datetime module"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
     {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
@@ -2395,7 +2418,6 @@ description = "Create, read, and update Microsoft Word .docx files."
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "python_docx-1.1.2-py3-none-any.whl", hash = "sha256:08c20d6058916fb19853fcf080f7f42b6270d89eac9fa5f8c15f691c0017fabe"},
     {file = "python_docx-1.1.2.tar.gz", hash = "sha256:0cf1f22e95b9002addca7948e16f2cd7acdfd498047f1941ca5d293db7762efd"},
@@ -2412,7 +2434,6 @@ description = "Read key-value pairs from a .env file and set them as environment
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
     {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
@@ -2428,7 +2449,6 @@ description = "A streaming multipart parser for Python"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104"},
     {file = "python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13"},
@@ -2441,7 +2461,6 @@ description = "World timezone definitions, modern and historical"
 optional = false
 python-versions = "*"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725"},
     {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"},
@@ -2454,7 +2473,6 @@ description = "YAML parser and emitter for Python"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
     {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
@@ -2518,7 +2536,6 @@ description = "文心千帆大模型平台 Python SDK"
 optional = false
 python-versions = "<4,>=3.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "qianfan-0.3.18-py3-none-any.whl", hash = "sha256:6332cd9341c3d6bbbd8c613774c3d1ac98010264118f45c1abc7aa4559eb3098"},
     {file = "qianfan-0.3.18.tar.gz", hash = "sha256:f6d4a463e9bda3e7afbab156be85376975444f2c0aa504de0f7d2a6b3701c263"},
@@ -2541,12 +2558,29 @@ typer = ">=0.9.0"
 typing-extensions = {version = ">=4.0.0", markers = "python_full_version <= \"3.10.0\""}
 
 [package.extras]
-all = ["emoji", "fastapi", "filelock", "ijson", "langchain (>=0.1.10)", "langchain-community", "locust", "ltp", "numpy (<1.22.0)", "numpy (>=1.22.0)", "pyarrow (<=12.0.1)", "pyarrow (>=14.0.1)", "python-dateutil (>=2.8.2,<3.0.0)", "sentencepiece", "tabulate", "torch", "torch (<=1.13.1)", "uvicorn"]
-dataset-base = ["filelock", "ijson", "locust", "numpy (<1.22.0)", "numpy (>=1.22.0)", "pyarrow (<=12.0.1)", "pyarrow (>=14.0.1)", "python-dateutil (>=2.8.2,<3.0.0)", "tabulate"]
-langchain = ["langchain (>=0.1.10)", "langchain-community"]
-local-data-clean = ["emoji", "filelock", "ijson", "locust", "ltp", "numpy (<1.22.0)", "numpy (>=1.22.0)", "pyarrow (<=12.0.1)", "pyarrow (>=14.0.1)", "python-dateutil (>=2.8.2,<3.0.0)", "sentencepiece", "tabulate", "torch", "torch (<=1.13.1)"]
+all = ["emoji", "fastapi", "filelock", "ijson", "langchain (>=0.1.10) ; python_full_version >= \"3.8.1\"", "langchain-community ; python_full_version >= \"3.8.1\"", "locust", "ltp", "numpy (<1.22.0) ; python_version == \"3.7\"", "numpy (>=1.22.0) ; python_version >= \"3.8\"", "pyarrow (<=12.0.1) ; python_version == \"3.7\"", "pyarrow (>=14.0.1) ; python_version >= \"3.8\"", "python-dateutil (>=2.8.2,<3.0.0)", "sentencepiece", "tabulate", "torch (<=1.13.1) ; python_version < \"3.8\"", "torch ; python_version >= \"3.8\"", "uvicorn"]
+dataset-base = ["filelock", "ijson", "locust", "numpy (<1.22.0) ; python_version == \"3.7\"", "numpy (>=1.22.0) ; python_version >= \"3.8\"", "pyarrow (<=12.0.1) ; python_version == \"3.7\"", "pyarrow (>=14.0.1) ; python_version >= \"3.8\"", "python-dateutil (>=2.8.2,<3.0.0)", "tabulate"]
+langchain = ["langchain (>=0.1.10) ; python_full_version >= \"3.8.1\"", "langchain-community ; python_full_version >= \"3.8.1\""]
+local-data-clean = ["emoji", "filelock", "ijson", "locust", "ltp", "numpy (<1.22.0) ; python_version == \"3.7\"", "numpy (>=1.22.0) ; python_version >= \"3.8\"", "pyarrow (<=12.0.1) ; python_version == \"3.7\"", "pyarrow (>=14.0.1) ; python_version >= \"3.8\"", "python-dateutil (>=2.8.2,<3.0.0)", "sentencepiece", "tabulate", "torch (<=1.13.1) ; python_version < \"3.8\"", "torch ; python_version >= \"3.8\""]
 openai = ["fastapi", "uvicorn"]
 
+[[package]]
+name = "referencing"
+version = "0.36.2"
+description = "JSON Referencing + Python"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0"},
+    {file = "referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa"},
+]
+
+[package.dependencies]
+attrs = ">=22.2.0"
+rpds-py = ">=0.7.0"
+typing-extensions = {version = ">=4.4.0", markers = "python_version < \"3.13\""}
+
 [[package]]
 name = "regex"
 version = "2024.11.6"
@@ -2554,7 +2588,6 @@ description = "Alternative regular expression module, to replace re."
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"},
     {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"},
@@ -2659,7 +2692,6 @@ description = "Python HTTP for Humans."
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
     {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
@@ -2682,7 +2714,6 @@ description = "A utility belt for advanced users of python-requests"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"},
     {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"},
@@ -2698,7 +2729,6 @@ description = "Easy to use retry decorator."
 optional = false
 python-versions = "*"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "retry-0.9.2-py2.py3-none-any.whl", hash = "sha256:ccddf89761fa2c726ab29391837d4327f819ea14d244c232a1d24c67a2f98606"},
     {file = "retry-0.9.2.tar.gz", hash = "sha256:f8bfa8b99b69c4506d6f5bd3b0aabf77f98cdb17f3c9fc3f5ca820033336fba4"},
@@ -2715,7 +2745,6 @@ description = "Render rich text, tables, progress bars, syntax highlighting, mar
 optional = false
 python-versions = ">=3.8.0"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90"},
     {file = "rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098"},
@@ -2729,6 +2758,119 @@ typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.1
 [package.extras]
 jupyter = ["ipywidgets (>=7.5.1,<9)"]
 
+[[package]]
+name = "rpds-py"
+version = "0.23.1"
+description = "Python bindings to Rust's persistent data structures (rpds)"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "rpds_py-0.23.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2a54027554ce9b129fc3d633c92fa33b30de9f08bc61b32c053dc9b537266fed"},
+    {file = "rpds_py-0.23.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b5ef909a37e9738d146519657a1aab4584018746a18f71c692f2f22168ece40c"},
+    {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ee9d6f0b38efb22ad94c3b68ffebe4c47865cdf4b17f6806d6c674e1feb4246"},
+    {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f7356a6da0562190558c4fcc14f0281db191cdf4cb96e7604c06acfcee96df15"},
+    {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9441af1d25aed96901f97ad83d5c3e35e6cd21a25ca5e4916c82d7dd0490a4fa"},
+    {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d8abf7896a91fb97e7977d1aadfcc2c80415d6dc2f1d0fca5b8d0df247248f3"},
+    {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b08027489ba8fedde72ddd233a5ea411b85a6ed78175f40285bd401bde7466d"},
+    {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fee513135b5a58f3bb6d89e48326cd5aa308e4bcdf2f7d59f67c861ada482bf8"},
+    {file = "rpds_py-0.23.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:35d5631ce0af26318dba0ae0ac941c534453e42f569011585cb323b7774502a5"},
+    {file = "rpds_py-0.23.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:a20cb698c4a59c534c6701b1c24a968ff2768b18ea2991f886bd8985ce17a89f"},
+    {file = "rpds_py-0.23.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e9c206a1abc27e0588cf8b7c8246e51f1a16a103734f7750830a1ccb63f557a"},
+    {file = "rpds_py-0.23.1-cp310-cp310-win32.whl", hash = "sha256:d9f75a06ecc68f159d5d7603b734e1ff6daa9497a929150f794013aa9f6e3f12"},
+    {file = "rpds_py-0.23.1-cp310-cp310-win_amd64.whl", hash = "sha256:f35eff113ad430b5272bbfc18ba111c66ff525828f24898b4e146eb479a2cdda"},
+    {file = "rpds_py-0.23.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:b79f5ced71efd70414a9a80bbbfaa7160da307723166f09b69773153bf17c590"},
+    {file = "rpds_py-0.23.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c9e799dac1ffbe7b10c1fd42fe4cd51371a549c6e108249bde9cd1200e8f59b4"},
+    {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:721f9c4011b443b6e84505fc00cc7aadc9d1743f1c988e4c89353e19c4a968ee"},
+    {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f88626e3f5e57432e6191cd0c5d6d6b319b635e70b40be2ffba713053e5147dd"},
+    {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:285019078537949cecd0190f3690a0b0125ff743d6a53dfeb7a4e6787af154f5"},
+    {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b92f5654157de1379c509b15acec9d12ecf6e3bc1996571b6cb82a4302060447"},
+    {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e768267cbe051dd8d1c5305ba690bb153204a09bf2e3de3ae530de955f5b5580"},
+    {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c5334a71f7dc1160382d45997e29f2637c02f8a26af41073189d79b95d3321f1"},
+    {file = "rpds_py-0.23.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d6adb81564af0cd428910f83fa7da46ce9ad47c56c0b22b50872bc4515d91966"},
+    {file = "rpds_py-0.23.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:cafa48f2133d4daa028473ede7d81cd1b9f9e6925e9e4003ebdf77010ee02f35"},
+    {file = "rpds_py-0.23.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0fced9fd4a07a1ded1bac7e961ddd9753dd5d8b755ba8e05acba54a21f5f1522"},
+    {file = "rpds_py-0.23.1-cp311-cp311-win32.whl", hash = "sha256:243241c95174b5fb7204c04595852fe3943cc41f47aa14c3828bc18cd9d3b2d6"},
+    {file = "rpds_py-0.23.1-cp311-cp311-win_amd64.whl", hash = "sha256:11dd60b2ffddba85715d8a66bb39b95ddbe389ad2cfcf42c833f1bcde0878eaf"},
+    {file = "rpds_py-0.23.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3902df19540e9af4cc0c3ae75974c65d2c156b9257e91f5101a51f99136d834c"},
+    {file = "rpds_py-0.23.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:66f8d2a17e5838dd6fb9be6baaba8e75ae2f5fa6b6b755d597184bfcd3cb0eba"},
+    {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:112b8774b0b4ee22368fec42749b94366bd9b536f8f74c3d4175d4395f5cbd31"},
+    {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e0df046f2266e8586cf09d00588302a32923eb6386ced0ca5c9deade6af9a149"},
+    {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f3288930b947cbebe767f84cf618d2cbe0b13be476e749da0e6a009f986248c"},
+    {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ce473a2351c018b06dd8d30d5da8ab5a0831056cc53b2006e2a8028172c37ce5"},
+    {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d550d7e9e7d8676b183b37d65b5cd8de13676a738973d330b59dc8312df9c5dc"},
+    {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e14f86b871ea74c3fddc9a40e947d6a5d09def5adc2076ee61fb910a9014fb35"},
+    {file = "rpds_py-0.23.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1bf5be5ba34e19be579ae873da515a2836a2166d8d7ee43be6ff909eda42b72b"},
+    {file = "rpds_py-0.23.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d7031d493c4465dbc8d40bd6cafefef4bd472b17db0ab94c53e7909ee781b9ef"},
+    {file = "rpds_py-0.23.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:55ff4151cfd4bc635e51cfb1c59ac9f7196b256b12e3a57deb9e5742e65941ad"},
+    {file = "rpds_py-0.23.1-cp312-cp312-win32.whl", hash = "sha256:a9d3b728f5a5873d84cba997b9d617c6090ca5721caaa691f3b1a78c60adc057"},
+    {file = "rpds_py-0.23.1-cp312-cp312-win_amd64.whl", hash = "sha256:b03a8d50b137ee758e4c73638b10747b7c39988eb8e6cd11abb7084266455165"},
+    {file = "rpds_py-0.23.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:4caafd1a22e5eaa3732acb7672a497123354bef79a9d7ceed43387d25025e935"},
+    {file = "rpds_py-0.23.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:178f8a60fc24511c0eb756af741c476b87b610dba83270fce1e5a430204566a4"},
+    {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c632419c3870507ca20a37c8f8f5352317aca097639e524ad129f58c125c61c6"},
+    {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:698a79d295626ee292d1730bc2ef6e70a3ab135b1d79ada8fde3ed0047b65a10"},
+    {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:271fa2184cf28bdded86bb6217c8e08d3a169fe0bbe9be5e8d96e8476b707122"},
+    {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b91cceb5add79ee563bd1f70b30896bd63bc5f78a11c1f00a1e931729ca4f1f4"},
+    {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a6cb95074777f1ecda2ca4fa7717caa9ee6e534f42b7575a8f0d4cb0c24013"},
+    {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:50fb62f8d8364978478b12d5f03bf028c6bc2af04082479299139dc26edf4c64"},
+    {file = "rpds_py-0.23.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c8f7e90b948dc9dcfff8003f1ea3af08b29c062f681c05fd798e36daa3f7e3e8"},
+    {file = "rpds_py-0.23.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5b98b6c953e5c2bda51ab4d5b4f172617d462eebc7f4bfdc7c7e6b423f6da957"},
+    {file = "rpds_py-0.23.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2893d778d4671ee627bac4037a075168b2673c57186fb1a57e993465dbd79a93"},
+    {file = "rpds_py-0.23.1-cp313-cp313-win32.whl", hash = "sha256:2cfa07c346a7ad07019c33fb9a63cf3acb1f5363c33bc73014e20d9fe8b01cdd"},
+    {file = "rpds_py-0.23.1-cp313-cp313-win_amd64.whl", hash = "sha256:3aaf141d39f45322e44fc2c742e4b8b4098ead5317e5f884770c8df0c332da70"},
+    {file = "rpds_py-0.23.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:759462b2d0aa5a04be5b3e37fb8183615f47014ae6b116e17036b131985cb731"},
+    {file = "rpds_py-0.23.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3e9212f52074fc9d72cf242a84063787ab8e21e0950d4d6709886fb62bcb91d5"},
+    {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9e9f3a3ac919406bc0414bbbd76c6af99253c507150191ea79fab42fdb35982a"},
+    {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c04ca91dda8a61584165825907f5c967ca09e9c65fe8966ee753a3f2b019fe1e"},
+    {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ab923167cfd945abb9b51a407407cf19f5bee35001221f2911dc85ffd35ff4f"},
+    {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ed6f011bedca8585787e5082cce081bac3d30f54520097b2411351b3574e1219"},
+    {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6959bb9928c5c999aba4a3f5a6799d571ddc2c59ff49917ecf55be2bbb4e3722"},
+    {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1ed7de3c86721b4e83ac440751329ec6a1102229aa18163f84c75b06b525ad7e"},
+    {file = "rpds_py-0.23.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5fb89edee2fa237584e532fbf78f0ddd1e49a47c7c8cfa153ab4849dc72a35e6"},
+    {file = "rpds_py-0.23.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7e5413d2e2d86025e73f05510ad23dad5950ab8417b7fc6beaad99be8077138b"},
+    {file = "rpds_py-0.23.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d31ed4987d72aabdf521eddfb6a72988703c091cfc0064330b9e5f8d6a042ff5"},
+    {file = "rpds_py-0.23.1-cp313-cp313t-win32.whl", hash = "sha256:f3429fb8e15b20961efca8c8b21432623d85db2228cc73fe22756c6637aa39e7"},
+    {file = "rpds_py-0.23.1-cp313-cp313t-win_amd64.whl", hash = "sha256:d6f6512a90bd5cd9030a6237f5346f046c6f0e40af98657568fa45695d4de59d"},
+    {file = "rpds_py-0.23.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:09cd7dbcb673eb60518231e02874df66ec1296c01a4fcd733875755c02014b19"},
+    {file = "rpds_py-0.23.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c6760211eee3a76316cf328f5a8bd695b47b1626d21c8a27fb3b2473a884d597"},
+    {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72e680c1518733b73c994361e4b06441b92e973ef7d9449feec72e8ee4f713da"},
+    {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ae28144c1daa61366205d32abd8c90372790ff79fc60c1a8ad7fd3c8553a600e"},
+    {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c698d123ce5d8f2d0cd17f73336615f6a2e3bdcedac07a1291bb4d8e7d82a05a"},
+    {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98b257ae1e83f81fb947a363a274c4eb66640212516becaff7bef09a5dceacaa"},
+    {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c9ff044eb07c8468594d12602291c635da292308c8c619244e30698e7fc455a"},
+    {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7938c7b0599a05246d704b3f5e01be91a93b411d0d6cc62275f025293b8a11ce"},
+    {file = "rpds_py-0.23.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:e9cb79ecedfc156c0692257ac7ed415243b6c35dd969baa461a6888fc79f2f07"},
+    {file = "rpds_py-0.23.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:7b77e07233925bd33fc0022b8537774423e4c6680b6436316c5075e79b6384f4"},
+    {file = "rpds_py-0.23.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a970bfaf130c29a679b1d0a6e0f867483cea455ab1535fb427566a475078f27f"},
+    {file = "rpds_py-0.23.1-cp39-cp39-win32.whl", hash = "sha256:4233df01a250b3984465faed12ad472f035b7cd5240ea3f7c76b7a7016084495"},
+    {file = "rpds_py-0.23.1-cp39-cp39-win_amd64.whl", hash = "sha256:c617d7453a80e29d9973b926983b1e700a9377dbe021faa36041c78537d7b08c"},
+    {file = "rpds_py-0.23.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c1f8afa346ccd59e4e5630d5abb67aba6a9812fddf764fd7eb11f382a345f8cc"},
+    {file = "rpds_py-0.23.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fad784a31869747df4ac968a351e070c06ca377549e4ace94775aaa3ab33ee06"},
+    {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5a96fcac2f18e5a0a23a75cd27ce2656c66c11c127b0318e508aab436b77428"},
+    {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3e77febf227a1dc3220159355dba68faa13f8dca9335d97504abf428469fb18b"},
+    {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:26bb3e8de93443d55e2e748e9fd87deb5f8075ca7bc0502cfc8be8687d69a2ec"},
+    {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:db7707dde9143a67b8812c7e66aeb2d843fe33cc8e374170f4d2c50bd8f2472d"},
+    {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1eedaaccc9bb66581d4ae7c50e15856e335e57ef2734dbc5fd8ba3e2a4ab3cb6"},
+    {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28358c54fffadf0ae893f6c1050e8f8853e45df22483b7fff2f6ab6152f5d8bf"},
+    {file = "rpds_py-0.23.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:633462ef7e61d839171bf206551d5ab42b30b71cac8f10a64a662536e057fdef"},
+    {file = "rpds_py-0.23.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:a98f510d86f689fcb486dc59e6e363af04151e5260ad1bdddb5625c10f1e95f8"},
+    {file = "rpds_py-0.23.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:e0397dd0b3955c61ef9b22838144aa4bef6f0796ba5cc8edfc64d468b93798b4"},
+    {file = "rpds_py-0.23.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:75307599f0d25bf6937248e5ac4e3bde5ea72ae6618623b86146ccc7845ed00b"},
+    {file = "rpds_py-0.23.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3614d280bf7aab0d3721b5ce0e73434acb90a2c993121b6e81a1c15c665298ac"},
+    {file = "rpds_py-0.23.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e5963ea87f88bddf7edd59644a35a0feecf75f8985430124c253612d4f7d27ae"},
+    {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad76f44f70aac3a54ceb1813ca630c53415da3a24fd93c570b2dfb4856591017"},
+    {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2c6ae11e6e93728d86aafc51ced98b1658a0080a7dd9417d24bfb955bb09c3c2"},
+    {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc869af5cba24d45fb0399b0cfdbcefcf6910bf4dee5d74036a57cf5264b3ff4"},
+    {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c76b32eb2ab650a29e423525e84eb197c45504b1c1e6e17b6cc91fcfeb1a4b1d"},
+    {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4263320ed887ed843f85beba67f8b2d1483b5947f2dc73a8b068924558bfeace"},
+    {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7f9682a8f71acdf59fd554b82b1c12f517118ee72c0f3944eda461606dfe7eb9"},
+    {file = "rpds_py-0.23.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:754fba3084b70162a6b91efceee8a3f06b19e43dac3f71841662053c0584209a"},
+    {file = "rpds_py-0.23.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:a1c66e71ecfd2a4acf0e4bd75e7a3605afa8f9b28a3b497e4ba962719df2be57"},
+    {file = "rpds_py-0.23.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:8d67beb6002441faef8251c45e24994de32c4c8686f7356a1f601ad7c466f7c3"},
+    {file = "rpds_py-0.23.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a1e17d8dc8e57d8e0fd21f8f0f0a5211b3fa258b2e444c2053471ef93fe25a00"},
+    {file = "rpds_py-0.23.1.tar.gz", hash = "sha256:7f3240dcfa14d198dba24b8b9cb3b108c06b68d45b7babd9eefc1038fdf7e707"},
+]
+
 [[package]]
 name = "ruff"
 version = "0.9.1"
@@ -2736,7 +2878,7 @@ description = "An extremely fast Python linter and code formatter, written in Ru
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\" and sys_platform != \"emscripten\""
+markers = "sys_platform != \"emscripten\""
 files = [
     {file = "ruff-0.9.1-py3-none-linux_armv6l.whl", hash = "sha256:84330dda7abcc270e6055551aca93fdde1b0685fc4fd358f26410f9349cf1743"},
     {file = "ruff-0.9.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:3cae39ba5d137054b0e5b472aee3b78a7c884e61591b100aeb544bcd1fc38d4f"},
@@ -2765,7 +2907,6 @@ description = "A small Python library created to help developers protect their a
 optional = false
 python-versions = ">3.9"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "safehttpx-0.1.6-py3-none-any.whl", hash = "sha256:407cff0b410b071623087c63dd2080c3b44dc076888d8c5823c00d1e58cb381c"},
     {file = "safehttpx-0.1.6.tar.gz", hash = "sha256:b356bfc82cee3a24c395b94a2dbeabbed60aff1aa5fa3b5fe97c4f2456ebce42"},
@@ -2784,14 +2925,13 @@ description = "A library implementing the 'SemVer' scheme."
 optional = false
 python-versions = ">=2.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "semantic_version-2.10.0-py2.py3-none-any.whl", hash = "sha256:de78a3b8e0feda74cabc54aab2da702113e33ac9d9eb9d2389bcf1f58b7d9177"},
     {file = "semantic_version-2.10.0.tar.gz", hash = "sha256:bdabb6d336998cbb378d4b9db3a4b56a1e3235701dc05ea2690d9a997ed5041c"},
 ]
 
 [package.extras]
-dev = ["Django (>=1.11)", "check-manifest", "colorama (<=0.4.1)", "coverage", "flake8", "nose2", "readme-renderer (<25.0)", "tox", "wheel", "zest.releaser[recommended]"]
+dev = ["Django (>=1.11)", "check-manifest", "colorama (<=0.4.1) ; python_version == \"3.4\"", "coverage", "flake8", "nose2", "readme-renderer (<25.0) ; python_version == \"3.4\"", "tox", "wheel", "zest.releaser[recommended]"]
 doc = ["Sphinx", "sphinx-rtd-theme"]
 
 [[package]]
@@ -2801,7 +2941,6 @@ description = "Easily download, build, install, upgrade, and uninstall Python pa
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "setuptools-70.0.0-py3-none-any.whl", hash = "sha256:54faa7f2e8d2d11bcd2c07bed282eef1046b5c080d1c32add737d7b5817b1ad4"},
     {file = "setuptools-70.0.0.tar.gz", hash = "sha256:f211a66637b8fa059bb28183da127d4e86396c991a942b028c6650d4319c3fd0"},
@@ -2809,7 +2948,7 @@ files = [
 
 [package.extras]
 docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
-testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
+testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov ; platform_python_implementation != \"PyPy\"", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
 
 [[package]]
 name = "shellingham"
@@ -2818,7 +2957,6 @@ description = "Tool to Detect Surrounding Shell"
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686"},
     {file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"},
@@ -2831,7 +2969,6 @@ description = "Python 2 and 3 compatibility utilities"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"},
     {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
@@ -2844,7 +2981,6 @@ description = "Sniff out which async library your code is running under"
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
     {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
@@ -2857,7 +2993,6 @@ description = "The little ASGI library that shines."
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "starlette-0.41.3-py3-none-any.whl", hash = "sha256:44cedb2b7c77a9de33a8b74b2b90e9f50d11fcf25d8270ea525ad71a25374ff7"},
     {file = "starlette-0.41.3.tar.gz", hash = "sha256:0e4ab3d16522a255be6b28260b938eae2482f98ce5cc934cb08dce8dc3ba5835"},
@@ -2876,7 +3011,6 @@ description = "Retry code until it succeeds"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "tenacity-8.5.0-py3-none-any.whl", hash = "sha256:b594c2a5945830c267ce6b79a166228323ed52718f30302c1359836112346687"},
     {file = "tenacity-8.5.0.tar.gz", hash = "sha256:8bc6c0c8a09b31e6cad13c47afbed1a567518250a9a171418582ed8d9c20ca78"},
@@ -2893,7 +3027,6 @@ description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "tiktoken-0.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b07e33283463089c81ef1467180e3e00ab00d46c2c4bbcef0acab5f771d6695e"},
     {file = "tiktoken-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9269348cb650726f44dd3bbb3f9110ac19a8dcc8f54949ad3ef652ca22a38e21"},
@@ -2935,6 +3068,39 @@ requests = ">=2.26.0"
 [package.extras]
 blobfile = ["blobfile (>=2)"]
 
+[[package]]
+name = "tokenizers"
+version = "0.21.0"
+description = ""
+optional = false
+python-versions = ">=3.7"
+groups = ["main"]
+files = [
+    {file = "tokenizers-0.21.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3c4c93eae637e7d2aaae3d376f06085164e1660f89304c0ab2b1d08a406636b2"},
+    {file = "tokenizers-0.21.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:f53ea537c925422a2e0e92a24cce96f6bc5046bbef24a1652a5edc8ba975f62e"},
+    {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b177fb54c4702ef611de0c069d9169f0004233890e0c4c5bd5508ae05abf193"},
+    {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6b43779a269f4629bebb114e19c3fca0223296ae9fea8bb9a7a6c6fb0657ff8e"},
+    {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aeb255802be90acfd363626753fda0064a8df06031012fe7d52fd9a905eb00e"},
+    {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8b09dbeb7a8d73ee204a70f94fc06ea0f17dcf0844f16102b9f414f0b7463ba"},
+    {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:400832c0904f77ce87c40f1a8a27493071282f785724ae62144324f171377273"},
+    {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e84ca973b3a96894d1707e189c14a774b701596d579ffc7e69debfc036a61a04"},
+    {file = "tokenizers-0.21.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:eb7202d231b273c34ec67767378cd04c767e967fda12d4a9e36208a34e2f137e"},
+    {file = "tokenizers-0.21.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:089d56db6782a73a27fd8abf3ba21779f5b85d4a9f35e3b493c7bbcbbf0d539b"},
+    {file = "tokenizers-0.21.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:c87ca3dc48b9b1222d984b6b7490355a6fdb411a2d810f6f05977258400ddb74"},
+    {file = "tokenizers-0.21.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4145505a973116f91bc3ac45988a92e618a6f83eb458f49ea0790df94ee243ff"},
+    {file = "tokenizers-0.21.0-cp39-abi3-win32.whl", hash = "sha256:eb1702c2f27d25d9dd5b389cc1f2f51813e99f8ca30d9e25348db6585a97e24a"},
+    {file = "tokenizers-0.21.0-cp39-abi3-win_amd64.whl", hash = "sha256:87841da5a25a3a5f70c102de371db120f41873b854ba65e52bccd57df5a3780c"},
+    {file = "tokenizers-0.21.0.tar.gz", hash = "sha256:ee0894bf311b75b0c03079f33859ae4b2334d675d4e93f5a4132e1eae2834fe4"},
+]
+
+[package.dependencies]
+huggingface-hub = ">=0.16.4,<1.0"
+
+[package.extras]
+dev = ["tokenizers[testing]"]
+docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"]
+testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests", "ruff"]
+
 [[package]]
 name = "tomlkit"
 version = "0.13.2"
@@ -2942,7 +3108,6 @@ description = "Style preserving TOML library"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "tomlkit-0.13.2-py3-none-any.whl", hash = "sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde"},
     {file = "tomlkit-0.13.2.tar.gz", hash = "sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79"},
@@ -2955,7 +3120,6 @@ description = "Fast, Extensible Progress Meter"
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"},
     {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"},
@@ -2978,7 +3142,6 @@ description = "Typer, build great CLIs. Easy to code. Based on Python type hints
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "typer-0.15.1-py3-none-any.whl", hash = "sha256:7994fb7b8155b64d3402518560648446072864beefd44aa2dc36972a5972e847"},
     {file = "typer-0.15.1.tar.gz", hash = "sha256:a0588c0a7fa68a1978a069818657778f86abe6ff5ea6abf472f940a08bfe4f0a"},
@@ -2997,7 +3160,6 @@ description = "Backported and Experimental Type Hints for Python 3.8+"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
     {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
@@ -3010,7 +3172,6 @@ description = "Provider of IANA time zone data"
 optional = false
 python-versions = ">=2"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd"},
     {file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"},
@@ -3023,7 +3184,6 @@ description = "tzinfo object for the local timezone"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "tzlocal-5.2-py3-none-any.whl", hash = "sha256:49816ef2fe65ea8ac19d19aa7a1ae0551c834303d5014c6d5a62e4cbda8047b8"},
     {file = "tzlocal-5.2.tar.gz", hash = "sha256:8d399205578f1a9342816409cc1e46a93ebd5755e39ea2d85334bea911bf0e6e"},
@@ -3042,14 +3202,13 @@ description = "HTTP library with thread-safe connection pooling, file post, and
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"},
     {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"},
 ]
 
 [package.extras]
-brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
+brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""]
 h2 = ["h2 (>=4,<5)"]
 socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
 zstd = ["zstandard (>=0.18.0)"]
@@ -3061,7 +3220,7 @@ description = "The lightning-fast ASGI server."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "python_version <= \"3.11\" and sys_platform != \"emscripten\""
+markers = "sys_platform != \"emscripten\""
 files = [
     {file = "uvicorn-0.34.0-py3-none-any.whl", hash = "sha256:023dc038422502fa28a09c7a30bf2b6991512da7dcdb8fd35fe57cfc154126f4"},
     {file = "uvicorn-0.34.0.tar.gz", hash = "sha256:404051050cd7e905de2c9a7e61790943440b3416f49cb409f965d9dcd0fa73e9"},
@@ -3073,7 +3232,7 @@ h11 = ">=0.8"
 typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""}
 
 [package.extras]
-standard = ["colorama (>=0.4)", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"]
+standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"]
 
 [[package]]
 name = "wcwidth"
@@ -3082,7 +3241,6 @@ description = "Measures the displayed width of unicode strings in a terminal"
 optional = false
 python-versions = "*"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"},
     {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"},
@@ -3095,7 +3253,6 @@ description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "websockets-14.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a0adf84bc2e7c86e8a202537b4fd50e6f7f0e4a6b6bf64d7ccb96c4cd3330b29"},
     {file = "websockets-14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90b5d9dfbb6d07a84ed3e696012610b6da074d97453bd01e0e30744b472c8179"},
@@ -3175,7 +3332,6 @@ description = "Yet another URL library"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "python_version <= \"3.11\""
 files = [
     {file = "yarl-1.18.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7df647e8edd71f000a5208fe6ff8c382a1de8edfbccdbbfe649d263de07d8c34"},
     {file = "yarl-1.18.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c69697d3adff5aa4f874b19c0e4ed65180ceed6318ec856ebc423aa5850d84f7"},
@@ -3266,7 +3422,27 @@ idna = ">=2.0"
 multidict = ">=4.0"
 propcache = ">=0.2.0"
 
+[[package]]
+name = "zipp"
+version = "3.21.0"
+description = "Backport of pathlib-compatible object wrapper for zip files"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931"},
+    {file = "zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4"},
+]
+
+[package.extras]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""]
+cover = ["pytest-cov"]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+enabler = ["pytest-enabler (>=2.2)"]
+test = ["big-O", "importlib-resources ; python_version < \"3.9\"", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"]
+type = ["pytest-mypy"]
+
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.10,<3.12"
-content-hash = "d6c44c529a73d5ddeb8429b23e06bb944c13e3c4de5d1cc76b65f9fedd919108"
+content-hash = "b6287d2cfb6e1f429fe13dcb799dec99ddeeff1ee394620dc73c00488ce62751"
diff --git a/hugegraph-llm/pyproject.toml b/hugegraph-llm/pyproject.toml
index 4829648d..d12054fc 100644
--- a/hugegraph-llm/pyproject.toml
+++ b/hugegraph-llm/pyproject.toml
@@ -24,7 +24,7 @@ authors = [
 ]
 readme = "README.md"
 license = "Apache-2.0"
-requires-python = "^3.10"
+requires-python = ">=3.10,<3.12"
 maintainers = [
     { name = "Apache HugeGraph Contributors", email = "dev@hugegraph.apache.org" },
 ]
@@ -38,7 +38,7 @@ documentation = "https://hugegraph.apache.org/docs/quickstart/hugegraph-ai/"
 
 [tool.poetry.dependencies]
 python = "^3.10,<3.12"
-openai = "~1.47.1"
+openai = "~1.61.0"
 ollama = "~0.2.1"
 qianfan = "~0.3.18"
 retry = "~0.9.2"
@@ -61,6 +61,7 @@ setuptools = "~70.0.0"
 urllib3 = "~2.2.2"
 rich = "~13.9.4"
 apscheduler= "~3.10.4"
+litellm = "~1.61.13"
 hugegraph-python = { path = "../hugegraph-python-client/", develop = true }
 
 [build-system]
diff --git a/hugegraph-llm/requirements.txt b/hugegraph-llm/requirements.txt
index 5f369f87..64dd3829 100644
--- a/hugegraph-llm/requirements.txt
+++ b/hugegraph-llm/requirements.txt
@@ -1,4 +1,4 @@
-openai~=1.47.1
+openai~=1.61.0
 ollama~=0.2.1
 qianfan~=0.3.18
 retry~=0.9.2
@@ -16,3 +16,4 @@ pandas~=2.2.2
 openpyxl~=3.1.5
 pydantic-settings~=2.6.1
 apscheduler~=3.10.4
+litellm~=1.61.13
diff --git a/hugegraph-llm/src/hugegraph_llm/config/llm_config.py b/hugegraph-llm/src/hugegraph_llm/config/llm_config.py
index 45cf4a6d..5e314bdf 100644
--- a/hugegraph-llm/src/hugegraph_llm/config/llm_config.py
+++ b/hugegraph-llm/src/hugegraph_llm/config/llm_config.py
@@ -25,10 +25,10 @@
 class LLMConfig(BaseConfig):
     """LLM settings"""
 
-    chat_llm_type: Literal["openai", "ollama/local", "qianfan_wenxin", "zhipu"] = "openai"
-    extract_llm_type: Literal["openai", "ollama/local", "qianfan_wenxin", "zhipu"] = "openai"
-    text2gql_llm_type: Literal["openai", "ollama/local", "qianfan_wenxin", "zhipu"] = "openai"
-    embedding_type: Optional[Literal["openai", "ollama/local", "qianfan_wenxin", "zhipu"]] = "openai"
+    chat_llm_type: Literal["openai", "litellm", "ollama/local", "qianfan_wenxin"] = "openai"
+    extract_llm_type: Literal["openai", "litellm", "ollama/local", "qianfan_wenxin"] = "openai"
+    text2gql_llm_type: Literal["openai", "litellm", "ollama/local", "qianfan_wenxin"] = "openai"
+    embedding_type: Optional[Literal["openai", "litellm", "ollama/local", "qianfan_wenxin"]] = "openai"
     reranker_type: Optional[Literal["cohere", "siliconflow"]] = None
     # 1. OpenAI settings
     openai_chat_api_base: Optional[str] = os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1")
@@ -84,14 +84,19 @@ class LLMConfig(BaseConfig):
     qianfan_embed_url: Optional[str] = qianfan_url_prefix + "/embeddings/"
     # refer https://cloud.baidu.com/doc/WENXINWORKSHOP/s/alj562vvu to get more details
     qianfan_embedding_model: Optional[str] = "embedding-v1"
-    # TODO: To be confirmed, whether to configure
-    # 5. ZhiPu(GLM) settings
-    zhipu_chat_api_key: Optional[str] = None
-    zhipu_chat_language_model: Optional[str] = "glm-4"
-    zhipu_chat_embedding_model: Optional[str] = "embedding-2"
-    zhipu_extract_api_key: Optional[str] = None
-    zhipu_extract_language_model: Optional[str] = "glm-4"
-    zhipu_extract_embedding_model: Optional[str] = "embedding-2"
-    zhipu_text2gql_api_key: Optional[str] = None
-    zhipu_text2gql_language_model: Optional[str] = "glm-4"
-    zhipu_text2gql_embedding_model: Optional[str] = "embedding-2"
+    # 5. LiteLLM settings
+    litellm_chat_api_key: Optional[str] = None
+    litellm_chat_api_base: Optional[str] = None
+    litellm_chat_language_model: Optional[str] = "openai/gpt-4o"
+    litellm_chat_tokens: int = 8192
+    litellm_extract_api_key: Optional[str] = None
+    litellm_extract_api_base: Optional[str] = None
+    litellm_extract_language_model: Optional[str] = "openai/gpt-4o"
+    litellm_extract_tokens: int = 256
+    litellm_text2gql_api_key: Optional[str] = None
+    litellm_text2gql_api_base: Optional[str] = None
+    litellm_text2gql_language_model: Optional[str] = "openai/gpt-4o"
+    litellm_text2gql_tokens: int = 4096
+    litellm_embedding_api_key: Optional[str] = None
+    litellm_embedding_api_base: Optional[str] = None
+    litellm_embedding_model: Optional[str] = "openai/text-embedding-3-small"
diff --git a/hugegraph-llm/src/hugegraph_llm/config/prompt_config.py b/hugegraph-llm/src/hugegraph_llm/config/prompt_config.py
index cdab6be7..ad32bbc0 100644
--- a/hugegraph-llm/src/hugegraph_llm/config/prompt_config.py
+++ b/hugegraph-llm/src/hugegraph_llm/config/prompt_config.py
@@ -40,42 +40,64 @@ class PromptConfig(BasePromptConfig):
 
     default_question: str = """Tell me about Sarah."""
 
-    # Data is detached from hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py
+    # Note: Users should modify the prompt(examples) according to the real schema and text (property_graph_extract.py)
     extract_graph_prompt: str = """## Main Task
 Given the following graph schema and a piece of text, your task is to analyze the text and extract information that fits into the schema's structure, formatting the information into vertices and edges as specified.
 
-## Basic Rules
-### Schema Format
+## Basic Rules:
+### Schema Format:
 Graph Schema:
-- Vertices: [List of vertex labels and their properties]
-- Edges: [List of edge labels, their source and target vertex labels, and properties]
+- "vertices": [List of vertex labels and their properties]
+- "edges": [List of edge labels, their source and target vertex labels, and properties]
 
-### Content Rule
-Please read the provided text carefully and identify any information that corresponds to the vertices and edges defined in the schema. For each piece of information that matches a vertex or edge, format it according to the following JSON structures:
+### Content Rule:
+Please read the provided text carefully and identify any information that corresponds to the vertices and edges defined in the schema. 
+You are not allowed to modify the schema contraints. Your task is to format the provided information into the required schema, without missing any keyword.
+For each piece of information that matches a vertex or edge, format it strictly according to the following JSON structures:
 
 #### Vertex Format:
 {"id":"vertexLabelID:entityName","label":"vertexLabel","type":"vertex","properties":{"propertyName":"propertyValue", ...}}
 
+where:
+    - "vertexLabelID": int
+    - "vertexLabel": str
+    - "entityName": str
+    - "type": "vertex"
+    - "properties": dict
+
 #### Edge Format:
-{"label":"edgeLabel","type":"edge","outV":"sourceVertexId","outVLabel":"sourceVertexLabel","inV":"targetVertexId","inVLabel":"targetVertexLabel","properties":{"propertyName":"propertyValue",...}}
-Also follow the rules: 
-1. Don't extract property fields or labels that doesn't exist in the given schema 
-2. Ensure the extracted property set in the same type as the given schema (like 'age' should be a number, 'select' should be a boolean)
-3. If there are multiple primary keys, the strategy for generating VID is: vertexlabelID:pk1!pk2!pk3 (pk means primary key, and '!' is the separator)
-4. Output in JSON format, only include vertexes and edges & remove empty properties, extracted and formatted based on the text/rules and schema
-5. Translate the schema fields into Chinese if the given text is Chinese but the schema is in English (Optional)
-
-## Example
+{"id":"vertexlabelID:pk1!pk2!pk3", label":"edgeLabel","type":"edge","outV":"sourceVertexId","outVLabel":"sourceVertexLabel","inV":"targetVertexId","inVLabel":"targetVertexLabel","properties":{"propertyName":"propertyValue",...}}
+
+where:
+    - "id": int or str (conditional) (optional)
+    - "edgeLabel": str
+    - "type": "edge"
+    - "outV": str
+    - "outVLabel": str
+    - "inV": str
+    - "inVLabel": str
+    - "properties": dict
+    - "sourceVertexId": "vertexLabelID:entityName"
+    - "targetVertexId": "vertexLabelID:entityName"
+
+Strictly follow these rules: 
+1. Don't extract property fields or labels that doesn't exist in the given schema. Do not generate new information.
+2. Ensure the extracted property set in the same type as the given schema (like 'age' should be a number, 'select' should be a boolean).
+3. If there are multiple primary keys, the strategy for generating VID is: vertexlabelID:pk1!pk2!pk3 (pk means primary key, and '!' is the separator). This id must be generated ONLY if there are multiple primary keys. If there is only one primary key, the strategy for generating VID is: int (sequencially increasing).
+4. Output in JSON format, only include vertexes and edges & remove empty properties, extracted and formatted based on the text/rules and schema.
+5. Translate the schema fields into Chinese if the given text input is Chinese (Optional)
+
+Refer to the following baseline example to understand the output generation requirements:
+## Example:
 ### Input example:
-#### text
+#### text:
 Meet Sarah, a 30-year-old attorney, and her roommate, James, whom she's shared a home with since 2010. James, in his professional life, works as a journalist.  
 
-#### graph schema
+#### graph schema example:
 {"vertices":[{"vertex_label":"person","properties":["name","age","occupation"]}], "edges":[{"edge_label":"roommate", "source_vertex_label":"person","target_vertex_label":"person","properties":["date"]]}
 
 ### Output example:
-[{"id":"1:Sarah","label":"person","type":"vertex","properties":{"name":"Sarah","age":30,"occupation":"attorney"}},{"id":"1:James","label":"person","type":"vertex","properties":{"name":"James","occupation":"journalist"}},{"label":"roommate","type":"edge","outV":"1:Sarah","outVLabel":"person","inV":"1:James","inVLabel":"person","properties":{"date":"2010"}}]
-"""
+{"vertices":[{"id":"1:Sarah","label":"person","type":"vertex","properties":{"name":"Sarah","age":30,"occupation":"attorney"}},{"id":"1:James","label":"person","type":"vertex","properties":{"name":"James","occupation":"journalist"}}], "edges":[{"id": 1, "label":"roommate","type":"edge","outV":"1:Sarah","outVLabel":"person","inV":"1:James","inVLabel":"person","properties":{"date":"2010"}}]}"""
 
     graph_schema: str = """{
 "vertexlabels": [
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py
index bc66ba57..74527d16 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py
@@ -24,11 +24,44 @@
 from requests.auth import HTTPBasicAuth
 
 from hugegraph_llm.config import huge_settings, llm_settings
+from hugegraph_llm.models.embeddings.litellm import LiteLLMEmbedding
+from hugegraph_llm.models.llms.litellm import LiteLLMClient
 from hugegraph_llm.utils.log import log
 
 current_llm = "chat"
 
 
+def test_litellm_embedding(api_key, api_base, model_name) -> int:
+    llm_client = LiteLLMEmbedding(
+            api_key = api_key,
+            api_base = api_base,
+            model_name = model_name,
+        )
+    try:
+        response = llm_client.get_text_embedding("test")
+        assert len(response) > 0
+    except Exception as e:
+        raise gr.Error(f"Error in litellm embedding call: {e}") from e
+    gr.Info("Test connection successful~")
+    return 200
+
+
+def test_litellm_chat(api_key, api_base, model_name, max_tokens: int) -> int:
+    try:
+        llm_client = LiteLLMClient(
+            api_key=api_key,
+            api_base=api_base,
+            model_name=model_name,
+            max_tokens=max_tokens,
+        )
+        response = llm_client.generate(messages=[{"role": "user", "content": "hi"}])
+        assert len(response) > 0
+    except Exception as e:
+        raise gr.Error(f"Error in litellm chat call: {e}") from e
+    gr.Info("Test connection successful~")
+    return 200
+
+
 def test_api_connection(url, method="GET", headers=None, params=None, body=None, auth=None, origin_call=None) -> int:
     # TODO: use fastapi.request / starlette instead?
     log.debug("Request URL: %s", url)
@@ -97,6 +130,11 @@ def apply_embedding_config(arg1, arg2, arg3, origin_call=None) -> int:
         llm_settings.ollama_embedding_port = int(arg2)
         llm_settings.ollama_embedding_model = arg3
         status_code = test_api_connection(f"http://{arg1}:{arg2}", origin_call=origin_call)
+    elif embedding_option == "litellm":
+        llm_settings.litellm_embedding_api_key = arg1
+        llm_settings.litellm_embedding_api_base = arg2
+        llm_settings.litellm_embedding_model = arg3
+        status_code = test_litellm_embedding(arg1, arg2, arg3)
     llm_settings.update_env()
     gr.Info("Configured!")
     return status_code
@@ -173,7 +211,6 @@ def apply_llm_config(current_llm_config, arg1, arg2, arg3, arg4, origin_call=Non
         setattr(llm_settings, f"openai_{current_llm_config}_tokens", int(arg4))
 
         test_url = getattr(llm_settings, f"openai_{current_llm_config}_api_base") + "/chat/completions"
-        log.debug("Type of OpenAI %s max_token is %s", current_llm_config, type(arg4))
         data = {
             "model": arg3,
             "temperature": 0.0,
@@ -192,6 +229,14 @@ def apply_llm_config(current_llm_config, arg1, arg2, arg3, arg4, origin_call=Non
         setattr(llm_settings, f"ollama_{current_llm_config}_language_model", arg3)
         status_code = test_api_connection(f"http://{arg1}:{arg2}", origin_call=origin_call)
 
+    elif llm_option == "litellm":
+        setattr(llm_settings, f"litellm_{current_llm_config}_api_key", arg1)
+        setattr(llm_settings, f"litellm_{current_llm_config}_api_base", arg2)
+        setattr(llm_settings, f"litellm_{current_llm_config}_language_model", arg3)
+        setattr(llm_settings, f"litellm_{current_llm_config}_tokens", int(arg4))
+
+        status_code = test_litellm_chat(arg1, arg2, arg3, int(arg4))
+
     gr.Info("Configured!")
     llm_settings.update_env()
     return status_code
@@ -218,7 +263,7 @@ def create_configs_block() -> list:
     with gr.Accordion("2. Set up the LLM.", open=False):
         gr.Markdown("> Tips: the openai option also support openai style api from other providers.")
         with gr.Tab(label='chat'):
-            chat_llm_dropdown = gr.Dropdown(choices=["openai", "qianfan_wenxin", "ollama/local"],
+            chat_llm_dropdown = gr.Dropdown(choices=["openai", "litellm", "qianfan_wenxin", "ollama/local"],
                                             value=getattr(llm_settings, "chat_llm_type"), label="type")
             apply_llm_config_with_chat_op = partial(apply_llm_config, "chat")
 
@@ -249,13 +294,23 @@ def chat_llm_settings(llm_type):
                         gr.Textbox(value=getattr(llm_settings, "qianfan_chat_language_model"), label="model_name"),
                         gr.Textbox(value="", visible=False),
                     ]
+                elif llm_type == "litellm":
+                    llm_config_input = [
+                        gr.Textbox(value=getattr(llm_settings, "litellm_chat_api_key"), label="api_key",
+                                   type="password"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_chat_api_base"), label="api_base",
+                                   info="If you want to use the default api_base, please keep it blank"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_chat_language_model"), label="model_name",
+                                   info="Please refer to https://docs.litellm.ai/docs/providers"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_chat_tokens"), label="max_token"),
+                    ]
                 else:
                     llm_config_input = [gr.Textbox(value="", visible=False) for _ in range(4)]
                 llm_config_button = gr.Button("Apply configuration")
                 llm_config_button.click(apply_llm_config_with_chat_op, inputs=llm_config_input)
 
         with gr.Tab(label='mini_tasks'):
-            extract_llm_dropdown = gr.Dropdown(choices=["openai", "qianfan_wenxin", "ollama/local"],
+            extract_llm_dropdown = gr.Dropdown(choices=["openai", "litellm", "qianfan_wenxin", "ollama/local"],
                                                value=getattr(llm_settings, "extract_llm_type"), label="type")
             apply_llm_config_with_extract_op = partial(apply_llm_config, "extract")
 
@@ -286,12 +341,22 @@ def extract_llm_settings(llm_type):
                         gr.Textbox(value=getattr(llm_settings, "qianfan_extract_language_model"), label="model_name"),
                         gr.Textbox(value="", visible=False),
                     ]
+                elif llm_type == "litellm":
+                    llm_config_input = [
+                        gr.Textbox(value=getattr(llm_settings, "litellm_extract_api_key"), label="api_key",
+                                   type="password"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_extract_api_base"), label="api_base",
+                                   info="If you want to use the default api_base, please keep it blank"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_extract_language_model"), label="model_name",
+                                   info="Please refer to https://docs.litellm.ai/docs/providers"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_extract_tokens"), label="max_token"),
+                    ]
                 else:
                     llm_config_input = [gr.Textbox(value="", visible=False) for _ in range(4)]
                 llm_config_button = gr.Button("Apply configuration")
                 llm_config_button.click(apply_llm_config_with_extract_op, inputs=llm_config_input)
         with gr.Tab(label='text2gql'):
-            text2gql_llm_dropdown = gr.Dropdown(choices=["openai", "qianfan_wenxin", "ollama/local"],
+            text2gql_llm_dropdown = gr.Dropdown(choices=["openai", "litellm", "qianfan_wenxin", "ollama/local"],
                                                 value=getattr(llm_settings, "text2gql_llm_type"), label="type")
             apply_llm_config_with_text2gql_op = partial(apply_llm_config, "text2gql")
 
@@ -322,6 +387,16 @@ def text2gql_llm_settings(llm_type):
                         gr.Textbox(value=getattr(llm_settings, "qianfan_text2gql_language_model"), label="model_name"),
                         gr.Textbox(value="", visible=False),
                     ]
+                elif llm_type == "litellm":
+                    llm_config_input = [
+                        gr.Textbox(value=getattr(llm_settings, "litellm_text2gql_api_key"), label="api_key",
+                                   type="password"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_text2gql_api_base"), label="api_base",
+                                   info="If you want to use the default api_base, please keep it blank"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_text2gql_language_model"), label="model_name",
+                                   info="Please refer to https://docs.litellm.ai/docs/providers"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_text2gql_tokens"), label="max_token"),
+                    ]
                 else:
                     llm_config_input = [gr.Textbox(value="", visible=False) for _ in range(4)]
                 llm_config_button = gr.Button("Apply configuration")
@@ -329,7 +404,8 @@ def text2gql_llm_settings(llm_type):
 
     with gr.Accordion("3. Set up the Embedding.", open=False):
         embedding_dropdown = gr.Dropdown(
-            choices=["openai", "qianfan_wenxin", "ollama/local"], value=llm_settings.embedding_type, label="Embedding"
+            choices=["openai", "litellm", "qianfan_wenxin", "ollama/local"], value=llm_settings.embedding_type,
+            label="Embedding"
         )
 
         @gr.render(inputs=[embedding_dropdown])
@@ -357,6 +433,16 @@ def embedding_settings(embedding_type):
                                    type="password"),
                         gr.Textbox(value=llm_settings.qianfan_embedding_model, label="model_name"),
                     ]
+            elif embedding_type == "litellm":
+                with gr.Row():
+                    embedding_config_input = [
+                        gr.Textbox(value=getattr(llm_settings, "litellm_embedding_api_key"), label="api_key",
+                                   type="password"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_embedding_api_base"), label="api_base",
+                                   info="If you want to use the default api_base, please keep it blank"),
+                        gr.Textbox(value=getattr(llm_settings, "litellm_embedding_model"), label="model_name",
+                                   info="Please refer to https://docs.litellm.ai/docs/embedding/supported_embedding"),
+                    ]
             else:
                 embedding_config_input = [
                     gr.Textbox(value="", visible=False),
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
index ba2ab7e7..82618872 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
@@ -43,6 +43,10 @@ def rag_answer(
     keywords_extract_prompt: str,
     gremlin_tmpl_num: Optional[int] = 2,
     gremlin_prompt: Optional[str] = None,
+    max_graph_items=30,
+    topk_return_results=20,
+    vector_dis_threshold=0.9,
+    topk_per_keyword=1,
 ) -> Tuple:
     """
     Generate an answer using the RAG (Retrieval-Augmented Generation) pipeline.
@@ -79,22 +83,28 @@ def rag_answer(
     if vector_search:
         rag.query_vector_index()
     if graph_search:
-        rag.extract_keywords(extract_template=keywords_extract_prompt).keywords_to_vid().import_schema(
+        rag.extract_keywords(extract_template=keywords_extract_prompt).keywords_to_vid(
+            vector_dis_threshold=vector_dis_threshold,
+            topk_per_keyword=topk_per_keyword,
+        ).import_schema(
             huge_settings.graph_name
         ).query_graphdb(
             num_gremlin_generate_example=gremlin_tmpl_num,
             gremlin_prompt=gremlin_prompt,
+            max_graph_items=max_graph_items
         )
     # TODO: add more user-defined search strategies
     rag.merge_dedup_rerank(
-        graph_ratio,
-        rerank_method,
-        near_neighbor_first,
+        graph_ratio=graph_ratio,
+        rerank_method=rerank_method,
+        near_neighbor_first=near_neighbor_first,
+        topk_return_results=topk_return_results
     )
     rag.synthesize_answer(raw_answer, vector_only_answer, graph_only_answer, graph_vector_answer, answer_prompt)
 
     try:
-        context = rag.run(verbose=True, query=text, vector_search=vector_search, graph_search=graph_search)
+        context = rag.run(verbose=True, query=text, vector_search=vector_search, graph_search=graph_search,
+                          max_graph_items=max_graph_items)
         if context.get("switch_to_bleu"):
             gr.Warning("Online reranker fails, automatically switches to local bleu rerank.")
         return (
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py
index 46e2e9e0..0fcc7f7c 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py
@@ -188,17 +188,29 @@ def graph_rag_recall(
     near_neighbor_first: bool,
     custom_related_information: str,
     gremlin_prompt: str,
+    max_graph_items: int,
+    topk_return_results: int,
+    vector_dis_threshold: float,
+    topk_per_keyword: int,
+    get_vertex_only: bool = False,
 ) -> dict:
     store_schema(prompt.text2gql_graph_schema, query, gremlin_prompt)
     rag = RAGPipeline()
+    rag.extract_keywords().keywords_to_vid(
+            vector_dis_threshold=vector_dis_threshold,
+            topk_per_keyword=topk_per_keyword,
+        )
 
-    rag.extract_keywords().keywords_to_vid().import_schema(huge_settings.graph_name).query_graphdb(
-        num_gremlin_generate_example=gremlin_tmpl_num,
-        gremlin_prompt=gremlin_prompt,
-    ).merge_dedup_rerank(
-        rerank_method=rerank_method,
-        near_neighbor_first=near_neighbor_first,
-        custom_related_information=custom_related_information,
-    )
+    if not get_vertex_only:
+        rag.import_schema(huge_settings.graph_name).query_graphdb(
+            num_gremlin_generate_example=gremlin_tmpl_num,
+            gremlin_prompt=gremlin_prompt,
+            max_graph_items=max_graph_items,
+        ).merge_dedup_rerank(
+            rerank_method=rerank_method,
+            near_neighbor_first=near_neighbor_first,
+            custom_related_information=custom_related_information,
+            topk_return_results=topk_return_results,
+        )
     context = rag.run(verbose=True, query=query, graph_search=True)
     return context
diff --git a/hugegraph-llm/src/hugegraph_llm/models/embeddings/init_embedding.py b/hugegraph-llm/src/hugegraph_llm/models/embeddings/init_embedding.py
index 48b302bb..4d9a14fd 100644
--- a/hugegraph-llm/src/hugegraph_llm/models/embeddings/init_embedding.py
+++ b/hugegraph-llm/src/hugegraph_llm/models/embeddings/init_embedding.py
@@ -19,6 +19,7 @@
 from hugegraph_llm.models.embeddings.openai import OpenAIEmbedding
 from hugegraph_llm.models.embeddings.ollama import OllamaEmbedding
 from hugegraph_llm.models.embeddings.qianfan import QianFanEmbedding
+from hugegraph_llm.models.embeddings.litellm import LiteLLMEmbedding
 from hugegraph_llm.config import llm_settings
 
 
@@ -45,5 +46,11 @@ def get_embedding(self):
                 api_key=llm_settings.qianfan_embedding_api_key,
                 secret_key=llm_settings.qianfan_embedding_secret_key
             )
+        if self.embedding_type == "litellm":
+            return LiteLLMEmbedding(
+                model_name=llm_settings.litellm_embedding_model,
+                api_key=llm_settings.litellm_embedding_api_key,
+                api_base=llm_settings.litellm_embedding_api_base
+            )
 
         raise Exception("embedding type is not supported !")
diff --git a/hugegraph-llm/src/hugegraph_llm/models/embeddings/litellm.py b/hugegraph-llm/src/hugegraph_llm/models/embeddings/litellm.py
new file mode 100644
index 00000000..ee808b09
--- /dev/null
+++ b/hugegraph-llm/src/hugegraph_llm/models/embeddings/litellm.py
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import List, Optional
+
+from litellm import embedding, RateLimitError, APIError, APIConnectionError, aembedding
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_exponential,
+    retry_if_exception_type,
+)
+
+from hugegraph_llm.models.embeddings.base import BaseEmbedding
+from hugegraph_llm.utils.log import log
+
+
+class LiteLLMEmbedding(BaseEmbedding):
+    """Wrapper for LiteLLM Embedding that supports multiple LLM providers."""
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        model_name: str = "openai/text-embedding-3-small",  # Can be any embedding model supported by LiteLLM
+    ) -> None:
+        self.api_key = api_key
+        self.api_base = api_base
+        self.model = model_name
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=4, max=10),
+        retry=retry_if_exception_type((RateLimitError, APIConnectionError, APIError)),
+    )
+    def get_text_embedding(self, text: str) -> List[float]:
+        """Get embedding for a single text."""
+        try:
+            response = embedding(
+                model=self.model,
+                input=text,
+                api_key=self.api_key,
+                api_base=self.api_base,
+            )
+            log.info("Token usage: %s", response.usage)
+            return response.data[0]["embedding"]
+        except (RateLimitError, APIConnectionError, APIError) as e:
+            log.error("Error in LiteLLM embedding call: %s", e)
+            raise
+
+    def get_texts_embeddings(self, texts: List[str]) -> List[List[float]]:
+        """Get embeddings for multiple texts."""
+        try:
+            response = embedding(
+                model=self.model,
+                input=texts,
+                api_key=self.api_key,
+                api_base=self.api_base,
+            )
+            log.info("Token usage: %s", response.usage)
+            return [data["embedding"] for data in response.data]
+        except (RateLimitError, APIConnectionError, APIError) as e:
+            log.error("Error in LiteLLM batch embedding call: %s", e)
+            raise
+
+    async def async_get_text_embedding(self, text: str) -> List[float]:
+        """Get embedding for a single text asynchronously."""
+        try:
+            response = await aembedding(
+                model=self.model,
+                input=text,
+                api_key=self.api_key,
+                api_base=self.api_base,
+            )
+            log.info("Token usage: %s", response.usage)
+            return response.data[0]["embedding"]
+        except (RateLimitError, APIConnectionError, APIError) as e:
+            log.error("Error in async LiteLLM embedding call: %s", e)
+            raise
diff --git a/hugegraph-llm/src/hugegraph_llm/models/llms/init_llm.py b/hugegraph-llm/src/hugegraph_llm/models/llms/init_llm.py
index c0aeb6c6..58eb4799 100644
--- a/hugegraph-llm/src/hugegraph_llm/models/llms/init_llm.py
+++ b/hugegraph-llm/src/hugegraph_llm/models/llms/init_llm.py
@@ -19,6 +19,7 @@
 from hugegraph_llm.models.llms.ollama import OllamaClient
 from hugegraph_llm.models.llms.openai import OpenAIClient
 from hugegraph_llm.models.llms.qianfan import QianfanClient
+from hugegraph_llm.models.llms.litellm import LiteLLMClient
 from hugegraph_llm.config import llm_settings
 
 
@@ -48,6 +49,13 @@ def get_chat_llm(self):
                 host=llm_settings.ollama_chat_host,
                 port=llm_settings.ollama_chat_port,
             )
+        if self.chat_llm_type == "litellm":
+            return LiteLLMClient(
+                api_key=llm_settings.litellm_chat_api_key,
+                api_base=llm_settings.litellm_chat_api_base,
+                model_name=llm_settings.litellm_chat_language_model,
+                max_tokens=llm_settings.litellm_chat_tokens,
+            )
         raise Exception("chat llm type is not supported !")
 
     def get_extract_llm(self):
@@ -70,6 +78,13 @@ def get_extract_llm(self):
                 host=llm_settings.ollama_extract_host,
                 port=llm_settings.ollama_extract_port,
             )
+        if self.extract_llm_type == "litellm":
+            return LiteLLMClient(
+                api_key=llm_settings.litellm_extract_api_key,
+                api_base=llm_settings.litellm_extract_api_base,
+                model_name=llm_settings.litellm_extract_language_model,
+                max_tokens=llm_settings.litellm_extract_tokens,
+            )
         raise Exception("extract llm type is not supported !")
 
     def get_text2gql_llm(self):
@@ -92,6 +107,13 @@ def get_text2gql_llm(self):
                 host=llm_settings.ollama_text2gql_host,
                 port=llm_settings.ollama_text2gql_port,
             )
+        if self.text2gql_llm_type == "litellm":
+            return LiteLLMClient(
+                api_key=llm_settings.litellm_text2gql_api_key,
+                api_base=llm_settings.litellm_text2gql_api_base,
+                model_name=llm_settings.litellm_text2gql_language_model,
+                max_tokens=llm_settings.litellm_text2gql_tokens,
+            )
         raise Exception("text2gql llm type is not supported !")
 
 
diff --git a/hugegraph-llm/src/hugegraph_llm/models/llms/litellm.py b/hugegraph-llm/src/hugegraph_llm/models/llms/litellm.py
new file mode 100644
index 00000000..23a12501
--- /dev/null
+++ b/hugegraph-llm/src/hugegraph_llm/models/llms/litellm.py
@@ -0,0 +1,156 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Callable, List, Optional, Dict, Any
+
+import tiktoken
+from litellm import completion, acompletion
+from litellm.exceptions import RateLimitError, BudgetExceededError, APIError
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_exponential,
+    retry_if_exception_type,
+)
+
+from hugegraph_llm.models.llms.base import BaseLLM
+from hugegraph_llm.utils.log import log
+
+
+class LiteLLMClient(BaseLLM):
+    """Wrapper for LiteLLM Client that supports multiple LLM providers."""
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        model_name: str = "openai/gpt-4o",  # Can be any model supported by LiteLLM
+        max_tokens: int = 4096,
+        temperature: float = 0.0,
+    ) -> None:
+        self.api_key = api_key
+        self.api_base = api_base
+        self.model = model_name
+        self.max_tokens = max_tokens
+        self.temperature = temperature
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=4, max=10),
+        retry=retry_if_exception_type((RateLimitError, BudgetExceededError, APIError))
+    )
+    def generate(
+        self,
+        messages: Optional[List[Dict[str, Any]]] = None,
+        prompt: Optional[str] = None,
+    ) -> str:
+        """Generate a response to the query messages/prompt."""
+        if messages is None:
+            assert prompt is not None, "Messages or prompt must be provided."
+            messages = [{"role": "user", "content": prompt}]
+        try:
+            response = completion(
+                model=self.model,
+                messages=messages,
+                temperature=self.temperature,
+                max_tokens=self.max_tokens,
+                api_key=self.api_key,
+                base_url=self.api_base,
+            )
+            log.info("Token usage: %s", response.usage)
+            return response.choices[0].message.content
+        except (RateLimitError, BudgetExceededError, APIError) as e:
+            log.error("Error in LiteLLM call: %s", e)
+            return f"Error: {str(e)}"
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=4, max=10),
+        retry=retry_if_exception_type((RateLimitError, BudgetExceededError, APIError))
+    )
+    async def agenerate(
+            self,
+            messages: Optional[List[Dict[str, Any]]] = None,
+            prompt: Optional[str] = None,
+    ) -> str:
+        """Generate a response to the query messages/prompt asynchronously."""
+        if messages is None:
+            assert prompt is not None, "Messages or prompt must be provided."
+            messages = [{"role": "user", "content": prompt}]
+        try:
+            response = await acompletion(
+                model=self.model,
+                messages=messages,
+                temperature=self.temperature,
+                max_tokens=self.max_tokens,
+                api_key=self.api_key,
+                base_url=self.api_base,
+            )
+            log.info("Token usage: %s", response.usage)
+            return response.choices[0].message.content
+        except (RateLimitError, BudgetExceededError, APIError) as e:
+            log.error("Error in async LiteLLM call: %s", e)
+            return f"Error: {str(e)}"
+
+    def generate_streaming(
+        self,
+        messages: Optional[List[Dict[str, Any]]] = None,
+        prompt: Optional[str] = None,
+        on_token_callback: Callable = None,
+    ) -> str:
+        """Generate a response to the query messages/prompt in streaming mode."""
+        if messages is None:
+            assert prompt is not None, "Messages or prompt must be provided."
+            messages = [{"role": "user", "content": prompt}]
+        try:
+            response = completion(
+                model=self.model,
+                messages=messages,
+                temperature=self.temperature,
+                max_tokens=self.max_tokens,
+                api_key=self.api_key,
+                base_url=self.api_base,
+                stream=True,
+            )
+            result = ""
+            for chunk in response:
+                if chunk.choices[0].delta.content:
+                    result += chunk.choices[0].delta.content
+                if on_token_callback:
+                    on_token_callback(chunk)
+            return result
+        except (RateLimitError, BudgetExceededError, APIError) as e:
+            log.error("Error in streaming LiteLLM call: %s", e)
+            return f"Error: {str(e)}"
+
+    def num_tokens_from_string(self, string: str) -> int:
+        """Get token count from string."""
+        try:
+            encoding = tiktoken.encoding_for_model(self.model)
+            num_tokens = len(encoding.encode(string))
+            return num_tokens
+        except (ValueError, TypeError) as _:  # Handle unused variable
+            # Fallback for models not supported by tiktoken
+            # Rough estimate: 1 token ≈ 4 characters
+            return len(string) // 4
+
+    def max_allowed_token_length(self) -> int:
+        """Get max-allowed token length based on the model."""
+        return 4096  # Default to 4096 if model not found
+
+    def get_llm_type(self) -> str:
+        return "litellm"
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py b/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py
index d9c5e98a..62968fd8 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py
@@ -44,7 +44,7 @@ class MergeDedupRerank:
     def __init__(
         self,
         embedding: BaseEmbedding,
-        topk: int = huge_settings.topk_return_results,
+        topk_return_results: int = huge_settings.topk_return_results,
         graph_ratio: float = 0.5,
         method: Literal["bleu", "reranker"] = "bleu",
         near_neighbor_first: bool = False,
@@ -54,7 +54,7 @@ def __init__(
         assert method in ["bleu", "reranker"], f"Unimplemented rerank method '{method}'."
         self.embedding = embedding
         self.graph_ratio = graph_ratio
-        self.topk = topk
+        self.topk_return_results = topk_return_results
         self.method = method
         self.near_neighbor_first = near_neighbor_first
         self.custom_related_information = custom_related_information
@@ -70,11 +70,11 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]:
         vector_search = context.get("vector_search", False)
         graph_search = context.get("graph_search", False)
         if graph_search and vector_search:
-            graph_length = int(self.topk * self.graph_ratio)
-            vector_length = self.topk - graph_length
+            graph_length = int(self.topk_return_results * self.graph_ratio)
+            vector_length = self.topk_return_results - graph_length
         else:
-            graph_length = self.topk
-            vector_length = self.topk
+            graph_length = self.topk_return_results
+            vector_length = self.topk_return_results
 
         vector_result = context.get("vector_result", [])
         vector_length = min(len(vector_result), vector_length)
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
index 7df36d77..9c3dcf98 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
@@ -100,12 +100,14 @@ def keywords_to_vid(
         by: Literal["query", "keywords"] = "keywords",
         topk_per_keyword: int = huge_settings.topk_per_keyword,
         topk_per_query: int = 10,
+        vector_dis_threshold: float = huge_settings.vector_dis_threshold,
     ):
         """
         Add a semantic ID query operator to the pipeline.
         :param by: Match by query or keywords.
         :param topk_per_keyword: Top K results per keyword.
         :param topk_per_query: Top K results per query.
+        :param vector_dis_threshold: Vector distance threshold.
         :return: Self-instance for chaining.
         """
         self._operators.append(
@@ -114,6 +116,7 @@ def keywords_to_vid(
                 by=by,
                 topk_per_keyword=topk_per_keyword,
                 topk_per_query=topk_per_query,
+                vector_dis_threshold=vector_dis_threshold,
             )
         )
         return self
@@ -174,6 +177,7 @@ def merge_dedup_rerank(
         rerank_method: Literal["bleu", "reranker"] = "bleu",
         near_neighbor_first: bool = False,
         custom_related_information: str = "",
+        topk_return_results: int = huge_settings.topk_return_results,
     ):
         """
         Add a merge, deduplication, and rerank operator to the pipeline.
@@ -187,6 +191,7 @@ def merge_dedup_rerank(
                 method=rerank_method,
                 near_neighbor_first=near_neighbor_first,
                 custom_related_information=custom_related_information,
+                topk_return_results=topk_return_results
             )
         )
         return self
@@ -239,7 +244,9 @@ def run(self, **kwargs) -> Dict[str, Any]:
         :return: Final context after all operators have been executed.
         """
         if len(self._operators) == 0:
-            self.extract_keywords().query_graphdb().synthesize_answer()
+            self.extract_keywords().query_graphdb(
+                max_graph_items=kwargs.get('max_graph_items')
+            ).synthesize_answer()
 
         context = kwargs
 
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
index 2ced6185..53aff68c 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
@@ -110,7 +110,7 @@ def __init__(
         self._gremlin_prompt = gremlin_prompt or prompt.gremlin_generate_prompt
 
     def run(self, context: Dict[str, Any]) -> Dict[str, Any]:
-        self._init_client(context)
+        self.init_client(context)
 
         # initial flag: -1 means no result, 0 means subgraph query, 1 means gremlin query
         context["graph_result_flag"] = -1
@@ -239,7 +239,9 @@ def _subgraph_query(self, context: Dict[str, Any]) -> Dict[str, Any]:
             )
         return context
 
-    def _init_client(self, context):
+    # TODO: move this method to a util file for reuse (remove self param)
+    def init_client(self, context):
+        """Initialize the HugeGraph client from context or default settings."""
         # pylint: disable=R0915 (too-many-statements)
         if self._client is None:
             if isinstance(context.get("graph_client"), PyHugeClient):
@@ -254,6 +256,15 @@ def _init_client(self, context):
                 self._client = PyHugeClient(ip, port, graph, user, pwd, gs)
         assert self._client is not None, "No valid graph to search."
 
+    def get_vertex_details(self, vertex_ids: List[str]) -> List[Dict[str, Any]]:
+        if not vertex_ids:
+            return []
+
+        formatted_ids = ", ".join(f"'{vid}'" for vid in vertex_ids)
+        gremlin_query = f"g.V({formatted_ids}).limit(20)"
+        result = self._client.gremlin().exec(gremlin=gremlin_query)["data"]
+        return result
+
     def _format_graph_from_vertex(self, query_result: List[Any]) -> Set[str]:
         knowledge = set()
         for item in query_result:
@@ -374,8 +385,8 @@ def _extract_labels_from_schema(self) -> Tuple[List[str], List[str]]:
         schema = self._get_graph_schema()
         vertex_props_str, edge_props_str = schema.split("\n")[:2]
         # TODO: rename to vertex (also need update in the schema)
-        vertex_props_str = vertex_props_str[len("Vertex properties: ") :].strip("[").strip("]")
-        edge_props_str = edge_props_str[len("Edge properties: ") :].strip("[").strip("]")
+        vertex_props_str = vertex_props_str[len("Vertex properties: "):].strip("[").strip("]")
+        edge_props_str = edge_props_str[len("Edge properties: "):].strip("[").strip("]")
         vertex_labels = self._extract_label_names(vertex_props_str)
         edge_labels = self._extract_label_names(edge_props_str)
         return vertex_labels, edge_labels
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py
index 8aa64115..f1d13af4 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py
@@ -34,7 +34,8 @@ def __init__(
             embedding: BaseEmbedding,
             by: Literal["query", "keywords"] = "keywords",
             topk_per_query: int = 10,
-            topk_per_keyword: int = huge_settings.topk_per_keyword
+            topk_per_keyword: int = huge_settings.topk_per_keyword,
+            vector_dis_threshold: float = huge_settings.vector_dis_threshold,
     ):
         self.index_dir = str(os.path.join(resource_path, huge_settings.graph_name, "graph_vids"))
         self.vector_index = VectorIndex.from_index_file(self.index_dir)
@@ -42,6 +43,7 @@ def __init__(
         self.by = by
         self.topk_per_query = topk_per_query
         self.topk_per_keyword = topk_per_keyword
+        self.vector_dis_threshold = vector_dis_threshold
         self._client = PyHugeClient(
             huge_settings.graph_ip,
             huge_settings.graph_port,
@@ -76,7 +78,7 @@ def _fuzzy_match_vids(self, keywords: List[str]) -> List[str]:
         for keyword in keywords:
             keyword_vector = self.embedding.get_text_embedding(keyword)
             results = self.vector_index.search(keyword_vector, top_k=self.topk_per_keyword,
-                                               dis_threshold=float(huge_settings.vector_dis_threshold))
+                                               dis_threshold=float(self.vector_dis_threshold))
             if results:
                 fuzzy_match_result.extend(results[:self.topk_per_keyword])
         return fuzzy_match_result
diff --git a/hugegraph-ml/README.md b/hugegraph-ml/README.md
index c6fca7f9..1465db9b 100644
--- a/hugegraph-ml/README.md
+++ b/hugegraph-ml/README.md
@@ -1,14 +1,16 @@
-  # hugegraph-ml
+# hugegraph-ml
 
 ## Summary
 
-`hugegraph-ml` is a tool that integrates HugeGraph with popular graph learning libraries. 
-It implements most graph learning algorithms, enabling users to perform end-to-end graph learning workflows directly from HugeGraph using `hugegraph-ml`. 
-Graph data can be read directly from `HugeGraph` and used for tasks such as node embedding, node classification, and graph classification. 
+`hugegraph-ml` is a tool that integrates HugeGraph with popular graph learning libraries.
+It implements most graph learning algorithms, enabling users to perform end-to-end graph learning workflows directly
+from HugeGraph using `hugegraph-ml`.
+Graph data can be read directly from `HugeGraph` and used for tasks such as node embedding, node classification, and
+graph classification.
 The implemented algorithm models can be found in the [models](./src/hugegraph_ml/models) folder.
 
 | model       | paper                                              |
-| ----------- | -------------------------------------------------- |
+|-------------|----------------------------------------------------|
 | AGNN        | https://arxiv.org/abs/1803.03735                   |
 | APPNP       | https://arxiv.org/abs/1810.05997                   |
 | ARMA        | https://arxiv.org/abs/1901.01343                   |
@@ -30,13 +32,16 @@ The implemented algorithm models can be found in the [models](./src/hugegraph_ml
 
 ## Environment Requirements
 
-- python 3.9+ 
+- python 3.9+
 - hugegraph-server 1.0+
 
 ## Preparation
 
-1. Start the HugeGraph database, you can do it via Docker/[Binary packages](https://hugegraph.apache.org/docs/download/download/). 
-   Refer to [docker-link](https://hub.docker.com/r/hugegraph/hugegraph) & [deploy-doc](https://hugegraph.apache.org/docs/quickstart/hugegraph-server/#31-use-docker-container-convenient-for-testdev) for guidance
+1. Start the HugeGraph database, you can do it via
+   Docker/[Binary packages](https://hugegraph.apache.org/docs/download/download/).
+   Refer
+   to [docker-link](https://hub.docker.com/r/hugegraph/hugegraph) & [deploy-doc](https://hugegraph.apache.org/docs/quickstart/hugegraph-server/#31-use-docker-container-convenient-for-testdev)
+   for guidance
 
 2. Clone this project
 
@@ -63,7 +68,7 @@ The implemented algorithm models can be found in the [models](./src/hugegraph_ml
 
 ### Perform node embedding on the `Cora` dataset using the `DGI` model
 
-Make sure that the Cora dataset is already in your HugeGraph database. 
+Make sure that the Cora dataset is already in your HugeGraph database.
 If not, you can run the `import_graph_from_dgl` function to import the `Cora` dataset from `DGL` into
 the `HugeGraph` database.
 
@@ -74,6 +79,7 @@ import_graph_from_dgl("cora")
 ```
 
 Run [dgi_example.py](./src/hugegraph_ml/examples/dgi_example.py) to view the example.
+
 ```bash
 python ./hugegraph_ml/examples/dgi_example.py
 ```
@@ -112,8 +118,8 @@ embedded_graph = node_embed_task.train_and_embed(add_self_loop=True, n_epochs=30
 
 ```python
 model = MLPClassifier(
-   n_in_feat=embedded_graph.ndata["feat"].shape[1], 
-   n_out_feat=embedded_graph.ndata["label"].unique().shape[0]
+    n_in_feat=embedded_graph.ndata["feat"].shape[1],
+    n_out_feat=embedded_graph.ndata["label"].unique().shape[0]
 )
 node_clf_task = NodeClassify(graph=embedded_graph, model=model)
 node_clf_task.train(lr=1e-3, n_epochs=400, patience=40)
diff --git a/hugegraph-python-client/README.md b/hugegraph-python-client/README.md
index 85383d17..31d8df7c 100644
--- a/hugegraph-python-client/README.md
+++ b/hugegraph-python-client/README.md
@@ -6,10 +6,10 @@ It is used to define graph structures, perform CRUD operations on graph data, ma
 
 ## Installation
 
-To install the `hugegraph-python-client`, you can use pip:
+To install the `hugegraph-python-client`, you can use pip/poetry/source building:
 
 ```bash
-pip3 install hugegraph-python
+pip install hugegraph-python # Note: may not the latest version, recommend to install from source
 ```
 
 ### Install from Source (Latest Code)

From 1d70b2a0f4ea3488e55cc280b3ccb207e41142f1 Mon Sep 17 00:00:00 2001
From: chirag gupta <103719146+chiruu12@users.noreply.github.com>
Date: Thu, 6 Mar 2025 08:16:51 +0530
Subject: [PATCH 07/10] Adding support for async and streaming output mode

---
 .github/workflows/python-client.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-client.yml b/.github/workflows/python-client.yml
index c0bdf5c9..e05708ae 100644
--- a/.github/workflows/python-client.yml
+++ b/.github/workflows/python-client.yml
@@ -20,7 +20,7 @@ jobs:
     - name: Prepare HugeGraph Server Environment
       run: |
         docker run -d --name=graph -p 8080:8080 -e PASSWORD=admin hugegraph/hugegraph:1.3.0
-        sleep 1
+        sleep 5
 
     - uses: actions/checkout@v4
 

From 05e74528fbeb6f7ea5bb0fb08190d17e840ea366 Mon Sep 17 00:00:00 2001
From: chirag gupta <103719146+chiruu12@users.noreply.github.com>
Date: Thu, 6 Mar 2025 08:31:59 +0530
Subject: [PATCH 08/10] Adding support for async and streaming output mode

---
 hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py | 2 +-
 hugegraph-llm/src/hugegraph_llm/api/stream_api.py          | 4 ++--
 hugegraph-llm/src/tests/api/test_rag_api.py                | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
index 1713045d..bc5b3a03 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
@@ -117,4 +117,4 @@ class RerankerConfigRequest(BaseModel):
 
 class LogStreamRequest(BaseModel):
     admin_token: Optional[str] = None
-    log_file: Optional[str] = "llm-server.log"
\ No newline at end of file
+    log_file: Optional[str] = "llm-server.log"
diff --git a/hugegraph-llm/src/hugegraph_llm/api/stream_api.py b/hugegraph-llm/src/hugegraph_llm/api/stream_api.py
index 90fc8a50..5cb617d7 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/stream_api.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/stream_api.py
@@ -28,7 +28,7 @@
 from hugegraph_llm.config import prompt, huge_settings
 from hugegraph_llm.utils.log import log
 
-
+# pylint: disable=too-many-statements
 async def stream_http_api(
         router: APIRouter,
         rag_answer_stream_func,
@@ -173,4 +173,4 @@ async def generate_graph_stream():
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
             }
-        )
\ No newline at end of file
+        )
diff --git a/hugegraph-llm/src/tests/api/test_rag_api.py b/hugegraph-llm/src/tests/api/test_rag_api.py
index 5770b54c..1c91f191 100644
--- a/hugegraph-llm/src/tests/api/test_rag_api.py
+++ b/hugegraph-llm/src/tests/api/test_rag_api.py
@@ -132,4 +132,4 @@ def test_graph_rag_recall_api(self):
 
 
 if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
+    unittest.main()

From d4cd537d83d70a844c5644b0a232c5460a69cf2a Mon Sep 17 00:00:00 2001
From: imbajin <jin@apache.org>
Date: Thu, 6 Mar 2025 18:38:44 +0800
Subject: [PATCH 09/10] fix admin_api.py usage error

---
 hugegraph-llm/src/hugegraph_llm/api/admin_api.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/hugegraph-llm/src/hugegraph_llm/api/admin_api.py b/hugegraph-llm/src/hugegraph_llm/api/admin_api.py
index 26c04a87..bc316da9 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/admin_api.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/admin_api.py
@@ -16,22 +16,18 @@
 # under the License.
 import os
 
-from fastapi import status, APIRouter
+from fastapi import status, APIRouter, HTTPException
 from fastapi.responses import StreamingResponse
 
-from hugegraph_llm.api.exceptions.rag_exceptions import generate_response
 from hugegraph_llm.api.models.rag_requests import LogStreamRequest
-from hugegraph_llm.api.models.rag_response import RAGResponse
 from hugegraph_llm.config import admin_settings
 
 
-# FIXME: line 31: E0702: Raising dict while only classes or instances are allowed (raising-bad-type)
 def admin_http_api(router: APIRouter, log_stream):
     @router.post("/logs", status_code=status.HTTP_200_OK)
     async def log_stream_api(req: LogStreamRequest):
         if admin_settings.admin_token != req.admin_token:
-            raise generate_response(RAGResponse(status_code=status.HTTP_403_FORBIDDEN,
-                                                message="Invalid admin_token")) #pylint: disable=E0702
+            raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid admin_token")
         log_path = os.path.join("logs", req.log_file)
 
         # Create a StreamingResponse that reads from the log stream generator

From d37b52e3b9b9716b71383620baadf4d53680e851 Mon Sep 17 00:00:00 2001
From: imbajin <jin@apache.org>
Date: Thu, 6 Mar 2025 19:31:01 +0800
Subject: [PATCH 10/10] tiny improve

---
 hugegraph-llm/src/hugegraph_llm/api/config_api.py | 3 +--
 hugegraph-llm/src/hugegraph_llm/api/stream_api.py | 5 +++--
 hugegraph-llm/src/tests/api/test_rag_api.py       | 5 +++--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/hugegraph-llm/src/hugegraph_llm/api/config_api.py b/hugegraph-llm/src/hugegraph_llm/api/config_api.py
index c6ebaeba..c6b43111 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/config_api.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/config_api.py
@@ -27,7 +27,6 @@
 from hugegraph_llm.config import llm_settings
 
 
-
 async def graph_config_route(router: APIRouter, apply_graph_conf):
     @router.post("/config/graph", status_code=status.HTTP_201_CREATED)
     async def graph_config_api(req: GraphConfigRequest):
@@ -37,7 +36,7 @@ async def graph_config_api(req: GraphConfigRequest):
     return graph_config_api
 
 async def llm_config_route(router: APIRouter, apply_llm_conf):
-    # TODO: restructure the implement of llm to three types, like "/config/chat_llm"
+    # TODO: restructure the implement of llm to three types, like "/config/chat_llm" + /config/mini_task_llm + ..
     @router.post("/config/llm", status_code=status.HTTP_201_CREATED)
     async def llm_config_api(req: LLMConfigRequest):
         llm_settings.llm_type = req.llm_type
diff --git a/hugegraph-llm/src/hugegraph_llm/api/stream_api.py b/hugegraph-llm/src/hugegraph_llm/api/stream_api.py
index 5cb617d7..0000a717 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/stream_api.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/stream_api.py
@@ -15,8 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import json
 import asyncio
+import json
 
 from fastapi import status, APIRouter, HTTPException
 from fastapi.responses import StreamingResponse
@@ -28,6 +28,7 @@
 from hugegraph_llm.config import prompt, huge_settings
 from hugegraph_llm.utils.log import log
 
+
 # pylint: disable=too-many-statements
 async def stream_http_api(
         router: APIRouter,
@@ -130,7 +131,7 @@ async def generate_graph_stream():
                         gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
                         get_vertex_only=req.get_vertex_only
                 ):
-                    # Handle vertex details for get_vertex_only flag
+                    # Handle vertex details for a get_vertex_only flag
                     if req.get_vertex_only and isinstance(chunk, dict) and "match_vids" in chunk:
                         from hugegraph_llm.operators.hugegraph_op.graph_rag_query import GraphRAGQuery
                         graph_rag = GraphRAGQuery()
diff --git a/hugegraph-llm/src/tests/api/test_rag_api.py b/hugegraph-llm/src/tests/api/test_rag_api.py
index 1c91f191..b31722cd 100644
--- a/hugegraph-llm/src/tests/api/test_rag_api.py
+++ b/hugegraph-llm/src/tests/api/test_rag_api.py
@@ -15,8 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import unittest
 import asyncio
+import unittest
+
 from fastapi import FastAPI, APIRouter
 from fastapi.testclient import TestClient
 
@@ -60,7 +61,7 @@ def setUp(self):
             "vertex_degree_list": [1, 2]
         })
 
-        # Setup the API
+        # Set up the API
         loop = asyncio.get_event_loop()
         loop.run_until_complete(
             rag_http_api(