Skip to content

Commit ca28faf

Browse files
authored
refactor(llm): replace vid by full vertexes info (#189)
1 parent 8c1ffbb commit ca28faf

File tree

4 files changed

+30
-9
lines changed

4 files changed

+30
-9
lines changed

hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ class GraphRAGRequest(BaseModel):
7676
from the query, by default only the most similar one is returned.")
7777

7878
client_config : Optional[GraphConfigRequest] = Query(None, description="hugegraph server config.")
79-
get_vid_only: bool = Query(False, description="return only keywords & vid (early stop).")
79+
get_vertex_only: bool = Query(False, description="return only keywords & vertex (early stop).")
8080

8181
gremlin_tmpl_num: int = Query(
8282
1, description="Number of Gremlin templates to use. If num <=0 means template is not provided"

hugegraph-llm/src/hugegraph_llm/api/rag_api.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
from hugegraph_llm.config import llm_settings, prompt
3333
from hugegraph_llm.utils.log import log
3434

35-
35+
# pylint: disable=too-many-statements
3636
def rag_http_api(
3737
router: APIRouter,
3838
rag_answer_func,
@@ -101,9 +101,18 @@ def graph_rag_recall_api(req: GraphRAGRequest):
101101
near_neighbor_first=req.near_neighbor_first,
102102
custom_related_information=req.custom_priority_info,
103103
gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt,
104-
get_vid_only=req.get_vid_only
104+
get_vertex_only=req.get_vertex_only
105105
)
106106

107+
if req.get_vertex_only:
108+
from hugegraph_llm.operators.hugegraph_op.graph_rag_query import GraphRAGQuery
109+
graph_rag = GraphRAGQuery()
110+
graph_rag.init_client(result)
111+
vertex_details = graph_rag.get_vertex_details(result["match_vids"])
112+
113+
if vertex_details:
114+
result["match_vids"] = vertex_details
115+
107116
if isinstance(result, dict):
108117
params = [
109118
"query",

hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,15 +192,16 @@ def graph_rag_recall(
192192
topk_return_results: int,
193193
vector_dis_threshold: float,
194194
topk_per_keyword: int,
195-
get_vid_only: bool
195+
get_vertex_only: bool = False,
196196
) -> dict:
197197
store_schema(prompt.text2gql_graph_schema, query, gremlin_prompt)
198198
rag = RAGPipeline()
199199
rag.extract_keywords().keywords_to_vid(
200200
vector_dis_threshold=vector_dis_threshold,
201201
topk_per_keyword=topk_per_keyword,
202202
)
203-
if not get_vid_only:
203+
204+
if not get_vertex_only:
204205
rag.import_schema(huge_settings.graph_name).query_graphdb(
205206
num_gremlin_generate_example=gremlin_tmpl_num,
206207
gremlin_prompt=gremlin_prompt,

hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def __init__(
110110
self._gremlin_prompt = gremlin_prompt or prompt.gremlin_generate_prompt
111111

112112
def run(self, context: Dict[str, Any]) -> Dict[str, Any]:
113-
self._init_client(context)
113+
self.init_client(context)
114114

115115
# initial flag: -1 means no result, 0 means subgraph query, 1 means gremlin query
116116
context["graph_result_flag"] = -1
@@ -239,7 +239,9 @@ def _subgraph_query(self, context: Dict[str, Any]) -> Dict[str, Any]:
239239
)
240240
return context
241241

242-
def _init_client(self, context):
242+
# TODO: move this method to a util file for reuse (remove self param)
243+
def init_client(self, context):
244+
"""Initialize the HugeGraph client from context or default settings."""
243245
# pylint: disable=R0915 (too-many-statements)
244246
if self._client is None:
245247
if isinstance(context.get("graph_client"), PyHugeClient):
@@ -254,6 +256,15 @@ def _init_client(self, context):
254256
self._client = PyHugeClient(ip, port, graph, user, pwd, gs)
255257
assert self._client is not None, "No valid graph to search."
256258

259+
def get_vertex_details(self, vertex_ids: List[str]) -> List[Dict[str, Any]]:
260+
if not vertex_ids:
261+
return []
262+
263+
formatted_ids = ", ".join(f"'{vid}'" for vid in vertex_ids)
264+
gremlin_query = f"g.V({formatted_ids}).limit(20)"
265+
result = self._client.gremlin().exec(gremlin=gremlin_query)["data"]
266+
return result
267+
257268
def _format_graph_from_vertex(self, query_result: List[Any]) -> Set[str]:
258269
knowledge = set()
259270
for item in query_result:
@@ -374,8 +385,8 @@ def _extract_labels_from_schema(self) -> Tuple[List[str], List[str]]:
374385
schema = self._get_graph_schema()
375386
vertex_props_str, edge_props_str = schema.split("\n")[:2]
376387
# TODO: rename to vertex (also need update in the schema)
377-
vertex_props_str = vertex_props_str[len("Vertex properties: ") :].strip("[").strip("]")
378-
edge_props_str = edge_props_str[len("Edge properties: ") :].strip("[").strip("]")
388+
vertex_props_str = vertex_props_str[len("Vertex properties: "):].strip("[").strip("]")
389+
edge_props_str = edge_props_str[len("Edge properties: "):].strip("[").strip("]")
379390
vertex_labels = self._extract_label_names(vertex_props_str)
380391
edge_labels = self._extract_label_names(edge_props_str)
381392
return vertex_labels, edge_labels

0 commit comments

Comments
 (0)