Skip to content

Commit d707409

Browse files
authored
Prettify logs in retrievers and GraphRAG (neo4j#313)
* Prettify logs in retrievers and GraphRAG and add the embedded vector to the returned result * Update CHANGELOG * Rebase
1 parent 4db2c45 commit d707409

File tree

6 files changed

+56
-29
lines changed

6 files changed

+56
-29
lines changed

CHANGELOG.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
## Next
44

5+
### Changed
6+
7+
- Improved log output readability in Retrievers and GraphRAG and added embedded vector to retriever result metadata for debugging.
8+
59
## 1.6.1
610

711
### Added
@@ -12,14 +16,14 @@
1216

1317
- Added `enforce_schema` parameter to `SimpleKGPipeline` for optional schema enforcement.
1418

19+
1520
## 1.6.0
1621

1722
### Added
1823

1924
- Added optional schema enforcement as a validation layer after entity and relation extraction.
2025
- Introduced a linear hybrid search ranker for HybridRetriever and HybridCypherRetriever, allowing customizable ranking with an `alpha` parameter.
2126
- Introduced SearchQueryParseError for handling invalid Lucene query strings in HybridRetriever and HybridCypherRetriever.
22-
- Components can now be called with the `run_with_context` method that gets an extra `context_` argument containing information about the pipeline it's run from: the `run_id`, `task_name` and a `notify` function that can be used to send `TASK_PROGRESS` events to the same callback as the pipeline events.
2327

2428
### Fixed
2529

src/neo4j_graphrag/generation/graphrag.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from neo4j_graphrag.message_history import MessageHistory
3131
from neo4j_graphrag.retrievers.base import Retriever
3232
from neo4j_graphrag.types import LLMMessage, RetrieverResult
33+
from neo4j_graphrag.utils.logging import prettify
3334

3435
logger = logging.getLogger(__name__)
3536

@@ -138,7 +139,7 @@ def search(
138139
prompt = self.prompt_template.format(
139140
query_text=query_text, context=context, examples=validated_data.examples
140141
)
141-
logger.debug(f"RAG: retriever_result={retriever_result}")
142+
logger.debug(f"RAG: retriever_result={prettify(retriever_result)}")
142143
logger.debug(f"RAG: prompt={prompt}")
143144
answer = self.llm.invoke(
144145
prompt,

src/neo4j_graphrag/retrievers/hybrid.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
# limitations under the License.
1515
from __future__ import annotations
1616

17-
import copy
1817
import logging
1918
from typing import Any, Callable, Optional, Union
2019

@@ -42,6 +41,7 @@
4241
SearchType,
4342
HybridSearchRanker,
4443
)
44+
from neo4j_graphrag.utils.logging import prettify
4545

4646
logger = logging.getLogger(__name__)
4747

@@ -213,10 +213,7 @@ def get_search_results(
213213
if "ranker" in parameters:
214214
del parameters["ranker"]
215215

216-
sanitized_parameters = copy.deepcopy(parameters)
217-
if "query_vector" in sanitized_parameters:
218-
sanitized_parameters["query_vector"] = "..."
219-
logger.debug("HybridRetriever Cypher parameters: %s", sanitized_parameters)
216+
logger.debug("HybridRetriever Cypher parameters: %s", prettify(parameters))
220217
logger.debug("HybridRetriever Cypher query: %s", search_query)
221218

222219
try:
@@ -234,6 +231,7 @@ def get_search_results(
234231
raise
235232
return RawSearchResult(
236233
records=records,
234+
metadata={"query_vector": query_vector},
237235
)
238236

239237

@@ -397,10 +395,7 @@ def get_search_results(
397395
if "ranker" in parameters:
398396
del parameters["ranker"]
399397

400-
sanitized_parameters = copy.deepcopy(parameters)
401-
if "query_vector" in sanitized_parameters:
402-
sanitized_parameters["query_vector"] = "..."
403-
logger.debug("HybridRetriever Cypher parameters: %s", sanitized_parameters)
398+
logger.debug("HybridRetriever Cypher parameters: %s", prettify(parameters))
404399
logger.debug("HybridRetriever Cypher query: %s", search_query)
405400

406401
try:
@@ -418,4 +413,5 @@ def get_search_results(
418413
raise
419414
return RawSearchResult(
420415
records=records,
416+
metadata={"query_vector": query_vector},
421417
)

src/neo4j_graphrag/retrievers/vector.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
VectorRetrieverModel,
4040
VectorSearchModel,
4141
)
42+
from neo4j_graphrag.utils.logging import prettify
4243

4344
logger = logging.getLogger(__name__)
4445

@@ -207,7 +208,7 @@ def get_search_results(
207208
)
208209
parameters.update(search_params)
209210

210-
logger.debug("VectorRetriever Cypher parameters: %s", parameters)
211+
logger.debug("VectorRetriever Cypher parameters: %s", prettify(parameters))
211212
logger.debug("VectorRetriever Cypher query: %s", search_query)
212213

213214
records, _, _ = self.driver.execute_query(
@@ -216,7 +217,10 @@ def get_search_results(
216217
database_=self.neo4j_database,
217218
routing_=neo4j.RoutingControl.READ,
218219
)
219-
return RawSearchResult(records=records)
220+
return RawSearchResult(
221+
records=records,
222+
metadata={"query_vector": query_vector},
223+
)
220224

221225

222226
class VectorCypherRetriever(Retriever):
@@ -351,7 +355,8 @@ def get_search_results(
351355
raise EmbeddingRequiredError(
352356
"Embedding method required for text query."
353357
)
354-
parameters["query_vector"] = self.embedder.embed_query(query_text)
358+
query_vector = self.embedder.embed_query(query_text)
359+
parameters["query_vector"] = query_vector
355360
del parameters["query_text"]
356361

357362
if query_params:
@@ -370,7 +375,9 @@ def get_search_results(
370375
)
371376
parameters.update(search_params)
372377

373-
logger.debug("VectorCypherRetriever Cypher parameters: %s", parameters)
378+
logger.debug(
379+
"VectorCypherRetriever Cypher parameters: %s", prettify(parameters)
380+
)
374381
logger.debug("VectorCypherRetriever Cypher query: %s", search_query)
375382

376383
records, _, _ = self.driver.execute_query(
@@ -381,4 +388,5 @@ def get_search_results(
381388
)
382389
return RawSearchResult(
383390
records=records,
391+
metadata={"query_vector": query_vector},
384392
)

tests/unit/retrievers/test_hybrid.py

+16-7
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def test_hybrid_retriever_with_result_format_function(
110110
content="dummy-node", metadata={"score": 1.0, "node_id": 123}
111111
),
112112
],
113-
metadata={"__retriever": "HybridRetriever"},
113+
metadata={"__retriever": "HybridRetriever", "query_vector": embed_query_vector},
114114
)
115115

116116

@@ -229,7 +229,7 @@ def test_hybrid_search_text_happy_path(
229229
items=[
230230
RetrieverResultItem(content="dummy-node", metadata={"score": 1.0}),
231231
],
232-
metadata={"__retriever": "HybridRetriever"},
232+
metadata={"__retriever": "HybridRetriever", "query_vector": embed_query_vector},
233233
)
234234

235235

@@ -436,7 +436,7 @@ def test_hybrid_retriever_return_properties(
436436
items=[
437437
RetrieverResultItem(content="dummy-node", metadata={"score": 1.0}),
438438
],
439-
metadata={"__retriever": "HybridRetriever"},
439+
metadata={"__retriever": "HybridRetriever", "query_vector": embed_query_vector},
440440
)
441441

442442

@@ -511,7 +511,10 @@ def test_hybrid_cypher_retrieval_query_with_params(
511511
metadata=None,
512512
),
513513
],
514-
metadata={"__retriever": "HybridCypherRetriever"},
514+
metadata={
515+
"__retriever": "HybridCypherRetriever",
516+
"query_vector": embed_query_vector,
517+
},
515518
)
516519

517520

@@ -554,7 +557,10 @@ def test_hybrid_cypher_retriever_with_result_format_function(
554557
content="dummy-node", metadata={"score": 1.0, "node_id": 123}
555558
),
556559
],
557-
metadata={"__retriever": "HybridCypherRetriever"},
560+
metadata={
561+
"__retriever": "HybridCypherRetriever",
562+
"query_vector": embed_query_vector,
563+
},
558564
)
559565

560566

@@ -710,7 +716,7 @@ def test_hybrid_search_linear_ranker_happy_path(
710716
items=[
711717
RetrieverResultItem(content="dummy-node", metadata={"score": 1.0}),
712718
],
713-
metadata={"__retriever": "HybridRetriever"},
719+
metadata={"__retriever": "HybridRetriever", "query_vector": embed_query_vector},
714720
)
715721

716722

@@ -792,7 +798,10 @@ def test_hybrid_cypher_linear_ranker(
792798
metadata=None,
793799
),
794800
],
795-
metadata={"__retriever": "HybridCypherRetriever"},
801+
metadata={
802+
"__retriever": "HybridCypherRetriever",
803+
"query_vector": embed_query_vector,
804+
},
796805
)
797806

798807

tests/unit/retrievers/test_vector.py

+16-7
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ def test_similarity_search_vector_happy_path(
155155
metadata={"score": 1.0, "nodeLabels": None, "id": None},
156156
),
157157
],
158-
metadata={"__retriever": "VectorRetriever"},
158+
metadata={"__retriever": "VectorRetriever", "query_vector": query_vector},
159159
)
160160

161161

@@ -208,7 +208,7 @@ def test_similarity_search_text_happy_path(
208208
metadata={"score": 1.0, "nodeLabels": None, "id": None},
209209
),
210210
],
211-
metadata={"__retriever": "VectorRetriever"},
211+
metadata={"__retriever": "VectorRetriever", "query_vector": embed_query_vector},
212212
)
213213

214214

@@ -270,7 +270,7 @@ def test_similarity_search_text_return_properties(
270270
metadata={"score": 1.0, "nodeLabels": None, "id": None},
271271
),
272272
],
273-
metadata={"__retriever": "VectorRetriever"},
273+
metadata={"__retriever": "VectorRetriever", "query_vector": embed_query_vector},
274274
)
275275

276276

@@ -344,7 +344,7 @@ def test_vector_retriever_with_result_format_function(
344344
content="dummy-node", metadata={"score": 1.0, "node_id": 123}
345345
),
346346
],
347-
metadata={"__retriever": "VectorRetriever"},
347+
metadata={"__retriever": "VectorRetriever", "query_vector": embed_query_vector},
348348
)
349349

350350

@@ -439,7 +439,10 @@ def test_retrieval_query_happy_path(
439439
metadata=None,
440440
),
441441
],
442-
metadata={"__retriever": "VectorCypherRetriever"},
442+
metadata={
443+
"__retriever": "VectorCypherRetriever",
444+
"query_vector": embed_query_vector,
445+
},
443446
)
444447

445448

@@ -504,7 +507,10 @@ def test_retrieval_query_with_result_format_function(
504507
content="dummy-node", metadata={"score": 1.0, "node_id": 123}
505508
),
506509
],
507-
metadata={"__retriever": "VectorCypherRetriever"},
510+
metadata={
511+
"__retriever": "VectorCypherRetriever",
512+
"query_vector": embed_query_vector,
513+
},
508514
)
509515

510516

@@ -573,7 +579,10 @@ def test_retrieval_query_with_params(
573579
metadata=None,
574580
),
575581
],
576-
metadata={"__retriever": "VectorCypherRetriever"},
582+
metadata={
583+
"__retriever": "VectorCypherRetriever",
584+
"query_vector": embed_query_vector,
585+
},
577586
)
578587

579588

0 commit comments

Comments
 (0)