Skip to content

Commit d1a5778

Browse files
authored
Raise SearchQueryParseError when HybridRetriever and HybridCypherRetriever encounters invalid Lucene string (neo4j#286)
* Raise SearchQueryParseError when HybridRetriever and HybridCypherRetriever encounters invalid Lucene string * Update Documentation for exceptions * Update CHANGELOG * Revert docs Makefile version to SNAPSHOT
1 parent c7234d3 commit d1a5778

File tree

5 files changed

+150
-12
lines changed

5 files changed

+150
-12
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
## Next
44

5+
### Added
6+
- Introduced SearchQueryParseError for handling invalid Lucene query strings in HybridRetriever and HybridCypherRetriever.
7+
58
## 1.5.0
69

710
### Added

docs/source/api.rst

+45
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,16 @@ Errors
445445

446446
* :class:`neo4j_graphrag.exceptions.LLMGenerationError`
447447

448+
* :class:`neo4j_graphrag.exceptions.SchemaValidationError`
449+
450+
* :class:`neo4j_graphrag.exceptions.PdfLoaderError`
451+
452+
* :class:`neo4j_graphrag.exceptions.PromptMissingPlaceholderError`
453+
454+
* :class:`neo4j_graphrag.exceptions.InvalidHybridSearchRankerError`
455+
456+
* :class:`neo4j_graphrag.exceptions.SearchQueryParseError`
457+
448458
* :class:`neo4j_graphrag.experimental.pipeline.exceptions.PipelineDefinitionError`
449459

450460
* :class:`neo4j_graphrag.experimental.pipeline.exceptions.PipelineMissingDependencyError`
@@ -559,6 +569,41 @@ LLMGenerationError
559569
:show-inheritance:
560570

561571

572+
SchemaValidationError
573+
=====================
574+
575+
.. autoclass:: neo4j_graphrag.exceptions.SchemaValidationError
576+
:show-inheritance:
577+
578+
579+
PdfLoaderError
580+
==============
581+
582+
.. autoclass:: neo4j_graphrag.exceptions.PdfLoaderError
583+
:show-inheritance:
584+
585+
586+
PromptMissingPlaceholderError
587+
=============================
588+
589+
.. autoclass:: neo4j_graphrag.exceptions.PromptMissingPlaceholderError
590+
:show-inheritance:
591+
592+
593+
InvalidHybridSearchRankerError
594+
==============================
595+
596+
.. autoclass:: neo4j_graphrag.exceptions.InvalidHybridSearchRankerError
597+
:show-inheritance:
598+
599+
600+
SearchQueryParseError
601+
=====================
602+
603+
.. autoclass:: neo4j_graphrag.exceptions.SearchQueryParseError
604+
:show-inheritance:
605+
606+
562607
PipelineDefinitionError
563608
=======================
564609

src/neo4j_graphrag/exceptions.py

+4
Original file line numberDiff line numberDiff line change
@@ -128,3 +128,7 @@ class PromptMissingPlaceholderError(Neo4jGraphRagError):
128128

129129
class InvalidHybridSearchRankerError(Neo4jGraphRagError):
130130
"""Exception raised when an invalid ranker type for Hybrid Search is provided."""
131+
132+
133+
class SearchQueryParseError(Neo4jGraphRagError):
134+
"""Exception raised when there is a query parse error in the text search string."""

src/neo4j_graphrag/retrievers/hybrid.py

+27-12
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
EmbeddingRequiredError,
2727
RetrieverInitializationError,
2828
SearchValidationError,
29+
SearchQueryParseError,
2930
)
3031
from neo4j_graphrag.neo4j_queries import get_search_query
3132
from neo4j_graphrag.retrievers.base import Retriever
@@ -218,12 +219,19 @@ def get_search_results(
218219
logger.debug("HybridRetriever Cypher parameters: %s", sanitized_parameters)
219220
logger.debug("HybridRetriever Cypher query: %s", search_query)
220221

221-
records, _, _ = self.driver.execute_query(
222-
search_query,
223-
parameters,
224-
database_=self.neo4j_database,
225-
routing_=neo4j.RoutingControl.READ,
226-
)
222+
try:
223+
records, _, _ = self.driver.execute_query(
224+
search_query,
225+
parameters,
226+
database_=self.neo4j_database,
227+
routing_=neo4j.RoutingControl.READ,
228+
)
229+
except neo4j.exceptions.ClientError as e:
230+
if "org.apache.lucene.queryparser.classic.ParseException" in str(e):
231+
raise SearchQueryParseError(
232+
f"Invalid Lucene query generated from query_text: {query_text}"
233+
) from e
234+
raise
227235
return RawSearchResult(
228236
records=records,
229237
)
@@ -395,12 +403,19 @@ def get_search_results(
395403
logger.debug("HybridRetriever Cypher parameters: %s", sanitized_parameters)
396404
logger.debug("HybridRetriever Cypher query: %s", search_query)
397405

398-
records, _, _ = self.driver.execute_query(
399-
search_query,
400-
parameters,
401-
database_=self.neo4j_database,
402-
routing_=neo4j.RoutingControl.READ,
403-
)
406+
try:
407+
records, _, _ = self.driver.execute_query(
408+
search_query,
409+
parameters,
410+
database_=self.neo4j_database,
411+
routing_=neo4j.RoutingControl.READ,
412+
)
413+
except neo4j.exceptions.ClientError as e:
414+
if "org.apache.lucene.queryparser.classic.ParseException" in str(e):
415+
raise SearchQueryParseError(
416+
f"Invalid Lucene query generated from query_text: {query_text}"
417+
) from e
418+
raise
404419
return RawSearchResult(
405420
records=records,
406421
)

tests/unit/retrievers/test_hybrid.py

+71
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
EmbeddingRequiredError,
2222
RetrieverInitializationError,
2323
SearchValidationError,
24+
SearchQueryParseError,
2425
)
2526
from neo4j_graphrag.neo4j_queries import get_search_query
2627
from neo4j_graphrag.retrievers import HybridCypherRetriever, HybridRetriever
@@ -793,3 +794,73 @@ def test_hybrid_cypher_linear_ranker(
793794
],
794795
metadata={"__retriever": "HybridCypherRetriever"},
795796
)
797+
798+
799+
@patch("neo4j_graphrag.retrievers.HybridRetriever._fetch_index_infos")
800+
@patch("neo4j_graphrag.retrievers.base.get_version")
801+
def test_hybrid_retriever_invalid_lucene_query_error(
802+
mock_get_version: MagicMock,
803+
_fetch_index_infos_mock: MagicMock,
804+
driver: MagicMock,
805+
embedder: MagicMock,
806+
) -> None:
807+
mock_get_version.return_value = ((5, 23, 0), False, False)
808+
809+
error_message = (
810+
"Failed to invoke procedure `db.index.fulltext.queryNodes`: "
811+
"Caused by: org.apache.lucene.queryparser.classic.ParseException: "
812+
'Encountered " <FUZZY_SLOP> "~aliens " at line 1, column 0.'
813+
)
814+
client_error = neo4j.exceptions.ClientError(error_message)
815+
driver.execute_query.side_effect = client_error
816+
817+
retriever = HybridRetriever(
818+
driver=driver,
819+
vector_index_name="vector-index",
820+
fulltext_index_name="fulltext-index",
821+
embedder=embedder,
822+
)
823+
retriever.neo4j_version_is_5_23_or_above = True
824+
retriever._embedding_node_property = "embedding"
825+
826+
with pytest.raises(
827+
SearchQueryParseError, match="Invalid Lucene query generated from query_text"
828+
):
829+
retriever.search(query_text="~aliens", top_k=5)
830+
831+
832+
@patch("neo4j_graphrag.retrievers.HybridCypherRetriever._fetch_index_infos")
833+
@patch("neo4j_graphrag.retrievers.base.get_version")
834+
def test_hybrid_cypher_retriever_invalid_lucene_query_error(
835+
mock_get_version: MagicMock,
836+
_fetch_index_infos_mock: MagicMock,
837+
driver: MagicMock,
838+
embedder: MagicMock,
839+
) -> None:
840+
mock_get_version.return_value = ((5, 23, 0), False, False)
841+
retrieval_query = """
842+
RETURN node.id AS node_id, node.text AS text, score, {test: $param} AS metadata
843+
"""
844+
845+
error_message = (
846+
"Failed to invoke procedure `db.index.fulltext.queryNodes`: "
847+
"Caused by: org.apache.lucene.queryparser.classic.ParseException: "
848+
'Encountered " <FUZZY_SLOP> "~aliens " at line 1, column 0.'
849+
)
850+
client_error = neo4j.exceptions.ClientError(error_message)
851+
driver.execute_query.side_effect = client_error
852+
853+
retriever = HybridCypherRetriever(
854+
driver=driver,
855+
vector_index_name="vector-index",
856+
fulltext_index_name="fulltext-index",
857+
embedder=embedder,
858+
retrieval_query=retrieval_query,
859+
)
860+
retriever.neo4j_version_is_5_23_or_above = True
861+
retriever._embedding_node_property = "embedding"
862+
863+
with pytest.raises(
864+
SearchQueryParseError, match="Invalid Lucene query generated from query_text"
865+
):
866+
retriever.search(query_text="~aliens", top_k=5)

0 commit comments

Comments
 (0)