diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 15151a955ef0..86518e6e9b65 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -174,6 +174,8 @@ Improvements * GITHUB#15184: Refactoring internal HNSWGraphBuilder's APIs and avoid creating new scorer for each call (Patrick Zhai) +* GITHUB#15332: Add PhraseQuery.Builder.setMaxTerms() method to limit the maximum number of terms and excessive memory use (linyunanit) + Optimizations --------------------- * GITHUB#15140: Optimize TopScoreDocCollector with TernaryLongHeap for improved performance over Binary-LongHeap. (Ramakrishna Chilaka) diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index b762fced2eef..122554747394 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -74,12 +74,14 @@ public class PhraseQuery extends Query { public static class Builder { private int slop; + private int maxTerms; private final List terms; private final IntArrayList positions; /** Sole constructor. */ public Builder() { slop = 0; + maxTerms = -1; terms = new ArrayList<>(); positions = new IntArrayList(); } @@ -94,6 +96,18 @@ public Builder setSlop(int slop) { return this; } + /** + * Set the maximum number of terms allowed in the phrase query. This helps prevent excessive + * memory usage for very long phrases. + * + *

If the number of terms added via {@link #add(Term)} or {@link #add(Term, int)} exceeds + * this threshold, an {@link IllegalArgumentException} will be thrown. + */ + public Builder setMaxTerms(int maxTerms) { + this.maxTerms = maxTerms; + return this; + } + /** * Adds a term to the end of the query phrase. The relative position of the term is the one * immediately after the last term added. @@ -128,6 +142,13 @@ public Builder add(Term term, int position) { + " and " + terms.get(0).field()); } + if (maxTerms > 0 && terms.size() >= maxTerms) { + throw new IllegalArgumentException( + "The current number of terms is " + + terms.size() + + ", which exceeds the limit of " + + maxTerms); + } terms.add(term); positions.add(position); return this; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java index a569fb5c5c91..654b93991db2 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java @@ -743,6 +743,20 @@ public void testBackwardPositions() throws Exception { }); } + public void testPhraseQueryMaxTerms() throws Exception { + PhraseQuery.Builder builder = new PhraseQuery.Builder(); + int termThreshold = 5; + builder.setMaxTerms(termThreshold); + for (int i = 0; i < termThreshold; i++) { + builder.add(new Term("field", "one" + i), i); + } + expectThrows( + IllegalArgumentException.class, + () -> { + builder.add(new Term("field", "three"), termThreshold); + }); + } + private static final String[] DOCS = new String[] { "a b c d e f g h",