Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ Improvements

* GITHUB#15184: Refactoring internal HNSWGraphBuilder's APIs and avoid creating new scorer for each call (Patrick Zhai)

* GITHUB#15332: Add PhraseQuery.Builder.setMaxTerms() method to limit the maximum number of terms and excessive memory use (linyunanit)

Optimizations
---------------------
* GITHUB#15140: Optimize TopScoreDocCollector with TernaryLongHeap for improved performance over Binary-LongHeap. (Ramakrishna Chilaka)
Expand Down
21 changes: 21 additions & 0 deletions lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,14 @@ public class PhraseQuery extends Query {
public static class Builder {

private int slop;
private int maxTerms;
private final List<Term> terms;
private final IntArrayList positions;

/** Sole constructor. */
public Builder() {
slop = 0;
maxTerms = -1;
terms = new ArrayList<>();
positions = new IntArrayList();
}
Expand All @@ -94,6 +96,18 @@ public Builder setSlop(int slop) {
return this;
}

/**
* Set the maximum number of terms allowed in the phrase query. This helps prevent excessive
* memory usage for very long phrases.
*
* <p>If the number of terms added via {@link #add(Term)} or {@link #add(Term, int)} exceeds
* this threshold, an {@link IllegalArgumentException} will be thrown.
*/
public Builder setMaxTerms(int maxTerms) {
this.maxTerms = maxTerms;
return this;
}

/**
* Adds a term to the end of the query phrase. The relative position of the term is the one
* immediately after the last term added.
Expand Down Expand Up @@ -128,6 +142,13 @@ public Builder add(Term term, int position) {
+ " and "
+ terms.get(0).field());
}
if (maxTerms > 0 && terms.size() >= maxTerms) {
throw new IllegalArgumentException(
"The current number of terms is "
+ terms.size()
+ ", which exceeds the limit of "
+ maxTerms);
}
terms.add(term);
positions.add(position);
return this;
Expand Down
14 changes: 14 additions & 0 deletions lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,20 @@ public void testBackwardPositions() throws Exception {
});
}

public void testPhraseQueryMaxTerms() throws Exception {
PhraseQuery.Builder builder = new PhraseQuery.Builder();
int termThreshold = 5;
builder.setMaxTerms(termThreshold);
for (int i = 0; i < termThreshold; i++) {
builder.add(new Term("field", "one" + i), i);
}
expectThrows(
IllegalArgumentException.class,
() -> {
builder.add(new Term("field", "three"), termThreshold);
});
}

private static final String[] DOCS =
new String[] {
"a b c d e f g h",
Expand Down
Loading