Skip to content

Commit e67c5a9

Browse files
committed
Enable to create PhraseQuery by passing termStates
Allow pre-computed TermStates to be passed to PhraseQuery.Builder to avoid term statistics lookup during search, improving performance for cases where term statistics are already available.
1 parent e02bdb4 commit e67c5a9

File tree

1 file changed

+43
-5
lines changed

1 file changed

+43
-5
lines changed

lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ public static class Builder {
7777
private int maxTerms;
7878
private final List<Term> terms;
7979
private final IntArrayList positions;
80+
private TermStates[] termStates;
8081

8182
/** Sole constructor. */
8283
public Builder() {
@@ -154,22 +155,52 @@ public Builder add(Term term, int position) {
154155
return this;
155156
}
156157

158+
/**
159+
* Expert: Set pre-computed TermStates for each term to avoid term statistics lookup during
160+
* search. This is an optimization that allows providing term statistics directly instead of
161+
* computing them during search time.
162+
*
163+
* @param termStates array of TermStates, one for each term. The array length must exactly
164+
* match the number of terms that will be added to this builder.
165+
* @throws IllegalArgumentException if termStates is null or if the length doesn't match
166+
* the number of terms when build() is called
167+
*/
168+
public Builder setTermStates(TermStates[] termStates) {
169+
this.termStates = Objects.requireNonNull(termStates, "termStates cannot be null");
170+
return this;
171+
}
172+
157173
/** Build a phrase query based on the terms that have been added. */
158174
public PhraseQuery build() {
159175
Term[] terms = this.terms.toArray(new Term[0]);
160-
return new PhraseQuery(slop, terms, positions.toArray());
176+
if (termStates != null) {
177+
if (termStates.length != terms.length) {
178+
throw new IllegalArgumentException("Must have as many termStates as terms");
179+
}
180+
return new PhraseQuery(slop, terms, positions.toArray(), termStates);
181+
} else {
182+
return new PhraseQuery(slop, terms, positions.toArray());
183+
}
161184
}
162185
}
163186

164187
private final int slop;
165188
private final String field;
166189
private final Term[] terms;
167190
private final int[] positions;
191+
private final TermStates[] termStates;
168192

169193
private PhraseQuery(int slop, Term[] terms, int[] positions) {
194+
this(slop, terms, positions, null);
195+
}
196+
197+
private PhraseQuery(int slop, Term[] terms, int[] positions, TermStates[] termStates) {
170198
if (terms.length != positions.length) {
171199
throw new IllegalArgumentException("Must have as many terms as positions");
172200
}
201+
if (termStates != null && terms.length != termStates.length) {
202+
throw new IllegalArgumentException("Must have as many terms as termStates");
203+
}
173204
if (slop < 0) {
174205
throw new IllegalArgumentException("Slop must be >= 0, got " + slop);
175206
}
@@ -198,6 +229,7 @@ private PhraseQuery(int slop, Term[] terms, int[] positions) {
198229
this.slop = slop;
199230
this.terms = terms;
200231
this.positions = positions;
232+
this.termStates = termStates;
201233
this.field = terms.length == 0 ? null : terms[0].field();
202234
}
203235

@@ -468,17 +500,23 @@ protected Similarity.SimScorer getStats(IndexSearcher searcher) throws IOExcepti
468500
throw new IllegalStateException(
469501
"PhraseWeight requires that the first position is 0, call rewrite first");
470502
}
471-
states = new TermStates[terms.length];
503+
if (termStates != null) {
504+
states = termStates;
505+
} else {
506+
states = new TermStates[terms.length];
507+
for (int i = 0; i < terms.length; i++) {
508+
final Term term = terms[i];
509+
states[i] = TermStates.build(searcher, term, scoreMode.needsScores());
510+
}
511+
}
472512
TermStatistics[] termStats = new TermStatistics[terms.length];
473513
int termUpTo = 0;
474514
for (int i = 0; i < terms.length; i++) {
475-
final Term term = terms[i];
476-
states[i] = TermStates.build(searcher, term, scoreMode.needsScores());
477515
if (scoreMode.needsScores()) {
478516
TermStates ts = states[i];
479517
if (ts.docFreq() > 0) {
480518
termStats[termUpTo++] =
481-
searcher.termStatistics(term, ts.docFreq(), ts.totalTermFreq());
519+
searcher.termStatistics(terms[i], ts.docFreq(), ts.totalTermFreq());
482520
}
483521
}
484522
}

0 commit comments

Comments
 (0)