diff --git a/lucene/core/src/java/module-info.java b/lucene/core/src/java/module-info.java index 108f8bcadc66..f8edee7a8288 100644 --- a/lucene/core/src/java/module-info.java +++ b/lucene/core/src/java/module-info.java @@ -52,6 +52,7 @@ exports org.apache.lucene.util.mutable; exports org.apache.lucene.util.packed; exports org.apache.lucene.util; + exports org.apache.lucene.util.search; // Temporarily export HPPC to all modules (eventually, this // should be restricted to only Lucene modules) diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index c198fecb4b35..625e0dcb338a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -19,8 +19,6 @@ import java.io.IOException; import java.util.Arrays; import java.util.Objects; -import java.util.function.BiFunction; -import java.util.function.Predicate; import org.apache.lucene.document.IntPoint; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; @@ -28,14 +26,9 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PointValues; -import org.apache.lucene.index.PointValues.IntersectVisitor; -import org.apache.lucene.index.PointValues.Relation; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil.ByteArrayComparator; -import org.apache.lucene.util.BitSetIterator; -import org.apache.lucene.util.DocIdSetBuilder; -import org.apache.lucene.util.FixedBitSet; -import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.search.PointQueryUtils; /** * Abstract class for range queries against single or multidimensional points such as {@link @@ -129,326 +122,7 @@ public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, fl // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs". // This is an inverted structure and should be used in the first pass: - return new ConstantScoreWeight(this, boost) { - - private boolean matches(byte[] packedValue) { - int offset = 0; - for (int dim = 0; dim < numDims; dim++, offset += bytesPerDim) { - if (comparator.compare(packedValue, offset, lowerPoint, offset) < 0) { - // Doc's value is too low, in this dimension - return false; - } - if (comparator.compare(packedValue, offset, upperPoint, offset) > 0) { - // Doc's value is too high, in this dimension - return false; - } - } - return true; - } - - private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) { - return new IntersectVisitor() { - - DocIdSetBuilder.BulkAdder adder; - - @Override - public void grow(int count) { - adder = result.grow(count); - } - - @Override - public void visit(int docID) { - adder.add(docID); - } - - @Override - public void visit(DocIdSetIterator iterator) throws IOException { - adder.add(iterator); - } - - @Override - public void visit(IntsRef ref) { - adder.add(ref); - } - - @Override - public void visit(int docID, byte[] packedValue) { - if (matches(packedValue)) { - visit(docID); - } - } - - @Override - public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { - if (matches(packedValue)) { - adder.add(iterator); - } - } - - @Override - public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - return relate(minPackedValue, maxPackedValue); - } - }; - } - - /** Create a visitor that sets documents that do NOT match the range. */ - private IntersectVisitor getInverseIntersectVisitor(FixedBitSet result, long[] cost) { - return new IntersectVisitor() { - - @Override - public void visit(int docID) { - result.set(docID); - cost[0]++; - } - - @Override - public void visit(DocIdSetIterator iterator) throws IOException { - result.or(iterator); - cost[0] += iterator.cost(); - } - - @Override - public void visit(IntsRef ref) { - for (int i = ref.offset, to = ref.offset + ref.length; i < to; i++) { - result.set(ref.ints[i]); - } - cost[0] += ref.length; - } - - @Override - public void visit(int docID, byte[] packedValue) { - if (matches(packedValue) == false) { - visit(docID); - } - } - - @Override - public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { - if (matches(packedValue) == false) { - visit(iterator); - } - } - - @Override - public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - Relation relation = relate(minPackedValue, maxPackedValue); - switch (relation) { - case CELL_INSIDE_QUERY: - // all points match, skip this subtree - return Relation.CELL_OUTSIDE_QUERY; - case CELL_OUTSIDE_QUERY: - // none of the points match, clear all documents - return Relation.CELL_INSIDE_QUERY; - case CELL_CROSSES_QUERY: - default: - return relation; - } - } - }; - } - - @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { - LeafReader reader = context.reader(); - - PointValues values = reader.getPointValues(field); - if (checkValidPointValues(values) == false) { - return null; - } - - if (values.getDocCount() == 0) { - return null; - } else { - final byte[] fieldPackedLower = values.getMinPackedValue(); - final byte[] fieldPackedUpper = values.getMaxPackedValue(); - for (int i = 0; i < numDims; ++i) { - int offset = i * bytesPerDim; - if (comparator.compare(lowerPoint, offset, fieldPackedUpper, offset) > 0 - || comparator.compare(upperPoint, offset, fieldPackedLower, offset) < 0) { - // If this query is a required clause of a boolean query, then returning null here - // will help make sure that we don't call ScorerSupplier#get on other required clauses - // of the same boolean query, which is an expensive operation for some queries (e.g. - // multi-term queries). - return null; - } - } - } - - boolean allDocsMatch; - if (values.getDocCount() == reader.maxDoc()) { - final byte[] fieldPackedLower = values.getMinPackedValue(); - final byte[] fieldPackedUpper = values.getMaxPackedValue(); - allDocsMatch = true; - for (int i = 0; i < numDims; ++i) { - int offset = i * bytesPerDim; - if (comparator.compare(lowerPoint, offset, fieldPackedLower, offset) > 0 - || comparator.compare(upperPoint, offset, fieldPackedUpper, offset) < 0) { - allDocsMatch = false; - break; - } - } - } else { - allDocsMatch = false; - } - - if (allDocsMatch) { - // all docs have a value and all points are within bounds, so everything matches - return ConstantScoreScorerSupplier.matchAll(score(), scoreMode, reader.maxDoc()); - } else { - return new ConstantScoreScorerSupplier(score(), scoreMode, reader.maxDoc()) { - - final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values); - final IntersectVisitor visitor = getIntersectVisitor(result); - long cost = -1; - - @Override - public DocIdSetIterator iterator(long leadCost) throws IOException { - if (values.getDocCount() == reader.maxDoc() - && values.getDocCount() == values.size() - && cost() > reader.maxDoc() / 2) { - // If all docs have exactly one value and the cost is greater - // than half the leaf size then maybe we can make things faster - // by computing the set of documents that do NOT match the range - final FixedBitSet result = new FixedBitSet(reader.maxDoc()); - long[] cost = new long[1]; - values.intersect(getInverseIntersectVisitor(result, cost)); - // Flip the bit set and cost - result.flip(0, reader.maxDoc()); - cost[0] = Math.max(0, reader.maxDoc() - cost[0]); - return new BitSetIterator(result, cost[0]); - } - - values.intersect(visitor); - return result.build().iterator(); - } - - @Override - public long cost() { - if (cost == -1) { - // Computing the cost may be expensive, so only do it if necessary - cost = values.estimateDocCount(visitor); - assert cost >= 0; - } - return cost; - } - }; - } - } - - @Override - public int count(LeafReaderContext context) throws IOException { - LeafReader reader = context.reader(); - - PointValues values = reader.getPointValues(field); - if (checkValidPointValues(values) == false) { - return 0; - } - - if (reader.hasDeletions() == false) { - if (relate(values.getMinPackedValue(), values.getMaxPackedValue()) - == Relation.CELL_INSIDE_QUERY) { - return values.getDocCount(); - } - // only 1D: we have the guarantee that it will actually run fast since there are at most 2 - // crossing leaves. - // docCount == size : counting according number of points in leaf node, so must be - // single-valued. - if (numDims == 1 && values.getDocCount() == values.size()) { - return (int) - pointCount(values.getPointTree(), PointRangeQuery.this::relate, this::matches); - } - } - return super.count(context); - } - - /** - * Finds the number of points matching the provided range conditions. Using this method is - * faster than calling {@link PointValues#intersect(IntersectVisitor)} to get the count of - * intersecting points. This method does not enforce live documents, therefore it should only - * be used when there are no deleted documents. - * - * @param pointTree start node of the count operation - * @param nodeComparator comparator to be used for checking whether the internal node is - * inside the range - * @param leafComparator comparator to be used for checking whether the leaf node is inside - * the range - * @return count of points that match the range - */ - private long pointCount( - PointValues.PointTree pointTree, - BiFunction nodeComparator, - Predicate leafComparator) - throws IOException { - final long[] matchingNodeCount = {0}; - // create a custom IntersectVisitor that records the number of leafNodes that matched - final IntersectVisitor visitor = - new IntersectVisitor() { - @Override - public void visit(int docID) { - // this branch should be unreachable - throw new UnsupportedOperationException( - "This IntersectVisitor does not perform any actions on a " - + "docID=" - + docID - + " node being visited"); - } - - @Override - public void visit(int docID, byte[] packedValue) { - if (leafComparator.test(packedValue)) { - matchingNodeCount[0]++; - } - } - - @Override - public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - return nodeComparator.apply(minPackedValue, maxPackedValue); - } - }; - pointCount(visitor, pointTree, matchingNodeCount); - return matchingNodeCount[0]; - } - - private void pointCount( - IntersectVisitor visitor, PointValues.PointTree pointTree, long[] matchingNodeCount) - throws IOException { - Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); - switch (r) { - case CELL_OUTSIDE_QUERY: - // This cell is fully outside the query shape: return 0 as the count of its nodes - return; - case CELL_INSIDE_QUERY: - // This cell is fully inside the query shape: return the size of the entire node as the - // count - matchingNodeCount[0] += pointTree.size(); - return; - case CELL_CROSSES_QUERY: - /* - The cell crosses the shape boundary, or the cell fully contains the query, so we fall - through and do full counting. - */ - if (pointTree.moveToChild()) { - do { - pointCount(visitor, pointTree, matchingNodeCount); - } while (pointTree.moveToSibling()); - pointTree.moveToParent(); - } else { - // we have reached a leaf node here. - pointTree.visitDocValues(visitor); - // leaf node count is saved in the matchingNodeCount array by the visitor - } - return; - default: - throw new IllegalArgumentException("Unreachable code"); - } - } - - @Override - public boolean isCacheable(LeafReaderContext ctx) { - return true; - } - }; + return new PointRangeWeight(this, boost, scoreMode); } public String getField() { @@ -540,7 +214,8 @@ public Query rewrite(IndexSearcher searcher) throws IOException { IndexReader reader = searcher.getIndexReader(); for (LeafReaderContext leaf : reader.leaves()) { - checkValidPointValues(leaf.reader().getPointValues(field)); + PointQueryUtils.checkValidPointValues( + leaf.reader().getPointValues(field), field, numDims, bytesPerDim); } // fetch the global min/max packed values across all segments @@ -551,7 +226,14 @@ public Query rewrite(IndexSearcher searcher) throws IOException { return new MatchNoDocsQuery(); } - return switch (relate(globalMinPacked, globalMaxPacked)) { + return switch (PointQueryUtils.relate( + globalMinPacked, + globalMaxPacked, + lowerPoint, + upperPoint, + numDims, + bytesPerDim, + comparator)) { case CELL_INSIDE_QUERY -> { if (canRewriteToMatchAllQuery(reader)) { yield new MatchAllDocsQuery(); @@ -594,58 +276,4 @@ private boolean canRewriteToFieldExistsQuery(IndexReader reader) { return true; } - - private Relation relate(byte[] minPackedValue, byte[] maxPackedValue) { - boolean crosses = false; - int offset = 0; - - for (int dim = 0; dim < numDims; dim++, offset += bytesPerDim) { - - if (comparator.compare(minPackedValue, offset, upperPoint, offset) > 0 - || comparator.compare(maxPackedValue, offset, lowerPoint, offset) < 0) { - return Relation.CELL_OUTSIDE_QUERY; - } - - // Evaluate crosses only when false. Still need to iterate through - // all the dimensions to ensure, none of them is completely outside - if (crosses == false) { - crosses = - comparator.compare(minPackedValue, offset, lowerPoint, offset) < 0 - || comparator.compare(maxPackedValue, offset, upperPoint, offset) > 0; - } - } - - if (crosses) { - return Relation.CELL_CROSSES_QUERY; - } else { - return Relation.CELL_INSIDE_QUERY; - } - } - - private boolean checkValidPointValues(PointValues values) throws IOException { - if (values == null) { - // No docs in this segment/field indexed any points - return false; - } - - if (values.getNumIndexDimensions() != numDims) { - throw new IllegalArgumentException( - "field=\"" - + field - + "\" was indexed with numIndexDimensions=" - + values.getNumIndexDimensions() - + " but this query has numDims=" - + numDims); - } - if (bytesPerDim != values.getBytesPerDimension()) { - throw new IllegalArgumentException( - "field=\"" - + field - + "\" was indexed with bytesPerDim=" - + values.getBytesPerDimension() - + " but this query has bytesPerDim=" - + bytesPerDim); - } - return true; - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeWeight.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeWeight.java new file mode 100644 index 000000000000..df4a6d5ca544 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeWeight.java @@ -0,0 +1,414 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.function.BiFunction; +import java.util.function.Predicate; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.DocIdSetBuilder; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.search.PointQueryUtils; + +/** + * {@code PointRangeWeight} implements scoring and matching logic for {@link PointRangeQuery}. It + * efficiently matches documents whose point values fall within a specified multi-dimensional range. + * + *

This class uses Lucene's point values infrastructure to perform fast range queries, leveraging + * tree traversal and custom visitors for intersection and counting. It supports both single- and + * multi-dimensional points, and optimizes for cases where all documents match or where all + * documents have exactly one value. + */ +public class PointRangeWeight extends ConstantScoreWeight { + private final String field; + private final int numDims; + private final int bytesPerDim; + private final byte[] lowerPoint; + private final byte[] upperPoint; + private final ScoreMode scoreMode; + private final ArrayUtil.ByteArrayComparator comparator; + + protected PointRangeWeight(PointRangeQuery query, float score, ScoreMode scoreMode) { + super(query, score); + this.field = query.field; + this.numDims = query.numDims; + this.bytesPerDim = query.bytesPerDim; + this.lowerPoint = query.lowerPoint; + this.upperPoint = query.upperPoint; + this.comparator = query.comparator; + this.scoreMode = scoreMode; + } + + private boolean matches(byte[] packedValue) { + int offset = 0; + for (int dim = 0; dim < numDims; dim++, offset += bytesPerDim) { + if (comparator.compare(packedValue, offset, lowerPoint, offset) < 0) { + // Doc's value is too low, in this dimension + return false; + } + if (comparator.compare(packedValue, offset, upperPoint, offset) > 0) { + // Doc's value is too high, in this dimension + return false; + } + } + return true; + } + + private PointValues.IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) { + return new PointValues.IntersectVisitor() { + + DocIdSetBuilder.BulkAdder adder; + + @Override + public void grow(int count) { + adder = result.grow(count); + } + + @Override + public void visit(int docID) { + adder.add(docID); + } + + @Override + public void visit(DocIdSetIterator iterator) throws IOException { + adder.add(iterator); + } + + @Override + public void visit(IntsRef ref) { + adder.add(ref); + } + + @Override + public void visit(int docID, byte[] packedValue) { + if (matches(packedValue)) { + visit(docID); + } + } + + @Override + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + if (matches(packedValue)) { + adder.add(iterator); + } + } + + @Override + public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + return PointQueryUtils.relate( + minPackedValue, + maxPackedValue, + lowerPoint, + upperPoint, + numDims, + bytesPerDim, + comparator); + } + }; + } + + /** Create a visitor that sets documents that do NOT match the range. */ + private PointValues.IntersectVisitor getInverseIntersectVisitor(FixedBitSet result, long[] cost) { + return new PointValues.IntersectVisitor() { + + @Override + public void visit(int docID) { + result.set(docID); + cost[0]++; + } + + @Override + public void visit(DocIdSetIterator iterator) throws IOException { + result.or(iterator); + cost[0] += iterator.cost(); + } + + @Override + public void visit(IntsRef ref) { + for (int i = ref.offset, to = ref.offset + ref.length; i < to; i++) { + result.set(ref.ints[i]); + } + cost[0] += ref.length; + } + + @Override + public void visit(int docID, byte[] packedValue) { + if (matches(packedValue) == false) { + visit(docID); + } + } + + @Override + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + if (matches(packedValue) == false) { + visit(iterator); + } + } + + @Override + public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + PointValues.Relation relation = + PointQueryUtils.relate( + minPackedValue, + maxPackedValue, + lowerPoint, + upperPoint, + numDims, + bytesPerDim, + comparator); + switch (relation) { + case CELL_INSIDE_QUERY: + // all points match, skip this subtree + return PointValues.Relation.CELL_OUTSIDE_QUERY; + case CELL_OUTSIDE_QUERY: + // none of the points match, clear all documents + return PointValues.Relation.CELL_INSIDE_QUERY; + case CELL_CROSSES_QUERY: + default: + return relation; + } + } + }; + } + + @Override + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + LeafReader reader = context.reader(); + + PointValues values = reader.getPointValues(field); + if (!PointQueryUtils.checkValidPointValues(values, field, numDims, bytesPerDim)) { + return null; + } + + if (values.getDocCount() == 0) { + return null; + } else { + final byte[] fieldPackedLower = values.getMinPackedValue(); + final byte[] fieldPackedUpper = values.getMaxPackedValue(); + for (int i = 0; i < numDims; ++i) { + int offset = i * bytesPerDim; + if (comparator.compare(lowerPoint, offset, fieldPackedUpper, offset) > 0 + || comparator.compare(upperPoint, offset, fieldPackedLower, offset) < 0) { + // If this query is a required clause of a boolean query, then returning null here + // will help make sure that we don't call ScorerSupplier#get on other required clauses + // of the same boolean query, which is an expensive operation for some queries (e.g. + // multi-term queries). + return null; + } + } + } + + boolean allDocsMatch; + if (values.getDocCount() == reader.maxDoc()) { + final byte[] fieldPackedLower = values.getMinPackedValue(); + final byte[] fieldPackedUpper = values.getMaxPackedValue(); + allDocsMatch = true; + for (int i = 0; i < numDims; ++i) { + int offset = i * bytesPerDim; + if (comparator.compare(lowerPoint, offset, fieldPackedLower, offset) > 0 + || comparator.compare(upperPoint, offset, fieldPackedUpper, offset) < 0) { + allDocsMatch = false; + break; + } + } + } else { + allDocsMatch = false; + } + + if (allDocsMatch) { + // all docs have a value and all points are within bounds, so everything matches + return ConstantScoreScorerSupplier.matchAll(score(), scoreMode, reader.maxDoc()); + } else { + return new ConstantScoreScorerSupplier(score(), scoreMode, reader.maxDoc()) { + + final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values); + final PointValues.IntersectVisitor visitor = getIntersectVisitor(result); + long cost = -1; + + @Override + public DocIdSetIterator iterator(long leadCost) throws IOException { + if (values.getDocCount() == reader.maxDoc() + && values.getDocCount() == values.size() + && cost() > reader.maxDoc() / 2) { + // If all docs have exactly one value and the cost is greater + // than half the leaf size then maybe we can make things faster + // by computing the set of documents that do NOT match the range + final FixedBitSet result = new FixedBitSet(reader.maxDoc()); + long[] cost = new long[1]; + values.intersect(getInverseIntersectVisitor(result, cost)); + // Flip the bit set and cost + result.flip(0, reader.maxDoc()); + cost[0] = Math.max(0, reader.maxDoc() - cost[0]); + return new BitSetIterator(result, cost[0]); + } + + values.intersect(visitor); + return result.build().iterator(); + } + + @Override + public long cost() { + if (cost == -1) { + // Computing the cost may be expensive, so only do it if necessary + cost = values.estimateDocCount(visitor); + assert cost >= 0; + } + return cost; + } + }; + } + } + + @Override + public int count(LeafReaderContext context) throws IOException { + LeafReader reader = context.reader(); + + PointValues values = reader.getPointValues(field); + if (!PointQueryUtils.checkValidPointValues(values, field, numDims, bytesPerDim)) { + return 0; + } + + if (reader.hasDeletions() == false) { + if (PointQueryUtils.relate( + values.getMinPackedValue(), + values.getMaxPackedValue(), + lowerPoint, + upperPoint, + numDims, + bytesPerDim, + comparator) + == PointValues.Relation.CELL_INSIDE_QUERY) { + return values.getDocCount(); + } + // only 1D: we have the guarantee that it will actually run fast since there are at most 2 + // crossing leaves. + // docCount == size : counting according number of points in leaf node, so must be + // single-valued. + if (numDims == 1 && values.getDocCount() == values.size()) { + return (int) + pointCount( + values.getPointTree(), + (minPackedValue, maxPackedValue) -> + PointQueryUtils.relate( + minPackedValue, + maxPackedValue, + lowerPoint, + upperPoint, + numDims, + bytesPerDim, + comparator), + this::matches); + } + } + return super.count(context); + } + + /** + * Finds the number of points matching the provided range conditions. Using this method is faster + * than calling {@link PointValues#intersect(PointValues.IntersectVisitor)} to get the count of + * intersecting points. This method does not enforce live documents, therefore it should only be + * used when there are no deleted documents. + * + * @param pointTree start node of the count operation + * @param nodeComparator comparator to be used for checking whether the internal node is inside + * the range + * @param leafComparator comparator to be used for checking whether the leaf node is inside the + * range + * @return count of points that match the range + */ + private long pointCount( + PointValues.PointTree pointTree, + BiFunction nodeComparator, + Predicate leafComparator) + throws IOException { + final long[] matchingNodeCount = {0}; + // create a custom IntersectVisitor that records the number of leafNodes that matched + final PointValues.IntersectVisitor visitor = + new PointValues.IntersectVisitor() { + @Override + public void visit(int docID) { + // this branch should be unreachable + throw new UnsupportedOperationException( + "This IntersectVisitor does not perform any actions on a " + + "docID=" + + docID + + " node being visited"); + } + + @Override + public void visit(int docID, byte[] packedValue) { + if (leafComparator.test(packedValue)) { + matchingNodeCount[0]++; + } + } + + @Override + public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + return nodeComparator.apply(minPackedValue, maxPackedValue); + } + }; + pointCount(visitor, pointTree, matchingNodeCount); + return matchingNodeCount[0]; + } + + private void pointCount( + PointValues.IntersectVisitor visitor, + PointValues.PointTree pointTree, + long[] matchingNodeCount) + throws IOException { + PointValues.Relation r = + visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); + switch (r) { + case CELL_OUTSIDE_QUERY: + // This cell is fully outside the query shape: return 0 as the count of its nodes + return; + case CELL_INSIDE_QUERY: + // This cell is fully inside the query shape: return the size of the entire node as the + // count + matchingNodeCount[0] += pointTree.size(); + return; + case CELL_CROSSES_QUERY: + /* + The cell crosses the shape boundary, or the cell fully contains the query, so we fall + through and do full counting. + */ + if (pointTree.moveToChild()) { + do { + pointCount(visitor, pointTree, matchingNodeCount); + } while (pointTree.moveToSibling()); + pointTree.moveToParent(); + } else { + // we have reached a leaf node here. + pointTree.visitDocValues(visitor); + // leaf node count is saved in the matchingNodeCount array by the visitor + } + return; + default: + throw new IllegalArgumentException("Unreachable code"); + } + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return true; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/util/search/PointQueryUtils.java b/lucene/core/src/java/org/apache/lucene/util/search/PointQueryUtils.java new file mode 100644 index 000000000000..516b3f00ef9b --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/search/PointQueryUtils.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util.search; + +import java.io.IOException; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.PointValues.Relation; +import org.apache.lucene.util.ArrayUtil; + +/** + * Utility methods for working with point-based queries in Lucene. + * + *

{@code PointQueryUtils} provides reusable static methods for validating point values and + * determining the spatial relation between query ranges and indexed point value ranges. These + * methods are used by point range queries and weights to ensure correctness and optimize query + * execution. + */ +public final class PointQueryUtils { + + private PointQueryUtils() {} + + /** + * Determines the spatial relation between a query range and a cell defined by min and max packed + * values. + * + * @param minPackedValue minimum packed value of the cell + * @param maxPackedValue maximum packed value of the cell + * @param lowerPoint lower bound of the query range + * @param upperPoint upper bound of the query range + * @param numDims number of dimensions + * @param bytesPerDim bytes per dimension + * @param comparator comparator for byte arrays + * @return the {@link Relation} between the cell and the query range + */ + public static PointValues.Relation relate( + byte[] minPackedValue, + byte[] maxPackedValue, + byte[] lowerPoint, + byte[] upperPoint, + int numDims, + int bytesPerDim, + ArrayUtil.ByteArrayComparator comparator) { + boolean crosses = false; + int offset = 0; + + for (int dim = 0; dim < numDims; dim++, offset += bytesPerDim) { + + if (comparator.compare(minPackedValue, offset, upperPoint, offset) > 0 + || comparator.compare(maxPackedValue, offset, lowerPoint, offset) < 0) { + return PointValues.Relation.CELL_OUTSIDE_QUERY; + } + + // Evaluate crosses only when false. Still need to iterate through + // all the dimensions to ensure, none of them is completely outside + if (!crosses) { + crosses = + comparator.compare(minPackedValue, offset, lowerPoint, offset) < 0 + || comparator.compare(maxPackedValue, offset, upperPoint, offset) > 0; + } + } + + if (crosses) { + return PointValues.Relation.CELL_CROSSES_QUERY; + } else { + return PointValues.Relation.CELL_INSIDE_QUERY; + } + } + + /** + * Validates that the provided {@link PointValues} instance matches the expected field, number of + * dimensions, and bytes per dimension. + * + * @param values the {@link PointValues} to validate + * @param field the field name + * @param numDims expected number of dimensions + * @param bytesPerDim expected bytes per dimension + * @return true if valid, false otherwise + * @throws IllegalArgumentException if the dimensions or bytes per dimension do not match + */ + public static boolean checkValidPointValues( + PointValues values, String field, int numDims, int bytesPerDim) throws IOException { + if (values == null) { + // No docs in this segment/field indexed any points + return false; + } + + if (values.getNumIndexDimensions() != numDims) { + throw new IllegalArgumentException( + "field=\"" + + field + + "\" was indexed with numIndexDimensions=" + + values.getNumIndexDimensions() + + " but this query has numDims=" + + numDims); + } + if (bytesPerDim != values.getBytesPerDimension()) { + throw new IllegalArgumentException( + "field=\"" + + field + + "\" was indexed with bytesPerDim=" + + values.getBytesPerDimension() + + " but this query has bytesPerDim=" + + bytesPerDim); + } + return true; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/util/search/package-info.java b/lucene/core/src/java/org/apache/lucene/util/search/package-info.java new file mode 100644 index 000000000000..2982c6dbc135 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/search/package-info.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Utility classes and methods for advanced search operations in Lucene. + * + *

The {@code org.apache.lucene.util.search} package provides reusable helpers for point-based + * queries, including validation and spatial relation logic for multi-dimensional point fields. + * These utilities are intended to support efficient and correct implementation of range queries and + * related search features in Lucene. + * + *

Classes in this package are typically used by core search components such as {@link + * org.apache.lucene.search.PointRangeQuery} and {@link org.apache.lucene.search.PointRangeWeight}. + * + * @since 9.0 + */ +package org.apache.lucene.util.search;