From a647161995b215ffd3b9eed9de98c43c35d56935 Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Wed, 21 May 2025 15:35:19 -0700 Subject: [PATCH 1/8] Fix approximation regression Signed-off-by: Prudhvi Godithi --- .../ApproximatePointRangeQuery.java | 70 ++++++------------- .../ApproximatePointRangeQueryTests.java | 65 ++++++++++++++--- 2 files changed, 78 insertions(+), 57 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java b/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java index c915f77cb7957..7ebd61c45a566 100644 --- a/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java +++ b/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java @@ -161,11 +161,6 @@ public void grow(int count) { @Override public void visit(int docID) { - // it is possible that size < 1024 and docCount < size but we will continue to count through all the 1024 docs - // and collect less, but it won't hurt performance - if (docCount[0] >= size) { - return; - } adder.add(docID); docCount[0]++; } @@ -177,9 +172,8 @@ public void visit(DocIdSetIterator iterator) throws IOException { @Override public void visit(IntsRef ref) { - for (int i = 0; i < ref.length; i++) { - adder.add(ref.ints[ref.offset + i]); - } + adder.add(ref); + docCount[0] += ref.length; } @Override @@ -248,10 +242,10 @@ private void intersectRight(PointValues.PointTree pointTree, PointValues.Interse // custom intersect visitor to walk the left of the tree public void intersectLeft(PointValues.IntersectVisitor visitor, PointValues.PointTree pointTree, long[] docCount) throws IOException { - PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); if (docCount[0] >= size) { return; } + PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); switch (r) { case CELL_OUTSIDE_QUERY: // This cell is fully outside the query shape: stop recursing @@ -293,63 +287,45 @@ public void intersectLeft(PointValues.IntersectVisitor visitor, PointValues.Poin } } - // custom intersect visitor to walk the right of tree + // custom intersect visitor to walk the right of tree (from rightmost leaf going left) public void intersectRight(PointValues.IntersectVisitor visitor, PointValues.PointTree pointTree, long[] docCount) throws IOException { - PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); if (docCount[0] >= size) { return; } + PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); switch (r) { - case CELL_OUTSIDE_QUERY: - // This cell is fully outside the query shape: stop recursing - break; - case CELL_INSIDE_QUERY: - // If the cell is fully inside, we keep moving right as long as the point tree size is over our size requirement - if (pointTree.size() > size && docCount[0] < size && moveRight(pointTree)) { + case CELL_CROSSES_QUERY: + if (pointTree.moveToChild() && docCount[0] < size) { + while (pointTree.moveToSibling()) { + } + intersectRight(visitor, pointTree, docCount); + pointTree.moveToParent(); - } - // if point tree size is no longer over, we have to go back one level where it still was over and the intersect left - else if (pointTree.size() <= size && docCount[0] < size) { - pointTree.moveToParent(); - intersectLeft(visitor, pointTree, docCount); - } - // if we've reached leaf, it means out size is under the size of the leaf, we can just collect all docIDs - else { - // Leaf node; scan and filter all points in this block: + if (docCount[0] < size) { - pointTree.visitDocIDs(visitor); + pointTree.moveToChild(); + intersectRight(visitor, pointTree, docCount); + pointTree.moveToParent(); } - } - break; - case CELL_CROSSES_QUERY: - // If the cell is fully inside, we keep moving right as long as the point tree size is over our size requirement - if (pointTree.size() > size && docCount[0] < size && moveRight(pointTree)) { - intersectRight(visitor, pointTree, docCount); - pointTree.moveToParent(); - } - // if point tree size is no longer over, we have to go back one level where it still was over and the intersect left - else if (pointTree.size() <= size && docCount[0] < size) { - pointTree.moveToParent(); - intersectLeft(visitor, pointTree, docCount); - } - // if we've reached leaf, it means out size is under the size of the leaf, we can just collect all doc values - else { - // Leaf node; scan and filter all points in this block: + } else { if (docCount[0] < size) { - pointTree.visitDocValues(visitor); + if (r == PointValues.Relation.CELL_INSIDE_QUERY) { + pointTree.visitDocIDs(visitor); + } else { + pointTree.visitDocValues(visitor); + } } } break; + case CELL_OUTSIDE_QUERY: + break; default: throw new IllegalArgumentException("Unreachable code"); } - } - public boolean moveRight(PointValues.PointTree pointTree) throws IOException { - return pointTree.moveToChild() && pointTree.moveToSibling(); } @Override diff --git a/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java b/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java index 2bb003c45393e..c475076f64238 100644 --- a/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java +++ b/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java @@ -122,7 +122,7 @@ public void testApproximateRangeWithDefaultSize() throws IOException { } } - public void testApproximateRangeWithSizeUnderDefault() throws IOException { + public void testApproximateRangeWithSizeDefault() throws IOException { try (Directory directory = newDirectory()) { try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new WhitespaceAnalyzer())) { int dims = 1; @@ -154,7 +154,9 @@ public void testApproximateRangeWithSizeUnderDefault() throws IOException { ); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(approximateQuery, 10); - assertEquals(topDocs.totalHits, new TotalHits(10, TotalHits.Relation.EQUAL_TO)); + assertEquals(TotalHits.Relation.EQUAL_TO, topDocs.totalHits.relation()); + assertEquals(10, topDocs.scoreDocs.length); + // assertEquals(topDocs.totalHits, new TotalHits(10, TotalHits.Relation.EQUAL_TO)); } catch (IOException e) { throw new RuntimeException(e); } @@ -249,10 +251,8 @@ public void testApproximateRangeShortCircuit() throws IOException { IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(approximateQuery, 10); TopDocs topDocs1 = searcher.search(query, 10); - - // since we short-circuit from the approx range at the end of size these will not be equal - assertNotEquals(topDocs.totalHits, topDocs1.totalHits); - assertEquals(topDocs.totalHits, new TotalHits(10, TotalHits.Relation.EQUAL_TO)); + assertEquals(10, topDocs.scoreDocs.length); + assertEquals(10, topDocs1.scoreDocs.length); assertEquals(topDocs1.totalHits, new TotalHits(101, TotalHits.Relation.EQUAL_TO)); } catch (IOException e) { throw new RuntimeException(e); @@ -299,10 +299,6 @@ public void testApproximateRangeShortCircuitAscSort() throws IOException { TopDocs topDocs = searcher.search(approximateQuery, 10, sort); TopDocs topDocs1 = searcher.search(query, 10, sort); - // since we short-circuit from the approx range at the end of size these will not be equal - assertNotEquals(topDocs.totalHits, topDocs1.totalHits); - assertEquals(topDocs.totalHits, new TotalHits(10, TotalHits.Relation.EQUAL_TO)); - assertEquals(topDocs1.totalHits, new TotalHits(21, TotalHits.Relation.EQUAL_TO)); assertEquals(topDocs.scoreDocs[0].doc, topDocs1.scoreDocs[0].doc); assertEquals(topDocs.scoreDocs[1].doc, topDocs1.scoreDocs[1].doc); assertEquals(topDocs.scoreDocs[2].doc, topDocs1.scoreDocs[2].doc); @@ -392,4 +388,53 @@ public void testCannotApproximateWithTrackTotalHits() { when(mockContext.request()).thenReturn(null); assertTrue(query.canApproximate(mockContext)); } + + public void testApproximateRangeShortCircuitDescSort() throws IOException { + try (Directory directory = newDirectory()) { + try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new WhitespaceAnalyzer())) { + int dims = 1; + + long[] scratch = new long[dims]; + int numPoints = 1000; + for (int i = 0; i < numPoints; i++) { + Document doc = new Document(); + for (int v = 0; v < dims; v++) { + scratch[v] = i; + } + iw.addDocument(asList(new LongPoint("point", scratch[0]), new NumericDocValuesField("point", scratch[0]))); + } + iw.flush(); + iw.forceMerge(1); + try (IndexReader reader = iw.getReader()) { + try { + long lower = 980; + long upper = 999; + Query approximateQuery = new ApproximatePointRangeQuery( + "point", + pack(lower).bytes, + pack(upper).bytes, + dims, + 10, + SortOrder.DESC, + ApproximatePointRangeQuery.LONG_FORMAT + ); + Query query = LongPoint.newRangeQuery("point", lower, upper); + + IndexSearcher searcher = new IndexSearcher(reader); + Sort sort = new Sort(new SortField("point", SortField.Type.LONG, true)); // true for DESC + TopDocs topDocs = searcher.search(approximateQuery, 10, sort); + TopDocs topDocs1 = searcher.search(query, 10, sort); + + // Verify we got the highest values first (DESC order) + assertEquals(topDocs.scoreDocs[0].doc, topDocs1.scoreDocs[0].doc); + assertEquals(topDocs.scoreDocs[1].doc, topDocs1.scoreDocs[1].doc); + assertEquals(topDocs.scoreDocs[2].doc, topDocs1.scoreDocs[2].doc); + + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + } + } } From f74734c13b88734289eafde868927807d1953e88 Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Tue, 27 May 2025 14:18:58 -0700 Subject: [PATCH 2/8] Upstream fetch Signed-off-by: Prudhvi Godithi --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 95f95f74d87fa..8ee2764a84d25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,7 +35,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Changed - Create generic DocRequest to better categorize ActionRequests ([#18269](https://github.com/opensearch-project/OpenSearch/pull/18269))) - + ### Dependencies - Update Apache Lucene from 10.1.0 to 10.2.1 ([#17961](https://github.com/opensearch-project/OpenSearch/pull/17961)) - Bump `com.google.code.gson:gson` from 2.12.1 to 2.13.1 ([#17923](https://github.com/opensearch-project/OpenSearch/pull/17923), [#18266](https://github.com/opensearch-project/OpenSearch/pull/18266)) @@ -72,6 +72,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Avoid NPE if on SnapshotInfo if 'shallow' boolean not present ([#18187](https://github.com/opensearch-project/OpenSearch/issues/18187)) - Fix 'system call filter not installed' caused when network.host: 0.0.0.0 ([#18309](https://github.com/opensearch-project/OpenSearch/pull/18309)) - Fix MatrixStatsAggregator reuse when mode parameter changes ([#18242](https://github.com/opensearch-project/OpenSearch/issues/18242)) +- Fixed the Approximate Framework regression with Lucene 10.2.1 by updating `intersectRight` BKD walk and `IntRef` visit method ([#18358](https://github.com/opensearch-project/OpenSearch/issues/18358)) ### Security From 6b56848109a0680d3ceca5ba661b99156f28ebd4 Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Tue, 27 May 2025 14:20:11 -0700 Subject: [PATCH 3/8] Fix tests Signed-off-by: Prudhvi Godithi --- .../search/approximate/ApproximatePointRangeQueryTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java b/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java index c475076f64238..011cda03affaa 100644 --- a/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java +++ b/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java @@ -156,7 +156,6 @@ public void testApproximateRangeWithSizeDefault() throws IOException { TopDocs topDocs = searcher.search(approximateQuery, 10); assertEquals(TotalHits.Relation.EQUAL_TO, topDocs.totalHits.relation()); assertEquals(10, topDocs.scoreDocs.length); - // assertEquals(topDocs.totalHits, new TotalHits(10, TotalHits.Relation.EQUAL_TO)); } catch (IOException e) { throw new RuntimeException(e); } From 757aebf67fd124950a8a50417c667f045538e51f Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Tue, 27 May 2025 16:44:19 -0700 Subject: [PATCH 4/8] Update code based on the comments Signed-off-by: Prudhvi Godithi --- .../search/approximate/ApproximatePointRangeQuery.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java b/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java index 7ebd61c45a566..67d6d9fedb920 100644 --- a/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java +++ b/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java @@ -298,13 +298,12 @@ public void intersectRight(PointValues.IntersectVisitor visitor, PointValues.Poi case CELL_INSIDE_QUERY: case CELL_CROSSES_QUERY: if (pointTree.moveToChild() && docCount[0] < size) { - while (pointTree.moveToSibling()) { - } - + // BKD is binary today, so one moveToSibling() is enough to land on the right child. + // If PointTree ever becomes n-ary, update the traversal below to visit all siblings or re-enable a full loop. + pointTree.moveToSibling(); + assert pointTree.moveToSibling() == false; intersectRight(visitor, pointTree, docCount); - pointTree.moveToParent(); - if (docCount[0] < size) { pointTree.moveToChild(); intersectRight(visitor, pointTree, docCount); From bbe949c522f100af4dc7f38998d0861e9aed73da Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Wed, 28 May 2025 18:16:40 -0700 Subject: [PATCH 5/8] Use point tree clone and improve the coverage Signed-off-by: Prudhvi Godithi --- .../ApproximatePointRangeQuery.java | 6 +- .../ApproximatePointRangeQueryTests.java | 177 +++++++++++++++++- 2 files changed, 178 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java b/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java index 67d6d9fedb920..89d553ad2dc9a 100644 --- a/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java +++ b/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java @@ -298,6 +298,7 @@ public void intersectRight(PointValues.IntersectVisitor visitor, PointValues.Poi case CELL_INSIDE_QUERY: case CELL_CROSSES_QUERY: if (pointTree.moveToChild() && docCount[0] < size) { + PointValues.PointTree leftChild = pointTree.clone(); // BKD is binary today, so one moveToSibling() is enough to land on the right child. // If PointTree ever becomes n-ary, update the traversal below to visit all siblings or re-enable a full loop. pointTree.moveToSibling(); @@ -305,9 +306,7 @@ public void intersectRight(PointValues.IntersectVisitor visitor, PointValues.Poi intersectRight(visitor, pointTree, docCount); pointTree.moveToParent(); if (docCount[0] < size) { - pointTree.moveToChild(); - intersectRight(visitor, pointTree, docCount); - pointTree.moveToParent(); + intersectRight(visitor, leftChild, docCount); } } else { if (docCount[0] < size) { @@ -324,7 +323,6 @@ public void intersectRight(PointValues.IntersectVisitor visitor, PointValues.Poi default: throw new IllegalArgumentException("Unreachable code"); } - } @Override diff --git a/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java b/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java index 011cda03affaa..9bac0ada92d3f 100644 --- a/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java +++ b/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java @@ -392,7 +392,6 @@ public void testApproximateRangeShortCircuitDescSort() throws IOException { try (Directory directory = newDirectory()) { try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new WhitespaceAnalyzer())) { int dims = 1; - long[] scratch = new long[dims]; int numPoints = 1000; for (int i = 0; i < numPoints; i++) { @@ -436,4 +435,180 @@ public void testApproximateRangeShortCircuitDescSort() throws IOException { } } } + + // Test to cover the left child traversal in intersectRight with CELL_INSIDE_QUERY + public void testIntersectRightLeftChildTraversal() throws IOException { + try (Directory directory = newDirectory()) { + try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new WhitespaceAnalyzer())) { + int dims = 1; + long[] scratch = new long[dims]; + // Create a dataset that will create a multi-level BKD tree + // We need enough documents to create internal nodes (not just leaves) + int numPoints = 2000; + for (int i = 0; i < numPoints; i++) { + Document doc = new Document(); + scratch[0] = i; + doc.add(new LongPoint("point", scratch[0])); + iw.addDocument(asList(new LongPoint("point", scratch[0]), new NumericDocValuesField("point", scratch[0]))); + if (i % 100 == 0) { + iw.flush(); + } + } + iw.flush(); + iw.forceMerge(1); + + try (IndexReader reader = iw.getReader()) { + // Query that will match many documents and require tree traversal + long lower = 1000; + long upper = 1999; + ApproximatePointRangeQuery query = new ApproximatePointRangeQuery( + "point", + pack(lower).bytes, + pack(upper).bytes, + dims, + 50, // Small size to ensure we hit the left child traversal condition + SortOrder.DESC, + ApproximatePointRangeQuery.LONG_FORMAT + ); + IndexSearcher searcher = new IndexSearcher(reader); + Sort sort = new Sort(new SortField("point", SortField.Type.LONG, true)); // DESC + TopDocs topDocs = searcher.search(query, 50, sort); + assertEquals("Should return exactly 50 documents", 50, topDocs.scoreDocs.length); + } + } + } + } + + // Test to cover pointTree.visitDocIDs(visitor) in CELL_INSIDE_QUERY leaf case for intersectRight + public void testIntersectRightCellInsideQueryLeaf() throws IOException { + try (Directory directory = newDirectory()) { + try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new WhitespaceAnalyzer())) { + int dims = 1; + long[] scratch = new long[dims]; + // Create a smaller dataset that will result in leaf nodes that are completely inside the query range + for (int i = 900; i <= 999; i++) { + Document doc = new Document(); + scratch[0] = i; + iw.addDocument(asList(new LongPoint("point", scratch[0]), new NumericDocValuesField("point", scratch[0]))); + } + iw.flush(); + iw.forceMerge(1); + + try (IndexReader reader = iw.getReader()) { + // Query that completely contains all documents (CELL_INSIDE_QUERY) + long lower = 800; + long upper = 1100; + + ApproximatePointRangeQuery query = new ApproximatePointRangeQuery( + "point", + pack(lower).bytes, + pack(upper).bytes, + dims, + 200, + SortOrder.DESC, + ApproximatePointRangeQuery.LONG_FORMAT + ); + + IndexSearcher searcher = new IndexSearcher(reader); + Sort sort = new Sort(new SortField("point", SortField.Type.LONG, true)); // DESC + TopDocs topDocs = searcher.search(query, 200, sort); + + assertEquals("Should find all documents", 100, topDocs.totalHits.value()); + // Should return all the indexed point values from 900 to 999 which tests CELL_INSIDE_QUERY + assertEquals("Should return exactly 100 documents", 100, topDocs.scoreDocs.length); + } + } + } + } + + // Test to cover CELL_OUTSIDE_QUERY break case for intersectRight + public void testIntersectRightCellOutsideQuery() throws IOException { + try (Directory directory = newDirectory()) { + try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new WhitespaceAnalyzer())) { + int dims = 1; + long[] scratch = new long[dims]; + // Create documents in two separate ranges to ensure some cells are outside query + // Range 1: 0-99 + for (int i = 0; i < 100; i++) { + Document doc = new Document(); + scratch[0] = i; + iw.addDocument(asList(new LongPoint("point", scratch[0]), new NumericDocValuesField("point", scratch[0]))); + } + // Range 2: 500-599 (gap ensures some tree nodes will be completely outside query) + for (int i = 500; i < 600; i++) { + Document doc = new Document(); + scratch[0] = i; + iw.addDocument(asList(new LongPoint("point", scratch[0]), new NumericDocValuesField("point", scratch[0]))); + } + iw.flush(); + iw.forceMerge(1); + + try (IndexReader reader = iw.getReader()) { + // Query only the middle range - this should create CELL_OUTSIDE_QUERY for some nodes + long lower = 200; + long upper = 400; + + ApproximatePointRangeQuery query = new ApproximatePointRangeQuery( + "point", + pack(lower).bytes, + pack(upper).bytes, + dims, + 50, + SortOrder.DESC, + ApproximatePointRangeQuery.LONG_FORMAT + ); + + IndexSearcher searcher = new IndexSearcher(reader); + Sort sort = new Sort(new SortField("point", SortField.Type.LONG, true)); // DESC + TopDocs topDocs = searcher.search(query, 50, sort); + + // Should find no documents since our query range (200-400) has no documents + assertEquals("Should find no documents in the gap range", 0, topDocs.totalHits.value()); + } + } + } + } + + // Test to cover intersectRight with CELL_CROSSES_QUERY case and ensure comprehensive coverage for intersectRight + public void testIntersectRightCellCrossesQuery() throws IOException { + try (Directory directory = newDirectory()) { + try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new WhitespaceAnalyzer())) { + int dims = 1; + long[] scratch = new long[dims]; + // Create documents that will result in cells that cross the query boundary + for (int i = 0; i < 1000; i++) { + Document doc = new Document(); + scratch[0] = i; + iw.addDocument(asList(new LongPoint("point", scratch[0]), new NumericDocValuesField("point", scratch[0]))); + } + iw.flush(); + iw.forceMerge(1); + + try (IndexReader reader = iw.getReader()) { + // Query that will partially overlap with tree nodes (CELL_CROSSES_QUERY) + // This range will intersect with some tree nodes but not completely contain them + long lower = 250; + long upper = 750; + + ApproximatePointRangeQuery query = new ApproximatePointRangeQuery( + "point", + pack(lower).bytes, + pack(upper).bytes, + dims, + 100, + SortOrder.DESC, + ApproximatePointRangeQuery.LONG_FORMAT + ); + + IndexSearcher searcher = new IndexSearcher(reader); + Sort sort = new Sort(new SortField("point", SortField.Type.LONG, true)); // DESC + TopDocs topDocs = searcher.search(query, 100, sort); + + assertEquals("Should return exactly 100 documents", 100, topDocs.scoreDocs.length); + // For Desc sort the ApproximatePointRangeQuery will slightly over collect to retain the highest matched docs + assertTrue("Should collect at least requested number of documents", topDocs.totalHits.value() >= 100); + } + } + } + } } From 21ab39e4e1a5f9470ff4e29f7b03838e78801f3d Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Thu, 29 May 2025 09:21:12 -0700 Subject: [PATCH 6/8] intersectRight handle single child case Signed-off-by: Prudhvi Godithi --- .../ApproximatePointRangeQuery.java | 16 +++++-- .../ApproximatePointRangeQueryTests.java | 46 +++++++++++++++++-- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java b/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java index 89d553ad2dc9a..d3843796810af 100644 --- a/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java +++ b/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java @@ -301,12 +301,18 @@ public void intersectRight(PointValues.IntersectVisitor visitor, PointValues.Poi PointValues.PointTree leftChild = pointTree.clone(); // BKD is binary today, so one moveToSibling() is enough to land on the right child. // If PointTree ever becomes n-ary, update the traversal below to visit all siblings or re-enable a full loop. - pointTree.moveToSibling(); - assert pointTree.moveToSibling() == false; - intersectRight(visitor, pointTree, docCount); - pointTree.moveToParent(); - if (docCount[0] < size) { + if (pointTree.moveToSibling()) { + // We have two children - visit right first + intersectRight(visitor, pointTree, docCount); + pointTree.moveToParent(); + // Then visit left if we still need more docs + if (docCount[0] < size) { + intersectRight(visitor, leftChild, docCount); + } + } else { + // Only one child - visit it intersectRight(visitor, leftChild, docCount); + pointTree.moveToParent(); } } else { if (docCount[0] < size) { diff --git a/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java b/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java index 9bac0ada92d3f..dde8936b9f805 100644 --- a/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java +++ b/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java @@ -395,7 +395,6 @@ public void testApproximateRangeShortCircuitDescSort() throws IOException { long[] scratch = new long[dims]; int numPoints = 1000; for (int i = 0; i < numPoints; i++) { - Document doc = new Document(); for (int v = 0; v < dims; v++) { scratch[v] = i; } @@ -487,7 +486,6 @@ public void testIntersectRightCellInsideQueryLeaf() throws IOException { long[] scratch = new long[dims]; // Create a smaller dataset that will result in leaf nodes that are completely inside the query range for (int i = 900; i <= 999; i++) { - Document doc = new Document(); scratch[0] = i; iw.addDocument(asList(new LongPoint("point", scratch[0]), new NumericDocValuesField("point", scratch[0]))); } @@ -530,13 +528,11 @@ public void testIntersectRightCellOutsideQuery() throws IOException { // Create documents in two separate ranges to ensure some cells are outside query // Range 1: 0-99 for (int i = 0; i < 100; i++) { - Document doc = new Document(); scratch[0] = i; iw.addDocument(asList(new LongPoint("point", scratch[0]), new NumericDocValuesField("point", scratch[0]))); } // Range 2: 500-599 (gap ensures some tree nodes will be completely outside query) for (int i = 500; i < 600; i++) { - Document doc = new Document(); scratch[0] = i; iw.addDocument(asList(new LongPoint("point", scratch[0]), new NumericDocValuesField("point", scratch[0]))); } @@ -577,7 +573,6 @@ public void testIntersectRightCellCrossesQuery() throws IOException { long[] scratch = new long[dims]; // Create documents that will result in cells that cross the query boundary for (int i = 0; i < 1000; i++) { - Document doc = new Document(); scratch[0] = i; iw.addDocument(asList(new LongPoint("point", scratch[0]), new NumericDocValuesField("point", scratch[0]))); } @@ -611,4 +606,45 @@ public void testIntersectRightCellCrossesQuery() throws IOException { } } } + + // Test to specifically cover the single child case in intersectRight + public void testIntersectRightSingleChildNode() throws IOException { + try (Directory directory = newDirectory()) { + try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new WhitespaceAnalyzer())) { + int dims = 1; + long[] scratch = new long[dims]; + + for (int i = 0; i < 100; i++) { + scratch[0] = 1000L; + iw.addDocument(asList(new LongPoint("point", scratch[0]), new NumericDocValuesField("point", scratch[0]))); + } + scratch[0] = 987654321L; + iw.addDocument(asList(new LongPoint("point", scratch[0]), new NumericDocValuesField("point", scratch[0]))); + + iw.flush(); + iw.forceMerge(1); + + try (IndexReader reader = iw.getReader()) { + long lower = 500L; + long upper = 999999999L; + + ApproximatePointRangeQuery query = new ApproximatePointRangeQuery( + "point", + pack(lower).bytes, + pack(upper).bytes, + dims, + 50, + SortOrder.DESC, + ApproximatePointRangeQuery.LONG_FORMAT + ); + + IndexSearcher searcher = new IndexSearcher(reader); + Sort sort = new Sort(new SortField("point", SortField.Type.LONG, true)); + TopDocs topDocs = searcher.search(query, 50, sort); + + assertEquals("Should return exactly 50 documents", 50, topDocs.scoreDocs.length); + } + } + } + } } From 2e25033e631c0b7260c28271e7860851a6e7d4c5 Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Thu, 29 May 2025 14:00:44 -0700 Subject: [PATCH 7/8] Address comments Signed-off-by: Prudhvi Godithi --- .../ApproximatePointRangeQuery.java | 4 +- .../ApproximatePointRangeQueryTests.java | 83 ++++++++++--------- 2 files changed, 47 insertions(+), 40 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java b/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java index d3843796810af..04b0a94caad8f 100644 --- a/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java +++ b/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java @@ -161,6 +161,7 @@ public void grow(int count) { @Override public void visit(int docID) { + // it is possible that size < 1024 and docCount < size but we will continue to count through all the 1024 docs adder.add(docID); docCount[0]++; } @@ -304,7 +305,6 @@ public void intersectRight(PointValues.IntersectVisitor visitor, PointValues.Poi if (pointTree.moveToSibling()) { // We have two children - visit right first intersectRight(visitor, pointTree, docCount); - pointTree.moveToParent(); // Then visit left if we still need more docs if (docCount[0] < size) { intersectRight(visitor, leftChild, docCount); @@ -312,8 +312,8 @@ public void intersectRight(PointValues.IntersectVisitor visitor, PointValues.Poi } else { // Only one child - visit it intersectRight(visitor, leftChild, docCount); - pointTree.moveToParent(); } + pointTree.moveToParent(); } else { if (docCount[0] < size) { if (r == PointValues.Relation.CELL_INSIDE_QUERY) { diff --git a/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java b/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java index dde8936b9f805..365da499477ea 100644 --- a/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java +++ b/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java @@ -139,6 +139,7 @@ public void testApproximateRangeWithSizeDefault() throws IOException { } iw.flush(); iw.forceMerge(1); + final int size = 10; try (IndexReader reader = iw.getReader()) { try { long lower = 0; @@ -148,14 +149,13 @@ public void testApproximateRangeWithSizeDefault() throws IOException { pack(lower).bytes, pack(upper).bytes, dims, - 10, + size, null, ApproximatePointRangeQuery.LONG_FORMAT ); IndexSearcher searcher = new IndexSearcher(reader); - TopDocs topDocs = searcher.search(approximateQuery, 10); - assertEquals(TotalHits.Relation.EQUAL_TO, topDocs.totalHits.relation()); - assertEquals(10, topDocs.scoreDocs.length); + TopDocs topDocs = searcher.search(approximateQuery, size); + assertEquals(size, topDocs.scoreDocs.length); } catch (IOException e) { throw new RuntimeException(e); } @@ -187,20 +187,21 @@ public void testApproximateRangeWithSizeOverDefault() throws IOException { long lower = 0; long upper = 12000; long maxHits = 12001; + final int size = 11000; Query approximateQuery = new ApproximatePointRangeQuery( "point", pack(lower).bytes, pack(upper).bytes, dims, - 11_000, + size, null, ApproximatePointRangeQuery.LONG_FORMAT ); IndexSearcher searcher = new IndexSearcher(reader); - TopDocs topDocs = searcher.search(approximateQuery, 11000); + TopDocs topDocs = searcher.search(approximateQuery, size); if (topDocs.totalHits.relation() == Relation.EQUAL_TO) { - assertEquals(topDocs.totalHits.value(), 11000); + assertEquals(topDocs.totalHits.value(), size); } else { assertTrue(11000 <= topDocs.totalHits.value()); assertTrue(maxHits >= topDocs.totalHits.value()); @@ -236,23 +237,24 @@ public void testApproximateRangeShortCircuit() throws IOException { try { long lower = 0; long upper = 100; + final int size = 10; Query approximateQuery = new ApproximatePointRangeQuery( "point", pack(lower).bytes, pack(upper).bytes, dims, - 10, + size, null, ApproximatePointRangeQuery.LONG_FORMAT ); Query query = LongPoint.newRangeQuery("point", lower, upper); IndexSearcher searcher = new IndexSearcher(reader); - TopDocs topDocs = searcher.search(approximateQuery, 10); - TopDocs topDocs1 = searcher.search(query, 10); - assertEquals(10, topDocs.scoreDocs.length); - assertEquals(10, topDocs1.scoreDocs.length); - assertEquals(topDocs1.totalHits, new TotalHits(101, TotalHits.Relation.EQUAL_TO)); + TopDocs topDocs = searcher.search(approximateQuery, size); + TopDocs topDocs1 = searcher.search(query, size); + assertEquals(size, topDocs.scoreDocs.length); + assertEquals(size, topDocs1.scoreDocs.length); + assertEquals(topDocs1.totalHits.value(), 101); } catch (IOException e) { throw new RuntimeException(e); } @@ -282,12 +284,13 @@ public void testApproximateRangeShortCircuitAscSort() throws IOException { try { long lower = 0; long upper = 20; + final int size = 10; Query approximateQuery = new ApproximatePointRangeQuery( "point", pack(lower).bytes, pack(upper).bytes, dims, - 10, + size, SortOrder.ASC, ApproximatePointRangeQuery.LONG_FORMAT ); @@ -295,8 +298,8 @@ public void testApproximateRangeShortCircuitAscSort() throws IOException { IndexSearcher searcher = new IndexSearcher(reader); Sort sort = new Sort(new SortField("point", SortField.Type.LONG)); - TopDocs topDocs = searcher.search(approximateQuery, 10, sort); - TopDocs topDocs1 = searcher.search(query, 10, sort); + TopDocs topDocs = searcher.search(approximateQuery, size, sort); + TopDocs topDocs1 = searcher.search(query, size, sort); assertEquals(topDocs.scoreDocs[0].doc, topDocs1.scoreDocs[0].doc); assertEquals(topDocs.scoreDocs[1].doc, topDocs1.scoreDocs[1].doc); @@ -406,12 +409,13 @@ public void testApproximateRangeShortCircuitDescSort() throws IOException { try { long lower = 980; long upper = 999; + final int size = 10; Query approximateQuery = new ApproximatePointRangeQuery( "point", pack(lower).bytes, pack(upper).bytes, dims, - 10, + size, SortOrder.DESC, ApproximatePointRangeQuery.LONG_FORMAT ); @@ -419,13 +423,14 @@ public void testApproximateRangeShortCircuitDescSort() throws IOException { IndexSearcher searcher = new IndexSearcher(reader); Sort sort = new Sort(new SortField("point", SortField.Type.LONG, true)); // true for DESC - TopDocs topDocs = searcher.search(approximateQuery, 10, sort); - TopDocs topDocs1 = searcher.search(query, 10, sort); + TopDocs topDocs = searcher.search(approximateQuery, size, sort); + TopDocs topDocs1 = searcher.search(query, size, sort); // Verify we got the highest values first (DESC order) - assertEquals(topDocs.scoreDocs[0].doc, topDocs1.scoreDocs[0].doc); - assertEquals(topDocs.scoreDocs[1].doc, topDocs1.scoreDocs[1].doc); - assertEquals(topDocs.scoreDocs[2].doc, topDocs1.scoreDocs[2].doc); + for (int i = 0; i < size; i++) { + assertEquals("Mismatch at doc index " + i, topDocs.scoreDocs[i].doc, topDocs1.scoreDocs[i].doc); + } + } catch (IOException e) { throw new RuntimeException(e); @@ -460,6 +465,7 @@ public void testIntersectRightLeftChildTraversal() throws IOException { // Query that will match many documents and require tree traversal long lower = 1000; long upper = 1999; + final int size = 50; ApproximatePointRangeQuery query = new ApproximatePointRangeQuery( "point", pack(lower).bytes, @@ -471,8 +477,8 @@ public void testIntersectRightLeftChildTraversal() throws IOException { ); IndexSearcher searcher = new IndexSearcher(reader); Sort sort = new Sort(new SortField("point", SortField.Type.LONG, true)); // DESC - TopDocs topDocs = searcher.search(query, 50, sort); - assertEquals("Should return exactly 50 documents", 50, topDocs.scoreDocs.length); + TopDocs topDocs = searcher.search(query, size, sort); + assertEquals("Should return exactly size value documents", size, topDocs.scoreDocs.length); } } } @@ -496,7 +502,8 @@ public void testIntersectRightCellInsideQueryLeaf() throws IOException { // Query that completely contains all documents (CELL_INSIDE_QUERY) long lower = 800; long upper = 1100; - + final int size = 200; + final int returnSize = 100; ApproximatePointRangeQuery query = new ApproximatePointRangeQuery( "point", pack(lower).bytes, @@ -509,11 +516,11 @@ public void testIntersectRightCellInsideQueryLeaf() throws IOException { IndexSearcher searcher = new IndexSearcher(reader); Sort sort = new Sort(new SortField("point", SortField.Type.LONG, true)); // DESC - TopDocs topDocs = searcher.search(query, 200, sort); + TopDocs topDocs = searcher.search(query, size, sort); - assertEquals("Should find all documents", 100, topDocs.totalHits.value()); + assertEquals("Should find all documents", returnSize, topDocs.totalHits.value()); // Should return all the indexed point values from 900 to 999 which tests CELL_INSIDE_QUERY - assertEquals("Should return exactly 100 documents", 100, topDocs.scoreDocs.length); + assertEquals("Should return exactly return size value documents", returnSize, topDocs.scoreDocs.length); } } } @@ -543,20 +550,20 @@ public void testIntersectRightCellOutsideQuery() throws IOException { // Query only the middle range - this should create CELL_OUTSIDE_QUERY for some nodes long lower = 200; long upper = 400; - + final int size = 50; ApproximatePointRangeQuery query = new ApproximatePointRangeQuery( "point", pack(lower).bytes, pack(upper).bytes, dims, - 50, + size, SortOrder.DESC, ApproximatePointRangeQuery.LONG_FORMAT ); IndexSearcher searcher = new IndexSearcher(reader); Sort sort = new Sort(new SortField("point", SortField.Type.LONG, true)); // DESC - TopDocs topDocs = searcher.search(query, 50, sort); + TopDocs topDocs = searcher.search(query, size, sort); // Should find no documents since our query range (200-400) has no documents assertEquals("Should find no documents in the gap range", 0, topDocs.totalHits.value()); @@ -584,7 +591,7 @@ public void testIntersectRightCellCrossesQuery() throws IOException { // This range will intersect with some tree nodes but not completely contain them long lower = 250; long upper = 750; - + final int size = 100; ApproximatePointRangeQuery query = new ApproximatePointRangeQuery( "point", pack(lower).bytes, @@ -597,9 +604,9 @@ public void testIntersectRightCellCrossesQuery() throws IOException { IndexSearcher searcher = new IndexSearcher(reader); Sort sort = new Sort(new SortField("point", SortField.Type.LONG, true)); // DESC - TopDocs topDocs = searcher.search(query, 100, sort); + TopDocs topDocs = searcher.search(query, size, sort); - assertEquals("Should return exactly 100 documents", 100, topDocs.scoreDocs.length); + assertEquals("Should return exactly size value documents", size, topDocs.scoreDocs.length); // For Desc sort the ApproximatePointRangeQuery will slightly over collect to retain the highest matched docs assertTrue("Should collect at least requested number of documents", topDocs.totalHits.value() >= 100); } @@ -627,22 +634,22 @@ public void testIntersectRightSingleChildNode() throws IOException { try (IndexReader reader = iw.getReader()) { long lower = 500L; long upper = 999999999L; - + final int size = 50; ApproximatePointRangeQuery query = new ApproximatePointRangeQuery( "point", pack(lower).bytes, pack(upper).bytes, dims, - 50, + size, SortOrder.DESC, ApproximatePointRangeQuery.LONG_FORMAT ); IndexSearcher searcher = new IndexSearcher(reader); Sort sort = new Sort(new SortField("point", SortField.Type.LONG, true)); - TopDocs topDocs = searcher.search(query, 50, sort); + TopDocs topDocs = searcher.search(query, size, sort); - assertEquals("Should return exactly 50 documents", 50, topDocs.scoreDocs.length); + assertEquals("Should return exactly size value documents", size, topDocs.scoreDocs.length); } } } From 63e08e7bea53e494566a46851b71df7aafcc39b8 Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Thu, 29 May 2025 14:17:17 -0700 Subject: [PATCH 8/8] Address commentsl fix spotless Signed-off-by: Prudhvi Godithi --- .../search/approximate/ApproximatePointRangeQueryTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java b/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java index 365da499477ea..a89102d96c083 100644 --- a/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java +++ b/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java @@ -431,7 +431,6 @@ public void testApproximateRangeShortCircuitDescSort() throws IOException { assertEquals("Mismatch at doc index " + i, topDocs.scoreDocs[i].doc, topDocs1.scoreDocs[i].doc); } - } catch (IOException e) { throw new RuntimeException(e); }