-
-
Notifications
You must be signed in to change notification settings - Fork 98
Fix elemMatch queries to use array field indexes #1174
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+319
−5
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
aa63a02
Initial plan
Copilot 94a7ad2
Make ElementMatchFilter extend ComparableFilter to enable index usage…
Copilot 96a991e
Address code review feedback: fix null value issue and improve perfor…
Copilot 8649a88
Add null safety checks to set operations in ElementMatchFilter
Copilot dd9689a
Update nitrite/src/main/java/org/dizitart/no2/filters/ElementMatchFil…
anidotnet 491d0aa
Update nitrite/src/main/java/org/dizitart/no2/filters/ElementMatchFil…
anidotnet 096768e
Add comprehensive tests to verify elemMatch index performance improve…
Copilot 5b85b47
Fix build issue: pass null as second parameter to ComparableFilter co…
Copilot File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
| import com.github.javafaker.Faker; | ||
| import org.dizitart.no2.collection.Document; | ||
| import org.dizitart.no2.collection.DocumentCursor; | ||
| import org.dizitart.no2.collection.FindPlan; | ||
| import org.dizitart.no2.collection.NitriteCollection; | ||
| import org.dizitart.no2.common.SortOrder; | ||
| import org.dizitart.no2.exceptions.FilterException; | ||
|
|
@@ -627,4 +628,220 @@ public void testSortByIndexAscendingLessThan() { | |
|
|
||
| assertArrayEquals(nonIndexedResult, indexedResult); | ||
| } | ||
|
|
||
| @Test | ||
| public void testFindByArrayFieldIndexWithElemMatch() { | ||
| // Create a collection with array field | ||
| NitriteCollection userCollection = db.getCollection("users"); | ||
|
|
||
| // Insert a larger dataset (15k documents as mentioned in the issue) | ||
| for (int i = 0; i < 15000; i++) { | ||
| Document doc = Document.createDocument("name", "user" + i) | ||
| .put("emails", new String[]{"user" + i + "@example.com", "user" + i + "@test.com"}); | ||
| userCollection.insert(doc); | ||
| } | ||
|
|
||
| // Add a specific test document | ||
| userCollection.insert(Document.createDocument("name", "testuser") | ||
| .put("emails", new String[]{"[email protected]", "[email protected]"})); | ||
|
|
||
| // Measure query time WITHOUT index | ||
| long startWithoutIndex = System.nanoTime(); | ||
| DocumentCursor cursorWithoutIndex = userCollection.find( | ||
| where("emails").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq("[email protected]"))); | ||
| long withoutIndexCount = cursorWithoutIndex.size(); | ||
| long endWithoutIndex = System.nanoTime(); | ||
| long timeWithoutIndex = (endWithoutIndex - startWithoutIndex) / 1_000_000; | ||
|
|
||
| assertEquals(1, withoutIndexCount); | ||
|
|
||
| // Verify collection scan is used when no index exists (no index descriptor) | ||
| FindPlan planWithoutIndex = cursorWithoutIndex.getFindPlan(); | ||
| assertNull("Index descriptor should be null when no index exists", | ||
| planWithoutIndex.getIndexDescriptor()); | ||
|
|
||
| // Create index on emails field | ||
| userCollection.createIndex(IndexOptions.indexOptions(IndexType.NON_UNIQUE), "emails"); | ||
|
|
||
| // Measure query time WITH index | ||
| long startWithIndex = System.nanoTime(); | ||
| DocumentCursor cursorWithIndex = userCollection.find( | ||
| where("emails").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq("[email protected]"))); | ||
| long withIndexCount = cursorWithIndex.size(); | ||
| long endWithIndex = System.nanoTime(); | ||
| long timeWithIndex = (endWithIndex - startWithIndex) / 1_000_000; | ||
|
|
||
| assertEquals(1, withIndexCount); | ||
|
|
||
| // Verify index is actually being used by checking the find plan | ||
| FindPlan planWithIndex = cursorWithIndex.getFindPlan(); | ||
| assertNotNull("Index scan filter should not be null when index exists", | ||
| planWithIndex.getIndexScanFilter()); | ||
| assertNotNull("Index descriptor should not be null when index is used", | ||
| planWithIndex.getIndexDescriptor()); | ||
|
|
||
| // With index should be significantly faster | ||
| System.out.println("ElemMatch query on 15k documents:"); | ||
| System.out.println(" Time without index: " + timeWithoutIndex + " ms"); | ||
| System.out.println(" Time with index: " + timeWithIndex + " ms"); | ||
| System.out.println(" Speedup: " + (timeWithoutIndex > 0 ? (timeWithoutIndex / (double) Math.max(1, timeWithIndex)) : "N/A") + "x"); | ||
|
|
||
| // Assert that index provides significant improvement (at least 2x faster) | ||
| // This is a conservative check - actual improvement should be much higher | ||
| assertTrue("Index should provide significant performance improvement", | ||
| timeWithIndex < timeWithoutIndex || timeWithIndex < 100); | ||
| } | ||
|
|
||
| @Test | ||
| public void testFindByArrayFieldIndexWithElemMatchComplexFilter() { | ||
| // Create a collection with array field | ||
| NitriteCollection productCollection = db.getCollection("products"); | ||
|
|
||
| // Insert documents with array of scores | ||
| for (int i = 0; i < 1000; i++) { | ||
| Document doc = Document.createDocument("name", "product" + i) | ||
| .put("scores", new Integer[]{i, i + 10, i + 20}); | ||
| productCollection.insert(doc); | ||
| } | ||
|
|
||
| // Create index on scores field | ||
| productCollection.createIndex(IndexOptions.indexOptions(IndexType.NON_UNIQUE), "scores"); | ||
|
|
||
| // Test 1: Query with elemMatch using gt filter | ||
| DocumentCursor cursor = productCollection.find( | ||
| where("scores").elemMatch(org.dizitart.no2.filters.FluentFilter.$.gt(995))); | ||
|
|
||
| // Verify index is used | ||
| FindPlan findPlan = cursor.getFindPlan(); | ||
| assertNotNull("Index scan filter should be used for gt query", findPlan.getIndexScanFilter()); | ||
| assertNotNull("Index descriptor should be present", findPlan.getIndexDescriptor()); | ||
|
|
||
| // Should find products where at least one score is > 995 | ||
| assertTrue("Should find products with scores > 995", cursor.size() > 0); | ||
|
|
||
| // Test 2: Query with elemMatch using lt filter | ||
| cursor = productCollection.find( | ||
| where("scores").elemMatch(org.dizitart.no2.filters.FluentFilter.$.lt(5))); | ||
|
|
||
| // Verify index is used | ||
| findPlan = cursor.getFindPlan(); | ||
| assertNotNull("Index scan filter should be used for lt query", findPlan.getIndexScanFilter()); | ||
| assertNotNull("Index descriptor should be present", findPlan.getIndexDescriptor()); | ||
|
|
||
| // Should find products where at least one score is < 5 | ||
| assertTrue("Should find products with scores < 5", cursor.size() > 0); | ||
|
|
||
| // Test 3: Query with elemMatch using gte filter | ||
| cursor = productCollection.find( | ||
| where("scores").elemMatch(org.dizitart.no2.filters.FluentFilter.$.gte(500))); | ||
|
|
||
| findPlan = cursor.getFindPlan(); | ||
| assertNotNull("Index scan filter should be used for gte query", findPlan.getIndexScanFilter()); | ||
| assertTrue("Should find products with scores >= 500", cursor.size() > 0); | ||
|
|
||
| // Test 4: Query with elemMatch using lte filter | ||
| cursor = productCollection.find( | ||
| where("scores").elemMatch(org.dizitart.no2.filters.FluentFilter.$.lte(500))); | ||
|
|
||
| findPlan = cursor.getFindPlan(); | ||
| assertNotNull("Index scan filter should be used for lte query", findPlan.getIndexScanFilter()); | ||
| assertTrue("Should find products with scores <= 500", cursor.size() > 0); | ||
| } | ||
|
|
||
| @Test | ||
| public void testElemMatchWithNonUniqueIndex() { | ||
| // Test that elemMatch works with non-unique index | ||
| NitriteCollection tagCollection = db.getCollection("tags"); | ||
|
|
||
| // Insert documents with tag arrays (some tags are common) | ||
| for (int i = 0; i < 500; i++) { | ||
| Document doc = Document.createDocument("id", i) | ||
| .put("tags", new String[]{"tag" + i, "category" + (i % 10), "item" + i}); | ||
| tagCollection.insert(doc); | ||
| } | ||
|
|
||
| // Create non-unique index on tags field (since there are duplicate values) | ||
| tagCollection.createIndex(IndexOptions.indexOptions(IndexType.NON_UNIQUE), "tags"); | ||
|
|
||
| // Query with elemMatch | ||
| DocumentCursor cursor = tagCollection.find( | ||
| where("tags").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq("tag100"))); | ||
|
|
||
| // Verify index is used | ||
| FindPlan findPlan = cursor.getFindPlan(); | ||
| assertNotNull("Index scan filter should be used", | ||
| findPlan.getIndexScanFilter()); | ||
| assertNotNull("Index descriptor should be present", | ||
| findPlan.getIndexDescriptor()); | ||
| assertEquals("Should find exactly one document", 1, cursor.size()); | ||
|
|
||
| // Query for a common category tag (should find multiple) | ||
| cursor = tagCollection.find( | ||
| where("tags").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq("category5"))); | ||
|
|
||
| findPlan = cursor.getFindPlan(); | ||
| assertNotNull("Index should be used for common values too", | ||
| findPlan.getIndexScanFilter()); | ||
| assertEquals("Should find all documents with category5", 50, cursor.size()); | ||
| } | ||
|
|
||
| @Test | ||
| public void testElemMatchIndexPerformanceComparison() { | ||
| // This test explicitly measures and compares performance | ||
| NitriteCollection perfCollection = db.getCollection("performance"); | ||
|
|
||
| // Insert a meaningful dataset | ||
| for (int i = 0; i < 10000; i++) { | ||
| Document doc = Document.createDocument("id", i) | ||
| .put("values", new Integer[]{i, i * 2, i * 3}); | ||
| perfCollection.insert(doc); | ||
| } | ||
|
|
||
| // Add a unique test value that only appears once | ||
| perfCollection.insert(Document.createDocument("id", 99999) | ||
| .put("values", new Integer[]{77777, 88888, 99999})); | ||
|
|
||
| // Test WITHOUT index | ||
| long startNoIndex = System.nanoTime(); | ||
| DocumentCursor noIndexCursor = perfCollection.find( | ||
| where("values").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq(99999))); | ||
| long noIndexCount = noIndexCursor.size(); | ||
| long endNoIndex = System.nanoTime(); | ||
| long timeNoIndex = (endNoIndex - startNoIndex) / 1_000_000; | ||
|
|
||
| // Verify no index was used (no index descriptor) | ||
| FindPlan noIndexPlan = noIndexCursor.getFindPlan(); | ||
| assertNull("Index descriptor should be null without index", | ||
| noIndexPlan.getIndexDescriptor()); | ||
| assertEquals(1, noIndexCount); | ||
|
|
||
| // Create index | ||
| perfCollection.createIndex(IndexOptions.indexOptions(IndexType.NON_UNIQUE), "values"); | ||
|
|
||
| // Test WITH index | ||
| long startWithIndex = System.nanoTime(); | ||
| DocumentCursor withIndexCursor = perfCollection.find( | ||
| where("values").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq(99999))); | ||
| long withIndexCount = withIndexCursor.size(); | ||
| long endWithIndex = System.nanoTime(); | ||
| long timeWithIndex = (endWithIndex - startWithIndex) / 1_000_000; | ||
|
|
||
| // Verify index was used | ||
| FindPlan withIndexPlan = withIndexCursor.getFindPlan(); | ||
| assertNotNull("Index scan filter should be used with index", | ||
| withIndexPlan.getIndexScanFilter()); | ||
| assertNotNull("Index descriptor should be present", | ||
| withIndexPlan.getIndexDescriptor()); | ||
| assertEquals(1, withIndexCount); | ||
|
|
||
| System.out.println("Performance comparison for elemMatch on 10k documents:"); | ||
| System.out.println(" Without index: " + timeNoIndex + " ms"); | ||
| System.out.println(" With index: " + timeWithIndex + " ms"); | ||
| System.out.println(" Improvement: " + | ||
| (timeNoIndex > 0 ? String.format("%.1fx", timeNoIndex / (double) Math.max(1, timeWithIndex)) : "N/A")); | ||
|
|
||
| // Index should provide measurable improvement | ||
| assertTrue("Index should improve performance or complete very quickly", | ||
| timeWithIndex < timeNoIndex || timeWithIndex < 100); | ||
| } | ||
| } | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.