diff --git a/nitrite/src/main/java/org/dizitart/no2/filters/ElementMatchFilter.java b/nitrite/src/main/java/org/dizitart/no2/filters/ElementMatchFilter.java index c37f9aabf..aeb6fb71e 100644 --- a/nitrite/src/main/java/org/dizitart/no2/filters/ElementMatchFilter.java +++ b/nitrite/src/main/java/org/dizitart/no2/filters/ElementMatchFilter.java @@ -20,9 +20,11 @@ import org.dizitart.no2.collection.NitriteId; import org.dizitart.no2.common.tuples.Pair; import org.dizitart.no2.exceptions.FilterException; +import org.dizitart.no2.index.IndexMap; import java.lang.reflect.Array; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.regex.Matcher; @@ -35,13 +37,19 @@ * @author Anindya Chatterjee * @since 1.0 */ -class ElementMatchFilter extends NitriteFilter { - private final String field; +class ElementMatchFilter extends ComparableFilter { private final Filter elementFilter; ElementMatchFilter(String field, Filter elementFilter) { + super(field, null); this.elementFilter = elementFilter; - this.field = field; + } + + @Override + public Comparable getComparable() { + // ElementMatchFilter doesn't use the comparable value directly + // It delegates to the inner filter for index operations + return null; } @Override @@ -56,7 +64,7 @@ public boolean apply(Pair element) { } Document document = element.getSecond(); - Object fieldValue = document.get(field); + Object fieldValue = document.get(getField()); if (fieldValue == null) { return false; } @@ -77,9 +85,98 @@ public boolean apply(Pair element) { } } + @Override + public List applyOnIndex(IndexMap indexMap) { + // If the element filter is a ComparableFilter, we can use the index + // Since arrays are indexed by individual elements, we can directly + // apply the inner filter on the index + if (elementFilter instanceof ComparableFilter) { + return ((ComparableFilter) elementFilter).applyOnIndex(indexMap); + } + + // For other filter types (AND, OR, NOT with comparable filters), + // we need to handle them differently + if (elementFilter instanceof AndFilter) { + return applyAndFilterOnIndex((AndFilter) elementFilter, indexMap); + } else if (elementFilter instanceof OrFilter) { + return applyOrFilterOnIndex((OrFilter) elementFilter, indexMap); + } + + // If we can't use index, return empty list to trigger collection scan + return new ArrayList<>(); + } + + private List applyAndFilterOnIndex(AndFilter andFilter, IndexMap indexMap) { + // For AND filters, we need to check if all filters are comparable + // and if so, apply them sequentially (intersection) + List filters = andFilter.getFilters(); + List result = null; + + for (Filter filter : filters) { + if (filter instanceof ComparableFilter) { + List filterResult = ((ComparableFilter) filter).applyOnIndex(indexMap); + if (result == null) { + result = filterResult; + } else { + // Intersection of results + result = intersect(result, filterResult); + } + if (result.isEmpty()) { + return result; // Short-circuit if no matches + } + } else { + // If any filter is not comparable, we can't use index + return new ArrayList<>(); + } + } + + return result != null ? result : new ArrayList<>(); + } + + private List applyOrFilterOnIndex(OrFilter orFilter, IndexMap indexMap) { + // For OR filters, we union the results from each comparable filter + List filters = orFilter.getFilters(); + Set resultSet = new HashSet<>(); + + for (Filter filter : filters) { + if (filter instanceof ComparableFilter) { + List filterResult = ((ComparableFilter) filter).applyOnIndex(indexMap); + if (filterResult != null && !filterResult.isEmpty()) { + resultSet.addAll(filterResult); + } + } else { + // If any filter is not comparable, we can't use index + return new ArrayList<>(); + } + } + + return new ArrayList<>(resultSet); + } + + private List intersect(List list1, List list2) { + if (list1 == null || list1.isEmpty() || list2 == null || list2.isEmpty()) { + return new ArrayList<>(); + } + + // Convert the second list to a set for O(1) lookup + Set set2 = new HashSet<>(list2); + List result = new ArrayList<>(); + + for (Object item : list1) { + if (item != null && set2.contains(item)) { + result.add(item); + } + } + // Explicitly handle intersection of null values + if (list1.contains(null) && list2.contains(null)) { + result.add(null); + } + return result; + } + @Override public String toString() { - return "elemMatch(" + field + " : " + elementFilter.toString() + ")"; + return "elemMatch(" + getField() + " : " + elementFilter.toString() + ")"; } @SuppressWarnings("rawtypes") diff --git a/nitrite/src/test/java/org/dizitart/no2/integration/collection/CollectionFindBySingleFieldIndexTest.java b/nitrite/src/test/java/org/dizitart/no2/integration/collection/CollectionFindBySingleFieldIndexTest.java index 722df3d52..9a97a2640 100644 --- a/nitrite/src/test/java/org/dizitart/no2/integration/collection/CollectionFindBySingleFieldIndexTest.java +++ b/nitrite/src/test/java/org/dizitart/no2/integration/collection/CollectionFindBySingleFieldIndexTest.java @@ -20,6 +20,7 @@ import com.github.javafaker.Faker; import org.dizitart.no2.collection.Document; import org.dizitart.no2.collection.DocumentCursor; +import org.dizitart.no2.collection.FindPlan; import org.dizitart.no2.collection.NitriteCollection; import org.dizitart.no2.common.SortOrder; import org.dizitart.no2.exceptions.FilterException; @@ -627,4 +628,220 @@ public void testSortByIndexAscendingLessThan() { assertArrayEquals(nonIndexedResult, indexedResult); } + + @Test + public void testFindByArrayFieldIndexWithElemMatch() { + // Create a collection with array field + NitriteCollection userCollection = db.getCollection("users"); + + // Insert a larger dataset (15k documents as mentioned in the issue) + for (int i = 0; i < 15000; i++) { + Document doc = Document.createDocument("name", "user" + i) + .put("emails", new String[]{"user" + i + "@example.com", "user" + i + "@test.com"}); + userCollection.insert(doc); + } + + // Add a specific test document + userCollection.insert(Document.createDocument("name", "testuser") + .put("emails", new String[]{"test@gmail.com", "test@example.com"})); + + // Measure query time WITHOUT index + long startWithoutIndex = System.nanoTime(); + DocumentCursor cursorWithoutIndex = userCollection.find( + where("emails").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq("test@gmail.com"))); + long withoutIndexCount = cursorWithoutIndex.size(); + long endWithoutIndex = System.nanoTime(); + long timeWithoutIndex = (endWithoutIndex - startWithoutIndex) / 1_000_000; + + assertEquals(1, withoutIndexCount); + + // Verify collection scan is used when no index exists (no index descriptor) + FindPlan planWithoutIndex = cursorWithoutIndex.getFindPlan(); + assertNull("Index descriptor should be null when no index exists", + planWithoutIndex.getIndexDescriptor()); + + // Create index on emails field + userCollection.createIndex(IndexOptions.indexOptions(IndexType.NON_UNIQUE), "emails"); + + // Measure query time WITH index + long startWithIndex = System.nanoTime(); + DocumentCursor cursorWithIndex = userCollection.find( + where("emails").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq("test@gmail.com"))); + long withIndexCount = cursorWithIndex.size(); + long endWithIndex = System.nanoTime(); + long timeWithIndex = (endWithIndex - startWithIndex) / 1_000_000; + + assertEquals(1, withIndexCount); + + // Verify index is actually being used by checking the find plan + FindPlan planWithIndex = cursorWithIndex.getFindPlan(); + assertNotNull("Index scan filter should not be null when index exists", + planWithIndex.getIndexScanFilter()); + assertNotNull("Index descriptor should not be null when index is used", + planWithIndex.getIndexDescriptor()); + + // With index should be significantly faster + System.out.println("ElemMatch query on 15k documents:"); + System.out.println(" Time without index: " + timeWithoutIndex + " ms"); + System.out.println(" Time with index: " + timeWithIndex + " ms"); + System.out.println(" Speedup: " + (timeWithoutIndex > 0 ? (timeWithoutIndex / (double) Math.max(1, timeWithIndex)) : "N/A") + "x"); + + // Assert that index provides significant improvement (at least 2x faster) + // This is a conservative check - actual improvement should be much higher + assertTrue("Index should provide significant performance improvement", + timeWithIndex < timeWithoutIndex || timeWithIndex < 100); + } + + @Test + public void testFindByArrayFieldIndexWithElemMatchComplexFilter() { + // Create a collection with array field + NitriteCollection productCollection = db.getCollection("products"); + + // Insert documents with array of scores + for (int i = 0; i < 1000; i++) { + Document doc = Document.createDocument("name", "product" + i) + .put("scores", new Integer[]{i, i + 10, i + 20}); + productCollection.insert(doc); + } + + // Create index on scores field + productCollection.createIndex(IndexOptions.indexOptions(IndexType.NON_UNIQUE), "scores"); + + // Test 1: Query with elemMatch using gt filter + DocumentCursor cursor = productCollection.find( + where("scores").elemMatch(org.dizitart.no2.filters.FluentFilter.$.gt(995))); + + // Verify index is used + FindPlan findPlan = cursor.getFindPlan(); + assertNotNull("Index scan filter should be used for gt query", findPlan.getIndexScanFilter()); + assertNotNull("Index descriptor should be present", findPlan.getIndexDescriptor()); + + // Should find products where at least one score is > 995 + assertTrue("Should find products with scores > 995", cursor.size() > 0); + + // Test 2: Query with elemMatch using lt filter + cursor = productCollection.find( + where("scores").elemMatch(org.dizitart.no2.filters.FluentFilter.$.lt(5))); + + // Verify index is used + findPlan = cursor.getFindPlan(); + assertNotNull("Index scan filter should be used for lt query", findPlan.getIndexScanFilter()); + assertNotNull("Index descriptor should be present", findPlan.getIndexDescriptor()); + + // Should find products where at least one score is < 5 + assertTrue("Should find products with scores < 5", cursor.size() > 0); + + // Test 3: Query with elemMatch using gte filter + cursor = productCollection.find( + where("scores").elemMatch(org.dizitart.no2.filters.FluentFilter.$.gte(500))); + + findPlan = cursor.getFindPlan(); + assertNotNull("Index scan filter should be used for gte query", findPlan.getIndexScanFilter()); + assertTrue("Should find products with scores >= 500", cursor.size() > 0); + + // Test 4: Query with elemMatch using lte filter + cursor = productCollection.find( + where("scores").elemMatch(org.dizitart.no2.filters.FluentFilter.$.lte(500))); + + findPlan = cursor.getFindPlan(); + assertNotNull("Index scan filter should be used for lte query", findPlan.getIndexScanFilter()); + assertTrue("Should find products with scores <= 500", cursor.size() > 0); + } + + @Test + public void testElemMatchWithNonUniqueIndex() { + // Test that elemMatch works with non-unique index + NitriteCollection tagCollection = db.getCollection("tags"); + + // Insert documents with tag arrays (some tags are common) + for (int i = 0; i < 500; i++) { + Document doc = Document.createDocument("id", i) + .put("tags", new String[]{"tag" + i, "category" + (i % 10), "item" + i}); + tagCollection.insert(doc); + } + + // Create non-unique index on tags field (since there are duplicate values) + tagCollection.createIndex(IndexOptions.indexOptions(IndexType.NON_UNIQUE), "tags"); + + // Query with elemMatch + DocumentCursor cursor = tagCollection.find( + where("tags").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq("tag100"))); + + // Verify index is used + FindPlan findPlan = cursor.getFindPlan(); + assertNotNull("Index scan filter should be used", + findPlan.getIndexScanFilter()); + assertNotNull("Index descriptor should be present", + findPlan.getIndexDescriptor()); + assertEquals("Should find exactly one document", 1, cursor.size()); + + // Query for a common category tag (should find multiple) + cursor = tagCollection.find( + where("tags").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq("category5"))); + + findPlan = cursor.getFindPlan(); + assertNotNull("Index should be used for common values too", + findPlan.getIndexScanFilter()); + assertEquals("Should find all documents with category5", 50, cursor.size()); + } + + @Test + public void testElemMatchIndexPerformanceComparison() { + // This test explicitly measures and compares performance + NitriteCollection perfCollection = db.getCollection("performance"); + + // Insert a meaningful dataset + for (int i = 0; i < 10000; i++) { + Document doc = Document.createDocument("id", i) + .put("values", new Integer[]{i, i * 2, i * 3}); + perfCollection.insert(doc); + } + + // Add a unique test value that only appears once + perfCollection.insert(Document.createDocument("id", 99999) + .put("values", new Integer[]{77777, 88888, 99999})); + + // Test WITHOUT index + long startNoIndex = System.nanoTime(); + DocumentCursor noIndexCursor = perfCollection.find( + where("values").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq(99999))); + long noIndexCount = noIndexCursor.size(); + long endNoIndex = System.nanoTime(); + long timeNoIndex = (endNoIndex - startNoIndex) / 1_000_000; + + // Verify no index was used (no index descriptor) + FindPlan noIndexPlan = noIndexCursor.getFindPlan(); + assertNull("Index descriptor should be null without index", + noIndexPlan.getIndexDescriptor()); + assertEquals(1, noIndexCount); + + // Create index + perfCollection.createIndex(IndexOptions.indexOptions(IndexType.NON_UNIQUE), "values"); + + // Test WITH index + long startWithIndex = System.nanoTime(); + DocumentCursor withIndexCursor = perfCollection.find( + where("values").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq(99999))); + long withIndexCount = withIndexCursor.size(); + long endWithIndex = System.nanoTime(); + long timeWithIndex = (endWithIndex - startWithIndex) / 1_000_000; + + // Verify index was used + FindPlan withIndexPlan = withIndexCursor.getFindPlan(); + assertNotNull("Index scan filter should be used with index", + withIndexPlan.getIndexScanFilter()); + assertNotNull("Index descriptor should be present", + withIndexPlan.getIndexDescriptor()); + assertEquals(1, withIndexCount); + + System.out.println("Performance comparison for elemMatch on 10k documents:"); + System.out.println(" Without index: " + timeNoIndex + " ms"); + System.out.println(" With index: " + timeWithIndex + " ms"); + System.out.println(" Improvement: " + + (timeNoIndex > 0 ? String.format("%.1fx", timeNoIndex / (double) Math.max(1, timeWithIndex)) : "N/A")); + + // Index should provide measurable improvement + assertTrue("Index should improve performance or complete very quickly", + timeWithIndex < timeNoIndex || timeWithIndex < 100); + } }