Skip to content
107 changes: 102 additions & 5 deletions nitrite/src/main/java/org/dizitart/no2/filters/ElementMatchFilter.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@
import org.dizitart.no2.collection.NitriteId;
import org.dizitart.no2.common.tuples.Pair;
import org.dizitart.no2.exceptions.FilterException;
import org.dizitart.no2.index.IndexMap;

import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
Expand All @@ -35,13 +37,19 @@
* @author Anindya Chatterjee
* @since 1.0
*/
class ElementMatchFilter extends NitriteFilter {
private final String field;
class ElementMatchFilter extends ComparableFilter {
private final Filter elementFilter;

ElementMatchFilter(String field, Filter elementFilter) {
super(field, null);
this.elementFilter = elementFilter;
this.field = field;
}

@Override
public Comparable<?> getComparable() {
// ElementMatchFilter doesn't use the comparable value directly
// It delegates to the inner filter for index operations
return null;
}

@Override
Expand All @@ -56,7 +64,7 @@ public boolean apply(Pair<NitriteId, Document> element) {
}

Document document = element.getSecond();
Object fieldValue = document.get(field);
Object fieldValue = document.get(getField());
if (fieldValue == null) {
return false;
}
Expand All @@ -77,9 +85,98 @@ public boolean apply(Pair<NitriteId, Document> element) {
}
}

@Override
public List<?> applyOnIndex(IndexMap indexMap) {
// If the element filter is a ComparableFilter, we can use the index
// Since arrays are indexed by individual elements, we can directly
// apply the inner filter on the index
if (elementFilter instanceof ComparableFilter) {
return ((ComparableFilter) elementFilter).applyOnIndex(indexMap);
}

// For other filter types (AND, OR, NOT with comparable filters),
// we need to handle them differently
if (elementFilter instanceof AndFilter) {
return applyAndFilterOnIndex((AndFilter) elementFilter, indexMap);
} else if (elementFilter instanceof OrFilter) {
return applyOrFilterOnIndex((OrFilter) elementFilter, indexMap);
}

// If we can't use index, return empty list to trigger collection scan
return new ArrayList<>();
}

private List<?> applyAndFilterOnIndex(AndFilter andFilter, IndexMap indexMap) {
// For AND filters, we need to check if all filters are comparable
// and if so, apply them sequentially (intersection)
List<Filter> filters = andFilter.getFilters();
List<?> result = null;

for (Filter filter : filters) {
if (filter instanceof ComparableFilter) {
List<?> filterResult = ((ComparableFilter) filter).applyOnIndex(indexMap);
if (result == null) {
result = filterResult;
} else {
// Intersection of results
result = intersect(result, filterResult);
}
if (result.isEmpty()) {
return result; // Short-circuit if no matches
}
} else {
// If any filter is not comparable, we can't use index
return new ArrayList<>();
}
}

return result != null ? result : new ArrayList<>();
}

private List<?> applyOrFilterOnIndex(OrFilter orFilter, IndexMap indexMap) {
// For OR filters, we union the results from each comparable filter
List<Filter> filters = orFilter.getFilters();
Set<Object> resultSet = new HashSet<>();

for (Filter filter : filters) {
if (filter instanceof ComparableFilter) {
List<?> filterResult = ((ComparableFilter) filter).applyOnIndex(indexMap);
if (filterResult != null && !filterResult.isEmpty()) {
resultSet.addAll(filterResult);
}
} else {
// If any filter is not comparable, we can't use index
return new ArrayList<>();
}
}

return new ArrayList<>(resultSet);
}

private List<?> intersect(List<?> list1, List<?> list2) {
if (list1 == null || list1.isEmpty() || list2 == null || list2.isEmpty()) {
return new ArrayList<>();
}

// Convert the second list to a set for O(1) lookup
Set<Object> set2 = new HashSet<>(list2);
List<Object> result = new ArrayList<>();

for (Object item : list1) {
if (item != null && set2.contains(item)) {
result.add(item);
}
}
// Explicitly handle intersection of null values
if (list1.contains(null) && list2.contains(null)) {
result.add(null);
}
return result;
}

@Override
public String toString() {
return "elemMatch(" + field + " : " + elementFilter.toString() + ")";
return "elemMatch(" + getField() + " : " + elementFilter.toString() + ")";
}

@SuppressWarnings("rawtypes")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import com.github.javafaker.Faker;
import org.dizitart.no2.collection.Document;
import org.dizitart.no2.collection.DocumentCursor;
import org.dizitart.no2.collection.FindPlan;
import org.dizitart.no2.collection.NitriteCollection;
import org.dizitart.no2.common.SortOrder;
import org.dizitart.no2.exceptions.FilterException;
Expand Down Expand Up @@ -627,4 +628,220 @@ public void testSortByIndexAscendingLessThan() {

assertArrayEquals(nonIndexedResult, indexedResult);
}

@Test
public void testFindByArrayFieldIndexWithElemMatch() {
// Create a collection with array field
NitriteCollection userCollection = db.getCollection("users");

// Insert a larger dataset (15k documents as mentioned in the issue)
for (int i = 0; i < 15000; i++) {
Document doc = Document.createDocument("name", "user" + i)
.put("emails", new String[]{"user" + i + "@example.com", "user" + i + "@test.com"});
userCollection.insert(doc);
}

// Add a specific test document
userCollection.insert(Document.createDocument("name", "testuser")
.put("emails", new String[]{"[email protected]", "[email protected]"}));

// Measure query time WITHOUT index
long startWithoutIndex = System.nanoTime();
DocumentCursor cursorWithoutIndex = userCollection.find(
where("emails").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq("[email protected]")));
long withoutIndexCount = cursorWithoutIndex.size();
long endWithoutIndex = System.nanoTime();
long timeWithoutIndex = (endWithoutIndex - startWithoutIndex) / 1_000_000;

assertEquals(1, withoutIndexCount);

// Verify collection scan is used when no index exists (no index descriptor)
FindPlan planWithoutIndex = cursorWithoutIndex.getFindPlan();
assertNull("Index descriptor should be null when no index exists",
planWithoutIndex.getIndexDescriptor());

// Create index on emails field
userCollection.createIndex(IndexOptions.indexOptions(IndexType.NON_UNIQUE), "emails");

// Measure query time WITH index
long startWithIndex = System.nanoTime();
DocumentCursor cursorWithIndex = userCollection.find(
where("emails").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq("[email protected]")));
long withIndexCount = cursorWithIndex.size();
long endWithIndex = System.nanoTime();
long timeWithIndex = (endWithIndex - startWithIndex) / 1_000_000;

assertEquals(1, withIndexCount);

// Verify index is actually being used by checking the find plan
FindPlan planWithIndex = cursorWithIndex.getFindPlan();
assertNotNull("Index scan filter should not be null when index exists",
planWithIndex.getIndexScanFilter());
assertNotNull("Index descriptor should not be null when index is used",
planWithIndex.getIndexDescriptor());

// With index should be significantly faster
System.out.println("ElemMatch query on 15k documents:");
System.out.println(" Time without index: " + timeWithoutIndex + " ms");
System.out.println(" Time with index: " + timeWithIndex + " ms");
System.out.println(" Speedup: " + (timeWithoutIndex > 0 ? (timeWithoutIndex / (double) Math.max(1, timeWithIndex)) : "N/A") + "x");

// Assert that index provides significant improvement (at least 2x faster)
// This is a conservative check - actual improvement should be much higher
assertTrue("Index should provide significant performance improvement",
timeWithIndex < timeWithoutIndex || timeWithIndex < 100);
}

@Test
public void testFindByArrayFieldIndexWithElemMatchComplexFilter() {
// Create a collection with array field
NitriteCollection productCollection = db.getCollection("products");

// Insert documents with array of scores
for (int i = 0; i < 1000; i++) {
Document doc = Document.createDocument("name", "product" + i)
.put("scores", new Integer[]{i, i + 10, i + 20});
productCollection.insert(doc);
}

// Create index on scores field
productCollection.createIndex(IndexOptions.indexOptions(IndexType.NON_UNIQUE), "scores");

// Test 1: Query with elemMatch using gt filter
DocumentCursor cursor = productCollection.find(
where("scores").elemMatch(org.dizitart.no2.filters.FluentFilter.$.gt(995)));

// Verify index is used
FindPlan findPlan = cursor.getFindPlan();
assertNotNull("Index scan filter should be used for gt query", findPlan.getIndexScanFilter());
assertNotNull("Index descriptor should be present", findPlan.getIndexDescriptor());

// Should find products where at least one score is > 995
assertTrue("Should find products with scores > 995", cursor.size() > 0);

// Test 2: Query with elemMatch using lt filter
cursor = productCollection.find(
where("scores").elemMatch(org.dizitart.no2.filters.FluentFilter.$.lt(5)));

// Verify index is used
findPlan = cursor.getFindPlan();
assertNotNull("Index scan filter should be used for lt query", findPlan.getIndexScanFilter());
assertNotNull("Index descriptor should be present", findPlan.getIndexDescriptor());

// Should find products where at least one score is < 5
assertTrue("Should find products with scores < 5", cursor.size() > 0);

// Test 3: Query with elemMatch using gte filter
cursor = productCollection.find(
where("scores").elemMatch(org.dizitart.no2.filters.FluentFilter.$.gte(500)));

findPlan = cursor.getFindPlan();
assertNotNull("Index scan filter should be used for gte query", findPlan.getIndexScanFilter());
assertTrue("Should find products with scores >= 500", cursor.size() > 0);

// Test 4: Query with elemMatch using lte filter
cursor = productCollection.find(
where("scores").elemMatch(org.dizitart.no2.filters.FluentFilter.$.lte(500)));

findPlan = cursor.getFindPlan();
assertNotNull("Index scan filter should be used for lte query", findPlan.getIndexScanFilter());
assertTrue("Should find products with scores <= 500", cursor.size() > 0);
}

@Test
public void testElemMatchWithNonUniqueIndex() {
// Test that elemMatch works with non-unique index
NitriteCollection tagCollection = db.getCollection("tags");

// Insert documents with tag arrays (some tags are common)
for (int i = 0; i < 500; i++) {
Document doc = Document.createDocument("id", i)
.put("tags", new String[]{"tag" + i, "category" + (i % 10), "item" + i});
tagCollection.insert(doc);
}

// Create non-unique index on tags field (since there are duplicate values)
tagCollection.createIndex(IndexOptions.indexOptions(IndexType.NON_UNIQUE), "tags");

// Query with elemMatch
DocumentCursor cursor = tagCollection.find(
where("tags").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq("tag100")));

// Verify index is used
FindPlan findPlan = cursor.getFindPlan();
assertNotNull("Index scan filter should be used",
findPlan.getIndexScanFilter());
assertNotNull("Index descriptor should be present",
findPlan.getIndexDescriptor());
assertEquals("Should find exactly one document", 1, cursor.size());

// Query for a common category tag (should find multiple)
cursor = tagCollection.find(
where("tags").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq("category5")));

findPlan = cursor.getFindPlan();
assertNotNull("Index should be used for common values too",
findPlan.getIndexScanFilter());
assertEquals("Should find all documents with category5", 50, cursor.size());
}

@Test
public void testElemMatchIndexPerformanceComparison() {
// This test explicitly measures and compares performance
NitriteCollection perfCollection = db.getCollection("performance");

// Insert a meaningful dataset
for (int i = 0; i < 10000; i++) {
Document doc = Document.createDocument("id", i)
.put("values", new Integer[]{i, i * 2, i * 3});
perfCollection.insert(doc);
}

// Add a unique test value that only appears once
perfCollection.insert(Document.createDocument("id", 99999)
.put("values", new Integer[]{77777, 88888, 99999}));

// Test WITHOUT index
long startNoIndex = System.nanoTime();
DocumentCursor noIndexCursor = perfCollection.find(
where("values").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq(99999)));
long noIndexCount = noIndexCursor.size();
long endNoIndex = System.nanoTime();
long timeNoIndex = (endNoIndex - startNoIndex) / 1_000_000;

// Verify no index was used (no index descriptor)
FindPlan noIndexPlan = noIndexCursor.getFindPlan();
assertNull("Index descriptor should be null without index",
noIndexPlan.getIndexDescriptor());
assertEquals(1, noIndexCount);

// Create index
perfCollection.createIndex(IndexOptions.indexOptions(IndexType.NON_UNIQUE), "values");

// Test WITH index
long startWithIndex = System.nanoTime();
DocumentCursor withIndexCursor = perfCollection.find(
where("values").elemMatch(org.dizitart.no2.filters.FluentFilter.$.eq(99999)));
long withIndexCount = withIndexCursor.size();
long endWithIndex = System.nanoTime();
long timeWithIndex = (endWithIndex - startWithIndex) / 1_000_000;

// Verify index was used
FindPlan withIndexPlan = withIndexCursor.getFindPlan();
assertNotNull("Index scan filter should be used with index",
withIndexPlan.getIndexScanFilter());
assertNotNull("Index descriptor should be present",
withIndexPlan.getIndexDescriptor());
assertEquals(1, withIndexCount);

System.out.println("Performance comparison for elemMatch on 10k documents:");
System.out.println(" Without index: " + timeNoIndex + " ms");
System.out.println(" With index: " + timeWithIndex + " ms");
System.out.println(" Improvement: " +
(timeNoIndex > 0 ? String.format("%.1fx", timeNoIndex / (double) Math.max(1, timeWithIndex)) : "N/A"));

// Index should provide measurable improvement
assertTrue("Index should improve performance or complete very quickly",
timeWithIndex < timeNoIndex || timeWithIndex < 100);
}
}