diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/serializers/WildcardQuerySerializer.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/serializers/WildcardQuerySerializer.java index c1624b6687d4d..c7d6ca7360c76 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/serializers/WildcardQuerySerializer.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/serializers/WildcardQuerySerializer.java @@ -27,7 +27,53 @@ protected String functionName() { @Override protected QueryBuilder createQueryBuilder(ConversionUtils.RelevanceOperands operands) { - return new WildcardQueryBuilder(operands.fieldName(), operands.query()); + String convertedPattern = convertSqlWildcardToLucene(operands.query()); + return new WildcardQueryBuilder(operands.fieldName(), convertedPattern); + } + + /** + * Converts SQL wildcard characters (% and _) to Lucene wildcard characters (* and ?). + * Escaped wildcards (\% and \_) are treated as literal characters. + * A backslash escaping another backslash (\\) produces a literal backslash. + */ + private static String convertSqlWildcardToLucene(String text) { + final char ESCAPE = '\\'; + StringBuilder result = new StringBuilder(text.length()); + boolean escaped = false; + + for (char c : text.toCharArray()) { + if (escaped) { + switch (c) { + case '%': + result.append('%'); + break; + case '_': + result.append('_'); + break; + case ESCAPE: + result.append(ESCAPE); + break; + default: + result.append(ESCAPE); + result.append(c); + break; + } + escaped = false; + } else if (c == ESCAPE) { + escaped = true; + } else if (c == '%') { + result.append('*'); + } else if (c == '_') { + result.append('?'); + } else { + result.append(c); + } + } + // Trailing backslash with nothing to escape — preserve it + if (escaped) { + result.append(ESCAPE); + } + return result.toString(); } @Override diff --git a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/QuerySerializerRegistryTests.java b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/QuerySerializerRegistryTests.java index 9c54327bbdac9..0cc7191c7fb71 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/QuerySerializerRegistryTests.java +++ b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/QuerySerializerRegistryTests.java @@ -391,6 +391,157 @@ public void testWildcardQuerySerializerWithBoost() throws IOException { } } + // --- SQL-to-Lucene wildcard conversion tests --- + + /** + * Tests that SQL '%' wildcard is converted to Lucene '*'. + */ + public void testWildcardQueryConvertsPercentToStar() throws IOException { + DelegatedPredicateSerializer serializer = serializers.get(ScalarFunction.WILDCARD_QUERY); + RexCall call = buildSingleFieldRexCallWithParams("title", "test%", "WILDCARD_QUERY", Map.of()); + List fieldStorage = List.of( + new FieldStorageInfo("title", "text", FieldType.TEXT, List.of(), List.of("lucene"), List.of(), false) + ); + + byte[] serialized = serializer.serialize(call, fieldStorage); + + try (StreamInput input = new NamedWriteableAwareStreamInput(StreamInput.wrap(serialized), WRITEABLE_REGISTRY)) { + WildcardQueryBuilder wildcardQb = (WildcardQueryBuilder) input.readNamedWriteable(QueryBuilder.class); + assertEquals("test*", wildcardQb.value()); + } + } + + /** + * Tests that SQL '_' wildcard is converted to Lucene '?'. + */ + public void testWildcardQueryConvertsUnderscoreToQuestionMark() throws IOException { + DelegatedPredicateSerializer serializer = serializers.get(ScalarFunction.WILDCARD_QUERY); + RexCall call = buildSingleFieldRexCallWithParams("title", "te_t", "WILDCARD_QUERY", Map.of()); + List fieldStorage = List.of( + new FieldStorageInfo("title", "text", FieldType.TEXT, List.of(), List.of("lucene"), List.of(), false) + ); + + byte[] serialized = serializer.serialize(call, fieldStorage); + + try (StreamInput input = new NamedWriteableAwareStreamInput(StreamInput.wrap(serialized), WRITEABLE_REGISTRY)) { + WildcardQueryBuilder wildcardQb = (WildcardQueryBuilder) input.readNamedWriteable(QueryBuilder.class); + assertEquals("te?t", wildcardQb.value()); + } + } + + /** + * Tests that escaped SQL wildcards (\% and \_) are treated as literal characters. + */ + public void testWildcardQueryEscapedWildcardsRemainLiteral() throws IOException { + DelegatedPredicateSerializer serializer = serializers.get(ScalarFunction.WILDCARD_QUERY); + RexCall call = buildSingleFieldRexCallWithParams("title", "100\\%\\_done", "WILDCARD_QUERY", Map.of()); + List fieldStorage = List.of( + new FieldStorageInfo("title", "text", FieldType.TEXT, List.of(), List.of("lucene"), List.of(), false) + ); + + byte[] serialized = serializer.serialize(call, fieldStorage); + + try (StreamInput input = new NamedWriteableAwareStreamInput(StreamInput.wrap(serialized), WRITEABLE_REGISTRY)) { + WildcardQueryBuilder wildcardQb = (WildcardQueryBuilder) input.readNamedWriteable(QueryBuilder.class); + assertEquals("100%_done", wildcardQb.value()); + } + } + + /** + * Tests mixed SQL wildcards and escaped wildcards in a single pattern. + */ + public void testWildcardQueryMixedEscapedAndUnescaped() throws IOException { + DelegatedPredicateSerializer serializer = serializers.get(ScalarFunction.WILDCARD_QUERY); + RexCall call = buildSingleFieldRexCallWithParams("title", "%foo\\_bar_", "WILDCARD_QUERY", Map.of()); + List fieldStorage = List.of( + new FieldStorageInfo("title", "text", FieldType.TEXT, List.of(), List.of("lucene"), List.of(), false) + ); + + byte[] serialized = serializer.serialize(call, fieldStorage); + + try (StreamInput input = new NamedWriteableAwareStreamInput(StreamInput.wrap(serialized), WRITEABLE_REGISTRY)) { + WildcardQueryBuilder wildcardQb = (WildcardQueryBuilder) input.readNamedWriteable(QueryBuilder.class); + assertEquals("*foo_bar?", wildcardQb.value()); + } + } + + /** + * Tests that a pattern with no SQL wildcards passes through unchanged. + */ + public void testWildcardQueryNoSqlWildcardsPassesThrough() throws IOException { + DelegatedPredicateSerializer serializer = serializers.get(ScalarFunction.WILDCARD_QUERY); + RexCall call = buildSingleFieldRexCallWithParams("title", "hello*world?", "WILDCARD_QUERY", Map.of()); + List fieldStorage = List.of( + new FieldStorageInfo("title", "text", FieldType.TEXT, List.of(), List.of("lucene"), List.of(), false) + ); + + byte[] serialized = serializer.serialize(call, fieldStorage); + + try (StreamInput input = new NamedWriteableAwareStreamInput(StreamInput.wrap(serialized), WRITEABLE_REGISTRY)) { + WildcardQueryBuilder wildcardQb = (WildcardQueryBuilder) input.readNamedWriteable(QueryBuilder.class); + assertEquals("hello*world?", wildcardQb.value()); + } + } + + /** + * Tests that an escaped backslash (\\) followed by a wildcard correctly produces + * a literal backslash plus the converted wildcard. + */ + public void testWildcardQueryEscapedBackslashFollowedByWildcard() throws IOException { + DelegatedPredicateSerializer serializer = serializers.get(ScalarFunction.WILDCARD_QUERY); + // Java string "\\\\%" is runtime chars: \, \, % + // Expected: first \ escapes second \ → literal \, then % is unescaped → * + RexCall call = buildSingleFieldRexCallWithParams("title", "\\\\%", "WILDCARD_QUERY", Map.of()); + List fieldStorage = List.of( + new FieldStorageInfo("title", "text", FieldType.TEXT, List.of(), List.of("lucene"), List.of(), false) + ); + + byte[] serialized = serializer.serialize(call, fieldStorage); + + try (StreamInput input = new NamedWriteableAwareStreamInput(StreamInput.wrap(serialized), WRITEABLE_REGISTRY)) { + WildcardQueryBuilder wildcardQb = (WildcardQueryBuilder) input.readNamedWriteable(QueryBuilder.class); + assertEquals("\\*", wildcardQb.value()); + } + } + + /** + * Tests that a backslash before a non-wildcard character preserves both characters. + */ + public void testWildcardQueryBackslashBeforeNonWildcard() throws IOException { + DelegatedPredicateSerializer serializer = serializers.get(ScalarFunction.WILDCARD_QUERY); + // Java string "\\n" is runtime chars: \, n + RexCall call = buildSingleFieldRexCallWithParams("title", "test\\n", "WILDCARD_QUERY", Map.of()); + List fieldStorage = List.of( + new FieldStorageInfo("title", "text", FieldType.TEXT, List.of(), List.of("lucene"), List.of(), false) + ); + + byte[] serialized = serializer.serialize(call, fieldStorage); + + try (StreamInput input = new NamedWriteableAwareStreamInput(StreamInput.wrap(serialized), WRITEABLE_REGISTRY)) { + WildcardQueryBuilder wildcardQb = (WildcardQueryBuilder) input.readNamedWriteable(QueryBuilder.class); + assertEquals("test\\n", wildcardQb.value()); + } + } + + /** + * Tests that a trailing backslash is preserved in the output. + */ + public void testWildcardQueryTrailingBackslash() throws IOException { + DelegatedPredicateSerializer serializer = serializers.get(ScalarFunction.WILDCARD_QUERY); + // Java string "test\\" is runtime chars: t, e, s, t, \ + RexCall call = buildSingleFieldRexCallWithParams("title", "test\\", "WILDCARD_QUERY", Map.of()); + List fieldStorage = List.of( + new FieldStorageInfo("title", "text", FieldType.TEXT, List.of(), List.of("lucene"), List.of(), false) + ); + + byte[] serialized = serializer.serialize(call, fieldStorage); + + try (StreamInput input = new NamedWriteableAwareStreamInput(StreamInput.wrap(serialized), WRITEABLE_REGISTRY)) { + WildcardQueryBuilder wildcardQb = (WildcardQueryBuilder) input.readNamedWriteable(QueryBuilder.class); + assertEquals("test\\", wildcardQb.value()); + } + } + // --- QuerySerializer (no-field) tests --- /**