From b8da55a170666ec91e076f16a9fe18749a571a70 Mon Sep 17 00:00:00 2001 From: Suresh N S Date: Tue, 19 May 2026 20:23:55 +0530 Subject: [PATCH 1/2] Fix for converting SQL wildcard to Lucene syntax MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding convertSqlWildcardToLucene method that converts SQL wildcards (% → *, _ → ?) with escape handling. Signed-off-by: Suresh N S --- .../serializers/WildcardQuerySerializer.java | 44 ++++++++- .../lucene/QuerySerializerRegistryTests.java | 92 +++++++++++++++++++ 2 files changed, 135 insertions(+), 1 deletion(-) diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/serializers/WildcardQuerySerializer.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/serializers/WildcardQuerySerializer.java index c1624b6687d4d..36cfb47c9df88 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/serializers/WildcardQuerySerializer.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/serializers/WildcardQuerySerializer.java @@ -27,7 +27,49 @@ protected String functionName() { @Override protected QueryBuilder createQueryBuilder(ConversionUtils.RelevanceOperands operands) { - return new WildcardQueryBuilder(operands.fieldName(), operands.query()); + String convertedPattern = convertSqlWildcardToLucene(operands.query()); + return new WildcardQueryBuilder(operands.fieldName(), convertedPattern); + } + + /** + * Converts SQL wildcard characters (% and _) to Lucene wildcard characters (* and ?). + * Escaped wildcards (\\% and \\_) are treated as literal characters. + */ + private static String convertSqlWildcardToLucene(String text) { + final char ESCAPE = '\\'; + StringBuilder result = new StringBuilder(text.length()); + boolean escaped = false; + + for (char c : text.toCharArray()) { + switch (c) { + case ESCAPE: + escaped = true; + result.append(c); + break; + case '%': + if (escaped) { + result.deleteCharAt(result.length() - 1); + result.append('%'); + } else { + result.append('*'); + } + escaped = false; + break; + case '_': + if (escaped) { + result.deleteCharAt(result.length() - 1); + result.append('_'); + } else { + result.append('?'); + } + escaped = false; + break; + default: + result.append(c); + escaped = false; + } + } + return result.toString(); } @Override diff --git a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/QuerySerializerRegistryTests.java b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/QuerySerializerRegistryTests.java index 9c54327bbdac9..6eecf21432eae 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/QuerySerializerRegistryTests.java +++ b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/QuerySerializerRegistryTests.java @@ -391,6 +391,98 @@ public void testWildcardQuerySerializerWithBoost() throws IOException { } } + // --- SQL-to-Lucene wildcard conversion tests --- + + /** + * Tests that SQL '%' wildcard is converted to Lucene '*'. + */ + public void testWildcardQueryConvertsPercentToStar() throws IOException { + DelegatedPredicateSerializer serializer = serializers.get(ScalarFunction.WILDCARD_QUERY); + RexCall call = buildSingleFieldRexCallWithParams("title", "test%", "WILDCARD_QUERY", Map.of()); + List fieldStorage = List.of( + new FieldStorageInfo("title", "text", FieldType.TEXT, List.of(), List.of("lucene"), List.of(), false) + ); + + byte[] serialized = serializer.serialize(call, fieldStorage); + + try (StreamInput input = new NamedWriteableAwareStreamInput(StreamInput.wrap(serialized), WRITEABLE_REGISTRY)) { + WildcardQueryBuilder wildcardQb = (WildcardQueryBuilder) input.readNamedWriteable(QueryBuilder.class); + assertEquals("test*", wildcardQb.value()); + } + } + + /** + * Tests that SQL '_' wildcard is converted to Lucene '?'. + */ + public void testWildcardQueryConvertsUnderscoreToQuestionMark() throws IOException { + DelegatedPredicateSerializer serializer = serializers.get(ScalarFunction.WILDCARD_QUERY); + RexCall call = buildSingleFieldRexCallWithParams("title", "te_t", "WILDCARD_QUERY", Map.of()); + List fieldStorage = List.of( + new FieldStorageInfo("title", "text", FieldType.TEXT, List.of(), List.of("lucene"), List.of(), false) + ); + + byte[] serialized = serializer.serialize(call, fieldStorage); + + try (StreamInput input = new NamedWriteableAwareStreamInput(StreamInput.wrap(serialized), WRITEABLE_REGISTRY)) { + WildcardQueryBuilder wildcardQb = (WildcardQueryBuilder) input.readNamedWriteable(QueryBuilder.class); + assertEquals("te?t", wildcardQb.value()); + } + } + + /** + * Tests that escaped SQL wildcards (\% and \_) are treated as literal characters. + */ + public void testWildcardQueryEscapedWildcardsRemainLiteral() throws IOException { + DelegatedPredicateSerializer serializer = serializers.get(ScalarFunction.WILDCARD_QUERY); + RexCall call = buildSingleFieldRexCallWithParams("title", "100\\%\\_done", "WILDCARD_QUERY", Map.of()); + List fieldStorage = List.of( + new FieldStorageInfo("title", "text", FieldType.TEXT, List.of(), List.of("lucene"), List.of(), false) + ); + + byte[] serialized = serializer.serialize(call, fieldStorage); + + try (StreamInput input = new NamedWriteableAwareStreamInput(StreamInput.wrap(serialized), WRITEABLE_REGISTRY)) { + WildcardQueryBuilder wildcardQb = (WildcardQueryBuilder) input.readNamedWriteable(QueryBuilder.class); + assertEquals("100%_done", wildcardQb.value()); + } + } + + /** + * Tests mixed SQL wildcards and escaped wildcards in a single pattern. + */ + public void testWildcardQueryMixedEscapedAndUnescaped() throws IOException { + DelegatedPredicateSerializer serializer = serializers.get(ScalarFunction.WILDCARD_QUERY); + RexCall call = buildSingleFieldRexCallWithParams("title", "%foo\\_bar_", "WILDCARD_QUERY", Map.of()); + List fieldStorage = List.of( + new FieldStorageInfo("title", "text", FieldType.TEXT, List.of(), List.of("lucene"), List.of(), false) + ); + + byte[] serialized = serializer.serialize(call, fieldStorage); + + try (StreamInput input = new NamedWriteableAwareStreamInput(StreamInput.wrap(serialized), WRITEABLE_REGISTRY)) { + WildcardQueryBuilder wildcardQb = (WildcardQueryBuilder) input.readNamedWriteable(QueryBuilder.class); + assertEquals("*foo_bar?", wildcardQb.value()); + } + } + + /** + * Tests that a pattern with no SQL wildcards passes through unchanged. + */ + public void testWildcardQueryNoSqlWildcardsPassesThrough() throws IOException { + DelegatedPredicateSerializer serializer = serializers.get(ScalarFunction.WILDCARD_QUERY); + RexCall call = buildSingleFieldRexCallWithParams("title", "hello*world?", "WILDCARD_QUERY", Map.of()); + List fieldStorage = List.of( + new FieldStorageInfo("title", "text", FieldType.TEXT, List.of(), List.of("lucene"), List.of(), false) + ); + + byte[] serialized = serializer.serialize(call, fieldStorage); + + try (StreamInput input = new NamedWriteableAwareStreamInput(StreamInput.wrap(serialized), WRITEABLE_REGISTRY)) { + WildcardQueryBuilder wildcardQb = (WildcardQueryBuilder) input.readNamedWriteable(QueryBuilder.class); + assertEquals("hello*world?", wildcardQb.value()); + } + } + // --- QuerySerializer (no-field) tests --- /** From 221ef48cf202e024972dad38644ff391f9d7c51a Mon Sep 17 00:00:00 2001 From: Suresh N S Date: Tue, 19 May 2026 21:03:56 +0530 Subject: [PATCH 2/2] Fixing an issue identified with backslash escape Signed-off-by: Suresh N S --- .../serializers/WildcardQuerySerializer.java | 54 +++++++++-------- .../lucene/QuerySerializerRegistryTests.java | 59 +++++++++++++++++++ 2 files changed, 88 insertions(+), 25 deletions(-) diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/serializers/WildcardQuerySerializer.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/serializers/WildcardQuerySerializer.java index 36cfb47c9df88..c7d6ca7360c76 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/serializers/WildcardQuerySerializer.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/serializers/WildcardQuerySerializer.java @@ -33,7 +33,8 @@ protected QueryBuilder createQueryBuilder(ConversionUtils.RelevanceOperands oper /** * Converts SQL wildcard characters (% and _) to Lucene wildcard characters (* and ?). - * Escaped wildcards (\\% and \\_) are treated as literal characters. + * Escaped wildcards (\% and \_) are treated as literal characters. + * A backslash escaping another backslash (\\) produces a literal backslash. */ private static String convertSqlWildcardToLucene(String text) { final char ESCAPE = '\\'; @@ -41,34 +42,37 @@ private static String convertSqlWildcardToLucene(String text) { boolean escaped = false; for (char c : text.toCharArray()) { - switch (c) { - case ESCAPE: - escaped = true; - result.append(c); - break; - case '%': - if (escaped) { - result.deleteCharAt(result.length() - 1); + if (escaped) { + switch (c) { + case '%': result.append('%'); - } else { - result.append('*'); - } - escaped = false; - break; - case '_': - if (escaped) { - result.deleteCharAt(result.length() - 1); + break; + case '_': result.append('_'); - } else { - result.append('?'); - } - escaped = false; - break; - default: - result.append(c); - escaped = false; + break; + case ESCAPE: + result.append(ESCAPE); + break; + default: + result.append(ESCAPE); + result.append(c); + break; + } + escaped = false; + } else if (c == ESCAPE) { + escaped = true; + } else if (c == '%') { + result.append('*'); + } else if (c == '_') { + result.append('?'); + } else { + result.append(c); } } + // Trailing backslash with nothing to escape — preserve it + if (escaped) { + result.append(ESCAPE); + } return result.toString(); } diff --git a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/QuerySerializerRegistryTests.java b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/QuerySerializerRegistryTests.java index 6eecf21432eae..0cc7191c7fb71 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/QuerySerializerRegistryTests.java +++ b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/QuerySerializerRegistryTests.java @@ -483,6 +483,65 @@ public void testWildcardQueryNoSqlWildcardsPassesThrough() throws IOException { } } + /** + * Tests that an escaped backslash (\\) followed by a wildcard correctly produces + * a literal backslash plus the converted wildcard. + */ + public void testWildcardQueryEscapedBackslashFollowedByWildcard() throws IOException { + DelegatedPredicateSerializer serializer = serializers.get(ScalarFunction.WILDCARD_QUERY); + // Java string "\\\\%" is runtime chars: \, \, % + // Expected: first \ escapes second \ → literal \, then % is unescaped → * + RexCall call = buildSingleFieldRexCallWithParams("title", "\\\\%", "WILDCARD_QUERY", Map.of()); + List fieldStorage = List.of( + new FieldStorageInfo("title", "text", FieldType.TEXT, List.of(), List.of("lucene"), List.of(), false) + ); + + byte[] serialized = serializer.serialize(call, fieldStorage); + + try (StreamInput input = new NamedWriteableAwareStreamInput(StreamInput.wrap(serialized), WRITEABLE_REGISTRY)) { + WildcardQueryBuilder wildcardQb = (WildcardQueryBuilder) input.readNamedWriteable(QueryBuilder.class); + assertEquals("\\*", wildcardQb.value()); + } + } + + /** + * Tests that a backslash before a non-wildcard character preserves both characters. + */ + public void testWildcardQueryBackslashBeforeNonWildcard() throws IOException { + DelegatedPredicateSerializer serializer = serializers.get(ScalarFunction.WILDCARD_QUERY); + // Java string "\\n" is runtime chars: \, n + RexCall call = buildSingleFieldRexCallWithParams("title", "test\\n", "WILDCARD_QUERY", Map.of()); + List fieldStorage = List.of( + new FieldStorageInfo("title", "text", FieldType.TEXT, List.of(), List.of("lucene"), List.of(), false) + ); + + byte[] serialized = serializer.serialize(call, fieldStorage); + + try (StreamInput input = new NamedWriteableAwareStreamInput(StreamInput.wrap(serialized), WRITEABLE_REGISTRY)) { + WildcardQueryBuilder wildcardQb = (WildcardQueryBuilder) input.readNamedWriteable(QueryBuilder.class); + assertEquals("test\\n", wildcardQb.value()); + } + } + + /** + * Tests that a trailing backslash is preserved in the output. + */ + public void testWildcardQueryTrailingBackslash() throws IOException { + DelegatedPredicateSerializer serializer = serializers.get(ScalarFunction.WILDCARD_QUERY); + // Java string "test\\" is runtime chars: t, e, s, t, \ + RexCall call = buildSingleFieldRexCallWithParams("title", "test\\", "WILDCARD_QUERY", Map.of()); + List fieldStorage = List.of( + new FieldStorageInfo("title", "text", FieldType.TEXT, List.of(), List.of("lucene"), List.of(), false) + ); + + byte[] serialized = serializer.serialize(call, fieldStorage); + + try (StreamInput input = new NamedWriteableAwareStreamInput(StreamInput.wrap(serialized), WRITEABLE_REGISTRY)) { + WildcardQueryBuilder wildcardQb = (WildcardQueryBuilder) input.readNamedWriteable(QueryBuilder.class); + assertEquals("test\\", wildcardQb.value()); + } + } + // --- QuerySerializer (no-field) tests --- /**