Skip to content

Commit 2af7321

Browse files
author
Guian Gumpac
authored
Add support for wildcard_query function to the new engine (#156) (#1108)
Signed-off-by: Guian Gumpac <[email protected]>
1 parent 64a3794 commit 2af7321

File tree

29 files changed

+1015
-38
lines changed

29 files changed

+1015
-38
lines changed

core/src/main/java/org/opensearch/sql/expression/DSL.java

+4
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,10 @@ public static FunctionExpression match_bool_prefix(Expression... args) {
715715
return compile(FunctionProperties.None, BuiltinFunctionName.MATCH_BOOL_PREFIX, args);
716716
}
717717

718+
public static FunctionExpression wildcard_query(Expression... args) {
719+
return compile(FunctionProperties.None,BuiltinFunctionName.WILDCARD_QUERY, args);
720+
}
721+
718722
public static FunctionExpression now(FunctionProperties functionProperties,
719723
Expression... args) {
720724
return compile(functionProperties, BuiltinFunctionName.NOW, args);

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,9 @@ public enum BuiltinFunctionName {
228228
MATCHQUERY(FunctionName.of("matchquery")),
229229
MULTI_MATCH(FunctionName.of("multi_match")),
230230
MULTIMATCH(FunctionName.of("multimatch")),
231-
MULTIMATCHQUERY(FunctionName.of("multimatchquery"));
231+
MULTIMATCHQUERY(FunctionName.of("multimatchquery")),
232+
WILDCARDQUERY(FunctionName.of("wildcardquery")),
233+
WILDCARD_QUERY(FunctionName.of("wildcard_query"));
232234

233235
private final FunctionName name;
234236

core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java

+7
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ public void register(BuiltinFunctionRepository repository) {
4242
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE));
4343
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASEQUERY));
4444
repository.register(match_phrase_prefix());
45+
repository.register(wildcard_query(BuiltinFunctionName.WILDCARD_QUERY));
46+
repository.register(wildcard_query(BuiltinFunctionName.WILDCARDQUERY));
4547
}
4648

4749
private static FunctionResolver match_bool_prefix() {
@@ -83,6 +85,11 @@ private static FunctionResolver query_string() {
8385
return new RelevanceFunctionResolver(funcName, STRUCT);
8486
}
8587

88+
private static FunctionResolver wildcard_query(BuiltinFunctionName wildcardQuery) {
89+
FunctionName funcName = wildcardQuery.getName();
90+
return new RelevanceFunctionResolver(funcName, STRING);
91+
}
92+
8693
public static class OpenSearchFunction extends FunctionExpression {
8794
private final FunctionName functionName;
8895
private final List<Expression> arguments;

core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java

+28
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,34 @@ void query_string_expression_two_fields() {
540540
AstDSL.unresolvedArg("query", stringLiteral("query_value"))));
541541
}
542542

543+
@Test
544+
void wildcard_query_expression() {
545+
assertAnalyzeEqual(
546+
DSL.wildcard_query(
547+
DSL.namedArgument("field", DSL.literal("test")),
548+
DSL.namedArgument("query", DSL.literal("query_value*"))),
549+
AstDSL.function("wildcard_query",
550+
unresolvedArg("field", stringLiteral("test")),
551+
unresolvedArg("query", stringLiteral("query_value*"))));
552+
}
553+
554+
@Test
555+
void wildcard_query_expression_all_params() {
556+
assertAnalyzeEqual(
557+
DSL.wildcard_query(
558+
DSL.namedArgument("field", DSL.literal("test")),
559+
DSL.namedArgument("query", DSL.literal("query_value*")),
560+
DSL.namedArgument("boost", DSL.literal("1.5")),
561+
DSL.namedArgument("case_insensitive", DSL.literal("true")),
562+
DSL.namedArgument("rewrite", DSL.literal("scoring_boolean"))),
563+
AstDSL.function("wildcard_query",
564+
unresolvedArg("field", stringLiteral("test")),
565+
unresolvedArg("query", stringLiteral("query_value*")),
566+
unresolvedArg("boost", stringLiteral("1.5")),
567+
unresolvedArg("case_insensitive", stringLiteral("true")),
568+
unresolvedArg("rewrite", stringLiteral("scoring_boolean"))));
569+
}
570+
543571
@Test
544572
public void match_phrase_prefix_all_params() {
545573
assertAnalyzeEqual(

core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java

+8
Original file line numberDiff line numberDiff line change
@@ -197,4 +197,12 @@ void query_string() {
197197
fields.getValue(), query.getValue()),
198198
expr.toString());
199199
}
200+
201+
@Test
202+
void wildcard_query() {
203+
FunctionExpression expr = DSL.wildcard_query(field, query);
204+
assertEquals(String.format("wildcard_query(field=%s, query=%s)",
205+
field.getValue(), query.getValue()),
206+
expr.toString());
207+
}
200208
}

docs/user/dql/functions.rst

+55
Original file line numberDiff line numberDiff line change
@@ -3299,6 +3299,59 @@ Example searching for field Tags::
32993299
| [Winnie-the-<em>Pooh</em>] |
33003300
+----------------------------------------------+
33013301

3302+
WILDCARD_QUERY
3303+
------------
3304+
3305+
Description
3306+
>>>>>>>>>>>
3307+
3308+
``wildcard_query(field_expression, query_expression[, option=<option_value>]*)``
3309+
3310+
The ``wildcard_query`` function maps to the ``wildcard_query`` query used in search engine. It returns documents that match provided text in the specified field.
3311+
OpenSearch supports wildcard characters ``*`` and ``?``. See the full description here: https://opensearch.org/docs/latest/opensearch/query-dsl/term/#wildcards.
3312+
You may include a backslash ``\`` to escape SQL wildcard characters ``\%`` and ``\_``.
3313+
3314+
Available parameters include:
3315+
3316+
- boost
3317+
- case_insensitive
3318+
- rewrite
3319+
3320+
For backward compatibility, ``wildcardquery`` is also supported and mapped to ``wildcard_query`` query as well.
3321+
3322+
Example with only ``field`` and ``query`` expressions, and all other parameters are set default values::
3323+
3324+
os> select Body from wildcard where wildcard_query(Body, 'test wildcard*');
3325+
fetched rows / total rows = 7/7
3326+
+-------------------------------------------+
3327+
| Body |
3328+
|-------------------------------------------|
3329+
| test wildcard |
3330+
| test wildcard in the end of the text% |
3331+
| test wildcard in % the middle of the text |
3332+
| test wildcard %% beside each other |
3333+
| test wildcard in the end of the text_ |
3334+
| test wildcard in _ the middle of the text |
3335+
| test wildcard __ beside each other |
3336+
+-------------------------------------------+
3337+
3338+
Another example to show how to set custom values for the optional parameters::
3339+
3340+
os> select Body from wildcard where wildcard_query(Body, 'test wildcard*', boost=0.7, case_insensitive=true, rewrite='constant_score');
3341+
fetched rows / total rows = 8/8
3342+
+-------------------------------------------+
3343+
| Body |
3344+
|-------------------------------------------|
3345+
| test wildcard |
3346+
| test wildcard in the end of the text% |
3347+
| test wildcard in % the middle of the text |
3348+
| test wildcard %% beside each other |
3349+
| test wildcard in the end of the text_ |
3350+
| test wildcard in _ the middle of the text |
3351+
| test wildcard __ beside each other |
3352+
| tEsT wIlDcArD sensitive cases |
3353+
+-------------------------------------------+
3354+
33023355
System Functions
33033356
================
33043357

@@ -3323,3 +3376,5 @@ Example::
33233376
|----------------+---------------+-----------------+------------------|
33243377
| DATE | INTEGER | DATETIME | STRUCT |
33253378
+----------------+---------------+-----------------+------------------+
3379+
3380+

doctest/test_data/wildcard.json

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{"index":{"_id":"0"}}
2+
{"Body":"test wildcard"}
3+
{"index":{"_id":"1"}}
4+
{"Body":"test wildcard in the end of the text%"}
5+
{"index":{"_id":"2"}}
6+
{"Body":"%test wildcard in the beginning of the text"}
7+
{"index":{"_id":"3"}}
8+
{"Body":"test wildcard in % the middle of the text"}
9+
{"index":{"_id":"4"}}
10+
{"Body":"test wildcard %% beside each other"}
11+
{"index":{"_id":"5"}}
12+
{"Body":"test wildcard in the end of the text_"}
13+
{"index":{"_id":"6"}}
14+
{"Body":"_test wildcard in the beginning of the text"}
15+
{"index":{"_id":"7"}}
16+
{"Body":"test wildcard in _ the middle of the text"}
17+
{"index":{"_id":"8"}}
18+
{"Body":"test wildcard __ beside each other"}
19+
{"index":{"_id":"9"}}
20+
{"Body":"test backslash wildcard \\_"}
21+
{"index":{"_id":"10"}}
22+
{"Body":"tEsT wIlDcArD sensitive cases"}

doctest/test_docs.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
NYC_TAXI = "nyc_taxi"
2727
BOOKS = "books"
2828
APACHE = "apache"
29+
WILDCARD = "wildcard"
2930

3031

3132
class DocTestConnection(OpenSearchConnection):
@@ -92,6 +93,7 @@ def set_up_test_indices(test):
9293
load_file("nyc_taxi.json", index_name=NYC_TAXI)
9394
load_file("books.json", index_name=BOOKS)
9495
load_file("apache.json", index_name=APACHE)
96+
load_file("wildcard.json", index_name=WILDCARD)
9597

9698

9799
def load_file(filename, index_name):
@@ -120,7 +122,7 @@ def set_up(test):
120122

121123
def tear_down(test):
122124
# drop leftover tables after each test
123-
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE], ignore_unavailable=True)
125+
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD], ignore_unavailable=True)
124126

125127

126128
docsuite = partial(doctest.DocFileSuite,

doctest/test_mapping/wildcard.json

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"mappings" : {
3+
"properties" : {
4+
"Body" : {
5+
"type" : "keyword"
6+
}
7+
}
8+
}
9+
}

integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java

+5-1
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,11 @@ public enum Index {
584584
CALCS(TestsConstants.TEST_INDEX_CALCS,
585585
"calcs",
586586
getMappingFile("calcs_index_mappings.json"),
587-
"src/test/resources/calcs.json"),;
587+
"src/test/resources/calcs.json"),
588+
WILDCARD(TestsConstants.TEST_INDEX_WILDCARD,
589+
"wildcard",
590+
getMappingFile("wildcard_index_mappings.json"),
591+
"src/test/resources/wildcard.json"),;
588592

589593
private final String name;
590594
private final String type;

integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ public class TestsConstants {
5353
public final static String TEST_INDEX_BEER = TEST_INDEX + "_beer";
5454
public final static String TEST_INDEX_NULL_MISSING = TEST_INDEX + "_null_missing";
5555
public final static String TEST_INDEX_CALCS = TEST_INDEX + "_calcs";
56+
public final static String TEST_INDEX_WILDCARD = TEST_INDEX + "_wildcard";
5657

5758
public final static String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
5859
public final static String TS_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS";
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
7+
package org.opensearch.sql.ppl;
8+
9+
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WILDCARD;
10+
import static org.opensearch.sql.util.MatcherUtils.rows;
11+
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
12+
13+
import java.io.IOException;
14+
import org.json.JSONObject;
15+
import org.junit.Test;
16+
17+
public class LikeQueryIT extends PPLIntegTestCase {
18+
19+
@Override
20+
public void init() throws IOException {
21+
loadIndex(Index.WILDCARD);
22+
}
23+
24+
@Test
25+
public void test_like_with_percent() throws IOException {
26+
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(KeywordBody, 'test wildcard%') | fields KeywordBody";
27+
JSONObject result = executeQuery(query);
28+
verifyDataRows(result,
29+
rows("test wildcard"),
30+
rows("test wildcard in the end of the text%"),
31+
rows("test wildcard in % the middle of the text"),
32+
rows("test wildcard %% beside each other"),
33+
rows("test wildcard in the end of the text_"),
34+
rows("test wildcard in _ the middle of the text"),
35+
rows("test wildcard __ beside each other"));
36+
}
37+
38+
@Test
39+
public void test_like_with_escaped_percent() throws IOException {
40+
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(KeywordBody, '\\\\%test wildcard%') | fields KeywordBody";
41+
JSONObject result = executeQuery(query);
42+
verifyDataRows(result,
43+
rows("%test wildcard in the beginning of the text"));
44+
}
45+
46+
@Test
47+
public void test_like_in_where_with_escaped_underscore() throws IOException {
48+
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(KeywordBody, '\\\\_test wildcard%') | fields KeywordBody";
49+
JSONObject result = executeQuery(query);
50+
verifyDataRows(result,
51+
rows("_test wildcard in the beginning of the text"));
52+
}
53+
54+
@Test
55+
public void test_like_on_text_field_with_one_word() throws IOException {
56+
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test*') | fields TextBody";
57+
JSONObject result = executeQuery(query);
58+
assertEquals(9, result.getInt("total"));
59+
}
60+
61+
@Test
62+
public void test_like_on_text_keyword_field_with_one_word() throws IOException {
63+
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextKeywordBody, 'test*') | fields TextKeywordBody";
64+
JSONObject result = executeQuery(query);
65+
assertEquals(8, result.getInt("total"));
66+
}
67+
68+
@Test
69+
public void test_like_on_text_keyword_field_with_greater_than_one_word() throws IOException {
70+
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextKeywordBody, 'test wild*') | fields TextKeywordBody";
71+
JSONObject result = executeQuery(query);
72+
assertEquals(7, result.getInt("total"));
73+
}
74+
75+
@Test
76+
public void test_like_on_text_field_with_greater_than_one_word() throws IOException {
77+
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test wild*') | fields TextBody";
78+
JSONObject result = executeQuery(query);
79+
assertEquals(0, result.getInt("total"));
80+
}
81+
82+
@Test
83+
public void test_convert_field_text_to_keyword() throws IOException {
84+
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextKeywordBody, '*') | fields TextKeywordBody";
85+
String result = explainQueryToString(query);
86+
assertTrue(result.contains("TextKeywordBody.keyword"));
87+
}
88+
}

0 commit comments

Comments
 (0)