Skip to content

Commit d5806cc

Browse files
kenrickyap14yapkc1acarbonetto
authored
json-valid PPL function (opensearch-project#3230)
Add json-valid PPL function (opensearch-project#3230) --------- Signed-off-by: Kenrick Yap <[email protected]> Signed-off-by: Kenrick Yap <[email protected]> Signed-off-by: kenrickyap <[email protected]> Signed-off-by: Andrew Carbonetto <[email protected]> Co-authored-by: Kenrick Yap <[email protected]> Co-authored-by: Andrew Carbonetto <[email protected]>
1 parent e7be8ca commit d5806cc

File tree

19 files changed

+279
-3
lines changed

19 files changed

+279
-3
lines changed

core/src/main/java/org/opensearch/sql/expression/DSL.java

+4
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,10 @@ public static FunctionExpression notLike(Expression... expressions) {
683683
return compile(FunctionProperties.None, BuiltinFunctionName.NOT_LIKE, expressions);
684684
}
685685

686+
public static FunctionExpression jsonValid(Expression... expressions) {
687+
return compile(FunctionProperties.None, BuiltinFunctionName.JSON_VALID, expressions);
688+
}
689+
686690
public static Aggregator avg(Expression... expressions) {
687691
return aggregate(BuiltinFunctionName.AVG, expressions);
688692
}

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

+3
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,9 @@ public enum BuiltinFunctionName {
204204
TRIM(FunctionName.of("trim")),
205205
UPPER(FunctionName.of("upper")),
206206

207+
/** Json Functions. */
208+
JSON_VALID(FunctionName.of("json_valid")),
209+
207210
/** NULL Test. */
208211
IS_NULL(FunctionName.of("is null")),
209212
IS_NOT_NULL(FunctionName.of("is not null")),

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java

+2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.opensearch.sql.expression.datetime.DateTimeFunctions;
2929
import org.opensearch.sql.expression.datetime.IntervalClause;
3030
import org.opensearch.sql.expression.ip.IPFunctions;
31+
import org.opensearch.sql.expression.json.JsonFunctions;
3132
import org.opensearch.sql.expression.operator.arthmetic.ArithmeticFunctions;
3233
import org.opensearch.sql.expression.operator.arthmetic.MathematicalFunctions;
3334
import org.opensearch.sql.expression.operator.convert.TypeCastOperators;
@@ -83,6 +84,7 @@ public static synchronized BuiltinFunctionRepository getInstance() {
8384
SystemFunctions.register(instance);
8485
OpenSearchFunctions.register(instance);
8586
IPFunctions.register(instance);
87+
JsonFunctions.register(instance);
8688
}
8789
return instance;
8890
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.expression.json;
7+
8+
import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN;
9+
import static org.opensearch.sql.data.type.ExprCoreType.STRING;
10+
import static org.opensearch.sql.expression.function.FunctionDSL.define;
11+
import static org.opensearch.sql.expression.function.FunctionDSL.impl;
12+
13+
import lombok.experimental.UtilityClass;
14+
import org.opensearch.sql.expression.function.BuiltinFunctionName;
15+
import org.opensearch.sql.expression.function.BuiltinFunctionRepository;
16+
import org.opensearch.sql.expression.function.DefaultFunctionResolver;
17+
import org.opensearch.sql.utils.JsonUtils;
18+
19+
@UtilityClass
20+
public class JsonFunctions {
21+
public void register(BuiltinFunctionRepository repository) {
22+
repository.register(jsonValid());
23+
}
24+
25+
private DefaultFunctionResolver jsonValid() {
26+
return define(
27+
BuiltinFunctionName.JSON_VALID.getName(), impl(JsonUtils::isValidJson, BOOLEAN, STRING));
28+
}
29+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package org.opensearch.sql.utils;
2+
3+
import com.fasterxml.jackson.core.JsonProcessingException;
4+
import com.fasterxml.jackson.databind.ObjectMapper;
5+
import lombok.experimental.UtilityClass;
6+
import org.opensearch.sql.data.model.ExprValue;
7+
import org.opensearch.sql.data.model.ExprValueUtils;
8+
9+
@UtilityClass
10+
public class JsonUtils {
11+
/**
12+
* Checks if given JSON string can be parsed as valid JSON.
13+
*
14+
* @param jsonExprValue JSON string (e.g. "{\"hello\": \"world\"}").
15+
* @return true if the string can be parsed as valid JSON, else false.
16+
*/
17+
public static ExprValue isValidJson(ExprValue jsonExprValue) {
18+
ObjectMapper objectMapper = new ObjectMapper();
19+
20+
if (jsonExprValue.isNull() || jsonExprValue.isMissing()) {
21+
return ExprValueUtils.LITERAL_FALSE;
22+
}
23+
24+
try {
25+
objectMapper.readTree(jsonExprValue.stringValue());
26+
return ExprValueUtils.LITERAL_TRUE;
27+
} catch (JsonProcessingException e) {
28+
return ExprValueUtils.LITERAL_FALSE;
29+
}
30+
}
31+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.expression.json;
7+
8+
import static org.junit.jupiter.api.Assertions.assertEquals;
9+
import static org.junit.jupiter.api.Assertions.assertThrows;
10+
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_FALSE;
11+
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_MISSING;
12+
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_NULL;
13+
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE;
14+
15+
import org.junit.jupiter.api.Test;
16+
import org.junit.jupiter.api.extension.ExtendWith;
17+
import org.mockito.junit.jupiter.MockitoExtension;
18+
import org.opensearch.sql.data.model.ExprValue;
19+
import org.opensearch.sql.data.model.ExprValueUtils;
20+
import org.opensearch.sql.exception.ExpressionEvaluationException;
21+
import org.opensearch.sql.expression.DSL;
22+
import org.opensearch.sql.expression.FunctionExpression;
23+
24+
@ExtendWith(MockitoExtension.class)
25+
public class JsonFunctionsTest {
26+
private static final ExprValue JsonNestedObject =
27+
ExprValueUtils.stringValue("{\"a\":\"1\",\"b\":{\"c\":\"2\",\"d\":\"3\"}}");
28+
private static final ExprValue JsonObject =
29+
ExprValueUtils.stringValue("{\"a\":\"1\",\"b\":\"2\"}");
30+
private static final ExprValue JsonArray = ExprValueUtils.stringValue("[1, 2, 3, 4]");
31+
private static final ExprValue JsonScalarString = ExprValueUtils.stringValue("\"abc\"");
32+
private static final ExprValue JsonEmptyString = ExprValueUtils.stringValue("");
33+
private static final ExprValue JsonInvalidObject =
34+
ExprValueUtils.stringValue("{\"invalid\":\"json\", \"string\"}");
35+
private static final ExprValue JsonInvalidScalar = ExprValueUtils.stringValue("abc");
36+
37+
@Test
38+
public void json_valid_returns_false() {
39+
assertEquals(LITERAL_FALSE, execute(JsonInvalidObject));
40+
assertEquals(LITERAL_FALSE, execute(JsonInvalidScalar));
41+
assertEquals(LITERAL_FALSE, execute(LITERAL_NULL));
42+
assertEquals(LITERAL_FALSE, execute(LITERAL_MISSING));
43+
}
44+
45+
@Test
46+
public void json_valid_throws_ExpressionEvaluationException() {
47+
assertThrows(
48+
ExpressionEvaluationException.class, () -> execute(ExprValueUtils.booleanValue(true)));
49+
}
50+
51+
@Test
52+
public void json_valid_returns_true() {
53+
assertEquals(LITERAL_TRUE, execute(JsonNestedObject));
54+
assertEquals(LITERAL_TRUE, execute(JsonObject));
55+
assertEquals(LITERAL_TRUE, execute(JsonArray));
56+
assertEquals(LITERAL_TRUE, execute(JsonScalarString));
57+
assertEquals(LITERAL_TRUE, execute(JsonEmptyString));
58+
}
59+
60+
private ExprValue execute(ExprValue jsonString) {
61+
FunctionExpression exp = DSL.jsonValid(DSL.literal(jsonString));
62+
return exp.valueOf();
63+
}
64+
}

docs/category.json

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
"user/ppl/functions/datetime.rst",
3535
"user/ppl/functions/expressions.rst",
3636
"user/ppl/functions/ip.rst",
37+
"user/ppl/functions/json.rst",
3738
"user/ppl/functions/math.rst",
3839
"user/ppl/functions/relevance.rst",
3940
"user/ppl/functions/string.rst"

docs/user/dql/metadata.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ Example 1: Show All Indices Information
3535
SQL query::
3636

3737
os> SHOW TABLES LIKE '%'
38-
fetched rows / total rows = 10/10
38+
fetched rows / total rows = 11/11
3939
+----------------+-------------+-----------------+------------+---------+----------+------------+-----------+---------------------------+----------------+
4040
| TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION |
4141
|----------------+-------------+-----------------+------------+---------+----------+------------+-----------+---------------------------+----------------|
@@ -44,6 +44,7 @@ SQL query::
4444
| docTestCluster | null | accounts | BASE TABLE | null | null | null | null | null | null |
4545
| docTestCluster | null | apache | BASE TABLE | null | null | null | null | null | null |
4646
| docTestCluster | null | books | BASE TABLE | null | null | null | null | null | null |
47+
| docTestCluster | null | json_test | BASE TABLE | null | null | null | null | null | null |
4748
| docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null |
4849
| docTestCluster | null | nyc_taxi | BASE TABLE | null | null | null | null | null | null |
4950
| docTestCluster | null | people | BASE TABLE | null | null | null | null | null | null |

docs/user/ppl/functions/json.rst

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
====================
2+
JSON Functions
3+
====================
4+
5+
.. rubric:: Table of contents
6+
7+
.. contents::
8+
:local:
9+
:depth: 1
10+
11+
JSON_VALID
12+
----------
13+
14+
Description
15+
>>>>>>>>>>>
16+
17+
Usage: `json_valid(json_string)` checks if `json_string` is a valid JSON-encoded string.
18+
19+
Argument type: STRING
20+
21+
Return type: BOOLEAN
22+
23+
Example::
24+
25+
> source=json_test | eval is_valid = json_valid(json_string) | fields test_name, json_string, is_valid
26+
fetched rows / total rows = 6/6
27+
+---------------------+---------------------------------+----------+
28+
| test_name | json_string | is_valid |
29+
|---------------------|---------------------------------|----------|
30+
| json nested object | {"a":"1","b":{"c":"2","d":"3"}} | True |
31+
| json object | {"a":"1","b":"2"} | True |
32+
| json array | [1, 2, 3, 4] | True |
33+
| json scalar string | "abc" | True |
34+
| json empty string | | True |
35+
| json invalid object | {"invalid":"json", "string"} | False |
36+
+---------------------+---------------------------------+----------+

doctest/test_data/json_test.json

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{"test_name":"json nested object", "json_string":"{\"a\":\"1\",\"b\":{\"c\":\"2\",\"d\":\"3\"}}"}
2+
{"test_name":"json object", "json_string":"{\"a\":\"1\",\"b\":\"2\"}"}
3+
{"test_name":"json array", "json_string":"[1, 2, 3, 4]"}
4+
{"test_name":"json scalar string", "json_string":"\"abc\""}
5+
{"test_name":"json empty string","json_string":""}
6+
{"test_name":"json invalid object", "json_string":"{\"invalid\":\"json\", \"string\"}"}

doctest/test_docs.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
NESTED = "nested"
3131
DATASOURCES = ".ql-datasources"
3232
WEBLOGS = "weblogs"
33+
JSON_TEST = "json_test"
3334

3435
class DocTestConnection(OpenSearchConnection):
3536

@@ -123,6 +124,7 @@ def set_up_test_indices(test):
123124
load_file("nested_objects.json", index_name=NESTED)
124125
load_file("datasources.json", index_name=DATASOURCES)
125126
load_file("weblogs.json", index_name=WEBLOGS)
127+
load_file("json_test.json", index_name=JSON_TEST)
126128

127129

128130
def load_file(filename, index_name):
@@ -151,7 +153,7 @@ def set_up(test):
151153

152154
def tear_down(test):
153155
# drop leftover tables after each test
154-
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS], ignore_unavailable=True)
156+
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS, JSON_TEST], ignore_unavailable=True)
155157

156158

157159
docsuite = partial(doctest.DocFileSuite,

integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import static org.opensearch.sql.legacy.TestUtils.getGameOfThronesIndexMapping;
2323
import static org.opensearch.sql.legacy.TestUtils.getGeopointIndexMapping;
2424
import static org.opensearch.sql.legacy.TestUtils.getJoinTypeIndexMapping;
25+
import static org.opensearch.sql.legacy.TestUtils.getJsonTestIndexMapping;
2526
import static org.opensearch.sql.legacy.TestUtils.getLocationIndexMapping;
2627
import static org.opensearch.sql.legacy.TestUtils.getMappingFile;
2728
import static org.opensearch.sql.legacy.TestUtils.getNestedSimpleIndexMapping;
@@ -745,7 +746,12 @@ public enum Index {
745746
TestsConstants.TEST_INDEX_GEOPOINT,
746747
"dates",
747748
getGeopointIndexMapping(),
748-
"src/test/resources/geopoints.json");
749+
"src/test/resources/geopoints.json"),
750+
JSON_TEST(
751+
TestsConstants.TEST_INDEX_JSON_TEST,
752+
"json",
753+
getJsonTestIndexMapping(),
754+
"src/test/resources/json_test.json");
749755

750756
private final String name;
751757
private final String type;

integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java

+5
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,11 @@ public static String getGeopointIndexMapping() {
250250
return getMappingFile(mappingFile);
251251
}
252252

253+
public static String getJsonTestIndexMapping() {
254+
String mappingFile = "json_test_index_mapping.json";
255+
return getMappingFile(mappingFile);
256+
}
257+
253258
public static void loadBulk(Client client, String jsonPath, String defaultIndex)
254259
throws Exception {
255260
System.out.println(String.format("Loading file %s into opensearch cluster", jsonPath));

integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java

+1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ public class TestsConstants {
5858
public static final String TEST_INDEX_MULTI_NESTED_TYPE = TEST_INDEX + "_multi_nested";
5959
public static final String TEST_INDEX_NESTED_WITH_NULLS = TEST_INDEX + "_nested_with_nulls";
6060
public static final String TEST_INDEX_GEOPOINT = TEST_INDEX + "_geopoint";
61+
public static final String TEST_INDEX_JSON_TEST = TEST_INDEX + "_json_test";
6162
public static final String DATASOURCES = ".ql-datasources";
6263

6364
public static final String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.ppl;
7+
8+
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_JSON_TEST;
9+
import static org.opensearch.sql.util.MatcherUtils.rows;
10+
import static org.opensearch.sql.util.MatcherUtils.schema;
11+
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
12+
import static org.opensearch.sql.util.MatcherUtils.verifySchema;
13+
14+
import java.io.IOException;
15+
import org.json.JSONObject;
16+
import org.junit.jupiter.api.Test;
17+
18+
public class JsonFunctionsIT extends PPLIntegTestCase {
19+
@Override
20+
public void init() throws IOException {
21+
loadIndex(Index.JSON_TEST);
22+
}
23+
24+
@Test
25+
public void test_json_valid() throws IOException {
26+
JSONObject result;
27+
28+
result =
29+
executeQuery(
30+
String.format(
31+
"source=%s | where json_valid(json_string) | fields test_name",
32+
TEST_INDEX_JSON_TEST));
33+
verifySchema(result, schema("test_name", null, "string"));
34+
verifyDataRows(
35+
result,
36+
rows("json nested object"),
37+
rows("json object"),
38+
rows("json array"),
39+
rows("json scalar string"),
40+
rows("json empty string"));
41+
}
42+
43+
@Test
44+
public void test_not_json_valid() throws IOException {
45+
JSONObject result;
46+
47+
result =
48+
executeQuery(
49+
String.format(
50+
"source=%s | where not json_valid(json_string) | fields test_name",
51+
TEST_INDEX_JSON_TEST));
52+
verifySchema(result, schema("test_name", null, "string"));
53+
verifyDataRows(result, rows("json invalid object"), rows("json null"));
54+
}
55+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"mappings": {
3+
"properties": {
4+
"test_name": {
5+
"type": "keyword"
6+
},
7+
"json_string": {
8+
"type": "keyword"
9+
}
10+
}
11+
}
12+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{"index":{"_id":"0"}}
2+
{"test_name":"json nested object", "json_string":"{\"a\":\"1\",\"b\":{\"c\":\"2\",\"d\":\"3\"}}"}
3+
{"index":{"_id":"1"}}
4+
{"test_name":"json object", "json_string":"{\"a\":\"1\",\"b\":\"2\"}"}
5+
{"index":{"_id":"2"}}
6+
{"test_name":"json array", "json_string":"[1, 2, 3, 4]"}
7+
{"index":{"_id":"3"}}
8+
{"test_name":"json scalar string", "json_string":"\"abc\""}
9+
{"index":{"_id":"4"}}
10+
{"test_name":"json empty string","json_string":""}
11+
{"index":{"_id":"5"}}
12+
{"test_name":"json invalid object", "json_string":"{\"invalid\":\"json\", \"string\"}"}
13+
{"index":{"_id":"6"}}
14+
{"test_name":"json null", "json_string":null}

ppl/src/main/antlr/OpenSearchPPLLexer.g4

+3
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,9 @@ ISNULL: 'ISNULL';
332332
ISNOTNULL: 'ISNOTNULL';
333333
CIDRMATCH: 'CIDRMATCH';
334334

335+
// JSON FUNCTIONS
336+
JSON_VALID: 'JSON_VALID';
337+
335338
// FLOWCONTROL FUNCTIONS
336339
IFNULL: 'IFNULL';
337340
NULLIF: 'NULLIF';

0 commit comments

Comments
 (0)