Skip to content

Commit c70a069

Browse files
committed
Unified PPL data type
Signed-off-by: Peng Huo <[email protected]>
1 parent 28275b8 commit c70a069

File tree

14 files changed

+366
-22
lines changed

14 files changed

+366
-22
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.lang;
7+
8+
import static org.opensearch.sql.lang.LangSpec.LangType.PPL;
9+
10+
import org.opensearch.sql.data.type.ExprType;
11+
12+
/**
13+
* Represents a language specification for query processing.
14+
*
15+
* <p>This interface defines basic methods for language-specific behaviors, such as determining the
16+
* language type and mapping expression types to type names. Two language specifications are
17+
* provided: SQL and PPL.
18+
*/
19+
public interface LangSpec {
20+
/** Enumerates the supported language types. */
21+
enum LangType {
22+
/** SQL language specification. */
23+
SQL,
24+
/** PPL (Piped Processing Language) language specification. */
25+
PPL
26+
}
27+
28+
/** The default SQL language specification instance. */
29+
LangSpec SQL_SPEC = new LangSpec() {};
30+
31+
/**
32+
* Returns a language specification instance based on the provided language name.
33+
*
34+
* @param language the name of the language, case-insensitive.
35+
* @return the PPL language specification if the language is PPL (ignoring case); otherwise, the
36+
* SQL language specification.
37+
*/
38+
static LangSpec fromLanguage(String language) {
39+
if (PPL.name().equalsIgnoreCase(language)) {
40+
return PPLLangSpec.PPL_SPEC;
41+
} else {
42+
return SQL_SPEC;
43+
}
44+
}
45+
46+
/**
47+
* Returns the language type of this specification.
48+
*
49+
* <p>By default, the language is considered SQL.
50+
*
51+
* @return the language type, SQL by default.
52+
*/
53+
default LangType language() {
54+
return LangType.SQL;
55+
}
56+
57+
/**
58+
* Returns the type name for the given expression type.
59+
*
60+
* <p>This default implementation returns the result of {@code exprType.typeName()}.
61+
*
62+
* @param exprType the expression type.
63+
* @return the type name of the expression.
64+
*/
65+
default String typeName(ExprType exprType) {
66+
return exprType.typeName();
67+
}
68+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.lang;
7+
8+
import java.util.HashMap;
9+
import java.util.Map;
10+
import org.opensearch.sql.data.type.ExprCoreType;
11+
import org.opensearch.sql.data.type.ExprType;
12+
13+
/**
14+
* PPL language specification implementation.
15+
*
16+
* <p>This class provides a singleton implementation of {@link LangSpec} for PPL. It defines a
17+
* custom mapping from expression types to PPL type names.
18+
*/
19+
public class PPLLangSpec implements LangSpec {
20+
21+
public static final PPLLangSpec PPL_SPEC = new PPLLangSpec();
22+
23+
private static Map<ExprType, String> exprTypeToPPLType = new HashMap<>();
24+
25+
static {
26+
exprTypeToPPLType.put(ExprCoreType.BYTE, "tinyint");
27+
exprTypeToPPLType.put(ExprCoreType.SHORT, "smallint");
28+
exprTypeToPPLType.put(ExprCoreType.INTEGER, "int");
29+
exprTypeToPPLType.put(ExprCoreType.LONG, "bigint");
30+
}
31+
32+
private PPLLangSpec() {}
33+
34+
@Override
35+
public LangType language() {
36+
return LangType.PPL;
37+
}
38+
39+
/**
40+
* Returns the corresponding PPL type name for the given expression type. If the expression type
41+
* is not mapped, it returns the default type name.
42+
*
43+
* @param exprType the expression type.
44+
* @return the PPL type name associated with the expression type, or the default type name.
45+
*/
46+
@Override
47+
public String typeName(ExprType exprType) {
48+
return exprTypeToPPLType.getOrDefault(exprType, exprType.typeName());
49+
}
50+
}

core/src/main/java/org/opensearch/sql/utils/SystemIndexUtils.java

+50-4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import lombok.Getter;
99
import lombok.RequiredArgsConstructor;
1010
import lombok.experimental.UtilityClass;
11+
import org.opensearch.sql.lang.LangSpec;
1112

1213
/** System Index Utils. Todo. Find the better name for this class. */
1314
@UtilityClass
@@ -39,7 +40,47 @@ public static Boolean isSystemIndex(String indexName) {
3940
* @return system mapping table.
4041
*/
4142
public static String mappingTable(String indexName) {
42-
return String.join(".", indexName, SYS_MAPPINGS_SUFFIX);
43+
return mappingTable(indexName, LangSpec.SQL_SPEC);
44+
}
45+
46+
public static String mappingTable(String indexName, LangSpec langSpec) {
47+
48+
return String.join(".", indexName, encodeLangSpec(langSpec));
49+
}
50+
51+
/**
52+
* Encodes the language specification into a system mappings suffix.
53+
*
54+
* <p>The returned suffix is composed of the language name (e.g., "SQL" or "PPL") concatenated
55+
* with an underscore and the system mappings suffix constant. For example:
56+
* "SQL_MAPPINGS_ODFE_SYS_TABLE".
57+
*
58+
* @param spec the language specification.
59+
* @return the encoded system mappings suffix.
60+
*/
61+
public static String encodeLangSpec(LangSpec spec) {
62+
return spec.language().name() + "_" + SYS_MAPPINGS_SUFFIX;
63+
}
64+
65+
/**
66+
* Extracts the language specification from a given system mappings suffix.
67+
*
68+
* <p>This method expects the suffix to start with the language name followed by an underscore.
69+
* For example, given "SQL_MAPPINGS_ODFE_SYS_TABLE", it extracts "SQL" and returns the
70+
* corresponding language specification via {@link LangSpec#fromLanguage(String)}. If the expected
71+
* format is not met, the default SQL specification is returned.
72+
*
73+
* @param systemMappingsSuffix the system mappings suffix.
74+
* @return the language specification extracted from the suffix, or {@link LangSpec#SQL_SPEC} if
75+
* the format is invalid.
76+
*/
77+
public static LangSpec extractLangSpec(String systemMappingsSuffix) {
78+
int underscoreIndex = systemMappingsSuffix.indexOf('_');
79+
if (underscoreIndex <= 0) {
80+
return LangSpec.SQL_SPEC;
81+
}
82+
String langName = systemMappingsSuffix.substring(0, underscoreIndex);
83+
return LangSpec.fromLanguage(langName);
4384
}
4485

4586
/**
@@ -52,10 +93,10 @@ public static SystemTable systemTable(String indexName) {
5293
String suffix = indexName.substring(lastDot + 1);
5394
String tableName = indexName.substring(0, lastDot).replace("%", "*");
5495

55-
if (suffix.equalsIgnoreCase(SYS_META_SUFFIX)) {
96+
if (suffix.endsWith(SYS_META_SUFFIX)) {
5697
return new SystemInfoTable(tableName);
57-
} else if (suffix.equalsIgnoreCase(SYS_MAPPINGS_SUFFIX)) {
58-
return new MetaInfoTable(tableName);
98+
} else if (suffix.endsWith(SYS_MAPPINGS_SUFFIX)) {
99+
return new MetaInfoTable(tableName, extractLangSpec(suffix));
59100
} else {
60101
throw new IllegalStateException("Invalid system index name: " + indexName);
61102
}
@@ -66,6 +107,10 @@ public interface SystemTable {
66107

67108
String getTableName();
68109

110+
default LangSpec getLangSpec() {
111+
return LangSpec.SQL_SPEC;
112+
}
113+
69114
default boolean isSystemInfoTable() {
70115
return false;
71116
}
@@ -93,6 +138,7 @@ public boolean isSystemInfoTable() {
93138
public static class MetaInfoTable implements SystemTable {
94139

95140
private final String tableName;
141+
private final LangSpec langSpec;
96142

97143
public boolean isMetaInfoTable() {
98144
return true;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.lang;
7+
8+
import static org.junit.jupiter.api.Assertions.*;
9+
10+
import org.junit.jupiter.api.Test;
11+
import org.opensearch.sql.data.type.ExprCoreType;
12+
13+
class LangSpecTest {
14+
@Test
15+
public void testFromLanguageSQL() {
16+
LangSpec spec = LangSpec.fromLanguage("SQL");
17+
assertEquals(LangSpec.LangType.SQL, spec.language(), "Expected language type to be SQL");
18+
assertSame(LangSpec.SQL_SPEC, spec, "Expected SQL_SPEC instance for SQL language.");
19+
}
20+
21+
@Test
22+
public void testSQLSpecDefaultTypeName() {
23+
String result = LangSpec.SQL_SPEC.typeName(ExprCoreType.BYTE);
24+
assertEquals("BYTE", result, "SQL_SPEC should return the expression type's default type name.");
25+
}
26+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.lang;
7+
8+
import static org.junit.jupiter.api.Assertions.*;
9+
import static org.opensearch.sql.lang.PPLLangSpec.PPL_SPEC;
10+
11+
import org.junit.jupiter.api.Test;
12+
import org.opensearch.sql.data.type.ExprCoreType;
13+
14+
class PPLLangSpecTest {
15+
/** Tests that the language type of the PPL specification is PPL. */
16+
@Test
17+
public void testPPLSpecLanguage() {
18+
PPLLangSpec spec = PPL_SPEC;
19+
assertEquals(LangSpec.LangType.PPL, spec.language(), "Expected language to be PPL.");
20+
}
21+
22+
/**
23+
* Tests that the PPL specification returns the correct type name mapping for known expression
24+
* types. Assumes that ExprCoreType constants are available.
25+
*/
26+
@Test
27+
public void testPPLSpecTypeNameMapping() {
28+
PPLLangSpec spec = PPL_SPEC;
29+
assertEquals("tinyint", spec.typeName(ExprCoreType.BYTE), "BYTE should map to tinyint.");
30+
assertEquals("smallint", spec.typeName(ExprCoreType.SHORT), "SHORT should map to smallint.");
31+
assertEquals("int", spec.typeName(ExprCoreType.INTEGER), "INTEGER should map to int.");
32+
assertEquals("bigint", spec.typeName(ExprCoreType.LONG), "LONG should map to bigint.");
33+
}
34+
35+
/**
36+
* Tests that an unmapped expression type returns its default type name in the PPL specification.
37+
*/
38+
@Test
39+
public void testPPLSpecDefaultTypeName() {
40+
String result = PPL_SPEC.typeName(ExprCoreType.STRING);
41+
assertEquals("STRING", result, "Unmapped expression type should return its default type name.");
42+
}
43+
}

core/src/test/java/org/opensearch/sql/utils/SystemIndexUtilsTest.java

+49-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
import static org.opensearch.sql.utils.SystemIndexUtils.systemTable;
1515

1616
import org.junit.jupiter.api.Test;
17+
import org.opensearch.sql.lang.LangSpec;
18+
import org.opensearch.sql.lang.PPLLangSpec;
1719

1820
class SystemIndexUtilsTest {
1921

@@ -25,7 +27,7 @@ void test_system_index() {
2527

2628
@Test
2729
void test_compose_mapping_table() {
28-
assertEquals("employee.MAPPINGS_ODFE_SYS_TABLE", mappingTable("employee"));
30+
assertEquals("employee.SQL_MAPPINGS_ODFE_SYS_TABLE", mappingTable("employee"));
2931
}
3032

3133
@Test
@@ -61,4 +63,50 @@ void throw_exception_for_invalid_index() {
6163
assertThrows(IllegalStateException.class, () -> systemTable("employee._ODFE_SYS_TABLE"));
6264
assertEquals("Invalid system index name: employee._ODFE_SYS_TABLE", exception.getMessage());
6365
}
66+
67+
/** Tests that encoding the SQL specification produces the expected suffix. */
68+
@Test
69+
public void testEncodeLangSpecSQL() {
70+
String expected = "SQL_MAPPINGS_ODFE_SYS_TABLE";
71+
String encoded = SystemIndexUtils.encodeLangSpec(LangSpec.SQL_SPEC);
72+
assertEquals(expected, encoded, "Encoded SQL lang spec should match expected suffix.");
73+
}
74+
75+
/** Tests that encoding the PPL specification produces the expected suffix. */
76+
@Test
77+
public void testEncodeLangSpecPPL() {
78+
String expected = "PPL_MAPPINGS_ODFE_SYS_TABLE";
79+
String encoded = SystemIndexUtils.encodeLangSpec(PPLLangSpec.PPL_SPEC);
80+
assertEquals(expected, encoded, "Encoded PPL lang spec should match expected suffix.");
81+
}
82+
83+
/** Tests that extracting the language specification from a valid SQL suffix returns SQL_SPEC. */
84+
@Test
85+
public void testExtractLangSpecValidSQL() {
86+
LangSpec spec = SystemIndexUtils.extractLangSpec("SQL_MAPPINGS_ODFE_SYS_TABLE");
87+
assertEquals(LangSpec.SQL_SPEC, spec, "Extracting from SQL suffix should return SQL_SPEC.");
88+
}
89+
90+
/** Tests that extracting the language specification from a valid PPL suffix returns PPL_SPEC. */
91+
@Test
92+
public void testExtractLangSpecValidPPL() {
93+
LangSpec spec = SystemIndexUtils.extractLangSpec("PPL_MAPPINGS_ODFE_SYS_TABLE");
94+
assertEquals(PPLLangSpec.PPL_SPEC, spec, "Extracting from PPL suffix should return PPL_SPEC.");
95+
}
96+
97+
/** Tests that an improperly formatted suffix defaults to SQL_SPEC. */
98+
@Test
99+
public void testExtractLangSpecInvalidFormat() {
100+
assertEquals(
101+
LangSpec.SQL_SPEC,
102+
SystemIndexUtils.extractLangSpec("FORMAT"),
103+
"Invalid format should default to SQL_SPEC.");
104+
}
105+
106+
@Test
107+
public void testGetLangSpec() {
108+
assertEquals(
109+
LangSpec.SQL_SPEC,
110+
new SystemIndexUtils.SystemInfoTable("employee.MAPPINGS_ODFE_SYS_TABLE").getLangSpec());
111+
}
64112
}

integ-test/src/test/java/org/opensearch/sql/ppl/DataTypeIT.java

+9-9
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ public void test_numeric_data_types() throws IOException {
3232
JSONObject result = executeQuery(String.format("source=%s", TEST_INDEX_DATATYPE_NUMERIC));
3333
verifySchema(
3434
result,
35-
schema("long_number", "long"),
36-
schema("integer_number", "integer"),
37-
schema("short_number", "short"),
38-
schema("byte_number", "byte"),
35+
schema("long_number", "bigint"),
36+
schema("integer_number", "int"),
37+
schema("short_number", "smallint"),
38+
schema("byte_number", "tinyint"),
3939
schema("double_number", "double"),
4040
schema("float_number", "float"),
4141
schema("half_float_number", "float"),
@@ -73,10 +73,10 @@ public void test_long_integer_data_type() throws IOException {
7373
TEST_INDEX_DATATYPE_NUMERIC));
7474
verifySchema(
7575
result,
76-
schema("int1", "integer"),
77-
schema("int2", "integer"),
78-
schema("long1", "long"),
79-
schema("long2", "long"));
76+
schema("int1", "int"),
77+
schema("int2", "int"),
78+
schema("long1", "bigint"),
79+
schema("long2", "bigint"));
8080
}
8181

8282
@Test
@@ -86,6 +86,6 @@ public void test_alias_data_type() throws IOException {
8686
String.format(
8787
"source=%s | where alias_col > 1 " + "| fields original_col, alias_col ",
8888
TEST_INDEX_ALIAS));
89-
verifySchema(result, schema("original_col", "integer"), schema("alias_col", "integer"));
89+
verifySchema(result, schema("original_col", "int"), schema("alias_col", "int"));
9090
}
9191
}

0 commit comments

Comments
 (0)