Skip to content

Commit 22f3aae

Browse files
committed
Fix for Multisearch and Append command
Signed-off-by: Kai Huang <[email protected]>
1 parent a57796f commit 22f3aae

File tree

7 files changed

+87
-156
lines changed

7 files changed

+87
-156
lines changed

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1741,18 +1741,16 @@ public RelNode visitMultisearch(Multisearch node, CalcitePlanContext context) {
17411741
}
17421742

17431743
/**
1744-
* Finds the timestamp field for multisearch ordering.
1744+
* Finds the @timestamp field for multisearch ordering. Only @timestamp field is used for
1745+
* timestamp interleaving. Other timestamp-like fields are ignored.
17451746
*
1746-
* @param rowType The row type to search for timestamp fields
1747-
* @return The name of the timestamp field, or null if not found
1747+
* @param rowType The row type to search for @timestamp field
1748+
* @return "@timestamp" if the field exists, or null if not found
17481749
*/
17491750
private String findTimestampField(RelDataType rowType) {
1750-
String[] candidates = {"@timestamp", "_time", "timestamp", "time"};
1751-
for (String fieldName : candidates) {
1752-
RelDataTypeField field = rowType.getField(fieldName, false, false);
1753-
if (field != null) {
1754-
return fieldName;
1755-
}
1751+
RelDataTypeField field = rowType.getField("@timestamp", false, false);
1752+
if (field != null) {
1753+
return "@timestamp";
17561754
}
17571755
return null;
17581756
}

core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,20 @@
1616
import org.apache.calcite.rel.type.RelDataType;
1717
import org.apache.calcite.rel.type.RelDataTypeField;
1818
import org.apache.calcite.rex.RexNode;
19-
import org.apache.calcite.sql.validate.SqlValidatorUtil;
2019

2120
/**
22-
* Utility class for unifying schemas across multiple RelNodes with type conflict resolution. Uses
23-
* the same strategy as append command - renames conflicting fields to avoid type conflicts.
21+
* Utility class for unifying schemas across multiple RelNodes. Throws an exception when type
22+
* conflicts are detected.
2423
*/
2524
public class SchemaUnifier {
2625

2726
/**
28-
* Builds a unified schema for multiple nodes with type conflict resolution.
27+
* Builds a unified schema for multiple nodes. Throws an exception if type conflicts are detected.
2928
*
3029
* @param nodes List of RelNodes to unify schemas for
3130
* @param context Calcite plan context
3231
* @return List of projected RelNodes with unified schema
32+
* @throws IllegalArgumentException if type conflicts are detected
3333
*/
3434
public static List<RelNode> buildUnifiedSchemaWithConflictResolution(
3535
List<RelNode> nodes, CalcitePlanContext context) {
@@ -55,30 +55,27 @@ public static List<RelNode> buildUnifiedSchemaWithConflictResolution(
5555
projectedNodes.add(projectedNode);
5656
}
5757

58-
// Step 3: Unify names to handle type conflicts (this creates age0, age1, etc.)
59-
List<String> uniqueNames =
60-
SqlValidatorUtil.uniquify(fieldNames, SqlValidatorUtil.EXPR_SUGGESTER, true);
61-
62-
// Step 4: Re-project with unique names if needed
63-
if (!uniqueNames.equals(fieldNames)) {
64-
List<RelNode> renamedNodes = new ArrayList<>();
65-
for (RelNode node : projectedNodes) {
66-
RelNode renamedNode =
67-
context.relBuilder.push(node).project(context.relBuilder.fields(), uniqueNames).build();
68-
renamedNodes.add(renamedNode);
58+
// Step 3: Check for type conflicts and throw exception if found
59+
Set<String> uniqueFieldNames = new HashSet<>();
60+
for (String fieldName : fieldNames) {
61+
if (!uniqueFieldNames.add(fieldName)) {
62+
throw new IllegalArgumentException(
63+
String.format(
64+
"Schema unification failed: field '%s' has conflicting types across subsearches",
65+
fieldName));
6966
}
70-
return renamedNodes;
7167
}
7268

7369
return projectedNodes;
7470
}
7571

7672
/**
7773
* Builds a unified schema by merging fields from all nodes. Fields with the same name but
78-
* different types are added as separate entries (which will be renamed during uniquification).
74+
* different types are added as separate entries (will cause an exception to be thrown).
7975
*
8076
* @param nodes List of RelNodes to merge schemas from
81-
* @return List of SchemaField representing the unified schema (may contain duplicate names)
77+
* @return List of SchemaField representing the unified schema (may contain duplicate names if
78+
* there are type conflicts)
8279
*/
8380
private static List<SchemaField> buildUnifiedSchema(List<RelNode> nodes) {
8481
List<SchemaField> schema = new ArrayList<>();

docs/category.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
"user/ppl/cmd/rare.rst",
4141
"user/ppl/cmd/regex.rst",
4242
"user/ppl/cmd/rename.rst",
43+
"user/ppl/cmd/multisearch.rst",
4344
"user/ppl/cmd/rex.rst",
4445
"user/ppl/cmd/search.rst",
4546
"user/ppl/cmd/showdatasources.rst",

docs/user/ppl/cmd/append.rst

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ append <sub-search>
2424

2525
* sub-search: mandatory. Executes PPL commands as a secondary search.
2626

27+
Limitations
28+
===========
29+
30+
* **Schema Compatibility**: When fields with the same name exist between the main search and sub-search but have incompatible types, the query will fail with an error. To avoid type conflicts, ensure that fields with the same name have the same data type, or use different field names (e.g., by renaming with ``eval`` or using ``fields`` to select non-conflicting columns).
31+
2732
Example 1: Append rows from a count aggregation to existing search result
2833
===============================================================
2934

@@ -64,23 +69,3 @@ PPL query::
6469
| 101 | M | null |
6570
+-----+--------+-------+
6671

67-
Example 3: Append rows with column type conflict
68-
=============================================
69-
70-
This example shows how column type conflicts are handled when appending results. Same name columns with different types will generate two different columns in appended result.
71-
72-
PPL query::
73-
74-
os> source=accounts | stats sum(age) as sum by gender, state | sort -sum | head 5 | append [ source=accounts | stats sum(age) as sum by gender | eval sum = cast(sum as double) ];
75-
fetched rows / total rows = 6/6
76-
+------+--------+-------+-------+
77-
| sum | gender | state | sum0 |
78-
|------+--------+-------+-------|
79-
| 36 | M | TN | null |
80-
| 33 | M | MD | null |
81-
| 32 | M | IL | null |
82-
| 28 | F | VA | null |
83-
| null | F | null | 28.0 |
84-
| null | M | null | 101.0 |
85-
+------+--------+-------+-------+
86-

docs/user/ppl/cmd/multisearch.rst

Lines changed: 23 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,6 @@ Description
3030
* **A/B Testing Analysis**: Combine results from different test groups for comparison
3131
* **Time-series Data Merging**: Interleave events from multiple sources based on timestamps
3232

33-
Version
34-
=======
35-
3.3.0
36-
3733
Syntax
3834
======
3935
| multisearch <subsearch1> <subsearch2> <subsearch3> ...
@@ -59,7 +55,7 @@ Limitations
5955
===========
6056

6157
* **Minimum Subsearches**: At least two subsearches must be specified
62-
* **Schema Compatibility**: When fields with the same name exist across subsearches but have incompatible types, the system automatically resolves conflicts by renaming the conflicting fields. The first occurrence retains the original name, while subsequent conflicting fields are renamed with a numeric suffix (e.g., ``age`` becomes ``age0``, ``age1``, etc.). This ensures all data is preserved while maintaining schema consistency.
58+
* **Schema Compatibility**: When fields with the same name exist across subsearches but have incompatible types, the query will fail with an error. To avoid type conflicts, ensure that fields with the same name have the same data type across all subsearches, or use different field names (e.g., by renaming with ``eval`` or using ``fields`` to select non-conflicting columns).
6359

6460
Usage
6561
=====
@@ -84,8 +80,8 @@ PPL query::
8480
|-----------+-----+-----------|
8581
| Nanette | 28 | young |
8682
| Amber | 32 | adult |
83+
| Dale | 33 | adult |
8784
| Hattie | 36 | adult |
88-
| Dale | 37 | adult |
8985
+-----------+-----+-----------+
9086

9187
Example 2: Success Rate Pattern
@@ -97,14 +93,14 @@ PPL query::
9793

9894
os> | multisearch [search source=accounts | where balance > 20000 | eval query_type = "high_balance" | fields firstname, balance, query_type] [search source=accounts | where balance > 0 AND balance <= 20000 | eval query_type = "regular" | fields firstname, balance, query_type] | sort balance desc;
9995
fetched rows / total rows = 4/4
100-
+-----------+---------+-------------+
101-
| firstname | balance | query_type |
102-
|-----------+---------+-------------|
103-
| Amber | 39225 | high_balance|
104-
| Nanette | 32838 | high_balance|
105-
| Hattie | 5686 | regular |
106-
| Dale | 4180 | regular |
107-
+-----------+---------+-------------+
96+
+-----------+---------+--------------+
97+
| firstname | balance | query_type |
98+
|-----------+---------+--------------|
99+
| Amber | 39225 | high_balance |
100+
| Nanette | 32838 | high_balance |
101+
| Hattie | 5686 | regular |
102+
| Dale | 4180 | regular |
103+
+-----------+---------+--------------+
108104

109105
Example 3: Timestamp Interleaving
110106
==================================
@@ -113,37 +109,19 @@ Combine time-series data from multiple sources with automatic timestamp-based or
113109

114110
PPL query::
115111

116-
os> | multisearch [search source=time_data | where category IN ("A", "B")] [search source=time_data2 | where category IN ("E", "F")] | head 5;
112+
os> | multisearch [search source=time_data | where category IN ("A", "B")] [search source=time_data2 | where category IN ("E", "F")] | fields @timestamp, category, value, timestamp | head 5;
117113
fetched rows / total rows = 5/5
118-
+-------+---------------------+----------+-------+---------------------+
119-
| index | @timestamp | category | value | timestamp |
120-
|-------+---------------------+----------+-------+---------------------|
121-
| null | 2025-08-01 04:00:00 | E | 2001 | 2025-08-01 04:00:00 |
122-
| null | 2025-08-01 03:47:41 | A | 8762 | 2025-08-01 03:47:41 |
123-
| null | 2025-08-01 02:30:00 | F | 2002 | 2025-08-01 02:30:00 |
124-
| null | 2025-08-01 01:14:11 | B | 9015 | 2025-08-01 01:14:11 |
125-
| null | 2025-08-01 01:00:00 | E | 2003 | 2025-08-01 01:00:00 |
126-
+-------+---------------------+----------+-------+---------------------+
127-
128-
Example 4: Handling Empty Results
129-
==================================
130-
131-
Multisearch gracefully handles cases where some subsearches return no results.
132-
133-
PPL query::
134-
135-
os> | multisearch [search source=accounts | where age > 25 | fields firstname, age] [search source=accounts | where age > 200 | eval impossible = "yes" | fields firstname, age, impossible] | head 5;
136-
fetched rows / total rows = 4/4
137-
+-----------+-----+------------+
138-
| firstname | age | impossible |
139-
|-----------+-----+------------|
140-
| Nanette | 28 | null |
141-
| Amber | 32 | null |
142-
| Hattie | 36 | null |
143-
| Dale | 37 | null |
144-
+-----------+-----+------------+
145-
146-
Example 5: Type Compatibility - Missing Fields
114+
+---------------------+----------+-------+---------------------+
115+
| @timestamp | category | value | timestamp |
116+
|---------------------+----------+-------+---------------------|
117+
| 2025-08-01 04:00:00 | E | 2001 | 2025-08-01 04:00:00 |
118+
| 2025-08-01 03:47:41 | A | 8762 | 2025-08-01 03:47:41 |
119+
| 2025-08-01 02:30:00 | F | 2002 | 2025-08-01 02:30:00 |
120+
| 2025-08-01 01:14:11 | B | 9015 | 2025-08-01 01:14:11 |
121+
| 2025-08-01 01:00:00 | E | 2003 | 2025-08-01 01:00:00 |
122+
+---------------------+----------+-------+---------------------+
123+
124+
Example 4: Type Compatibility - Missing Fields
147125
=================================================
148126

149127
Demonstrate how missing fields are handled with NULL insertion.
@@ -157,26 +135,7 @@ PPL query::
157135
|-----------+-----+------------|
158136
| Nanette | 28 | yes |
159137
| Amber | 32 | null |
138+
| Dale | 33 | null |
160139
| Hattie | 36 | null |
161-
| Dale | 37 | null |
162140
+-----------+-----+------------+
163141

164-
Example 6: Type Conflict Resolution - Automatic Renaming
165-
===========================================================
166-
167-
When the same field name has incompatible types across subsearches, the system automatically renames conflicting fields with numeric suffixes.
168-
169-
PPL query::
170-
171-
os> | multisearch [search source=accounts | fields firstname, age, balance | head 2] [search source=locations | fields description, age, place_id | head 2];
172-
fetched rows / total rows = 4/4
173-
+-----------+-----+---------+------------------+------+----------+
174-
| firstname | age | balance | description | age0 | place_id |
175-
|-----------+-----+---------+------------------+------+----------|
176-
| Amber | 32 | 39225 | null | null | null |
177-
| Hattie | 36 | 5686 | null | null | null |
178-
| null | null| null | Central Park | old | 1001 |
179-
| null | null| null | Times Square | modern| 1002 |
180-
+-----------+-----+---------+------------------+------+----------+
181-
182-
In this example, the ``age`` field has type ``bigint`` in accounts but type ``string`` in locations. The system keeps the first occurrence as ``age`` (bigint) and renames the second occurrence to ``age0`` (string), preserving all data while avoiding type conflicts.

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -300,30 +300,23 @@ public void testMultisearchNullFillingAcrossIndices() throws IOException {
300300
}
301301

302302
@Test
303-
public void testMultisearchWithDirectTypeConflict() throws IOException {
304-
JSONObject result =
305-
executeQuery(
306-
String.format(
307-
"| multisearch "
308-
+ "[search source=%s | fields firstname, age, balance | head 2] "
309-
+ "[search source=%s | fields description, age, place_id | head 2]",
310-
TEST_INDEX_ACCOUNT, TEST_INDEX_LOCATIONS_TYPE_CONFLICT));
311-
312-
verifySchema(
313-
result,
314-
schema("firstname", null, "string"),
315-
schema("age", null, "bigint"),
316-
schema("balance", null, "bigint"),
317-
schema("description", null, "string"),
318-
schema("age0", null, "string"),
319-
schema("place_id", null, "int"));
303+
public void testMultisearchWithDirectTypeConflict() {
304+
Exception exception =
305+
assertThrows(
306+
ResponseException.class,
307+
() ->
308+
executeQuery(
309+
String.format(
310+
"| multisearch "
311+
+ "[search source=%s | fields firstname, age, balance | head 2] "
312+
+ "[search source=%s | fields description, age, place_id | head 2]",
313+
TEST_INDEX_ACCOUNT, TEST_INDEX_LOCATIONS_TYPE_CONFLICT)));
320314

321-
verifyDataRows(
322-
result,
323-
rows("Amber", 32L, 39225L, null, null, null),
324-
rows("Hattie", 36L, 5686L, null, null, null),
325-
rows(null, null, null, "Central Park", "old", 1001),
326-
rows(null, null, null, "Times Square", "modern", 1002));
315+
assertTrue(
316+
"Error message should indicate type conflict",
317+
exception
318+
.getMessage()
319+
.contains("Schema unification failed: field 'age' has conflicting types"));
327320
}
328321

329322
@Test

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendCommandIT.java

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import java.util.Locale;
1919
import org.json.JSONObject;
2020
import org.junit.Test;
21+
import org.opensearch.client.ResponseException;
2122
import org.opensearch.sql.common.setting.Settings;
2223
import org.opensearch.sql.ppl.PPLIntegTestCase;
2324

@@ -213,27 +214,24 @@ public void testAppendWithMergedColumn() throws IOException {
213214
}
214215

215216
@Test
216-
public void testAppendWithConflictTypeColumn() throws IOException {
217-
JSONObject actual =
218-
executeQuery(
219-
String.format(
220-
Locale.ROOT,
221-
"source=%s | stats sum(age) as sum by gender | append [ source=%s | stats sum(age)"
222-
+ " as sum by state | sort sum | eval sum = cast(sum as double) ] | head 5",
223-
TEST_INDEX_ACCOUNT,
224-
TEST_INDEX_ACCOUNT));
225-
verifySchemaInOrder(
226-
actual,
227-
schema("sum", "bigint"),
228-
schema("gender", "string"),
229-
schema("state", "string"),
230-
schema("sum0", "double"));
231-
verifyDataRows(
232-
actual,
233-
rows(14947, "F", null, null),
234-
rows(15224, "M", null, null),
235-
rows(null, null, "NV", 369d),
236-
rows(null, null, "NM", 412d),
237-
rows(null, null, "AZ", 414d));
217+
public void testAppendWithConflictTypeColumn() {
218+
Exception exception =
219+
assertThrows(
220+
ResponseException.class,
221+
() ->
222+
executeQuery(
223+
String.format(
224+
Locale.ROOT,
225+
"source=%s | stats sum(age) as sum by gender | append [ source=%s | stats"
226+
+ " sum(age) as sum by state | sort sum | eval sum = cast(sum as"
227+
+ " double) ] | head 5",
228+
TEST_INDEX_ACCOUNT,
229+
TEST_INDEX_ACCOUNT)));
230+
231+
assertTrue(
232+
"Error message should indicate type conflict",
233+
exception
234+
.getMessage()
235+
.contains("Schema unification failed: field 'sum' has conflicting types"));
238236
}
239237
}

0 commit comments

Comments
 (0)