Skip to content

Commit 8bab753

Browse files
authored
Support Hive Writing data into the filesystem from queries statement parse (#36371)
* support Writing data into the filesystem from queries * update release-notes
1 parent 3c4fc4e commit 8bab753

File tree

7 files changed

+286
-46
lines changed

7 files changed

+286
-46
lines changed

RELEASE-NOTES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
1. SQL Parser: Support Hive SHOW TRANSACTIONS & SHOW COMPACTIONS statement parse - [#36301](https://github.com/apache/shardingsphere/pull/36301)
8383
1. SQL Parser: Support Hive DESCRIBE statement parse - [#36350](https://github.com/apache/shardingsphere/pull/36350)
8484
1. SQL Parser: Support Hive Inserting data into Hive Tables from queries statement parse - [#36320](https://github.com/apache/shardingsphere/pull/36320)
85+
1. SQL Parser: Support Hive Writing data into the filesystem from queries statement parse - [#36371](https://github.com/apache/shardingsphere/pull/36371)
8586
1. SQL Parser: Support SQL Server xml methods parse - [#35911](https://github.com/apache/shardingsphere/pull/35911)
8687
1. SQL Parser: Support SQL Server CHANGETABLE function parse - [#35920](https://github.com/apache/shardingsphere/pull/35920)
8788
1. SQL Parser: Support SQL Server AI_GENERATE_EMBEDDINGS function parse - [#35922](https://github.com/apache/shardingsphere/pull/35922)

parser/sql/dialect/hive/src/main/antlr4/imports/hive/BaseRule.g4

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,3 +1336,48 @@ noWriteToBinLog
13361336
channelOption
13371337
: FOR CHANNEL string_
13381338
;
1339+
1340+
propertyListCommonClause
1341+
: LP_ propertyList RP_
1342+
;
1343+
1344+
propertyList
1345+
: property (COMMA_ property)*
1346+
;
1347+
1348+
property
1349+
: string_ EQ_ string_
1350+
;
1351+
1352+
rowFormat
1353+
: ROW FORMAT rowFormatType
1354+
;
1355+
1356+
rowFormatType
1357+
: DELIMITED rowFormatDelimited
1358+
| SERDE string_ (WITH SERDEPROPERTIES propertyListCommonClause)?
1359+
;
1360+
1361+
rowFormatDelimited
1362+
: (COLUMNS TERMINATED BY string_ (ESCAPED BY string_)?)?
1363+
(COLLECTION ITEMS TERMINATED BY string_)?
1364+
(MAP KEYS TERMINATED BY string_)?
1365+
(LINES TERMINATED BY string_)?
1366+
(NULL DEFINED AS string_)?
1367+
;
1368+
1369+
storedClause
1370+
: STORED AS fileFormat
1371+
| STORED BY string_ (WITH SERDEPROPERTIES propertyListCommonClause)?
1372+
;
1373+
1374+
fileFormat
1375+
: SEQUENCEFILE
1376+
| TEXTFILE
1377+
| RCFILE
1378+
| ORC
1379+
| PARQUET
1380+
| AVRO
1381+
| JSONFILE
1382+
| INPUTFORMAT string_ OUTPUTFORMAT string_
1383+
;

parser/sql/dialect/hive/src/main/antlr4/imports/hive/DDLStatement.g4

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -338,39 +338,6 @@ skewedValue
338338
| literals
339339
;
340340

341-
rowFormat
342-
: ROW FORMAT rowFormatType
343-
;
344-
345-
rowFormatType
346-
: DELIMITED rowFormatDelimited
347-
| SERDE string_ (WITH SERDEPROPERTIES propertyListCommonClause)?
348-
;
349-
350-
rowFormatDelimited
351-
: (COLUMNS TERMINATED BY string_ (ESCAPED BY string_)?)?
352-
(COLLECTION ITEMS TERMINATED BY string_)?
353-
(MAP KEYS TERMINATED BY string_)?
354-
(LINES TERMINATED BY string_)?
355-
(NULL DEFINED AS string_)?
356-
;
357-
358-
storedClause
359-
: STORED AS fileFormat
360-
| STORED BY string_ (WITH SERDEPROPERTIES propertyListCommonClause)?
361-
;
362-
363-
fileFormat
364-
: SEQUENCEFILE
365-
| TEXTFILE
366-
| RCFILE
367-
| ORC
368-
| PARQUET
369-
| AVRO
370-
| JSONFILE
371-
| INPUTFORMAT string_ OUTPUTFORMAT string_
372-
;
373-
374341
storageLocation
375342
: LOCATION string_
376343
;
@@ -387,14 +354,6 @@ tblProperties
387354
: TBLPROPERTIES propertyListCommonClause
388355
;
389356

390-
propertyList
391-
: property (COMMA_ property)*
392-
;
393-
394-
property
395-
: string_ EQ_ string_
396-
;
397-
398357
addConstraint
399358
: ADD CONSTRAINT constraintName
400359
;
@@ -495,10 +454,6 @@ columnNamesCommonClause
495454
: LP_ columnNames RP_
496455
;
497456

498-
propertyListCommonClause
499-
: LP_ propertyList RP_
500-
;
501-
502457
macroParameterList
503458
: macroParameter (COMMA_ macroParameter)*
504459
;

parser/sql/dialect/hive/src/main/antlr4/imports/hive/DMLStatement.g4

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import BaseRule;
2222
insert
2323
: INSERT insertSpecification INTO? tableName partitionNames? (insertValuesClause | setAssignmentsClause | insertSelectClause) onDuplicateKeyClause?
2424
| insertDataIntoTablesFromQueries
25+
| writingDataIntoFileSystem
2526
;
2627

2728
insertSpecification
@@ -368,7 +369,7 @@ dynamicPartitionInserts
368369
;
369370

370371
hiveMultipleInserts
371-
: hiveInsertStatement (hiveInsertStatement)*
372+
: hiveInsertStatement+
372373
;
373374

374375
hiveInsertStatement
@@ -387,3 +388,12 @@ dynamicPartitionKey
387388
partitionClause
388389
: partitionSpec ifNotExists?
389390
;
391+
392+
writingDataIntoFileSystem
393+
: insertOverwriteStandardSyntax
394+
| fromClause insertOverwriteStandardSyntax+
395+
;
396+
397+
insertOverwriteStandardSyntax
398+
: INSERT OVERWRITE LOCAL? DIRECTORY string_ rowFormat? storedClause? select
399+
;

parser/sql/dialect/hive/src/main/java/org/apache/shardingsphere/sql/parser/hive/visitor/statement/type/HiveDMLStatementVisitor.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@
123123
import org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.DynamicPartitionInsertsContext;
124124
import org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.HiveMultipleInsertsContext;
125125
import org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.HiveInsertStatementContext;
126+
import org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.WritingDataIntoFileSystemContext;
127+
import org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.InsertOverwriteStandardSyntaxContext;
126128
import org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.DynamicPartitionClauseContext;
127129
import org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.DynamicPartitionKeyContext;
128130
import org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.TableNameContext;
@@ -873,6 +875,9 @@ public ASTNode visitInsert(final InsertContext ctx) {
873875
if (null != ctx.insertDataIntoTablesFromQueries()) {
874876
return visit(ctx.insertDataIntoTablesFromQueries());
875877
}
878+
if (null != ctx.writingDataIntoFileSystem()) {
879+
return visit(ctx.writingDataIntoFileSystem());
880+
}
876881
InsertStatement result;
877882
if (null != ctx.insertValuesClause()) {
878883
result = (InsertStatement) visit(ctx.insertValuesClause());
@@ -990,6 +995,51 @@ public ASTNode visitDynamicPartitionKey(final DynamicPartitionKeyContext ctx) {
990995
return new PartitionSegment(ctx.getStart().getStartIndex(), ctx.getStop().getStopIndex(), new IdentifierValue(ctx.identifier().getText()));
991996
}
992997

998+
@Override
999+
public ASTNode visitWritingDataIntoFileSystem(final WritingDataIntoFileSystemContext ctx) {
1000+
List<InsertOverwriteStandardSyntaxContext> statements = ctx.insertOverwriteStandardSyntax();
1001+
if (1 == statements.size() && null == ctx.fromClause()) {
1002+
return visit(statements.get(0));
1003+
}
1004+
final TableSegment sourceTable = null != ctx.fromClause() ? (TableSegment) visit(ctx.fromClause()) : null;
1005+
if (1 == statements.size()) {
1006+
InsertStatement single = (InsertStatement) visit(statements.get(0));
1007+
if (null != sourceTable) {
1008+
single.getInsertSelect().ifPresent(subquery -> setFromForSelect(subquery, sourceTable));
1009+
}
1010+
single.addParameterMarkers(getParameterMarkerSegments());
1011+
return single;
1012+
}
1013+
InsertStatement result = new InsertStatement(getDatabaseType());
1014+
result.setMultiTableInsertType(MultiTableInsertType.ALL);
1015+
MultiTableInsertIntoSegment multiTableInsertInto = new MultiTableInsertIntoSegment(
1016+
ctx.getStart().getStartIndex(), ctx.getStop().getStopIndex());
1017+
for (InsertOverwriteStandardSyntaxContext each : statements) {
1018+
InsertStatement insertStmt = (InsertStatement) visit(each);
1019+
if (null != sourceTable) {
1020+
insertStmt.getInsertSelect().ifPresent(subquery -> setFromForSelect(subquery, sourceTable));
1021+
}
1022+
insertStmt.addParameterMarkers(getParameterMarkerSegments());
1023+
multiTableInsertInto.getInsertStatements().add(insertStmt);
1024+
}
1025+
result.setMultiTableInsertInto(multiTableInsertInto);
1026+
result.addParameterMarkers(getParameterMarkerSegments());
1027+
return result;
1028+
}
1029+
1030+
@Override
1031+
public ASTNode visitInsertOverwriteStandardSyntax(final InsertOverwriteStandardSyntaxContext ctx) {
1032+
return createHiveInsertStatementForDirectory(ctx.select(), ctx.start.getStartIndex());
1033+
}
1034+
1035+
private InsertStatement createHiveInsertStatementForDirectory(final SelectContext select, final int startIndex) {
1036+
InsertStatement result = new InsertStatement(getDatabaseType());
1037+
result.setInsertColumns(new InsertColumnsSegment(startIndex, startIndex, Collections.emptyList()));
1038+
result.setInsertSelect(createInsertSelectSegment(select));
1039+
result.addParameterMarkers(getParameterMarkerSegments());
1040+
return result;
1041+
}
1042+
9931043
private SubquerySegment createInsertSelectSegment(final SelectContext ctx) {
9941044
SelectStatement selectStatement = (SelectStatement) visit(ctx);
9951045
return new SubquerySegment(ctx.start.getStartIndex(), ctx.stop.getStopIndex(), selectStatement, getOriginalText(ctx));

test/it/parser/src/main/resources/case/dml/insert.xml

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5123,4 +5123,174 @@
51235123
</where>
51245124
</select>
51255125
</insert>
5126+
5127+
<insert sql-case-id="insert_overwrite_directory_basic">
5128+
<columns start-index="0" stop-index="0" />
5129+
<select>
5130+
<from>
5131+
<simple-table name="employee" start-index="68" stop-index="75" />
5132+
</from>
5133+
<projections start-index="54" stop-index="61">
5134+
<column-projection name="id" start-index="54" stop-index="55" />
5135+
<column-projection name="name" start-index="58" stop-index="61" />
5136+
</projections>
5137+
</select>
5138+
</insert>
5139+
5140+
<insert sql-case-id="insert_overwrite_local_directory">
5141+
<columns start-index="0" stop-index="0" />
5142+
<select>
5143+
<from>
5144+
<simple-table name="sales" start-index="74" stop-index="78" />
5145+
</from>
5146+
<projections start-index="60" stop-index="67">
5147+
<column-projection name="order_id" start-index="60" stop-index="67" />
5148+
</projections>
5149+
</select>
5150+
</insert>
5151+
5152+
<insert sql-case-id="insert_overwrite_directory_with_row_format">
5153+
<columns start-index="0" stop-index="0" />
5154+
<select>
5155+
<from>
5156+
<simple-table name="products" start-index="121" stop-index="128" />
5157+
</from>
5158+
<projections start-index="95" stop-index="114">
5159+
<column-projection name="product_id" start-index="95" stop-index="104" />
5160+
<column-projection name="category" start-index="107" stop-index="114" />
5161+
</projections>
5162+
</select>
5163+
</insert>
5164+
5165+
<insert sql-case-id="insert_overwrite_directory_with_file_format">
5166+
<columns start-index="0" stop-index="0" />
5167+
<select>
5168+
<from>
5169+
<simple-table name="system_logs" start-index="91" stop-index="101" />
5170+
</from>
5171+
<projections start-index="68" stop-index="84">
5172+
<column-projection name="log_date" start-index="68" stop-index="75" />
5173+
<column-projection name="message" start-index="78" stop-index="84" />
5174+
</projections>
5175+
</select>
5176+
</insert>
5177+
5178+
<insert sql-case-id="insert_overwrite_local_with_row_format_and_file_format">
5179+
<columns start-index="0" stop-index="0" />
5180+
<select>
5181+
<from>
5182+
<simple-table name="user_activity" start-index="135" stop-index="147" />
5183+
</from>
5184+
<projections start-index="119" stop-index="128">
5185+
<column-projection name="login_time" start-index="119" stop-index="128" />
5186+
</projections>
5187+
</select>
5188+
</insert>
5189+
5190+
<insert sql-case-id="single_insert_overwrite_directory">
5191+
<columns start-index="14" stop-index="14" />
5192+
<select>
5193+
<from>
5194+
<simple-table name="employee" start-index="5" stop-index="12" />
5195+
</from>
5196+
<projections start-index="72" stop-index="73">
5197+
<column-projection name="id" start-index="72" stop-index="73" />
5198+
</projections>
5199+
</select>
5200+
</insert>
5201+
5202+
<insert sql-case-id="two_insert_overwrite_directory">
5203+
<multi-table-insert-type value="ALL" />
5204+
<multi-table-insert-into start-index="0" stop-index="136" literal-start-index="0" literal-stop-index="136">
5205+
<insert-statement>
5206+
<columns start-index="14" stop-index="14" />
5207+
<select>
5208+
<from>
5209+
<simple-table name="employee" start-index="5" stop-index="12" />
5210+
</from>
5211+
<projections start-index="72" stop-index="73">
5212+
<column-projection name="id" start-index="72" stop-index="73" />
5213+
</projections>
5214+
</select>
5215+
</insert-statement>
5216+
<insert-statement>
5217+
<columns start-index="75" stop-index="75" />
5218+
<select>
5219+
<from>
5220+
<simple-table name="employee" start-index="5" stop-index="12" />
5221+
</from>
5222+
<projections start-index="133" stop-index="136">
5223+
<column-projection name="name" start-index="133" stop-index="136" />
5224+
</projections>
5225+
</select>
5226+
</insert-statement>
5227+
</multi-table-insert-into>
5228+
</insert>
5229+
5230+
<insert sql-case-id="two_insert_overwrite_directory_with_local">
5231+
<multi-table-insert-type value="ALL" />
5232+
<multi-table-insert-into start-index="5" stop-index="148" literal-start-index="0" literal-stop-index="148">
5233+
<insert-statement>
5234+
<columns start-index="14" stop-index="14" />
5235+
<select>
5236+
<from>
5237+
<simple-table name="employee" start-index="5" stop-index="12" />
5238+
</from>
5239+
<projections start-index="78" stop-index="79">
5240+
<column-projection name="id" start-index="78" stop-index="79" />
5241+
</projections>
5242+
</select>
5243+
</insert-statement>
5244+
<insert-statement>
5245+
<columns start-index="81" stop-index="81" />
5246+
<select>
5247+
<from>
5248+
<simple-table name="employee" start-index="5" stop-index="12" />
5249+
</from>
5250+
<projections start-index="145" stop-index="148">
5251+
<column-projection name="name" start-index="145" stop-index="148" />
5252+
</projections>
5253+
</select>
5254+
</insert-statement>
5255+
</multi-table-insert-into>
5256+
</insert>
5257+
5258+
<insert sql-case-id="three_insert_overwrite_directory">
5259+
<multi-table-insert-type value="ALL" />
5260+
<multi-table-insert-into start-index="0" stop-index="199" literal-start-index="0" literal-stop-index="199">
5261+
<insert-statement>
5262+
<columns start-index="14" stop-index="14" />
5263+
<select>
5264+
<from>
5265+
<simple-table name="employee" start-index="5" stop-index="12" />
5266+
</from>
5267+
<projections start-index="72" stop-index="73">
5268+
<column-projection name="id" start-index="72" stop-index="73" />
5269+
</projections>
5270+
</select>
5271+
</insert-statement>
5272+
<insert-statement>
5273+
<columns start-index="75" stop-index="75" />
5274+
<select>
5275+
<from>
5276+
<simple-table name="employee" start-index="5" stop-index="12" />
5277+
</from>
5278+
<projections start-index="133" stop-index="136">
5279+
<column-projection name="name" start-index="133" stop-index="136" />
5280+
</projections>
5281+
</select>
5282+
</insert-statement>
5283+
<insert-statement>
5284+
<columns start-index="138" stop-index="138" />
5285+
<select>
5286+
<from>
5287+
<simple-table name="employee" start-index="5" stop-index="12" />
5288+
</from>
5289+
<projections start-index="196" stop-index="199">
5290+
<column-projection name="user" start-index="196" stop-index="199" />
5291+
</projections>
5292+
</select>
5293+
</insert-statement>
5294+
</multi-table-insert-into>
5295+
</insert>
51265296
</sql-parser-test-cases>

0 commit comments

Comments
 (0)