Skip to content

Commit f8fdb75

Browse files
W1thOutW1thOut
W1thOut
authored andcommitted
delete segment expect remaining numbe
Revise modify code add SI test add no partition table teset
1 parent d8f7df9 commit f8fdb75

File tree

4 files changed

+261
-1
lines changed

4 files changed

+261
-1
lines changed

Diff for: integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala

+4
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,8 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
180180
protected val REGISTER = carbonKeyWord("REGISTER")
181181
protected val PROPERTIES = carbonKeyWord("PROPERTIES")
182182
protected val REFRESH = carbonKeyWord("REFRESH")
183+
protected val EXPECT = carbonKeyWord("EXPECT")
184+
protected val REMAIN_NUMBER = carbonKeyWord("REMAIN_NUMBER")
183185

184186
// For materialized view
185187
// Keywords used in this parser
@@ -353,4 +355,6 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
353355
p.getClass.getSimpleName.equals("FloatLit") ||
354356
p.getClass.getSimpleName.equals("DecimalLit")
355357
}) ^^ (_.chars)
358+
359+
protected lazy val number: Parser[Int] = numericLit ^^ (_.toInt)
356360
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.execution.command.management
19+
20+
import org.apache.spark.sql.{CarbonEnv, Row, SparkSession}
21+
import org.apache.spark.sql.execution.command.{Checker, DataCommand}
22+
23+
import org.apache.carbondata.api.CarbonStore
24+
import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
25+
import org.apache.carbondata.core.exception.ConcurrentOperationException
26+
import org.apache.carbondata.core.statusmanager.{SegmentStatus, SegmentStatusManager}
27+
import org.apache.carbondata.events.{withEvents, DeleteSegmentByIdPostEvent, DeleteSegmentByIdPreEvent}
28+
29+
/**
30+
* A command for delete by remaining number.
31+
* In general, keep the latest segment.
32+
*
33+
* @param remaining expected remaining quantity after deletion
34+
*/
35+
case class CarbonDeleteLoadByRemainNumberCommand(
36+
remaining: Int,
37+
databaseNameOp: Option[String],
38+
tableName: String)
39+
extends DataCommand {
40+
41+
override def processData(sparkSession: SparkSession): Seq[Row] = {
42+
Checker.validateTableExists(databaseNameOp, tableName, sparkSession)
43+
val carbonTable = CarbonEnv.getCarbonTable(databaseNameOp, tableName)(sparkSession)
44+
setAuditTable(carbonTable)
45+
setAuditInfo(Map("remaining number" -> remaining.toString))
46+
if (!carbonTable.getTableInfo.isTransactionalTable) {
47+
throw new MalformedCarbonCommandException("Unsupported operation on non transactional table")
48+
}
49+
50+
// if insert overwrite in progress, do not allow delete segment
51+
if (SegmentStatusManager.isOverwriteInProgressInTable(carbonTable)) {
52+
throw new ConcurrentOperationException(carbonTable, "insert overwrite", "delete segment")
53+
}
54+
55+
val segments = CarbonStore.readSegments(carbonTable.getTablePath, showHistory = false, None)
56+
57+
// Through the remaining number, get the delete id
58+
val deleteSegmentIds = segments.filter(segment => segment.getSegmentStatus == SegmentStatus.SUCCESS
59+
|| segment.getSegmentStatus == SegmentStatus.COMPACTED)
60+
.sortBy(_.getLoadStartTime)
61+
.map(_.getLoadName)
62+
.reverse
63+
.drop(remaining)
64+
65+
if (deleteSegmentIds.length == 0) {
66+
return Seq.empty
67+
}
68+
69+
withEvents(DeleteSegmentByIdPreEvent(carbonTable, deleteSegmentIds, sparkSession),
70+
DeleteSegmentByIdPostEvent(carbonTable, deleteSegmentIds, sparkSession)) {
71+
CarbonStore.deleteLoadById(
72+
deleteSegmentIds,
73+
CarbonEnv.getDatabaseName(databaseNameOp)(sparkSession),
74+
tableName,
75+
carbonTable
76+
)
77+
}
78+
Seq.empty
79+
}
80+
81+
override protected def opName: String = "DELETE SEGMENT BY REMAIN_NUMBER"
82+
}

Diff for: integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala

+9-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ class CarbonSpark2SqlParser extends CarbonDDLSqlParser {
7979

8080
protected lazy val segmentManagement: Parser[LogicalPlan] =
8181
deleteSegmentByID | deleteSegmentByLoadDate | deleteStage | cleanFiles | addSegment |
82-
showSegments
82+
showSegments | deleteSegmentByRemainNumber
8383

8484
protected lazy val restructure: Parser[LogicalPlan] = alterTableDropColumn
8585

@@ -508,6 +508,14 @@ class CarbonSpark2SqlParser extends CarbonDDLSqlParser {
508508
CarbonDeleteLoadByIdCommand(loadIds, dbName, tableName.toLowerCase())
509509
}
510510

511+
protected lazy val deleteSegmentByRemainNumber: Parser[LogicalPlan] =
512+
DELETE ~> FROM ~ TABLE ~> (ident <~ ".").? ~ ident ~
513+
(EXPECT ~> (SEGMENT ~ "." ~ REMAIN_NUMBER) ~> "=" ~> number) <~
514+
opt(";") ^^ {
515+
case dbName ~ tableName ~ remaining =>
516+
CarbonDeleteLoadByRemainNumberCommand(remaining, dbName, tableName.toLowerCase())
517+
}
518+
511519
protected lazy val deleteSegmentByLoadDate: Parser[LogicalPlan] =
512520
DELETE ~> FROM ~> TABLE ~> (ident <~ ".").? ~ ident ~
513521
(WHERE ~> (SEGMENT ~ "." ~ STARTTIME ~> BEFORE) ~ stringLit) <~
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.carbondata.spark.testsuite.deletesegment
18+
19+
import org.apache.spark.sql.test.util.QueryTest
20+
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
21+
22+
import org.apache.carbondata.core.constants.CarbonCommonConstants
23+
import org.apache.carbondata.core.util.CarbonProperties
24+
25+
/**
26+
* test class for testing the delete segment expect remaining number.
27+
*/
28+
class DeleteSegmentByRemainNumberTestCase extends QueryTest with BeforeAndAfterAll with BeforeAndAfterEach {
29+
val DELETED_STATUS = "Marked for Delete"
30+
31+
val SUCCESSFUL_STATUS = "Success"
32+
33+
override def beforeAll {
34+
CarbonProperties.getInstance()
35+
.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
36+
.addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "dd-MM-yyyy")
37+
}
38+
39+
override def beforeEach(): Unit = {
40+
sql("drop table if exists deleteSegmentPartitionTable")
41+
sql("drop table if exists deleteSegmentTable")
42+
sql("drop table if exists indexTable")
43+
sql(
44+
"CREATE table deleteSegmentPartitionTable (ID int, date String, country String, name " +
45+
"String, phonetype String, serialname String, salary String) STORED AS carbondata " +
46+
"PARTITIONED by(age int)"
47+
)
48+
sql(
49+
s"""LOAD DATA local inpath '$resourcesPath/dataretention1.csv'
50+
| INTO TABLE deleteSegmentPartitionTable PARTITION (age='20')
51+
| OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '"')""".stripMargin)
52+
sql(
53+
s"""LOAD DATA local inpath '$resourcesPath/dataretention2.csv'
54+
| INTO TABLE deleteSegmentPartitionTable PARTITION (age='30')
55+
| OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '"')""".stripMargin)
56+
sql(
57+
s"""LOAD DATA local inpath '$resourcesPath/dataretention3.csv'
58+
| INTO TABLE deleteSegmentPartitionTable PARTITION (age='40')
59+
| OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '"')""".stripMargin)
60+
61+
sql(
62+
"CREATE table deleteSegmentTable (ID int, date String, country String, name " +
63+
"String, phonetype String, serialname String, salary String) STORED AS carbondata"
64+
)
65+
sql(
66+
s"""LOAD DATA local inpath '$resourcesPath/dataretention1.csv'
67+
| INTO TABLE deleteSegmentTable OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '"')""".stripMargin)
68+
sql(
69+
s"""LOAD DATA local inpath '$resourcesPath/dataretention2.csv'
70+
| INTO TABLE deleteSegmentTable OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '"')""".stripMargin)
71+
sql(
72+
s"""LOAD DATA local inpath '$resourcesPath/dataretention3.csv'
73+
| INTO TABLE deleteSegmentTable OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '"')""".stripMargin)
74+
75+
sql("create index indexTable on table deleteSegmentTable(country) as 'carbondata'" +
76+
"properties('sort_scope'='global_sort', 'Global_sort_partitions'='3')")
77+
}
78+
79+
override def afterAll(): Unit = {
80+
sql("drop table if exists deleteSegmentTable")
81+
sql("drop table if exists deleteSegmentPartitionTable")
82+
sql("drop table if exists indexTable")
83+
}
84+
85+
test("delete segment, remain_number = 1") {
86+
sql("delete from table deleteSegmentTable expect segment.remain_number = 1")
87+
val segments1 = sql("show segments on deleteSegmentTable").collect()
88+
assertResult(SUCCESSFUL_STATUS)(segments1(0).get(1))
89+
assertResult(DELETED_STATUS)(segments1(1).get(1))
90+
assertResult(DELETED_STATUS)(segments1(2).get(1))
91+
assertResult(sql("select * from indexTable").count())(10)
92+
93+
sql("delete from table deleteSegmentPartitionTable expect segment.remain_number = 1")
94+
val segments2 = sql("show segments on deleteSegmentPartitionTable").collect()
95+
assertResult(SUCCESSFUL_STATUS)(segments2(0).get(1))
96+
assertResult(DELETED_STATUS)(segments2(1).get(1))
97+
assertResult(DELETED_STATUS)(segments2(2).get(1))
98+
}
99+
100+
test("delete segment, remain nothing") {
101+
sql("delete from table deleteSegmentTable expect segment.remain_number = 0")
102+
val segments1 = sql("show segments on deleteSegmentTable").collect()
103+
segments1.foreach(row => assertResult(DELETED_STATUS)(row.get(1)))
104+
assertResult(sql("select * from indexTable").count())(0)
105+
106+
sql("delete from table deleteSegmentPartitionTable expect segment.remain_number = 0")
107+
val segments2 = sql("show segments on deleteSegmentPartitionTable").collect()
108+
segments2.foreach(row => assertResult(DELETED_STATUS)(row.get(1)))
109+
}
110+
111+
test("delete segment, remain all") {
112+
sql("delete from table deleteSegmentTable expect segment.remain_number = 3")
113+
val segments1 = sql("show segments on deleteSegmentTable").collect()
114+
segments1.foreach(row => assertResult(SUCCESSFUL_STATUS)(row.get(1)))
115+
assertResult(sql("select * from indexTable").count())(30)
116+
117+
sql("delete from table deleteSegmentPartitionTable expect segment.remain_number = 3")
118+
val segments2 = sql("show segments on deleteSegmentPartitionTable").collect()
119+
segments2.foreach(row => assertResult(SUCCESSFUL_STATUS)(row.get(1)))
120+
}
121+
122+
test("delete segment, remain_number is invalid") {
123+
val ex1 = intercept[Exception] {
124+
sql("delete from table deleteSegmentTable expect segment.remain_number = -1")
125+
}
126+
assert(ex1.getMessage.contains("not found"))
127+
val ex2 = intercept[Exception] {
128+
sql("delete from table deleteSegmentTable expect segment.remain_number = 2147483648")
129+
}
130+
assert(ex2.getMessage.contains("SqlParse"))
131+
assertResult(sql("select * from indexTable").count())(30)
132+
133+
val ex3 = intercept[Exception] {
134+
sql("delete from table deleteSegmentPartitionTable expect segment.remain_number = -1")
135+
}
136+
assert(ex3.getMessage.contains("not found"))
137+
val ex4 = intercept[Exception] {
138+
sql("delete from table deleteSegmentPartitionTable expect segment.remain_number = 2147483648")
139+
}
140+
assert(ex4.getMessage.contains("SqlParse"))
141+
}
142+
143+
test("delete segment after update") {
144+
sql("update deleteSegmentPartitionTable d set (d.country) = ('fr') where d.country = 'aus'")
145+
sql("delete from table deleteSegmentPartitionTable expect segment.remain_number = 1")
146+
val segments2 = sql("select * from deleteSegmentPartitionTable").collect()
147+
segments2.foreach(row => assertResult("fr")(row(2)))
148+
}
149+
150+
test("delete segment after delete newest segment by segmentId") {
151+
sql("delete from table deleteSegmentTable where segment.id in (2)")
152+
sql("delete from table deleteSegmentTable expect segment.remain_number = 1")
153+
val segments1 = sql("show segments on deleteSegmentTable").collect()
154+
assertResult(DELETED_STATUS)(segments1(0).get(1))
155+
assertResult(SUCCESSFUL_STATUS)(segments1(1).get(1))
156+
assertResult(DELETED_STATUS)(segments1(2).get(1))
157+
assertResult(sql("select * from indexTable").count())(10)
158+
159+
sql("delete from table deleteSegmentPartitionTable where segment.id in (2)")
160+
sql("delete from table deleteSegmentPartitionTable expect segment.remain_number = 1")
161+
val segments2 = sql("show segments on deleteSegmentPartitionTable").collect()
162+
assertResult(DELETED_STATUS)(segments2(0).get(1))
163+
assertResult(SUCCESSFUL_STATUS)(segments2(1).get(1))
164+
assertResult(DELETED_STATUS)(segments2(2).get(1))
165+
}
166+
}

0 commit comments

Comments
 (0)