@@ -22,7 +22,7 @@ import java.math.BigDecimal
22
22
23
23
import org .apache .spark .sql .test .util .QueryTest
24
24
import org .apache .spark .sql .types ._
25
- import org .apache .spark .sql .{AnalysisException , DataFrame , Row , SaveMode }
25
+ import org .apache .spark .sql .{AnalysisException , DataFrame , DataFrameWriter , Row , SaveMode }
26
26
import org .scalatest .BeforeAndAfterAll
27
27
28
28
class TestLoadDataFrame extends QueryTest with BeforeAndAfterAll {
@@ -73,7 +73,9 @@ class TestLoadDataFrame extends QueryTest with BeforeAndAfterAll {
73
73
sql(" DROP TABLE IF EXISTS carbon8" )
74
74
sql(" DROP TABLE IF EXISTS carbon9" )
75
75
sql(" DROP TABLE IF EXISTS carbon10" )
76
-
76
+ sql(" DROP TABLE IF EXISTS df_write_sort_column_not_specified" )
77
+ sql(" DROP TABLE IF EXISTS df_write_specify_sort_column" )
78
+ sql(" DROP TABLE IF EXISTS df_write_empty_sort_column" )
77
79
}
78
80
79
81
@@ -236,13 +238,68 @@ test("test the boolean data type"){
236
238
sql(" select count(*) from carbon10 where c3 > 500" ), Row (500 )
237
239
)
238
240
sql(" drop table carbon10" )
239
- assert(! new File (path).exists())
241
+ assert(! new File (path).exists())
240
242
assert(intercept[AnalysisException ](
241
243
sql(" select count(*) from carbon10 where c3 > 500" ))
242
244
.message
243
245
.contains(" not found" ))
244
246
}
245
247
248
+ private def getSortColumnValue (tableName : String ): Array [String ] = {
249
+ val desc = sql(s " desc formatted $tableName" )
250
+ val sortColumnRow = desc.collect.find(r =>
251
+ r(0 ).asInstanceOf [String ].trim.equalsIgnoreCase(" SORT_COLUMNS" )
252
+ )
253
+ assert(sortColumnRow.isDefined)
254
+ sortColumnRow.get.get(1 ).asInstanceOf [String ].split(" ," )
255
+ .map(_.trim.toLowerCase).filter(_.length > 0 )
256
+ }
257
+
258
+ private def getDefaultWriter (tableName : String ): DataFrameWriter [Row ] = {
259
+ df2.write
260
+ .format(" carbondata" )
261
+ .option(" tableName" , tableName)
262
+ .option(" tempCSV" , " false" )
263
+ .option(" single_pass" , " false" )
264
+ .option(" table_blocksize" , " 256" )
265
+ .option(" compress" , " false" )
266
+ .mode(SaveMode .Overwrite )
267
+ }
268
+
269
+ test(" test load dataframe with sort_columns not specified," +
270
+ " by default all string columns will be sort_columns" ) {
271
+ // all string column will be sort_columns by default
272
+ getDefaultWriter(" df_write_sort_column_not_specified" ).save()
273
+ checkAnswer(
274
+ sql(" select count(*) from df_write_sort_column_not_specified where c3 > 500" ), Row (500 )
275
+ )
276
+
277
+ val sortColumnValue = getSortColumnValue(" df_write_sort_column_not_specified" )
278
+ assert(sortColumnValue.sameElements(Array (" c1" , " c2" )))
279
+ }
280
+
281
+ test(" test load dataframe with sort_columns specified" ) {
282
+ // only specify c1 as sort_columns
283
+ getDefaultWriter(" df_write_specify_sort_column" ).option(" sort_columns" , " c1" ).save()
284
+ checkAnswer(
285
+ sql(" select count(*) from df_write_specify_sort_column where c3 > 500" ), Row (500 )
286
+ )
287
+
288
+ val sortColumnValue = getSortColumnValue(" df_write_specify_sort_column" )
289
+ assert(sortColumnValue.sameElements(Array (" c1" )))
290
+ }
291
+
292
+ test(" test load dataframe with sort_columns specified empty" ) {
293
+ // specify empty sort_column
294
+ getDefaultWriter(" df_write_empty_sort_column" ).option(" sort_columns" , " " ).save()
295
+ checkAnswer(
296
+ sql(" select count(*) from df_write_empty_sort_column where c3 > 500" ), Row (500 )
297
+ )
298
+
299
+ val sortColumnValue = getSortColumnValue(" df_write_empty_sort_column" )
300
+ assert(sortColumnValue.isEmpty)
301
+ }
302
+
246
303
override def afterAll {
247
304
dropTable
248
305
}
0 commit comments