@@ -22,6 +22,7 @@ use parquet::arrow::ArrowWriter;
2222use parquet:: basic:: Encoding ;
2323use parquet:: file:: properties:: EnabledStatistics ;
2424use parquet:: file:: properties:: WriterProperties ;
25+ use parquet:: file:: properties:: WriterVersion ;
2526use parquet:: format:: FileMetaData ;
2627
2728/// Serialize data blocks to parquet format.
@@ -30,17 +31,41 @@ pub fn blocks_to_parquet(
3031 blocks : Vec < DataBlock > ,
3132 write_buffer : & mut Vec < u8 > ,
3233 compression : TableCompression ,
34+ enable_encoding : bool ,
3335) -> Result < FileMetaData > {
3436 assert ! ( !blocks. is_empty( ) ) ;
35- let props = WriterProperties :: builder ( )
37+ let builder = WriterProperties :: builder ( )
3638 . set_compression ( compression. into ( ) )
3739 // use `usize::MAX` to effectively limit the number of row groups to 1
3840 . set_max_row_group_size ( usize:: MAX )
39- . set_encoding ( Encoding :: PLAIN )
40- . set_dictionary_enabled ( false )
4141 . set_statistics_enabled ( EnabledStatistics :: None )
42- . set_bloom_filter_enabled ( false )
43- . build ( ) ;
42+ . set_bloom_filter_enabled ( false ) ;
43+
44+ let builder = if enable_encoding {
45+ // Enable dictionary encoding and fallback encodings.
46+ //
47+ // Memo for quick lookup:
48+ // The fallback encoding "strategy" used by parquet-54.2.1 is:
49+ //
50+ // ~~~
51+ // (Type::BOOLEAN, WriterVersion::PARQUET_2_0) => Encoding::RLE,
52+ // (Type::INT32, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BINARY_PACKED,
53+ // (Type::INT64, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BINARY_PACKED,
54+ // (Type::BYTE_ARRAY, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BYTE_ARRAY,
55+ // (Type::FIXED_LEN_BYTE_ARRAY, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BYTE_ARRAY,
56+ // _ => Encoding::PLAIN,
57+ // ~~~
58+ //
59+ builder
60+ . set_writer_version ( WriterVersion :: PARQUET_2_0 )
61+ . set_dictionary_enabled ( true )
62+ } else {
63+ builder
64+ . set_dictionary_enabled ( false )
65+ . set_encoding ( Encoding :: PLAIN )
66+ } ;
67+
68+ let props = builder. build ( ) ;
4469 let batches = blocks
4570 . into_iter ( )
4671 . map ( |block| block. to_record_batch ( table_schema) )
0 commit comments