19
19
#include < gtest/gtest.h>
20
20
#include < unordered_set>
21
21
22
+ #include " velox/dwio/parquet/RegisterParquetWriter.h"
22
23
#include " velox/exec/fuzzer/AggregationFuzzerOptions.h"
23
24
#include " velox/exec/fuzzer/AggregationFuzzerRunner.h"
24
- #include " velox/exec/fuzzer/DuckQueryRunner.h"
25
25
#include " velox/exec/fuzzer/TransformResultVerifier.h"
26
26
#include " velox/functions/prestosql/registration/RegistrationFunctions.h"
27
27
#include " velox/functions/sparksql/aggregates/Register.h"
28
+ #include " velox/functions/sparksql/fuzzer/SparkQueryRunner.h"
28
29
#include " velox/serializers/CompactRowSerializer.h"
29
30
#include " velox/serializers/PrestoSerializer.h"
30
31
#include " velox/serializers/UnsafeRowSerializer.h"
@@ -45,6 +46,7 @@ DEFINE_string(
45
46
int main (int argc, char ** argv) {
46
47
facebook::velox::functions::aggregate::sparksql::registerAggregateFunctions (
47
48
" " , false );
49
+ facebook::velox::parquet::registerParquetWriterFactory ();
48
50
49
51
::testing::InitGoogleTest (&argc, argv);
50
52
@@ -71,10 +73,13 @@ int main(int argc, char** argv) {
71
73
}
72
74
facebook::velox::memory::MemoryManager::initialize ({});
73
75
74
- // TODO: List of the functions that at some point crash or fail and need to
75
- // be fixed before we can enable. Constant argument of bloom_filter_agg cause
76
- // fuzzer test fail.
77
- std::unordered_set<std::string> skipFunctions = {" bloom_filter_agg" };
76
+ // Spark does not provide user-accessible aggregate functions with the
77
+ // following names.
78
+ std::unordered_set<std::string> skipFunctions = {
79
+ " bloom_filter_agg" ,
80
+ " first_ignore_null" ,
81
+ " last_ignore_null" ,
82
+ " regr_replacement" };
78
83
79
84
using facebook::velox::exec::test::TransformResultVerifier;
80
85
@@ -113,21 +118,9 @@ int main(int argc, char** argv) {
113
118
size_t initialSeed = FLAGS_seed == 0 ? std::time (nullptr ) : FLAGS_seed;
114
119
std::shared_ptr<facebook::velox::memory::MemoryPool> rootPool{
115
120
facebook::velox::memory::memoryManager ()->addRootPool ()};
116
- auto duckQueryRunner =
117
- std::make_unique<facebook::velox::exec::test::DuckQueryRunner>(
118
- rootPool.get ());
119
- duckQueryRunner->disableAggregateFunctions (
120
- {// https://github.com/facebookincubator/velox/issues/7677
121
- " max_by" ,
122
- " min_by" ,
123
- // The skewness functions of Velox and DuckDB use different
124
- // algorithms.
125
- // https://github.com/facebookincubator/velox/issues/4845
126
- " skewness" ,
127
- // Spark's kurtosis uses Pearson's formula for calculating the kurtosis
128
- // coefficient. Meanwhile, DuckDB employs the sample kurtosis calculation
129
- // formula. The results from the two methods are completely different.
130
- " kurtosis" });
121
+ auto sparkQueryRunner = std::make_unique<
122
+ facebook::velox::functions::sparksql::fuzzer::SparkQueryRunner>(
123
+ rootPool.get (), " localhost:15002" , " fuzzer" , " aggregate" );
131
124
132
125
using Runner = facebook::velox::exec::test::AggregationFuzzerRunner;
133
126
using Options = facebook::velox::exec::test::AggregationFuzzerOptions;
@@ -137,5 +130,9 @@ int main(int argc, char** argv) {
137
130
options.skipFunctions = skipFunctions;
138
131
options.customVerificationFunctions = customVerificationFunctions;
139
132
options.orderableGroupKeys = true ;
140
- return Runner::run (initialSeed, std::move (duckQueryRunner), options);
133
+ options.timestampPrecision =
134
+ facebook::velox::VectorFuzzer::Options::TimestampPrecision::kMicroSeconds ;
135
+ options.hiveConfigs = {
136
+ {facebook::velox::connector::hive::HiveConfig::kReadTimestampUnit , " 6" }};
137
+ return Runner::run (initialSeed, std::move (sparkQueryRunner), options);
141
138
}
0 commit comments