Skip to content

Commit c6cf32d

Browse files
committed
Merge branch 'main' of github.com:facebookincubator/velox into joinFuzzer
2 parents 91eb882 + eb3cc2d commit c6cf32d

File tree

163 files changed

+3722
-2056
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

163 files changed

+3722
-2056
lines changed

.github/workflows/scheduled.yml

+10-8
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ jobs:
318318
uses: actions/upload-artifact@v4
319319
with:
320320
name: cache_fuzzer
321-
path: velox/_build/debug/velox/exec/tests/velox_cache_fuzzer_test
321+
path: velox/_build/debug/velox/exec/fuzzer/velox_cache_fuzzer
322322
retention-days: "${{ env.RETENTION }}"
323323

324324
- name: Upload table evolution fuzzer
@@ -751,6 +751,8 @@ jobs:
751751
container: ghcr.io/facebookincubator/velox-dev:centos9
752752
needs: compile
753753
timeout-minutes: 120
754+
# Temporarily disable on PRs till flakiness is fixed #12167
755+
if: ${{ github.event_name != 'pull_request' }}
754756
steps:
755757

756758
- name: Download cache fuzzer
@@ -760,24 +762,24 @@ jobs:
760762

761763
- name: Run Cache Fuzzer
762764
run: |
763-
mkdir -p /tmp/cache_fuzzer_test/logs/
764-
chmod -R 777 /tmp/cache_fuzzer_test
765-
chmod +x velox_cache_fuzzer_test
766-
./velox_cache_fuzzer_test \
765+
mkdir -p /tmp/cache_fuzzer/logs/
766+
chmod -R 777 /tmp/cache_fuzzer
767+
chmod +x velox_cache_fuzzer
768+
./velox_cache_fuzzer \
767769
--seed ${RANDOM} \
768770
--duration_sec $DURATION \
769771
--minloglevel=0 \
770772
--stderrthreshold=2 \
771-
--log_dir=/tmp/cache_fuzzer_test/logs \
773+
--log_dir=/tmp/cache_fuzzer/logs \
772774
&& echo -e "\n\Cache fuzzer run finished successfully."
773775
774776
- name: Archive Cache production artifacts
775777
if: ${{ !cancelled() }}
776778
uses: actions/upload-artifact@v4
777779
with:
778-
name: cache-fuzzer-test-logs
780+
name: cache-fuzzer-logs
779781
path: |
780-
/tmp/cache_fuzzer_test
782+
/tmp/cache_fuzzer
781783
782784
table-evolution-fuzzer-run:
783785
name: Table Evolution Fuzzer

CONTRIBUTING.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ with a benchmark.
287287
line arguments.
288288

289289
```
290-
# Test the new function in isolation. Use --only flag to restrict the set of functions
290+
# Test the new function in isolation. Use --only flag to restrict the set of functions
291291
# and run for 60 seconds or longer.
292292
velox_expression_fuzzer_test --only <my-new-function-name> --duration_sec 60 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --velox_fuzzer_enable_decimal_type --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse
293293

velox/common/base/CMakeLists.txt

+1-2
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,7 @@ velox_link_libraries(
5656
velox_process
5757
glog::glog
5858
Folly::folly
59-
fmt::fmt
60-
gflags::gflags)
59+
fmt::fmt)
6160

6261
velox_add_library(velox_status Status.cpp)
6362
velox_link_libraries(

velox/common/caching/CMakeLists.txt

-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ velox_link_libraries(
3232
velox_time
3333
Folly::folly
3434
fmt::fmt
35-
gflags::gflags
3635
PRIVATE velox_time)
3736

3837
if(${VELOX_BUILD_TESTING})

velox/common/caching/SsdFile.cpp

+3-5
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "velox/common/base/SuccinctPrinter.h"
2323
#include "velox/common/caching/FileIds.h"
2424
#include "velox/common/caching/SsdCache.h"
25+
#include "velox/common/config/GlobalConfig.h"
2526
#include "velox/common/memory/Memory.h"
2627
#include "velox/common/process/TraceContext.h"
2728

@@ -34,9 +35,6 @@
3435
#include <sys/types.h>
3536
#include <numeric>
3637

37-
DEFINE_bool(ssd_odirect, true, "Use O_DIRECT for SSD cache IO");
38-
DEFINE_bool(ssd_verify_write, false, "Read back data after writing to SSD");
39-
4038
namespace facebook::velox::cache {
4139

4240
namespace {
@@ -118,7 +116,7 @@ SsdFile::SsdFile(const Config& config)
118116
process::TraceContext trace("SsdFile::SsdFile");
119117
filesystems::FileOptions fileOptions;
120118
fileOptions.shouldThrowOnFileAlreadyExists = false;
121-
fileOptions.bufferIo = !FLAGS_ssd_odirect;
119+
fileOptions.bufferIo = !config::globalConfig().useSsdODirect;
122120
writeFile_ = fs_->openFileForWrite(fileName_, fileOptions);
123121
readFile_ = fs_->openFileForRead(fileName_, fileOptions);
124122

@@ -417,7 +415,7 @@ void SsdFile::write(std::vector<CachePin>& pins) {
417415
checksum = checksumEntry(*entry);
418416
}
419417
entries_[std::move(key)] = SsdRun(offset, size, checksum);
420-
if (FLAGS_ssd_verify_write) {
418+
if (config::globalConfig().verifySsdWrite) {
421419
verifyWrite(*entry, SsdRun(offset, size, checksum));
422420
}
423421
offset += size;

velox/common/caching/SsdFile.h

-3
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,6 @@
2525
#include "velox/common/file/FileInputStream.h"
2626
#include "velox/common/file/FileSystems.h"
2727

28-
DECLARE_bool(ssd_odirect);
29-
DECLARE_bool(ssd_verify_write);
30-
3128
namespace facebook::velox::cache {
3229

3330
namespace test {

velox/common/caching/tests/AsyncDataCacheTest.cpp

+7-3
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "velox/common/testutil/ScopedTestTime.h"
2828
#include "velox/common/testutil/TestValue.h"
2929
#include "velox/exec/tests/utils/TempDirectoryPath.h"
30+
#include "velox/flag_definitions/flags.h"
3031

3132
#include <folly/executors/IOThreadPoolExecutor.h>
3233
#include <folly/executors/QueuedImmediateExecutor.h>
@@ -136,7 +137,8 @@ class AsyncDataCacheTest : public ::testing::TestWithParam<TestParam> {
136137
std::unique_ptr<SsdCache> ssdCache;
137138
if (ssdBytes > 0) {
138139
// tmpfs does not support O_DIRECT, so turn this off for testing.
139-
FLAGS_ssd_odirect = false;
140+
FLAGS_velox_ssd_odirect = false;
141+
translateFlagsToGlobalConfig();
140142
// Make a new tempDirectory only if one is not already set. The
141143
// second creation of cache must find the checkpoint of the
142144
// previous one.
@@ -846,7 +848,8 @@ TEST_P(AsyncDataCacheTest, DISABLED_ssd) {
846848

847849
// Read back all writes. This increases the chance of writes falling behind
848850
// new entry creation.
849-
FLAGS_ssd_verify_write = true;
851+
FLAGS_velox_ssd_verify_write = true;
852+
translateFlagsToGlobalConfig();
850853

851854
// We read kSsdBytes worth of data on 16 threads. The same data will be hit by
852855
// all threads. The expectation is that most of the data ends up on SSD. All
@@ -861,7 +864,8 @@ TEST_P(AsyncDataCacheTest, DISABLED_ssd) {
861864
ASSERT_LE(kRamBytes, ssdStats.bytesWritten);
862865

863866
// We allow writes to proceed faster.
864-
FLAGS_ssd_verify_write = false;
867+
FLAGS_velox_ssd_verify_write = false;
868+
translateFlagsToGlobalConfig();
865869
// We read the data back. The verify hook checks correct values. Error every
866870
// 13 batch loads.
867871
runThreads(16, [&](int32_t /*i*/) { loadLoop(0, kSsdBytes, 13); });

velox/common/caching/tests/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ target_link_libraries(
3939
velox_file_test_utils
4040
velox_memory
4141
velox_temp_path
42+
velox_flag_definitions
4243
Folly::folly
4344
glog::glog
4445
GTest::gtest

velox/common/caching/tests/SsdFileTest.cpp

+6-5
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "velox/common/base/tests/GTestUtils.h"
1818
#include "velox/common/caching/FileIds.h"
1919
#include "velox/common/caching/tests/CacheTestUtil.h"
20+
#include "velox/common/config/GlobalConfig.h"
2021
#include "velox/common/file/FileSystems.h"
2122
#include "velox/common/file/tests/FaultyFileSystem.h"
2223
#include "velox/common/memory/Memory.h"
@@ -73,7 +74,7 @@ class SsdFileTest : public testing::Test {
7374
bool disableFileCow = false,
7475
bool enableFaultInjection = false) {
7576
// tmpfs does not support O_DIRECT, so turn this off for testing.
76-
FLAGS_ssd_odirect = false;
77+
config::globalConfig().useSsdODirect = false;
7778
cache_ = AsyncDataCache::create(memory::memoryManager()->allocator());
7879
cacheHelper_ =
7980
std::make_unique<test::AsyncDataCacheTestHelper>(cache_.get());
@@ -327,7 +328,7 @@ TEST_F(SsdFileTest, writeAndRead) {
327328
constexpr int64_t kSsdSize = 16 * SsdFile::kRegionSize;
328329
std::vector<TestEntry> allEntries;
329330
initializeCache(kSsdSize);
330-
FLAGS_ssd_verify_write = true;
331+
config::globalConfig().verifySsdWrite = true;
331332
for (auto startOffset = 0; startOffset <= kSsdSize - SsdFile::kRegionSize;
332333
startOffset += SsdFile::kRegionSize) {
333334
auto pins =
@@ -402,7 +403,7 @@ TEST_F(SsdFileTest, checkpoint) {
402403
constexpr int64_t kSsdSize = 16 * SsdFile::kRegionSize;
403404
const uint64_t checkpointIntervalBytes = 5 * SsdFile::kRegionSize;
404405
const auto fileNameAlt = StringIdLease(fileIds(), "fileInStorageAlt");
405-
FLAGS_ssd_verify_write = true;
406+
config::globalConfig().verifySsdWrite = true;
406407
initializeCache(kSsdSize, checkpointIntervalBytes);
407408

408409
std::vector<TestEntry> allEntries;
@@ -498,7 +499,7 @@ TEST_F(SsdFileTest, checkpoint) {
498499
TEST_F(SsdFileTest, fileCorruption) {
499500
constexpr int64_t kSsdSize = 16 * SsdFile::kRegionSize;
500501
const uint64_t checkpointIntervalBytes = 5 * SsdFile::kRegionSize;
501-
FLAGS_ssd_verify_write = true;
502+
config::globalConfig().verifySsdWrite = true;
502503

503504
const auto populateCache = [&](std::vector<TestEntry>& entries) {
504505
entries.clear();
@@ -570,7 +571,7 @@ TEST_F(SsdFileTest, fileCorruption) {
570571
TEST_F(SsdFileTest, recoverFromCheckpointWithChecksum) {
571572
constexpr int64_t kSsdSize = 4 * SsdFile::kRegionSize;
572573
const uint64_t checkpointIntervalBytes = 3 * SsdFile::kRegionSize;
573-
FLAGS_ssd_verify_write = true;
574+
config::globalConfig().verifySsdWrite = true;
574575

575576
// Test if cache data can be recovered with different settings.
576577
struct {

velox/common/compression/Compression.cpp

+13-13
Original file line numberDiff line numberDiff line change
@@ -25,36 +25,36 @@ std::unique_ptr<folly::compression::Codec> compressionKindToCodec(
2525
CompressionKind kind) {
2626
switch (static_cast<int32_t>(kind)) {
2727
case CompressionKind_NONE:
28-
return getCodec(folly::io::CodecType::NO_COMPRESSION);
28+
return getCodec(folly::compression::CodecType::NO_COMPRESSION);
2929
case CompressionKind_ZLIB:
30-
return getCodec(folly::io::CodecType::ZLIB);
30+
return getCodec(folly::compression::CodecType::ZLIB);
3131
case CompressionKind_SNAPPY:
32-
return getCodec(folly::io::CodecType::SNAPPY);
32+
return getCodec(folly::compression::CodecType::SNAPPY);
3333
case CompressionKind_ZSTD:
34-
return getCodec(folly::io::CodecType::ZSTD);
34+
return getCodec(folly::compression::CodecType::ZSTD);
3535
case CompressionKind_LZ4:
36-
return getCodec(folly::io::CodecType::LZ4);
36+
return getCodec(folly::compression::CodecType::LZ4);
3737
case CompressionKind_GZIP:
38-
return getCodec(folly::io::CodecType::GZIP);
38+
return getCodec(folly::compression::CodecType::GZIP);
3939
default:
4040
VELOX_UNSUPPORTED(
4141
"Not support {} in folly", compressionKindToString(kind));
4242
}
4343
}
4444

45-
CompressionKind codecTypeToCompressionKind(folly::io::CodecType type) {
45+
CompressionKind codecTypeToCompressionKind(folly::compression::CodecType type) {
4646
switch (type) {
47-
case folly::io::CodecType::NO_COMPRESSION:
47+
case folly::compression::CodecType::NO_COMPRESSION:
4848
return CompressionKind_NONE;
49-
case folly::io::CodecType::ZLIB:
49+
case folly::compression::CodecType::ZLIB:
5050
return CompressionKind_ZLIB;
51-
case folly::io::CodecType::SNAPPY:
51+
case folly::compression::CodecType::SNAPPY:
5252
return CompressionKind_SNAPPY;
53-
case folly::io::CodecType::ZSTD:
53+
case folly::compression::CodecType::ZSTD:
5454
return CompressionKind_ZSTD;
55-
case folly::io::CodecType::LZ4:
55+
case folly::compression::CodecType::LZ4:
5656
return CompressionKind_LZ4;
57-
case folly::io::CodecType::GZIP:
57+
case folly::compression::CodecType::GZIP:
5858
return CompressionKind_GZIP;
5959
default:
6060
VELOX_UNSUPPORTED(

velox/common/compression/Compression.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ enum CompressionKind {
3636
std::unique_ptr<folly::compression::Codec> compressionKindToCodec(
3737
CompressionKind kind);
3838

39-
CompressionKind codecTypeToCompressionKind(folly::io::CodecType type);
39+
CompressionKind codecTypeToCompressionKind(folly::compression::CodecType type);
4040

4141
/// Get the name of the CompressionKind.
4242
std::string compressionKindToString(CompressionKind kind);

velox/common/compression/tests/CompressionTest.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@ TEST_F(CompressionTest, testCompressionNames) {
3838

3939
TEST_F(CompressionTest, compressionKindToCodec) {
4040
ASSERT_EQ(
41-
folly::io::CodecType::NO_COMPRESSION,
41+
folly::compression::CodecType::NO_COMPRESSION,
4242
compressionKindToCodec(CompressionKind::CompressionKind_NONE)->type());
4343
ASSERT_EQ(
44-
folly::io::CodecType::LZ4,
44+
folly::compression::CodecType::LZ4,
4545
compressionKindToCodec(CompressionKind::CompressionKind_LZ4)->type());
4646
EXPECT_THROW(
4747
compressionKindToCodec(CompressionKind::CompressionKind_LZO),

velox/common/config/GlobalConfig.h

+8
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,14 @@ struct GlobalConfiguration {
7777
/// and be writable. This flag is ignored if
7878
/// saveInputOnExpressionAnyFailurePath flag is set.
7979
std::string saveInputOnExpressionSystemFailurePath;
80+
/// Use O_DIRECT for SSD cache I/O. This allows to bypass Linux Kernel's page
81+
/// cache and can improve performance on some filesystems. Disable if the
82+
/// filesystem does not support it.
83+
bool useSsdODirect{true};
84+
/// Verify the data written to SSD. Once an entry is written, it is
85+
/// immediately read back and is compared against the entry written. This is
86+
/// helpful to protect against SSD write corruption.
87+
bool verifySsdWrite{false};
8088
};
8189

8290
GlobalConfiguration& globalConfig();

velox/common/encode/Coding.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ class Varint {
264264
// Zig-zag encoding that maps signed integers with a small absolute value
265265
// to unsigned integers with a small (positive) value.
266266
// if x >= 0, ZigZag::encode(x) == 2*x
267-
// if x < 0, ZigZag::encode(x) == -2*x + 1
267+
// if x < 0, ZigZag::encode(x) == -2*x + 1
268268
class ZigZag {
269269
public:
270270
static uint64_t encode(int64_t val) {
@@ -273,6 +273,10 @@ class ZigZag {
273273
return (static_cast<uint64_t>(val) << 1) ^ (val >> 63);
274274
}
275275

276+
static __uint128_t encodeInt128(__int128_t val) {
277+
return (static_cast<__uint128_t>(val) << 1) ^ (val >> 127);
278+
}
279+
276280
template <typename U, typename T = typename std::make_signed<U>::type>
277281
static T decode(U val) {
278282
return static_cast<T>((val >> 1) ^ -(val & 1));

velox/common/encode/tests/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
add_executable(velox_common_encode_test Base64Test.cpp)
15+
add_executable(velox_common_encode_test Base64Test.cpp ZigZagTest.cpp)
1616
add_test(velox_common_encode_test velox_common_encode_test)
1717
target_link_libraries(
1818
velox_common_encode_test
+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <gtest/gtest.h>
18+
#include "velox/common/base/tests/GTestUtils.h"
19+
#include "velox/common/encode/Coding.h"
20+
#include "velox/type/HugeInt.h"
21+
22+
namespace facebook::velox::encode::test {
23+
24+
class ZigZagTest : public ::testing::Test {};
25+
26+
TEST_F(ZigZagTest, hugeInt) {
27+
auto assertZigZag = [](int128_t value) {
28+
auto encoded = ZigZag::encodeInt128(value);
29+
auto decoded = ZigZag::decode(encoded);
30+
EXPECT_EQ(value, decoded);
31+
};
32+
33+
assertZigZag(0);
34+
assertZigZag(HugeInt::parse("1234567890123456789"));
35+
assertZigZag(HugeInt::parse("-1234567890123456789"));
36+
assertZigZag(HugeInt::parse(std::string(38, '9')));
37+
assertZigZag(std::numeric_limits<__int128_t>::max());
38+
assertZigZag(std::numeric_limits<__int128_t>::min());
39+
}
40+
41+
} // namespace facebook::velox::encode::test

velox/common/fuzzer/tests/ConstrainedGeneratorsTest.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
#include <gtest/gtest.h>
2020

21+
#include "velox/common/memory/Memory.h"
2122
#include "velox/functions/prestosql/types/JsonType.h"
2223
#include "velox/type/Variant.h"
2324

0 commit comments

Comments
 (0)