facebookincubator · ccat3z · Mar 5, 2025 · Mar 7, 2025 · Mar 10, 2025 · Mar 10, 2025
diff --git a/velox/common/base/CountBits.h b/velox/common/base/CountBits.h
@@ -19,7 +19,8 @@
 namespace facebook::velox {
 
 // Copied from format.h of fmt.
-FOLLY_ALWAYS_INLINE int countDigits(__uint128_t n) {
+template <typename T>
+FOLLY_ALWAYS_INLINE int countDigits(T n) {
   int count = 1;
   for (;;) {
     if (n < 10) {

@@ -13,6 +13,7 @@
 # limitations under the License.
 if(${VELOX_BUILD_TESTING})
   add_subdirectory(tests)
+  add_subdirectory(fuzzer)
 endif()
 
 add_subdirectory(parser)

diff --git a/velox/type/Conversions.h b/velox/type/Conversions.h
@@ -21,6 +21,7 @@
 #include <cctype>
 #include <string>
 #include <type_traits>
+#include "velox/common/base/CountBits.h"
 #include "velox/common/base/Exceptions.h"
 #include "velox/common/base/Status.h"
 #include "velox/type/CppToType.h"
@@ -578,12 +579,7 @@ struct Converter<TypeKind::VARCHAR, void, TPolicy> {
         normalizeStandardNotation(str);
         return str;
       }
-      // Precision of float is at most 8 significant decimal digits. Precision
-      // of double is at most 17 significant decimal digits.
-      auto str =
-          fmt::format(std::is_same_v<T, float> ? "{:.7E}" : "{:.16E}", val);
-      normalizeScientificNotation(str);
-      return str;
+      return castScientificNotation(val);
     }
 
     return folly::to<std::string>(val);
@@ -603,6 +599,62 @@ struct Converter<TypeKind::VARCHAR, void, TPolicy> {
     return val ? "true" : "false";
   }
 
+  /// Convert floating number to scientific notation. Only accpet abs(value)
+  /// less equal then 10E-3 or great equal than 10E7. Otherwise, the result is
+  /// undefined.
+  template <typename T>
+  static std::string castScientificNotation(T value) {
+    static_assert(std::is_floating_point_v<T>);
+    static char digits[10] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'};
+
+    auto dec = fmt::detail::dragonbox::to_decimal(value);
+
+    // Ensure the significand part contains at least two digits for decimal
+    // point.
+    if (dec.significand < 10) {
+      dec.significand *= 10;
+      dec.exponent -= 1;
+    }
+
+    // Calc digit size
+    auto significand_size = countDigits(dec.significand);
+    dec.exponent += significand_size - 1;
+    auto exp_size = countDigits(std::abs(dec.exponent));
+
+    std::string buf;
+    buf.resize(
+        significand_size + exp_size + (value < 0) + (dec.exponent < 0) + 2);
+
+    int pos = 0;
+
+    // Push sign
+    if (value < 0) {
+      buf[pos++] = '-';
+    }
+
+    // Push significand part
+    pos += significand_size;
+    for (; dec.significand > 9; dec.significand /= 10) {
+      buf[pos--] = digits[dec.significand % 10];
+    }
+    buf[pos--] = '.';
+    buf[pos--] = digits[dec.significand % 10];
+    pos += significand_size + 2;
+
+    // Push exponent part
+    buf[pos++] = 'E';
+    if (dec.exponent < 0) {
+      buf[pos++] = '-';
+      dec.exponent = -dec.exponent;
+    }
+    pos += exp_size - 1;
+    for (; dec.exponent > 0; dec.exponent /= 10) {
+      buf[pos--] = digits[dec.exponent % 10];
+    }
+
+    return buf;
+  }
+
   /// Normalize the given floating-point standard notation string in place, by
   /// appending '.0' if it has only the integer part but no fractional part. For
   /// example, for the given string '12345', replace it with '12345.0'.

@@ -0,0 +1,39 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+find_package(Java COMPONENTS Development)
+
+if(Java_Development_FOUND)
+  add_custom_command(
+    OUTPUT FloatGenerator.class
+    COMMAND ${Java_JAVAC_EXECUTABLE} -d ${CMAKE_CURRENT_BINARY_DIR}
+            ${CMAKE_CURRENT_SOURCE_DIR}/FloatGenerator.java
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/FloatGenerator.java)
+  add_custom_target(java_float_generator DEPENDS FloatGenerator.class)
+
+  add_executable(velox_float_to_string_fuzzer FloatToStringFuzzer.cpp)
+  add_test(float_to_string_fuzzer float_to_string_fuzzer)
+
+  target_link_libraries(
+    velox_float_to_string_fuzzer
+    Boost::headers
+    fmt::fmt
+    velox_type
+    GTest::gtest
+    GTest::gtest_main
+    gflags::gflags
+    glog::glog)
+
+  add_dependencies(velox_float_to_string_fuzzer java_float_generator)
+endif()
diff --git a/velox/type/fuzzer/FloatGenerator.java b/velox/type/fuzzer/FloatGenerator.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.Random;
+
+public class FloatGenerator {
+  private static final Random random = new Random();
+
+  public static void main(String[] args) {
+    if (args.length != 2) {
+      System.out.println("usage: java FloatGenerator double|float count");
+      System.exit(1);
+    }
+    String type = args[0];
+    int count = Integer.parseInt(args[1]);
+
+    // Detect whether https://bugs.openjdk.org/browse/JDK-4511638 was fixed.
+    System.out.println(Double.toString(1.0E23).equals("9.999999999999999E22"));
+
+    if (type.equals("float")) {
+      for (int i = 0; i < count; i++) {
+        float randomFloat = Float.parseFloat(generateRandom(1, 10, -37, 38));
+        System.out.println(randomFloat);
+        System.out.println(Float.floatToIntBits(randomFloat));
+      }
+    } else {
+      for (int i = 0; i < count; i++) {
+        double randomDouble = Double.parseDouble(generateRandom(1, 18, -307, 308));
+        System.out.println(randomDouble);
+        System.out.println(Double.doubleToLongBits(randomDouble));
+      }
+    }
+  }
+
+  private static String generateRandom(int minMantissaDigits, int maxMantissaDigits, int minExponent, int maxExponent) {
+    int numDigits = minMantissaDigits + random.nextInt(maxMantissaDigits - minMantissaDigits + 1);
+    StringBuilder sb = new StringBuilder();
+
+    if (random.nextBoolean()) {
+      sb.append('-');
+    }
+
+    sb.append(random.nextInt(9) + 1); 
+    sb.append('.');
+
+    for (int i = 1; i < numDigits; i++) {
+      sb.append(random.nextInt(10));
+    }
+
+    int exponent = minExponent + random.nextInt(maxExponent - minExponent + 1);
+    sb.append('E');
+    sb.append(exponent);
+
+    return sb.toString();
+  }
+}
diff --git a/velox/type/fuzzer/FloatToStringFuzzer.cpp b/velox/type/fuzzer/FloatToStringFuzzer.cpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/type/Conversions.h"
+
+#include <filesystem>
+#include <iostream>
+#include <tuple>
+#include <vector>
+#include "boost/process.hpp"
+#include "fmt/core.h"
+#include "gtest/gtest.h"
+
+namespace bp = boost::process;
+
+namespace {
+template <typename T>
+std::tuple<bool, std::vector<T>, std::vector<std::string>>
+generateFloatTestCases(int count) {
+  auto workDir = std::filesystem::canonical("/proc/self/exe").parent_path();
+  bp::ipstream pipeStream;
+  bp::child c(
+      fmt::format(
+          "java FloatGenerator {} {}",
+          std::is_same_v<T, double> ? "double" : "float",
+          count),
+      bp::start_dir(workDir.string()),
+      bp::std_out > pipeStream);
+
+  std::string buggyJavaVersion;
+  std::vector<T> values;
+  std::vector<std::string> expects;
+  values.reserve(count);
+  expects.reserve(count);
+
+  pipeStream >> buggyJavaVersion;
+
+  std::conditional_t<std::is_same_v<T, double>, int64_t, int32_t> carrierInt;
+  std::string expect;
+
+  while (pipeStream >> expect >> carrierInt) {
+    values.emplace_back(reinterpret_cast<const T&>(carrierInt));
+    expects.emplace_back(expect);
+  }
+
+  c.wait();
+  int result = c.exit_code();
+  if (result != 0) {
+    throw std::runtime_error(
+        fmt::format("Process exited with code: {}", result));
+  }
+
+  return {buggyJavaVersion == "true", values, expects};
+}
+
+template <typename T>
+void testCastToVarchar(
+    const std::vector<T> values,
+    const std::vector<std::string> expects,
+    bool buggyJavaVersion) {
+  using namespace facebook::velox;
+  util::Converter<TypeKind::VARCHAR> convertor;
+
+  ASSERT_EQ(values.size(), expects.size());
+
+  for (int i = 0; i < values.size(); i++) {
+    const auto& value = values[i];
+    const auto& expect = expects[i];
+    auto actual = convertor.tryCast(value).value_or("");
+
+    // Old java (< 19) may produce longer or incorrect decimal.
+    // See https://bugs.openjdk.org/browse/JDK-4511638.
+    // e.g.
+    // Actual       | JDK <= 18            | JDK 19
+    // 7.5371334E25 | 7.5371335E25         | 7.5371334E25  # incorrect
+    // 1.0E23       | 9.999999999999999E22 | 1.0E23        # longer
+    if (buggyJavaVersion) {
+      EXPECT_TRUE(
+          actual == expect ||
+          // Shorter but same decimal
+          (actual.size() <= expect.size() &&
+           (std::is_same_v<T, double> ? std::stod(actual)
+                                      : std::stof(actual)) == value));
+
+      if (actual != expect) {
+        std::cerr << "Warning: " << actual << " != " << expect << std::endl;
+      }
+    } else {
+      EXPECT_EQ(actual, expect);
+    }
+  }
+}
+} // namespace
+
+TEST(FloatToString, float) {
+  auto [buggyJavaVersion, values, expects] =
+      generateFloatTestCases<float>(10'000);
+  testCastToVarchar(values, expects, buggyJavaVersion);
+}
+
+TEST(FloatToString, double) {
+  auto [buggyJavaVersion, values, expects] =
+      generateFloatTestCases<double>(10'000);
+  testCastToVarchar(values, expects, buggyJavaVersion);
+}
@@ -123,4 +123,15 @@ if(VELOX_ENABLE_BENCHMARKS)
     GTest::gtest_main
     gflags::gflags
     glog::glog)
+
+  add_executable(velox_floating_point_benchmark FloatingPointBenchmark.cpp)
+  target_link_libraries(
+    velox_floating_point_benchmark
+    velox_type
+    Folly::folly
+    Folly::follybenchmark
+    GTest::gtest
+    GTest::gtest_main
+    gflags::gflags
+    glog::glog)
 endif()