diff --git a/velox/docs/functions/presto/math.rst b/velox/docs/functions/presto/math.rst index c394182c6615..f6d662562d79 100644 --- a/velox/docs/functions/presto/math.rst +++ b/velox/docs/functions/presto/math.rst @@ -341,6 +341,12 @@ Probability Functions: inverse_cdf probability (p): P(N < n). The a, b parameters must be positive real values (all of type DOUBLE). The probability p must lie on the interval [0, 1]. +.. function:: inverse_f_cdf(df1, df2, p) -> double + + Compute the inverse of the F cdf with a given ``df1`` (numerator degrees of freedom) and ``df2`` (denominator degrees of freedom) parameters + for the cumulative probability (p): P(N < n). The numerator and denominator df parameters must be positive real numbers. + The probability ``p`` must lie on the interval [0, 1]. + .. function:: inverse_weibull_cdf(a, b, p) -> double Compute the inverse of the Weibull cdf with given parameters ``a``, ``b`` for the probability ``p``. diff --git a/velox/functions/prestosql/Probability.h b/velox/functions/prestosql/Probability.h index f0f92cf4e09b..67638dca9d69 100644 --- a/velox/functions/prestosql/Probability.h +++ b/velox/functions/prestosql/Probability.h @@ -186,6 +186,21 @@ struct InverseBetaCDFFunction { } }; +template +struct InverseFCDFFunction { + VELOX_DEFINE_FUNCTION_TYPES(T); + + FOLLY_ALWAYS_INLINE void + call(double& result, double df1, double df2, double p) { + VELOX_USER_CHECK((p >= 0) && (p <= 1), "p must be in the interval [0, 1]"); + VELOX_USER_CHECK_GT(df1, 0, "numerator df must be greater than 0"); + VELOX_USER_CHECK_GT(df2, 0, "denominator df must be greater than 0"); + + boost::math::fisher_f_distribution<> dist(df1, df2); + result = boost::math::quantile(dist, p); + } +}; + template struct ChiSquaredCDFFunction { VELOX_DEFINE_FUNCTION_TYPES(T); diff --git a/velox/functions/prestosql/registration/ProbabilityTrigonometricFunctionsRegistration.cpp b/velox/functions/prestosql/registration/ProbabilityTrigonometricFunctionsRegistration.cpp index 4e66db644a2c..1984f3edf914 100644 --- a/velox/functions/prestosql/registration/ProbabilityTrigonometricFunctionsRegistration.cpp +++ b/velox/functions/prestosql/registration/ProbabilityTrigonometricFunctionsRegistration.cpp @@ -50,6 +50,8 @@ void registerProbTrigFunctions(const std::string& prefix) { {prefix + "f_cdf"}); registerFunction( {prefix + "inverse_beta_cdf"}); + registerFunction( + {prefix + "inverse_f_cdf"}); registerFunction( {prefix + "inverse_normal_cdf"}); registerFunction( diff --git a/velox/functions/prestosql/tests/ProbabilityTest.cpp b/velox/functions/prestosql/tests/ProbabilityTest.cpp index 5cb4329ad375..7c877d45374f 100644 --- a/velox/functions/prestosql/tests/ProbabilityTest.cpp +++ b/velox/functions/prestosql/tests/ProbabilityTest.cpp @@ -268,6 +268,57 @@ TEST_F(ProbabilityTest, invBetaCDF) { VELOX_ASSERT_THROW(invBetaCDF(3, 5, 1.1), "p must be in the interval [0, 1]"); } +TEST_F(ProbabilityTest, inverseFCDF) { + const auto inverseFCDF = [&](std::optional df1, + std::optional df2, + std::optional p) { + return evaluateOnce("inverse_f_cdf(c0, c1, c2)", df1, df2, p); + }; + + EXPECT_EQ(inverseFCDF(2.0, 5.0, 0.0), 0.0); + EXPECT_EQ(inverseFCDF(2.0, 5.0, 0.5), 0.79876977693223561); + EXPECT_EQ(inverseFCDF(2.0, 5.0, 0.9), 3.779716078773951); + + EXPECT_EQ(inverseFCDF(2.0, 5.0, std::nullopt), std::nullopt); + EXPECT_EQ(inverseFCDF(2.0, std::nullopt, 3.7797), std::nullopt); + EXPECT_EQ(inverseFCDF(std::nullopt, 5.0, 3.7797), std::nullopt); + + EXPECT_EQ(inverseFCDF(kDoubleMax, 5.0, 1), kInf); + EXPECT_EQ(inverseFCDF(1, kDoubleMax, 1), kInf); + EXPECT_EQ(inverseFCDF(82.6, 901.10, 1), kInf); + EXPECT_EQ(inverseFCDF(kDoubleMin, 50.620, 1), kInf); + EXPECT_EQ( + inverseFCDF(kBigIntMax, 5.0, 0.93256230095450132), 3.7797000000000009); + EXPECT_EQ(inverseFCDF(76.901, kBigIntMax, 1), kInf); + EXPECT_EQ(inverseFCDF(2.0, 5.0, 1), kInf); + + // Test invalid inputs for df1. + VELOX_ASSERT_THROW( + inverseFCDF(0, 3, 0.5), "numerator df must be greater than 0"); + VELOX_ASSERT_THROW( + inverseFCDF(kBigIntMin, 5.0, 0.999), + "numerator df must be greater than 0"); + + // Test invalid inputs for df2. + VELOX_ASSERT_THROW( + inverseFCDF(3, 0, 0.5), "denominator df must be greater than 0"); + VELOX_ASSERT_THROW( + inverseFCDF(2.0, kBigIntMin, 0.0001), + "denominator df must be greater than 0"); + + // Test invalid inputs for p. + VELOX_ASSERT_THROW( + inverseFCDF(3, 5, -0.1), "p must be in the interval [0, 1]"); + VELOX_ASSERT_THROW( + inverseFCDF(2.0, 5.0, kBigIntMin), "p must be in the interval [0, 1]"); + + // Test a combination of invalid inputs. + VELOX_ASSERT_THROW( + inverseFCDF(-1.2, 0, -0.1), "p must be in the interval [0, 1]"); + VELOX_ASSERT_THROW( + inverseFCDF(1, -kInf, -0.1), "p must be in the interval [0, 1]"); +} + TEST_F(ProbabilityTest, chiSquaredCDF) { const auto chiSquaredCDF = [&](std::optional df, std::optional value) {