Skip to content

Commit a5dc8a9

Browse files
committed
implement inverse_f_cdf() function
1 parent a976ba5 commit a5dc8a9

File tree

4 files changed

+74
-0
lines changed

4 files changed

+74
-0
lines changed

velox/docs/functions/presto/math.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,12 @@ Probability Functions: inverse_cdf
341341
probability (p): P(N < n). The a, b parameters must be positive real values (all of type DOUBLE).
342342
The probability p must lie on the interval [0, 1].
343343

344+
.. function:: inverse_f_cdf(df1, df2, p) -> double
345+
346+
Compute the inverse of the F cdf with a given df1 (numerator degrees of freedom) and df2 (denominator degrees of freedom) parameters
347+
for the cumulative probability (p): P(N < n). The numerator and denominator df parameters must be positive real numbers.
348+
The probability p must lie on the interval [0, 1].
349+
344350
.. function:: inverse_weibull_cdf(a, b, p) -> double
345351

346352
Compute the inverse of the Weibull cdf with given parameters ``a``, ``b`` for the probability ``p``.

velox/functions/prestosql/Probability.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,21 @@ struct InverseBetaCDFFunction {
186186
}
187187
};
188188

189+
template <typename T>
190+
struct InverseFCDFFunction {
191+
VELOX_DEFINE_FUNCTION_TYPES(T);
192+
193+
FOLLY_ALWAYS_INLINE void
194+
call(double& result, double df1, double df2, double p) {
195+
VELOX_USER_CHECK((p >= 0) && (p <= 1), "p must be in the interval [0, 1]");
196+
VELOX_USER_CHECK_GT(df1, 0, "numerator df must be greater than 0");
197+
VELOX_USER_CHECK_GT(df2, 0, "denominator df must be greater than 0");
198+
199+
boost::math::fisher_f_distribution<> dist(df1, df2);
200+
result = boost::math::quantile(dist, p);
201+
}
202+
};
203+
189204
template <typename T>
190205
struct ChiSquaredCDFFunction {
191206
VELOX_DEFINE_FUNCTION_TYPES(T);

velox/functions/prestosql/registration/ProbabilityTrigonometricFunctionsRegistration.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ void registerProbTrigFunctions(const std::string& prefix) {
5050
{prefix + "f_cdf"});
5151
registerFunction<InverseBetaCDFFunction, double, double, double, double>(
5252
{prefix + "inverse_beta_cdf"});
53+
registerFunction<InverseFCDFFunction, double, double, double, double>(
54+
{prefix + "inverse_f_cdf"});
5355
registerFunction<InverseNormalCDFFunction, double, double, double, double>(
5456
{prefix + "inverse_normal_cdf"});
5557
registerFunction<PoissonCDFFunction, double, double, int32_t>(

velox/functions/prestosql/tests/ProbabilityTest.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,57 @@ TEST_F(ProbabilityTest, invBetaCDF) {
268268
VELOX_ASSERT_THROW(invBetaCDF(3, 5, 1.1), "p must be in the interval [0, 1]");
269269
}
270270

271+
TEST_F(ProbabilityTest, inverseFCDF) {
272+
const auto inverseFCDF = [&](std::optional<double> df1,
273+
std::optional<double> df2,
274+
std::optional<double> p) {
275+
return evaluateOnce<double>("inverse_f_cdf(c0, c1, c2)", df1, df2, p);
276+
};
277+
278+
EXPECT_EQ(inverseFCDF(2.0, 5.0, 0.0), 0.0);
279+
EXPECT_EQ(inverseFCDF(2.0, 5.0, 0.5), 0.79876977693223561);
280+
EXPECT_EQ(inverseFCDF(2.0, 5.0, 0.9), 3.779716078773951);
281+
282+
EXPECT_EQ(inverseFCDF(2.0, 5.0, std::nullopt), std::nullopt);
283+
EXPECT_EQ(inverseFCDF(2.0, std::nullopt, 3.7797), std::nullopt);
284+
EXPECT_EQ(inverseFCDF(std::nullopt, 5.0, 3.7797), std::nullopt);
285+
286+
EXPECT_EQ(inverseFCDF(kDoubleMax, 5.0, 1), kInf);
287+
EXPECT_EQ(inverseFCDF(1, kDoubleMax, 1), kInf);
288+
EXPECT_EQ(inverseFCDF(82.6, 901.10, 1), kInf);
289+
EXPECT_EQ(inverseFCDF(kDoubleMin, 50.620, 1), kInf);
290+
EXPECT_EQ(
291+
inverseFCDF(kBigIntMax, 5.0, 0.93256230095450132), 3.7797000000000009);
292+
EXPECT_EQ(inverseFCDF(76.901, kBigIntMax, 1), kInf);
293+
EXPECT_EQ(inverseFCDF(2.0, 5.0, 1), kInf);
294+
295+
// Test invalid inputs for df1.
296+
VELOX_ASSERT_THROW(
297+
inverseFCDF(0, 3, 0.5), "numerator df must be greater than 0");
298+
VELOX_ASSERT_THROW(
299+
inverseFCDF(kBigIntMin, 5.0, 0.999),
300+
"numerator df must be greater than 0");
301+
302+
// Test invalid inputs for df2.
303+
VELOX_ASSERT_THROW(
304+
inverseFCDF(3, 0, 0.5), "denominator df must be greater than 0");
305+
VELOX_ASSERT_THROW(
306+
inverseFCDF(2.0, kBigIntMin, 0.0001),
307+
"denominator df must be greater than 0");
308+
309+
// Test invalid inputs for p.
310+
VELOX_ASSERT_THROW(
311+
inverseFCDF(3, 5, -0.1), "p must be in the interval [0, 1]");
312+
VELOX_ASSERT_THROW(
313+
inverseFCDF(2.0, 5.0, kBigIntMin), "p must be in the interval [0, 1]");
314+
315+
// Test a combination of invalid inputs.
316+
VELOX_ASSERT_THROW(
317+
inverseFCDF(-1.2, 0, -0.1), "p must be in the interval [0, 1]");
318+
VELOX_ASSERT_THROW(
319+
inverseFCDF(1, -kInf, -0.1), "p must be in the interval [0, 1]");
320+
}
321+
271322
TEST_F(ProbabilityTest, chiSquaredCDF) {
272323
const auto chiSquaredCDF = [&](std::optional<double> df,
273324
std::optional<double> value) {

0 commit comments

Comments
 (0)