diff --git a/doc/distributions/students_t.qbk b/doc/distributions/students_t.qbk index 3396048c5f..0a95080bdb 100644 --- a/doc/distributions/students_t.qbk +++ b/doc/distributions/students_t.qbk @@ -29,6 +29,11 @@ RealType beta, RealType sd, RealType hint = 100); + + // degrees of freedom inversion from a quantile and probability: + BOOST_MATH_GPU_ENABLED static RealType invert_probability_with_respect_to_degrees_of_freedom( + RealType x, + RealType p); }; }} // namespaces @@ -106,6 +111,13 @@ For more information on this function see the [@http://www.itl.nist.gov/div898/handbook/prc/section2/prc222.htm NIST Engineering Statistics Handbook]. + BOOST_MATH_GPU_ENABLED static RealType invert_probability_with_respect_to_degrees_of_freedom( + RealType x, + RealType p); + +Returns the degrees of freedom [nu] such that CDF(/x/; [nu]) = /p/. +Requires 0 < /p/ < 1 and /x/ [ne] 0, otherwise calls __domain_error. + [h4 Non-member Accessors] All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all @@ -164,6 +176,7 @@ without the subtraction implied above.]] [[skewness][if (v > 3) 0 else NaN ]] [[kurtosis][if (v > 4) 3 * (v - 2) \/ (v - 4) else NaN]] [[kurtosis excess][if (v > 4) 6 \/ (df - 4) else NaN]] +[[invert_probability_with_respect_to_degrees_of_freedom][Uses a 2nd-order Edgeworth expansion as initial guess for a numerical root finder]] ] If the moment index /k/ is less than /v/, then the moment is undefined. diff --git a/include/boost/math/distributions/students_t.hpp b/include/boost/math/distributions/students_t.hpp index 39f20d6e41..02f7497fbc 100644 --- a/include/boost/math/distributions/students_t.hpp +++ b/include/boost/math/distributions/students_t.hpp @@ -58,6 +58,10 @@ class students_t_distribution RealType sd, RealType hint = 100); + BOOST_MATH_GPU_ENABLED static RealType invert_probability_with_respect_to_degrees_of_freedom( + RealType x, + RealType p); + private: // Data member: RealType df_; // degrees of freedom is a real number > 0 or +infinity. @@ -281,6 +285,11 @@ BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type +struct cdf_to_df_func +{ + BOOST_MATH_GPU_ENABLED cdf_to_df_func(RealType x_val, RealType p_val, bool c) + : x(x_val), p(p_val), comp(c) {} + + BOOST_MATH_GPU_ENABLED RealType operator()(const RealType& df) + { + students_t_distribution t(df); + return comp ? + RealType(p - cdf(complement(t, x))) + : RealType(cdf(t, x) - p); + } + RealType x, p; + bool comp; +}; + +// +// Shared root-finding helper used by both find_degrees_of_freedom and +// invert_probability_with_respect_to_degrees_of_freedom. +// +template +BOOST_MATH_GPU_ENABLED RealType solve_for_degrees_of_freedom( + Func f, + RealType hint, + bool rising, + const char* function, + Policy const&) +{ + tools::eps_tolerance tol(policies::digits()); + boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); + boost::math::pair r = tools::bracket_and_solve_root( + f, hint, RealType(2), rising, tol, max_iter, Policy()); + RealType result = r.first + (r.second - r.first) / 2; + if (max_iter >= policies::get_max_root_iterations()) + { + return policies::raise_evaluation_error(function, // LCOV_EXCL_LINE + "Unable to locate solution in a reasonable time: either there is no answer to " // LCOV_EXCL_LINE + "the degrees of freedom or the answer is infinite. Current best guess is %1%", // LCOV_EXCL_LINE + result, Policy()); // LCOV_EXCL_LINE + } + return result; +} + +// +// Edgeworth warm-start for invert_probability_with_respect_to_degrees_of_freedom. +// On success writes a df estimate into 'result' and returns true. +// Returns false when no positive root is found; 'result' is left unchanged. +// +template +BOOST_MATH_GPU_ENABLED bool approximate_df_with_edgeworth_expansion(RealType x, RealType p, RealType& result) +{ + BOOST_MATH_STD_USING + // F(x; nu) ~ cdf_normal(x) - pdf_normal(x)*(x + x^3)/(4*nu) + pdf_normal(x)*(3x + 5x^3 + 7x^5 - 3x^7)/(96*nu^2) + // Substituting u = 1/nu and setting F(x; nu) = p gives b*u^2 - a*u + c = 0, where: + // a = pdf_normal(x) * (x + x^3) / 4 + // b = pdf_normal(x) * (3x + 5x^3 + 7x^5 - 3x^7) / 96 + // c = cdf_normal(x) - p + normal_distribution std_normal(0, 1); + RealType pdf_val = pdf(std_normal, x); + RealType cdf_val = cdf(std_normal, x); + + RealType x2 = x * x; + RealType x3 = x2 * x; + RealType x5 = x3 * x2; + RealType x7 = x5 * x2; + + RealType a = pdf_val * (x + x3) / 4; + RealType b = pdf_val * (3 * x + 5 * x3 + 7 * x5 - 3 * x7) / 96; + RealType c = cdf_val - p; + + RealType discriminant = a * a - 4 * b * c; + if (discriminant >= 0 && b != 0) + { + RealType sqrt_disc = sqrt(discriminant); + // The two roots of b*u^2 - a*u + c = 0. Pick the smallest positive u (= largest df = 1/u). + RealType u = (a - sqrt_disc) / (2 * b); + if (u <= 0) + u = (a + sqrt_disc) / (2 * b); + if (u > 0) + { + result = 1 / u; + return true; + } + } + return false; +} + } // namespace detail template @@ -332,16 +429,53 @@ BOOST_MATH_GPU_ENABLED RealType students_t_distribution::find_ hint = 1; detail::sample_size_func f(alpha, beta, sd, difference_from_mean); - tools::eps_tolerance tol(policies::digits()); - boost::math::uintmax_t max_iter = policies::get_max_root_iterations(); - boost::math::pair r = tools::bracket_and_solve_root(f, hint, RealType(2), false, tol, max_iter, Policy()); - RealType result = r.first + (r.second - r.first) / 2; - if(max_iter >= policies::get_max_root_iterations()) + return detail::solve_for_degrees_of_freedom(f, hint, false, function, Policy()); +} + +template +BOOST_MATH_GPU_ENABLED RealType students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + RealType x, + RealType p) +{ + BOOST_MATH_STD_USING // for ADL of std functions + constexpr auto function = "boost::math::students_t_distribution<%1%>::invert_probability_with_respect_to_degrees_of_freedom"; + // + // Check for domain errors: + // + RealType error_result; + if (false == detail::check_probability(function, p, &error_result, Policy())) + return error_result; + + if (x == 0) { - return policies::raise_evaluation_error(function, "Unable to locate solution in a reasonable time: either there is no answer to how many degrees of freedom are required" // LCOV_EXCL_LINE - " or the answer is infinite. Current best guess is %1%", result, Policy()); // LCOV_EXCL_LINE + // CDF(0; df) = 0.5 for all df; only solvable when p == 0.5. + if (p == static_cast(0.5)) + return policies::raise_overflow_error(function, 0, Policy()); + return policies::raise_domain_error( + function, + "No degrees of freedom can satisfy CDF(0; df) == %1% (must be 0.5).", + p, Policy()); } - return result; + + // Edgeworth warm start: compute a df estimate; fall back to df_hint_fallback if it fails + // or is too inaccurate + RealType hint = static_cast(detail::df_hint_fallback); + if (detail::approximate_df_with_edgeworth_expansion(x, p, hint)) + { + // Check that approximation is at least somewhat close; + // for small degrees of freedom it does not fail but is very inaccurate. + students_t_distribution t_approx(hint); + RealType exact_cdf = cdf(t_approx, x); + RealType relative_error = fabs(exact_cdf - p) / p; + if (relative_error > static_cast(0.1)) + hint = static_cast(detail::df_hint_fallback); + } + // Root-find on f(df) = CDF(x; df) - p. + // CDF(x; df) is strictly increasing in df for x > 0, decreasing for x < 0. + // Use the smaller of p and q=1-p to avoid cancellation near 1. + RealType q = 1 - p; + detail::cdf_to_df_func f(x, p < q ? p : q, p < q ? false : true); + return detail::solve_for_degrees_of_freedom(f, hint, x > 0, function, Policy()); } template diff --git a/test/test_students_t.cpp b/test/test_students_t.cpp index a08cf75a01..4d5b42ba84 100644 --- a/test/test_students_t.cpp +++ b/test/test_students_t.cpp @@ -548,6 +548,126 @@ void test_spots(RealType) static_cast(1.0))), 9); + // Tests for invert_probability_with_respect_to_degrees_of_freedom. + // Each case is derived from the CDF spot tests above: the exact df is known, + // so we verify that inverting CDF(x; df) = p recovers df to tight tolerance. + // float has ~7 significant digits; large-df inversion is ill-conditioned at that precision, + // so use a looser tolerance for single precision. + RealType tol_inv_df = std::is_same::value + ? static_cast(0.1) // float: 0.1% + : static_cast(0.01); // double+: 0.01% + BOOST_CHECK_CLOSE( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(-6.96455673428326), + static_cast(0.01)), + static_cast(2), + tol_inv_df); + BOOST_CHECK_CLOSE( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(-3.36492999890721), + static_cast(0.01)), + static_cast(5), + tol_inv_df); + BOOST_CHECK_CLOSE( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(-0.559429644), + static_cast(0.3)), + static_cast(5), + tol_inv_df); + BOOST_CHECK_CLOSE( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(1.475884049), + static_cast(0.9)), + static_cast(5), + tol_inv_df); + BOOST_CHECK_CLOSE( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(-1.475884049), + static_cast(0.1)), + static_cast(5), + tol_inv_df); + BOOST_CHECK_CLOSE( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(-5.2410429995425), + static_cast(0.00001)), + static_cast(25), + tol_inv_df); + BOOST_CHECK_CLOSE( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(6.96455673428326), + static_cast(0.99)), + static_cast(2), + tol_inv_df); + BOOST_CHECK_CLOSE( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(2), + static_cast(0.610822886098362)), + static_cast(0.1), + tol_inv_df); + BOOST_CHECK_CLOSE( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(2), + static_cast(0.777242554908434)), + static_cast(0.5), + tol_inv_df); + BOOST_CHECK_CLOSE( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(2), + static_cast(0.822925875908677)), + static_cast(0.75), + tol_inv_df); + BOOST_CHECK_CLOSE( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(2), + static_cast(0.977114826753374)), + static_cast(1000), + tol_inv_df); + BOOST_CHECK_CLOSE( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(1), + static_cast(0.8413326478347855)), + static_cast(1e4), + tol_inv_df * static_cast(5)); // Looser tolerance for ill-conditioned problem with very large df + // Small df case 1: Edgeworth expansion breaks down for small degrees if freedom, use fallback + BOOST_CHECK_CLOSE( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(1e20), + static_cast(0.5244796002843015)), + static_cast(1e-3), + tol_inv_df); + // Small df case 2: Edgeworth expansion succeeds but is inaccurate, use fallback + BOOST_CHECK_CLOSE( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(2.0), + static_cast(0.500000000644961)), + static_cast(1e-10), + tol_inv_df); + // Domain error: p outside (0,1) +#ifndef BOOST_NO_EXCEPTIONS + BOOST_MATH_CHECK_THROW( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(2), + static_cast(-0.1)), + std::domain_error); + BOOST_MATH_CHECK_THROW( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(2), + static_cast(1.1)), + std::domain_error); + // x == 0, p != 0.5: no df can satisfy CDF(0; df) == p → domain error + BOOST_MATH_CHECK_THROW( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(0), + static_cast(0.3)), + std::domain_error); + // x == 0, p == 0.5: CDF(0; df) == 0.5 for all df → overflow (infinite solutions) + BOOST_MATH_CHECK_THROW( + students_t_distribution::invert_probability_with_respect_to_degrees_of_freedom( + static_cast(0), + static_cast(0.5)), + std::overflow_error); +#endif + // Test for large degrees of freedom when should be same as normal. RealType inf = std::numeric_limits::infinity(); RealType nan = std::numeric_limits::quiet_NaN();