From dfa5a9eed10420f89e25e3a14b3b311ae482b2c5 Mon Sep 17 00:00:00 2001 From: Jonas Rembser Date: Thu, 25 Jun 2026 12:31:55 +0200 Subject: [PATCH] [Minuit2] Fix analytical Hessian/G2 transform for parameters with limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a parameter has limits, Minuit2 minimizes in an internal coordinate q that is a non-linear function of the external parameter. Converting a user-provided *external* Hessian (or G2) to internal coordinates therefore requires, on the diagonal, an extra term involving the second derivative of the transformation: ``` H_int(i,i) = (dx/dq)^2 * H_ext(i,i) + (d^2x/dq^2) * g_ext(i) ``` `AnalyticalGradientCalculator::Hessian()` and `::G2()` only applied the first-order Jacobian factor `(dx/dq)^2` and dropped the `(d^2x/dq^2) * g_ext(i)` term. The numerical path does not have this problem because it differentiates directly in internal coordinates, so the term is included implicitly. This was introduced in 888a7677f7b ("[math][minuit2] First implementation in Minuit2 of mNHesse using external Hessian calculator"), which added the external->internal Hessian/G2 transform. At the minimum the external gradient is ~0, so the missing term vanishes and the final errors are correct. But it is non-zero everywhere else, in particular at the seeding point. When a parameter starts close to a limit (dx/dq small) and far from the solution (g_ext large), the dropped term dominates: the seed G2/covariance built by MnSeedGenerator is corrupted and Migrad can take a catastrophic first step, silently converging to a wrong result that is still reported as valid. Relying on the numerical Hessian for the same fit works fine. Fix this by adding the missing curvature term: * add `D2Int2Ext()` to Sin/SqrtLow/SqrtUp parameter transformations and a dispatching `MnUserTransformation::D2Int2Ext()` (returns 0 without limits, so unlimited fits are unchanged) * add `(d^2x/dq^2) * g_ext(i)` to the diagonal in `AnalyticalGradientCalculator::Hessian()` and both branches of `::G2()` The off-diagonal entries are unchanged: the transformation is per-parameter (diagonal), so mixed second derivatives only get the `(dx/dq_i)(dx/dq_j)` factor. Add a regression test (Minuit2.AnalyticalHessianLimitTransformation). It does not rely on the end-to-end Migrad convergence behaviour, which is chaotic and compiler/optimization dependent for the reproducer, but instead checks the analytical internal Hessian/G2 against a central finite difference of the internal gradient at a near-limit, non-minimum point. Without the fix the G2 value is off by several orders of magnitude and has the wrong sign. Closes #22692. 🤖 Done with the help of AI. --- .../inc/Minuit2/MnParameterTransformation.h | 7 + .../inc/Minuit2/MnUserTransformation.h | 1 + .../src/AnalyticalGradientCalculator.cxx | 22 +- .../minuit2/src/MnParameterTransformation.cxx | 20 ++ math/minuit2/src/MnUserTransformation.cxx | 19 ++ math/minuit2/test/testMinuit2.cxx | 190 ++++++++++++++++++ 6 files changed, 254 insertions(+), 5 deletions(-) diff --git a/math/minuit2/inc/Minuit2/MnParameterTransformation.h b/math/minuit2/inc/Minuit2/MnParameterTransformation.h index 3262f2de03ded..a9158cfc4f0de 100644 --- a/math/minuit2/inc/Minuit2/MnParameterTransformation.h +++ b/math/minuit2/inc/Minuit2/MnParameterTransformation.h @@ -26,6 +26,7 @@ class SinParameterTransformation { long double Int2ext(long double Value, long double Upper, long double Lower) const; long double Ext2int(long double Value, long double Upper, long double Lower, const MnMachinePrecision &) const; long double DInt2Ext(long double Value, long double Upper, long double Lower) const; + long double D2Int2Ext(long double Value, long double Upper, long double Lower) const; long double DExt2Int(long double Value, long double Upper, long double Lower) const; }; @@ -46,6 +47,9 @@ class SqrtLowParameterTransformation /* : public ParameterTransformation */ { // derivative of transformation from internal to external long double DInt2Ext(long double Value, long double Lower) const; + // second derivative of transformation from internal to external + long double D2Int2Ext(long double Value, long double Lower) const; + // derivative of transformation from external to internal long double DExt2Int(long double Value, long double Lower) const; }; @@ -67,6 +71,9 @@ class SqrtUpParameterTransformation /* : public ParameterTransformation */ { // derivative of transformation from internal to external long double DInt2Ext(long double Value, long double Upper) const; + // second derivative of transformation from internal to external + long double D2Int2Ext(long double Value, long double Upper) const; + // derivative of transformation from external to internal long double DExt2Int(long double Value, long double Upper) const; }; diff --git a/math/minuit2/inc/Minuit2/MnUserTransformation.h b/math/minuit2/inc/Minuit2/MnUserTransformation.h index a7814a7fdf73e..bd7e63eb32f8f 100644 --- a/math/minuit2/inc/Minuit2/MnUserTransformation.h +++ b/math/minuit2/inc/Minuit2/MnUserTransformation.h @@ -90,6 +90,7 @@ class MnUserTransformation { // Index = internal Parameter double DInt2Ext(unsigned int, double) const; + double D2Int2Ext(unsigned int, double) const; double DExt2Int(unsigned int, double) const; // // Index = external Parameter diff --git a/math/minuit2/src/AnalyticalGradientCalculator.cxx b/math/minuit2/src/AnalyticalGradientCalculator.cxx index 66f8b12728b82..b38956523d86c 100644 --- a/math/minuit2/src/AnalyticalGradientCalculator.cxx +++ b/math/minuit2/src/AnalyticalGradientCalculator.cxx @@ -72,13 +72,15 @@ bool AnalyticalGradientCalculator::Hessian(const MinimumParameters &par, MnAlgeb unsigned int n = par.Vec().size(); assert(hmat.size() == n *(n+1)/2); // compute external Hessian and then transform - std::vector extHessian = fGradFunc.Hessian(fTransformation(par.Vec())); + std::vector extParams = fTransformation(par.Vec()); + std::vector extHessian = fGradFunc.Hessian(extParams); if (extHessian.empty()) { MnPrint print("AnalyticalGradientCalculator::Hessian"); print.Info("FCN cannot compute Hessian matrix"); return false; } unsigned int next = sqrt(extHessian.size()); + std::vector extGradient = fGradFunc.Gradient(extParams); // we need now to transform the matrix from external to internal coordinates for (unsigned int i = 0; i < n; i++) { unsigned int iext = fTransformation.ExtOfInt(i); @@ -87,6 +89,11 @@ bool AnalyticalGradientCalculator::Hessian(const MinimumParameters &par, MnAlgeb double dxdj = fTransformation.DInt2Ext(j, par.Vec()(j)); unsigned int jext = fTransformation.ExtOfInt(j); hmat(i, j) = dxdi * extHessian[iext*next+ jext] * dxdj; + if (i == j) { + double d2xdi2 = fTransformation.D2Int2Ext(i, par.Vec()(i)); + if (d2xdi2 != 0.) + hmat(i, j) += d2xdi2 * extGradient[iext]; + } } } return true; @@ -100,9 +107,12 @@ bool AnalyticalGradientCalculator::G2(const MinimumParameters &par, MnAlgebraicV MnPrint print("AnalyticalGradientCalculator::G2"); + std::vector extParams = fTransformation(par.Vec()); + std::vector extGradient = fGradFunc.Gradient(extParams); + // --- Preferred path: direct G2 from FCN --- if (fGradFunc.HasG2()) { - std::vector extG2 = fGradFunc.G2(fTransformation(par.Vec())); + std::vector extG2 = fGradFunc.G2(extParams); if (extG2.empty()) { print.Info("FCN cannot compute the 2nd derivatives vector (G2)"); return false; @@ -111,7 +121,8 @@ bool AnalyticalGradientCalculator::G2(const MinimumParameters &par, MnAlgebraicV for (unsigned int i = 0; i < n; i++) { const unsigned int iext = fTransformation.ExtOfInt(i); const double dxdi = fTransformation.DInt2Ext(i, par.Vec()(i)); - g2(i) = dxdi * dxdi * extG2[iext]; + const double d2xdi2 = fTransformation.D2Int2Ext(i, par.Vec()(i)); + g2(i) = dxdi * dxdi * extG2[iext] + d2xdi2 * extGradient[iext]; } return true; } @@ -122,7 +133,7 @@ bool AnalyticalGradientCalculator::G2(const MinimumParameters &par, MnAlgebraicV return false; } - std::vector extHessian = fGradFunc.Hessian(fTransformation(par.Vec())); + std::vector extHessian = fGradFunc.Hessian(extParams); if (extHessian.empty()) { print.Info("FCN cannot compute Hessian matrix (needed to derive G2)"); return false; @@ -141,7 +152,8 @@ bool AnalyticalGradientCalculator::G2(const MinimumParameters &par, MnAlgebraicV const unsigned int iext = fTransformation.ExtOfInt(i); const double diag = extHessian[iext * nExt + iext]; const double dxdi = fTransformation.DInt2Ext(i, par.Vec()(i)); - g2(i) = dxdi * dxdi * diag; + const double d2xdi2 = fTransformation.D2Int2Ext(i, par.Vec()(i)); + g2(i) = dxdi * dxdi * diag + d2xdi2 * extGradient[iext]; } return true; } diff --git a/math/minuit2/src/MnParameterTransformation.cxx b/math/minuit2/src/MnParameterTransformation.cxx index d5c3200a19895..592c24ce5099e 100644 --- a/math/minuit2/src/MnParameterTransformation.cxx +++ b/math/minuit2/src/MnParameterTransformation.cxx @@ -57,6 +57,12 @@ long double SinParameterTransformation::DInt2Ext(long double Value, long double return 0.5 * ((Upper - Lower) * std::cos(Value)); } +long double SinParameterTransformation::D2Int2Ext(long double Value, long double Upper, long double Lower) const +{ + // return the second derivative of the transformation d^2 Ext/ d Int^2 + return -0.5 * ((Upper - Lower) * std::sin(Value)); +} + long double SinParameterTransformation::DExt2Int(long double Value, long double Upper, long double Lower) const { // return the derivative of the transformation d Int/ d Ext @@ -89,6 +95,13 @@ long double SqrtLowParameterTransformation::DInt2Ext(long double value, long dou return val; } +long double SqrtLowParameterTransformation::D2Int2Ext(long double value, long double) const +{ + // second derivative of internal to external transformation : d^2 (Int2Ext) / d Int^2 + long double val = std::pow(value * value + 1., -1.5); + return val; +} + long double SqrtLowParameterTransformation::DExt2Int(long double value, long double lower) const { // derivative of internal to external transformation : d (Ext2Int) / d Ext @@ -122,6 +135,13 @@ long double SqrtUpParameterTransformation::DInt2Ext(long double value, long doub return val; } +long double SqrtUpParameterTransformation::D2Int2Ext(long double value, long double) const +{ + // second derivative of internal to external transformation : d^2 (Int2Ext) / d Int^2 + long double val = -std::pow(value * value + 1., -1.5); + return val; +} + long double SqrtUpParameterTransformation::DExt2Int(long double value, long double upper) const { // derivative of internal to external transformation : d (Ext2Int ) / d Ext diff --git a/math/minuit2/src/MnUserTransformation.cxx b/math/minuit2/src/MnUserTransformation.cxx index 9bf4f5a4d907a..e957395e429c9 100644 --- a/math/minuit2/src/MnUserTransformation.cxx +++ b/math/minuit2/src/MnUserTransformation.cxx @@ -227,6 +227,25 @@ double MnUserTransformation::DInt2Ext(unsigned int i, double val) const return dd; } +double MnUserTransformation::D2Int2Ext(unsigned int i, double val) const +{ + // return the second derivative of the int->ext transformation: d^2 Pext(i) / d Pint(i)^2 + // for the parameter i with value val + + double dd = 0.; + if (fParameters[fExtOfInt[i]].HasLimits()) { + if (fParameters[fExtOfInt[i]].HasUpperLimit() && fParameters[fExtOfInt[i]].HasLowerLimit()) + dd = fDoubleLimTrafo.D2Int2Ext(val, fParameters[fExtOfInt[i]].UpperLimit(), + fParameters[fExtOfInt[i]].LowerLimit()); + else if (fParameters[fExtOfInt[i]].HasUpperLimit() && !fParameters[fExtOfInt[i]].HasLowerLimit()) + dd = fUpperLimTrafo.D2Int2Ext(val, fParameters[fExtOfInt[i]].UpperLimit()); + else + dd = fLowerLimTrafo.D2Int2Ext(val, fParameters[fExtOfInt[i]].LowerLimit()); + } + + return dd; +} + double MnUserTransformation::DExt2Int(unsigned int i, double val) const { // return the derivative of the ext->int transformation: dPint(i) / dPext(i) diff --git a/math/minuit2/test/testMinuit2.cxx b/math/minuit2/test/testMinuit2.cxx index e12ad691966da..9a50b6140350e 100644 --- a/math/minuit2/test/testMinuit2.cxx +++ b/math/minuit2/test/testMinuit2.cxx @@ -5,11 +5,19 @@ #include #include #include +#include +#include +#include +#include +#include +#include #include #include #include +#include + class QuadraticFCNBase : public ROOT::Minuit2::FCNBase { public: double operator()(const std::vector &p) const override @@ -220,3 +228,185 @@ TEST(Minuit2, NoG2CallsWhenFCHasNoG2) EXPECT_EQ(RosenbrockFCNWithoutG2::G2CallCounter(), 0) << "The G2() method was called, even though we claim to not implement it!"; } + +// Least-squares fit of a simplified, piecewise binding-kinetics model with an +// optional analytical Hessian. The model gradient and Hessian are exact. The +// two rate parameters (ka, kd) have a physical lower limit of zero and are +// started far below the solution. +// +// When parameters have limits, the internal<->external parameter transformation +// is non-linear, so converting the user-provided external Hessian (and G2) to +// internal coordinates requires an extra curvature term proportional to the +// external gradient. Without it, the seed covariance is corrupted and, for this +// configuration, Migrad silently diverges to a completely wrong minimum while +// still reporting validity. +class BindingKineticsFCN : public ROOT::Minuit2::FCNBase { +public: + explicit BindingKineticsFCN(bool hasHessian) : fHasHessian(hasHessian) {} + + double Up() const override { return 1.0; } + bool HasGradient() const override { return true; } + bool HasHessian() const override { return fHasHessian; } + + double operator()(std::vector const &p) const override + { + double chi2 = 0.0; + for (std::size_t i = 0; i < fX.size(); ++i) { + const double r = (fY[i] - Model(fX[i], p)) / fErr; + chi2 += r * r; + } + return chi2; + } + + std::vector Gradient(std::vector const &p) const override + { + std::vector grad(2, 0.0); + for (std::size_t i = 0; i < fX.size(); ++i) { + const double r = (fY[i] - Model(fX[i], p)) / fErr; + const auto g = ModelGradient(fX[i], p); + for (int j = 0; j < 2; ++j) + grad[j] -= 2.0 * r * g[j] / fErr; + } + return grad; + } + + std::vector Hessian(std::vector const &p) const override + { + std::vector hess(4, 0.0); + for (std::size_t i = 0; i < fX.size(); ++i) { + const double r = (fY[i] - Model(fX[i], p)) / fErr; + const auto g = ModelGradient(fX[i], p); + const auto h = ModelHessian(fX[i], p); + for (int j = 0; j < 2; ++j) + for (int k = 0; k < 2; ++k) + hess[j * 2 + k] -= 2.0 / fErr * (r * h[j * 2 + k] - g[j] * g[k] / fErr); + } + return hess; + } + +private: + static double Model(double x, std::vector const &p) + { + const double ka = p[0]; + const double kd = p[1]; + return x < 100 ? 1.0 - std::exp(x * (-ka - kd)) + : (1.0 - std::exp(-100.0 * ka - 100.0 * kd)) * std::exp(-kd * (x - 100.0)); + } + + static std::vector ModelGradient(double x, std::vector const &p) + { + const double ka = p[0]; + const double kd = p[1]; + const double g0 = x < 100 ? x * std::exp(-x * (ka + kd)) : 100.0 * std::exp(-100.0 * ka - kd * x); + const double g1 = x < 100 ? x * std::exp(-x * (ka + kd)) + : ((1.0 - std::exp(100.0 * ka + 100.0 * kd)) * (x - 100.0) + 100.0) * + std::exp(-100.0 * ka - kd * (x - 100.0) - 100.0 * kd); + return {g0, g1}; + } + + static std::vector ModelHessian(double x, std::vector const &p) + { + const double ka = p[0]; + const double kd = p[1]; + const double h00 = x < 100 ? -x * x * std::exp(-x * (ka + kd)) : -10000.0 * std::exp(-100.0 * ka - kd * x); + const double h01 = x < 100 ? -x * x * std::exp(-x * (ka + kd)) : -100.0 * x * std::exp(-100.0 * ka - kd * x); + const double h11 = + x < 100 ? -x * x * std::exp(-x * (ka + kd)) + : (-200.0 * x - (1.0 - std::exp(100.0 * ka + 100.0 * kd)) * (x - 100.0) * (x - 100.0) + 10000.0) * + std::exp(-100.0 * ka - kd * (x - 100.0) - 100.0 * kd); + return {h00, h01, h01, h11}; + } + + bool fHasHessian; + + static constexpr double fErr = 0.001; + + // clang-format off + inline static const std::vector fX = { + 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, + 160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300}; + + // Generated with Model(x, {0.01, 0.01}) plus normal noise of width fErr + inline static const std::vector fY = { + 0.0014677825891482553, 0.18188013378785522, 0.32992558356212337, 0.45041553188395367, + 0.54794601921519193, 0.63195112024476019, 0.69853633449106323, 0.75286490803859663, + 0.7981616977365642, 0.83331949155031482, 0.86616863624896234, 0.78174922906099609, + 0.70818268046233734, 0.64271424905745012, 0.57898830590860717, 0.52425958636511283, + 0.47459845673511991, 0.43080972773016701, 0.38834996655153947, 0.35178861510658521, + 0.31925150166578198, 0.28732404660033928, 0.26252634588235924, 0.23540772239857896, + 0.21266927896262663, 0.19394038874520433, 0.17446775399099632, 0.15841619132882989, + 0.14478862098494658, 0.13111385920544755, 0.1156468064187659}; + // clang-format on +}; + +// When a parameter has limits, the internal<->external parameter transformation +// q -> x(q) is non-linear, so the Hessian and G2 (second derivatives) in +// internal coordinates carry an extra diagonal term beyond (dx/dq)^2 H_ext: +// +// H_int(i,i) = (dx/dq)^2 H_ext(i,i) + (d^2x/dq^2) g_ext(i) +// +// The analytical-gradient path used to drop the (d^2x/dq^2) g_ext term. At a +// point with a non-zero gradient (i.e. away from the minimum) and a parameter +// close to its limit, this term dominates, which corrupted the Migrad seed and +// could make the minimization silently diverge to a wrong but "valid" result. +// +// This is checked deterministically (no dependence on the chaotic convergence +// behaviour) by comparing the analytical internal Hessian/G2 against a central +// finite difference of the calculator's own internal gradient. +// +// Covers GitHub issue https://github.com/root-project/root/issues/22692 +TEST(Minuit2, AnalyticalHessianLimitTransformation) +{ + using namespace ROOT::Minuit2; + + BindingKineticsFCN fcn(/*hasHessian=*/true); + + MnUserParameters upar; + // Start far from the solution and close to the lower limit: here the external + // gradient is large and dx/dq is tiny, so the missing curvature term matters. + upar.Add("ka", 1e-7, 1e-8); + upar.Add("kd", 1e-7, 1e-8); + upar.SetLowerLimit(0, 0.0); + upar.SetLowerLimit(1, 0.0); + + const MnUserParameterState state(upar); + const MnUserTransformation &trafo = state.Trafo(); + AnalyticalGradientCalculator agc(fcn, trafo); + + const unsigned int n = 2; + + MnAlgebraicVector q(n); + for (unsigned int i = 0; i < n; ++i) + q(i) = state.IntParameters()[i]; + + // Internal gradient computed by the calculator at an internal point. + auto internalGradient = [&](const MnAlgebraicVector &qq) { return agc(MinimumParameters(qq, 0.0)).Grad(); }; + + // Analytical internal Hessian. + MnAlgebraicSymMatrix hAnalytic(n); + ASSERT_TRUE(agc.Hessian(MinimumParameters(q, 0.0), hAnalytic)); + + // Analytical internal G2 (diagonal of the Hessian) via the dedicated method. + MnAlgebraicVector g2Analytic(n); + ASSERT_TRUE(agc.G2(MinimumParameters(q, 0.0), g2Analytic)); + + // Ground truth: H_int(i,j) = d g_int(i) / d q_j by central finite difference. + const double h = 1e-6; + for (unsigned int j = 0; j < n; ++j) { + MnAlgebraicVector qp(q); + MnAlgebraicVector qm(q); + qp(j) += h; + qm(j) -= h; + const MnAlgebraicVector gp = internalGradient(qp); + const MnAlgebraicVector gm = internalGradient(qm); + for (unsigned int i = 0; i < n; ++i) { + const double fd = (gp(i) - gm(i)) / (2.0 * h); + // Relative tolerance: the second derivatives span several orders of + // magnitude, and dropping the curvature term changes them by ~100%. + const double tol = 1e-3 * std::max(std::abs(fd), 1.0); + EXPECT_NEAR(hAnalytic(i, j), fd, tol) << "Hessian mismatch at (" << i << "," << j << ")"; + if (i == j) + EXPECT_NEAR(g2Analytic(i), fd, tol) << "G2 mismatch at " << i; + } + } +}