From dfa5a9eed10420f89e25e3a14b3b311ae482b2c5 Mon Sep 17 00:00:00 2001
From: Jonas Rembser <jonas.rembser@cern.ch>
Date: Thu, 25 Jun 2026 12:31:55 +0200
Subject: [PATCH] [Minuit2] Fix analytical Hessian/G2 transform for parameters
 with limits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a parameter has limits, Minuit2 minimizes in an internal coordinate
q that is a non-linear function of the external parameter. Converting a
user-provided *external* Hessian (or G2) to internal coordinates
therefore requires, on the diagonal, an extra term involving the second
derivative of the transformation:

```
H_int(i,i) = (dx/dq)^2 * H_ext(i,i) + (d^2x/dq^2) * g_ext(i)
```

`AnalyticalGradientCalculator::Hessian()` and `::G2()` only applied the
first-order Jacobian factor `(dx/dq)^2` and dropped the `(d^2x/dq^2) *
g_ext(i)` term. The numerical path does not have this problem because it
differentiates directly in internal coordinates, so the term is included
implicitly.

This was introduced in 888a7677f7b ("[math][minuit2] First
implementation in Minuit2 of mNHesse using external Hessian
calculator"), which added the external->internal Hessian/G2 transform.

At the minimum the external gradient is ~0, so the missing term vanishes
and the final errors are correct. But it is non-zero everywhere else, in
particular at the seeding point. When a parameter starts close to a
limit (dx/dq small) and far from the solution (g_ext large), the dropped
term dominates: the seed G2/covariance built by MnSeedGenerator is
corrupted and Migrad can take a catastrophic first step, silently
converging to a wrong result that is still reported as valid. Relying on
the numerical Hessian for the same fit works fine.

Fix this by adding the missing curvature term:
 * add `D2Int2Ext()` to Sin/SqrtLow/SqrtUp parameter transformations and
   a dispatching `MnUserTransformation::D2Int2Ext()` (returns 0 without
   limits, so unlimited fits are unchanged)
 * add `(d^2x/dq^2) * g_ext(i)` to the diagonal in
   `AnalyticalGradientCalculator::Hessian()` and both branches of
   `::G2()`

The off-diagonal entries are unchanged: the transformation is
per-parameter (diagonal), so mixed second derivatives only get the
`(dx/dq_i)(dx/dq_j)` factor.

Add a regression test (Minuit2.AnalyticalHessianLimitTransformation). It
does not rely on the end-to-end Migrad convergence behaviour, which is
chaotic and compiler/optimization dependent for the reproducer, but
instead checks the analytical internal Hessian/G2 against a central
finite difference of the internal gradient at a near-limit, non-minimum
point. Without the fix the G2 value is off by several orders of
magnitude and has the wrong sign.

Closes #22692.

🤖 Done with the help of AI.
---
 .../inc/Minuit2/MnParameterTransformation.h   |   7 +
 .../inc/Minuit2/MnUserTransformation.h        |   1 +
 .../src/AnalyticalGradientCalculator.cxx      |  22 +-
 .../minuit2/src/MnParameterTransformation.cxx |  20 ++
 math/minuit2/src/MnUserTransformation.cxx     |  19 ++
 math/minuit2/test/testMinuit2.cxx             | 190 ++++++++++++++++++
 6 files changed, 254 insertions(+), 5 deletions(-)

diff --git a/math/minuit2/inc/Minuit2/MnParameterTransformation.h b/math/minuit2/inc/Minuit2/MnParameterTransformation.h
index 3262f2de03ded..a9158cfc4f0de 100644
--- a/math/minuit2/inc/Minuit2/MnParameterTransformation.h
+++ b/math/minuit2/inc/Minuit2/MnParameterTransformation.h
@@ -26,6 +26,7 @@ class SinParameterTransformation {
    long double Int2ext(long double Value, long double Upper, long double Lower) const;
    long double Ext2int(long double Value, long double Upper, long double Lower, const MnMachinePrecision &) const;
    long double DInt2Ext(long double Value, long double Upper, long double Lower) const;
+   long double D2Int2Ext(long double Value, long double Upper, long double Lower) const;
    long double DExt2Int(long double Value, long double Upper, long double Lower) const;
 };
 
@@ -46,6 +47,9 @@ class SqrtLowParameterTransformation /* : public ParameterTransformation */ {
    // derivative of transformation from internal to external
    long double DInt2Ext(long double Value, long double Lower) const;
 
+   // second derivative of transformation from internal to external
+   long double D2Int2Ext(long double Value, long double Lower) const;
+
    // derivative of transformation from external to internal
    long double DExt2Int(long double Value, long double Lower) const;
 };
@@ -67,6 +71,9 @@ class SqrtUpParameterTransformation /* : public ParameterTransformation */ {
    // derivative of transformation from internal to external
    long double DInt2Ext(long double Value, long double Upper) const;
 
+   // second derivative of transformation from internal to external
+   long double D2Int2Ext(long double Value, long double Upper) const;
+
    // derivative of transformation from external to internal
    long double DExt2Int(long double Value, long double Upper) const;
 };
diff --git a/math/minuit2/inc/Minuit2/MnUserTransformation.h b/math/minuit2/inc/Minuit2/MnUserTransformation.h
index a7814a7fdf73e..bd7e63eb32f8f 100644
--- a/math/minuit2/inc/Minuit2/MnUserTransformation.h
+++ b/math/minuit2/inc/Minuit2/MnUserTransformation.h
@@ -90,6 +90,7 @@ class MnUserTransformation {
 
    // Index = internal Parameter
    double DInt2Ext(unsigned int, double) const;
+   double D2Int2Ext(unsigned int, double) const;
    double DExt2Int(unsigned int, double) const;
 
    //   // Index = external Parameter
diff --git a/math/minuit2/src/AnalyticalGradientCalculator.cxx b/math/minuit2/src/AnalyticalGradientCalculator.cxx
index 66f8b12728b82..b38956523d86c 100644
--- a/math/minuit2/src/AnalyticalGradientCalculator.cxx
+++ b/math/minuit2/src/AnalyticalGradientCalculator.cxx
@@ -72,13 +72,15 @@ bool AnalyticalGradientCalculator::Hessian(const MinimumParameters &par, MnAlgeb
    unsigned int n = par.Vec().size();
    assert(hmat.size() == n *(n+1)/2);
    // compute external Hessian and then transform
-   std::vector<double> extHessian = fGradFunc.Hessian(fTransformation(par.Vec()));
+   std::vector<double> extParams = fTransformation(par.Vec());
+   std::vector<double> extHessian = fGradFunc.Hessian(extParams);
    if (extHessian.empty()) {
       MnPrint print("AnalyticalGradientCalculator::Hessian");
       print.Info("FCN cannot compute Hessian matrix");
       return false;
    }
    unsigned int next = sqrt(extHessian.size());
+   std::vector<double> extGradient = fGradFunc.Gradient(extParams);
    // we need now to transform the matrix from external to internal coordinates
    for (unsigned int i = 0; i < n; i++) {
       unsigned int iext = fTransformation.ExtOfInt(i);
@@ -87,6 +89,11 @@ bool AnalyticalGradientCalculator::Hessian(const MinimumParameters &par, MnAlgeb
          double dxdj = fTransformation.DInt2Ext(j, par.Vec()(j));
          unsigned int jext = fTransformation.ExtOfInt(j);
          hmat(i, j) = dxdi * extHessian[iext*next+ jext] * dxdj;
+         if (i == j) {
+            double d2xdi2 = fTransformation.D2Int2Ext(i, par.Vec()(i));
+            if (d2xdi2 != 0.)
+               hmat(i, j) += d2xdi2 * extGradient[iext];
+         }
       }
    }
    return true;
@@ -100,9 +107,12 @@ bool AnalyticalGradientCalculator::G2(const MinimumParameters &par, MnAlgebraicV
 
    MnPrint print("AnalyticalGradientCalculator::G2");
 
+   std::vector<double> extParams = fTransformation(par.Vec());
+   std::vector<double> extGradient = fGradFunc.Gradient(extParams);
+
    // --- Preferred path: direct G2 from FCN ---
    if (fGradFunc.HasG2()) {
-      std::vector<double> extG2 = fGradFunc.G2(fTransformation(par.Vec()));
+      std::vector<double> extG2 = fGradFunc.G2(extParams);
       if (extG2.empty()) {
          print.Info("FCN cannot compute the 2nd derivatives vector (G2)");
          return false;
@@ -111,7 +121,8 @@ bool AnalyticalGradientCalculator::G2(const MinimumParameters &par, MnAlgebraicV
       for (unsigned int i = 0; i < n; i++) {
          const unsigned int iext = fTransformation.ExtOfInt(i);
          const double dxdi = fTransformation.DInt2Ext(i, par.Vec()(i));
-         g2(i) = dxdi * dxdi * extG2[iext];
+         const double d2xdi2 = fTransformation.D2Int2Ext(i, par.Vec()(i));
+         g2(i) = dxdi * dxdi * extG2[iext] + d2xdi2 * extGradient[iext];
       }
       return true;
    }
@@ -122,7 +133,7 @@ bool AnalyticalGradientCalculator::G2(const MinimumParameters &par, MnAlgebraicV
       return false;
    }
 
-   std::vector<double> extHessian = fGradFunc.Hessian(fTransformation(par.Vec()));
+   std::vector<double> extHessian = fGradFunc.Hessian(extParams);
    if (extHessian.empty()) {
       print.Info("FCN cannot compute Hessian matrix (needed to derive G2)");
       return false;
@@ -141,7 +152,8 @@ bool AnalyticalGradientCalculator::G2(const MinimumParameters &par, MnAlgebraicV
       const unsigned int iext = fTransformation.ExtOfInt(i);
       const double diag = extHessian[iext * nExt + iext];
       const double dxdi = fTransformation.DInt2Ext(i, par.Vec()(i));
-      g2(i) = dxdi * dxdi * diag;
+      const double d2xdi2 = fTransformation.D2Int2Ext(i, par.Vec()(i));
+      g2(i) = dxdi * dxdi * diag + d2xdi2 * extGradient[iext];
    }
    return true;
 }
diff --git a/math/minuit2/src/MnParameterTransformation.cxx b/math/minuit2/src/MnParameterTransformation.cxx
index d5c3200a19895..592c24ce5099e 100644
--- a/math/minuit2/src/MnParameterTransformation.cxx
+++ b/math/minuit2/src/MnParameterTransformation.cxx
@@ -57,6 +57,12 @@ long double SinParameterTransformation::DInt2Ext(long double Value, long double
    return 0.5 * ((Upper - Lower) * std::cos(Value));
 }
 
+long double SinParameterTransformation::D2Int2Ext(long double Value, long double Upper, long double Lower) const
+{
+   // return the second derivative of the transformation d^2 Ext/ d Int^2
+   return -0.5 * ((Upper - Lower) * std::sin(Value));
+}
+
 long double SinParameterTransformation::DExt2Int(long double Value, long double Upper, long double Lower) const
 {
    // return the derivative of the transformation d Int/ d Ext
@@ -89,6 +95,13 @@ long double SqrtLowParameterTransformation::DInt2Ext(long double value, long dou
    return val;
 }
 
+long double SqrtLowParameterTransformation::D2Int2Ext(long double value, long double) const
+{
+   // second derivative of internal to external transformation : d^2 (Int2Ext) / d Int^2
+   long double val = std::pow(value * value + 1., -1.5);
+   return val;
+}
+
 long double SqrtLowParameterTransformation::DExt2Int(long double value, long double lower) const
 {
    // derivative of internal to external transformation   :  d (Ext2Int) / d Ext
@@ -122,6 +135,13 @@ long double SqrtUpParameterTransformation::DInt2Ext(long double value, long doub
    return val;
 }
 
+long double SqrtUpParameterTransformation::D2Int2Ext(long double value, long double) const
+{
+   // second derivative of internal to external transformation : d^2 (Int2Ext) / d Int^2
+   long double val = -std::pow(value * value + 1., -1.5);
+   return val;
+}
+
 long double SqrtUpParameterTransformation::DExt2Int(long double value, long double upper) const
 {
    // derivative of internal to external transformation :  d (Ext2Int ) / d Ext
diff --git a/math/minuit2/src/MnUserTransformation.cxx b/math/minuit2/src/MnUserTransformation.cxx
index 9bf4f5a4d907a..e957395e429c9 100644
--- a/math/minuit2/src/MnUserTransformation.cxx
+++ b/math/minuit2/src/MnUserTransformation.cxx
@@ -227,6 +227,25 @@ double MnUserTransformation::DInt2Ext(unsigned int i, double val) const
    return dd;
 }
 
+double MnUserTransformation::D2Int2Ext(unsigned int i, double val) const
+{
+   // return the second derivative of the int->ext transformation: d^2 Pext(i) / d Pint(i)^2
+   // for the parameter i with value val
+
+   double dd = 0.;
+   if (fParameters[fExtOfInt[i]].HasLimits()) {
+      if (fParameters[fExtOfInt[i]].HasUpperLimit() && fParameters[fExtOfInt[i]].HasLowerLimit())
+         dd = fDoubleLimTrafo.D2Int2Ext(val, fParameters[fExtOfInt[i]].UpperLimit(),
+                                        fParameters[fExtOfInt[i]].LowerLimit());
+      else if (fParameters[fExtOfInt[i]].HasUpperLimit() && !fParameters[fExtOfInt[i]].HasLowerLimit())
+         dd = fUpperLimTrafo.D2Int2Ext(val, fParameters[fExtOfInt[i]].UpperLimit());
+      else
+         dd = fLowerLimTrafo.D2Int2Ext(val, fParameters[fExtOfInt[i]].LowerLimit());
+   }
+
+   return dd;
+}
+
 double MnUserTransformation::DExt2Int(unsigned int i, double val) const
 {
    // return the derivative of the ext->int transformation: dPint(i) / dPext(i)
diff --git a/math/minuit2/test/testMinuit2.cxx b/math/minuit2/test/testMinuit2.cxx
index e12ad691966da..9a50b6140350e 100644
--- a/math/minuit2/test/testMinuit2.cxx
+++ b/math/minuit2/test/testMinuit2.cxx
@@ -5,11 +5,19 @@
 #include <Minuit2/FCNBase.h>
 #include <Minuit2/MnMigrad.h>
 #include <Minuit2/MnUserParameters.h>
+#include <Minuit2/MnUserParameterState.h>
+#include <Minuit2/MnUserTransformation.h>
+#include <Minuit2/AnalyticalGradientCalculator.h>
+#include <Minuit2/MinimumParameters.h>
+#include <Minuit2/FunctionGradient.h>
+#include <Minuit2/MnMatrix.h>
 #include <Minuit2/FunctionMinimum.h>
 #include <Minuit2/MnPrint.h>
 
 #include <gtest/gtest.h>
 
+#include <cmath>
+
 class QuadraticFCNBase : public ROOT::Minuit2::FCNBase {
 public:
    double operator()(const std::vector<double> &p) const override
@@ -220,3 +228,185 @@ TEST(Minuit2, NoG2CallsWhenFCHasNoG2)
    EXPECT_EQ(RosenbrockFCNWithoutG2::G2CallCounter(), 0)
       << "The G2() method was called, even though we claim to not implement it!";
 }
+
+// Least-squares fit of a simplified, piecewise binding-kinetics model with an
+// optional analytical Hessian. The model gradient and Hessian are exact. The
+// two rate parameters (ka, kd) have a physical lower limit of zero and are
+// started far below the solution.
+//
+// When parameters have limits, the internal<->external parameter transformation
+// is non-linear, so converting the user-provided external Hessian (and G2) to
+// internal coordinates requires an extra curvature term proportional to the
+// external gradient. Without it, the seed covariance is corrupted and, for this
+// configuration, Migrad silently diverges to a completely wrong minimum while
+// still reporting validity.
+class BindingKineticsFCN : public ROOT::Minuit2::FCNBase {
+public:
+   explicit BindingKineticsFCN(bool hasHessian) : fHasHessian(hasHessian) {}
+
+   double Up() const override { return 1.0; }
+   bool HasGradient() const override { return true; }
+   bool HasHessian() const override { return fHasHessian; }
+
+   double operator()(std::vector<double> const &p) const override
+   {
+      double chi2 = 0.0;
+      for (std::size_t i = 0; i < fX.size(); ++i) {
+         const double r = (fY[i] - Model(fX[i], p)) / fErr;
+         chi2 += r * r;
+      }
+      return chi2;
+   }
+
+   std::vector<double> Gradient(std::vector<double> const &p) const override
+   {
+      std::vector<double> grad(2, 0.0);
+      for (std::size_t i = 0; i < fX.size(); ++i) {
+         const double r = (fY[i] - Model(fX[i], p)) / fErr;
+         const auto g = ModelGradient(fX[i], p);
+         for (int j = 0; j < 2; ++j)
+            grad[j] -= 2.0 * r * g[j] / fErr;
+      }
+      return grad;
+   }
+
+   std::vector<double> Hessian(std::vector<double> const &p) const override
+   {
+      std::vector<double> hess(4, 0.0);
+      for (std::size_t i = 0; i < fX.size(); ++i) {
+         const double r = (fY[i] - Model(fX[i], p)) / fErr;
+         const auto g = ModelGradient(fX[i], p);
+         const auto h = ModelHessian(fX[i], p);
+         for (int j = 0; j < 2; ++j)
+            for (int k = 0; k < 2; ++k)
+               hess[j * 2 + k] -= 2.0 / fErr * (r * h[j * 2 + k] - g[j] * g[k] / fErr);
+      }
+      return hess;
+   }
+
+private:
+   static double Model(double x, std::vector<double> const &p)
+   {
+      const double ka = p[0];
+      const double kd = p[1];
+      return x < 100 ? 1.0 - std::exp(x * (-ka - kd))
+                     : (1.0 - std::exp(-100.0 * ka - 100.0 * kd)) * std::exp(-kd * (x - 100.0));
+   }
+
+   static std::vector<double> ModelGradient(double x, std::vector<double> const &p)
+   {
+      const double ka = p[0];
+      const double kd = p[1];
+      const double g0 = x < 100 ? x * std::exp(-x * (ka + kd)) : 100.0 * std::exp(-100.0 * ka - kd * x);
+      const double g1 = x < 100 ? x * std::exp(-x * (ka + kd))
+                                : ((1.0 - std::exp(100.0 * ka + 100.0 * kd)) * (x - 100.0) + 100.0) *
+                                     std::exp(-100.0 * ka - kd * (x - 100.0) - 100.0 * kd);
+      return {g0, g1};
+   }
+
+   static std::vector<double> ModelHessian(double x, std::vector<double> const &p)
+   {
+      const double ka = p[0];
+      const double kd = p[1];
+      const double h00 = x < 100 ? -x * x * std::exp(-x * (ka + kd)) : -10000.0 * std::exp(-100.0 * ka - kd * x);
+      const double h01 = x < 100 ? -x * x * std::exp(-x * (ka + kd)) : -100.0 * x * std::exp(-100.0 * ka - kd * x);
+      const double h11 =
+         x < 100 ? -x * x * std::exp(-x * (ka + kd))
+                 : (-200.0 * x - (1.0 - std::exp(100.0 * ka + 100.0 * kd)) * (x - 100.0) * (x - 100.0) + 10000.0) *
+                      std::exp(-100.0 * ka - kd * (x - 100.0) - 100.0 * kd);
+      return {h00, h01, h01, h11};
+   }
+
+   bool fHasHessian;
+
+   static constexpr double fErr = 0.001;
+
+   // clang-format off
+   inline static const std::vector<double> fX = {
+      0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150,
+      160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300};
+
+   // Generated with Model(x, {0.01, 0.01}) plus normal noise of width fErr
+   inline static const std::vector<double> fY = {
+      0.0014677825891482553, 0.18188013378785522, 0.32992558356212337, 0.45041553188395367,
+      0.54794601921519193,   0.63195112024476019, 0.69853633449106323, 0.75286490803859663,
+      0.7981616977365642,    0.83331949155031482, 0.86616863624896234, 0.78174922906099609,
+      0.70818268046233734,   0.64271424905745012, 0.57898830590860717, 0.52425958636511283,
+      0.47459845673511991,   0.43080972773016701, 0.38834996655153947, 0.35178861510658521,
+      0.31925150166578198,   0.28732404660033928, 0.26252634588235924, 0.23540772239857896,
+      0.21266927896262663,   0.19394038874520433, 0.17446775399099632, 0.15841619132882989,
+      0.14478862098494658,   0.13111385920544755, 0.1156468064187659};
+   // clang-format on
+};
+
+// When a parameter has limits, the internal<->external parameter transformation
+// q -> x(q) is non-linear, so the Hessian and G2 (second derivatives) in
+// internal coordinates carry an extra diagonal term beyond (dx/dq)^2 H_ext:
+//
+//   H_int(i,i) = (dx/dq)^2 H_ext(i,i) + (d^2x/dq^2) g_ext(i)
+//
+// The analytical-gradient path used to drop the (d^2x/dq^2) g_ext term. At a
+// point with a non-zero gradient (i.e. away from the minimum) and a parameter
+// close to its limit, this term dominates, which corrupted the Migrad seed and
+// could make the minimization silently diverge to a wrong but "valid" result.
+//
+// This is checked deterministically (no dependence on the chaotic convergence
+// behaviour) by comparing the analytical internal Hessian/G2 against a central
+// finite difference of the calculator's own internal gradient.
+//
+// Covers GitHub issue https://github.com/root-project/root/issues/22692
+TEST(Minuit2, AnalyticalHessianLimitTransformation)
+{
+   using namespace ROOT::Minuit2;
+
+   BindingKineticsFCN fcn(/*hasHessian=*/true);
+
+   MnUserParameters upar;
+   // Start far from the solution and close to the lower limit: here the external
+   // gradient is large and dx/dq is tiny, so the missing curvature term matters.
+   upar.Add("ka", 1e-7, 1e-8);
+   upar.Add("kd", 1e-7, 1e-8);
+   upar.SetLowerLimit(0, 0.0);
+   upar.SetLowerLimit(1, 0.0);
+
+   const MnUserParameterState state(upar);
+   const MnUserTransformation &trafo = state.Trafo();
+   AnalyticalGradientCalculator agc(fcn, trafo);
+
+   const unsigned int n = 2;
+
+   MnAlgebraicVector q(n);
+   for (unsigned int i = 0; i < n; ++i)
+      q(i) = state.IntParameters()[i];
+
+   // Internal gradient computed by the calculator at an internal point.
+   auto internalGradient = [&](const MnAlgebraicVector &qq) { return agc(MinimumParameters(qq, 0.0)).Grad(); };
+
+   // Analytical internal Hessian.
+   MnAlgebraicSymMatrix hAnalytic(n);
+   ASSERT_TRUE(agc.Hessian(MinimumParameters(q, 0.0), hAnalytic));
+
+   // Analytical internal G2 (diagonal of the Hessian) via the dedicated method.
+   MnAlgebraicVector g2Analytic(n);
+   ASSERT_TRUE(agc.G2(MinimumParameters(q, 0.0), g2Analytic));
+
+   // Ground truth: H_int(i,j) = d g_int(i) / d q_j by central finite difference.
+   const double h = 1e-6;
+   for (unsigned int j = 0; j < n; ++j) {
+      MnAlgebraicVector qp(q);
+      MnAlgebraicVector qm(q);
+      qp(j) += h;
+      qm(j) -= h;
+      const MnAlgebraicVector gp = internalGradient(qp);
+      const MnAlgebraicVector gm = internalGradient(qm);
+      for (unsigned int i = 0; i < n; ++i) {
+         const double fd = (gp(i) - gm(i)) / (2.0 * h);
+         // Relative tolerance: the second derivatives span several orders of
+         // magnitude, and dropping the curvature term changes them by ~100%.
+         const double tol = 1e-3 * std::max(std::abs(fd), 1.0);
+         EXPECT_NEAR(hAnalytic(i, j), fd, tol) << "Hessian mismatch at (" << i << "," << j << ")";
+         if (i == j)
+            EXPECT_NEAR(g2Analytic(i), fd, tol) << "G2 mismatch at " << i;
+      }
+   }
+}