Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions math/minuit2/inc/Minuit2/MnParameterTransformation.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class SinParameterTransformation {
long double Int2ext(long double Value, long double Upper, long double Lower) const;
long double Ext2int(long double Value, long double Upper, long double Lower, const MnMachinePrecision &) const;
long double DInt2Ext(long double Value, long double Upper, long double Lower) const;
long double D2Int2Ext(long double Value, long double Upper, long double Lower) const;
long double DExt2Int(long double Value, long double Upper, long double Lower) const;
};

Expand All @@ -46,6 +47,9 @@ class SqrtLowParameterTransformation /* : public ParameterTransformation */ {
// derivative of transformation from internal to external
long double DInt2Ext(long double Value, long double Lower) const;

// second derivative of transformation from internal to external
long double D2Int2Ext(long double Value, long double Lower) const;

// derivative of transformation from external to internal
long double DExt2Int(long double Value, long double Lower) const;
};
Expand All @@ -67,6 +71,9 @@ class SqrtUpParameterTransformation /* : public ParameterTransformation */ {
// derivative of transformation from internal to external
long double DInt2Ext(long double Value, long double Upper) const;

// second derivative of transformation from internal to external
long double D2Int2Ext(long double Value, long double Upper) const;

// derivative of transformation from external to internal
long double DExt2Int(long double Value, long double Upper) const;
};
Expand Down
1 change: 1 addition & 0 deletions math/minuit2/inc/Minuit2/MnUserTransformation.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ class MnUserTransformation {

// Index = internal Parameter
double DInt2Ext(unsigned int, double) const;
double D2Int2Ext(unsigned int, double) const;
double DExt2Int(unsigned int, double) const;

// // Index = external Parameter
Expand Down
22 changes: 17 additions & 5 deletions math/minuit2/src/AnalyticalGradientCalculator.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,15 @@ bool AnalyticalGradientCalculator::Hessian(const MinimumParameters &par, MnAlgeb
unsigned int n = par.Vec().size();
assert(hmat.size() == n *(n+1)/2);
// compute external Hessian and then transform
std::vector<double> extHessian = fGradFunc.Hessian(fTransformation(par.Vec()));
std::vector<double> extParams = fTransformation(par.Vec());
std::vector<double> extHessian = fGradFunc.Hessian(extParams);
if (extHessian.empty()) {
MnPrint print("AnalyticalGradientCalculator::Hessian");
print.Info("FCN cannot compute Hessian matrix");
return false;
}
unsigned int next = sqrt(extHessian.size());
std::vector<double> extGradient = fGradFunc.Gradient(extParams);
// we need now to transform the matrix from external to internal coordinates
for (unsigned int i = 0; i < n; i++) {
unsigned int iext = fTransformation.ExtOfInt(i);
Expand All @@ -87,6 +89,11 @@ bool AnalyticalGradientCalculator::Hessian(const MinimumParameters &par, MnAlgeb
double dxdj = fTransformation.DInt2Ext(j, par.Vec()(j));
unsigned int jext = fTransformation.ExtOfInt(j);
hmat(i, j) = dxdi * extHessian[iext*next+ jext] * dxdj;
if (i == j) {
double d2xdi2 = fTransformation.D2Int2Ext(i, par.Vec()(i));
if (d2xdi2 != 0.)
hmat(i, j) += d2xdi2 * extGradient[iext];
}
}
}
return true;
Expand All @@ -100,9 +107,12 @@ bool AnalyticalGradientCalculator::G2(const MinimumParameters &par, MnAlgebraicV

MnPrint print("AnalyticalGradientCalculator::G2");

std::vector<double> extParams = fTransformation(par.Vec());
std::vector<double> extGradient = fGradFunc.Gradient(extParams);

// --- Preferred path: direct G2 from FCN ---
if (fGradFunc.HasG2()) {
std::vector<double> extG2 = fGradFunc.G2(fTransformation(par.Vec()));
std::vector<double> extG2 = fGradFunc.G2(extParams);
if (extG2.empty()) {
print.Info("FCN cannot compute the 2nd derivatives vector (G2)");
return false;
Expand All @@ -111,7 +121,8 @@ bool AnalyticalGradientCalculator::G2(const MinimumParameters &par, MnAlgebraicV
for (unsigned int i = 0; i < n; i++) {
const unsigned int iext = fTransformation.ExtOfInt(i);
const double dxdi = fTransformation.DInt2Ext(i, par.Vec()(i));
g2(i) = dxdi * dxdi * extG2[iext];
const double d2xdi2 = fTransformation.D2Int2Ext(i, par.Vec()(i));
g2(i) = dxdi * dxdi * extG2[iext] + d2xdi2 * extGradient[iext];
}
return true;
}
Expand All @@ -122,7 +133,7 @@ bool AnalyticalGradientCalculator::G2(const MinimumParameters &par, MnAlgebraicV
return false;
}

std::vector<double> extHessian = fGradFunc.Hessian(fTransformation(par.Vec()));
std::vector<double> extHessian = fGradFunc.Hessian(extParams);
if (extHessian.empty()) {
print.Info("FCN cannot compute Hessian matrix (needed to derive G2)");
return false;
Expand All @@ -141,7 +152,8 @@ bool AnalyticalGradientCalculator::G2(const MinimumParameters &par, MnAlgebraicV
const unsigned int iext = fTransformation.ExtOfInt(i);
const double diag = extHessian[iext * nExt + iext];
const double dxdi = fTransformation.DInt2Ext(i, par.Vec()(i));
g2(i) = dxdi * dxdi * diag;
const double d2xdi2 = fTransformation.D2Int2Ext(i, par.Vec()(i));
g2(i) = dxdi * dxdi * diag + d2xdi2 * extGradient[iext];
}
return true;
}
Expand Down
20 changes: 20 additions & 0 deletions math/minuit2/src/MnParameterTransformation.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ long double SinParameterTransformation::DInt2Ext(long double Value, long double
return 0.5 * ((Upper - Lower) * std::cos(Value));
}

long double SinParameterTransformation::D2Int2Ext(long double Value, long double Upper, long double Lower) const
{
// return the second derivative of the transformation d^2 Ext/ d Int^2
return -0.5 * ((Upper - Lower) * std::sin(Value));
}

long double SinParameterTransformation::DExt2Int(long double Value, long double Upper, long double Lower) const
{
// return the derivative of the transformation d Int/ d Ext
Expand Down Expand Up @@ -89,6 +95,13 @@ long double SqrtLowParameterTransformation::DInt2Ext(long double value, long dou
return val;
}

long double SqrtLowParameterTransformation::D2Int2Ext(long double value, long double) const
{
// second derivative of internal to external transformation : d^2 (Int2Ext) / d Int^2
long double val = std::pow(value * value + 1., -1.5);
return val;
}

long double SqrtLowParameterTransformation::DExt2Int(long double value, long double lower) const
{
// derivative of internal to external transformation : d (Ext2Int) / d Ext
Expand Down Expand Up @@ -122,6 +135,13 @@ long double SqrtUpParameterTransformation::DInt2Ext(long double value, long doub
return val;
}

long double SqrtUpParameterTransformation::D2Int2Ext(long double value, long double) const
{
// second derivative of internal to external transformation : d^2 (Int2Ext) / d Int^2
long double val = -std::pow(value * value + 1., -1.5);
return val;
}

long double SqrtUpParameterTransformation::DExt2Int(long double value, long double upper) const
{
// derivative of internal to external transformation : d (Ext2Int ) / d Ext
Expand Down
19 changes: 19 additions & 0 deletions math/minuit2/src/MnUserTransformation.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,25 @@ double MnUserTransformation::DInt2Ext(unsigned int i, double val) const
return dd;
}

double MnUserTransformation::D2Int2Ext(unsigned int i, double val) const
{
// return the second derivative of the int->ext transformation: d^2 Pext(i) / d Pint(i)^2
// for the parameter i with value val

double dd = 0.;
if (fParameters[fExtOfInt[i]].HasLimits()) {
if (fParameters[fExtOfInt[i]].HasUpperLimit() && fParameters[fExtOfInt[i]].HasLowerLimit())
dd = fDoubleLimTrafo.D2Int2Ext(val, fParameters[fExtOfInt[i]].UpperLimit(),
fParameters[fExtOfInt[i]].LowerLimit());
else if (fParameters[fExtOfInt[i]].HasUpperLimit() && !fParameters[fExtOfInt[i]].HasLowerLimit())
dd = fUpperLimTrafo.D2Int2Ext(val, fParameters[fExtOfInt[i]].UpperLimit());
else
dd = fLowerLimTrafo.D2Int2Ext(val, fParameters[fExtOfInt[i]].LowerLimit());
}

return dd;
}

double MnUserTransformation::DExt2Int(unsigned int i, double val) const
{
// return the derivative of the ext->int transformation: dPint(i) / dPext(i)
Expand Down
190 changes: 190 additions & 0 deletions math/minuit2/test/testMinuit2.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,19 @@
#include <Minuit2/FCNBase.h>
#include <Minuit2/MnMigrad.h>
#include <Minuit2/MnUserParameters.h>
#include <Minuit2/MnUserParameterState.h>
#include <Minuit2/MnUserTransformation.h>
#include <Minuit2/AnalyticalGradientCalculator.h>
#include <Minuit2/MinimumParameters.h>
#include <Minuit2/FunctionGradient.h>
#include <Minuit2/MnMatrix.h>
#include <Minuit2/FunctionMinimum.h>
#include <Minuit2/MnPrint.h>

#include <gtest/gtest.h>

#include <cmath>

class QuadraticFCNBase : public ROOT::Minuit2::FCNBase {
public:
double operator()(const std::vector<double> &p) const override
Expand Down Expand Up @@ -220,3 +228,185 @@
EXPECT_EQ(RosenbrockFCNWithoutG2::G2CallCounter(), 0)
<< "The G2() method was called, even though we claim to not implement it!";
}

// Least-squares fit of a simplified, piecewise binding-kinetics model with an
// optional analytical Hessian. The model gradient and Hessian are exact. The
// two rate parameters (ka, kd) have a physical lower limit of zero and are
// started far below the solution.
//
// When parameters have limits, the internal<->external parameter transformation
// is non-linear, so converting the user-provided external Hessian (and G2) to
// internal coordinates requires an extra curvature term proportional to the
// external gradient. Without it, the seed covariance is corrupted and, for this
// configuration, Migrad silently diverges to a completely wrong minimum while
// still reporting validity.
class BindingKineticsFCN : public ROOT::Minuit2::FCNBase {
public:
explicit BindingKineticsFCN(bool hasHessian) : fHasHessian(hasHessian) {}

double Up() const override { return 1.0; }
bool HasGradient() const override { return true; }
bool HasHessian() const override { return fHasHessian; }

double operator()(std::vector<double> const &p) const override
{
double chi2 = 0.0;
for (std::size_t i = 0; i < fX.size(); ++i) {
const double r = (fY[i] - Model(fX[i], p)) / fErr;
chi2 += r * r;
}
return chi2;
}

std::vector<double> Gradient(std::vector<double> const &p) const override
{
std::vector<double> grad(2, 0.0);
for (std::size_t i = 0; i < fX.size(); ++i) {
const double r = (fY[i] - Model(fX[i], p)) / fErr;
const auto g = ModelGradient(fX[i], p);
for (int j = 0; j < 2; ++j)
grad[j] -= 2.0 * r * g[j] / fErr;
}
return grad;
}

std::vector<double> Hessian(std::vector<double> const &p) const override
{
std::vector<double> hess(4, 0.0);
for (std::size_t i = 0; i < fX.size(); ++i) {
const double r = (fY[i] - Model(fX[i], p)) / fErr;
const auto g = ModelGradient(fX[i], p);
const auto h = ModelHessian(fX[i], p);
for (int j = 0; j < 2; ++j)
for (int k = 0; k < 2; ++k)
hess[j * 2 + k] -= 2.0 / fErr * (r * h[j * 2 + k] - g[j] * g[k] / fErr);
}
return hess;
}

private:
static double Model(double x, std::vector<double> const &p)
{
const double ka = p[0];
const double kd = p[1];
return x < 100 ? 1.0 - std::exp(x * (-ka - kd))
: (1.0 - std::exp(-100.0 * ka - 100.0 * kd)) * std::exp(-kd * (x - 100.0));
}

static std::vector<double> ModelGradient(double x, std::vector<double> const &p)
{
const double ka = p[0];
const double kd = p[1];
const double g0 = x < 100 ? x * std::exp(-x * (ka + kd)) : 100.0 * std::exp(-100.0 * ka - kd * x);
const double g1 = x < 100 ? x * std::exp(-x * (ka + kd))
: ((1.0 - std::exp(100.0 * ka + 100.0 * kd)) * (x - 100.0) + 100.0) *
std::exp(-100.0 * ka - kd * (x - 100.0) - 100.0 * kd);
return {g0, g1};
}

static std::vector<double> ModelHessian(double x, std::vector<double> const &p)
{
const double ka = p[0];
const double kd = p[1];
const double h00 = x < 100 ? -x * x * std::exp(-x * (ka + kd)) : -10000.0 * std::exp(-100.0 * ka - kd * x);
const double h01 = x < 100 ? -x * x * std::exp(-x * (ka + kd)) : -100.0 * x * std::exp(-100.0 * ka - kd * x);
const double h11 =
x < 100 ? -x * x * std::exp(-x * (ka + kd))
: (-200.0 * x - (1.0 - std::exp(100.0 * ka + 100.0 * kd)) * (x - 100.0) * (x - 100.0) + 10000.0) *
std::exp(-100.0 * ka - kd * (x - 100.0) - 100.0 * kd);
return {h00, h01, h01, h11};
}

bool fHasHessian;

static constexpr double fErr = 0.001;

// clang-format off
inline static const std::vector<double> fX = {
0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150,
160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300};

// Generated with Model(x, {0.01, 0.01}) plus normal noise of width fErr
inline static const std::vector<double> fY = {
0.0014677825891482553, 0.18188013378785522, 0.32992558356212337, 0.45041553188395367,
0.54794601921519193, 0.63195112024476019, 0.69853633449106323, 0.75286490803859663,
0.7981616977365642, 0.83331949155031482, 0.86616863624896234, 0.78174922906099609,
0.70818268046233734, 0.64271424905745012, 0.57898830590860717, 0.52425958636511283,
0.47459845673511991, 0.43080972773016701, 0.38834996655153947, 0.35178861510658521,
0.31925150166578198, 0.28732404660033928, 0.26252634588235924, 0.23540772239857896,
0.21266927896262663, 0.19394038874520433, 0.17446775399099632, 0.15841619132882989,
0.14478862098494658, 0.13111385920544755, 0.1156468064187659};
// clang-format on
};

// When a parameter has limits, the internal<->external parameter transformation
// q -> x(q) is non-linear, so the Hessian and G2 (second derivatives) in
// internal coordinates carry an extra diagonal term beyond (dx/dq)^2 H_ext:
//
// H_int(i,i) = (dx/dq)^2 H_ext(i,i) + (d^2x/dq^2) g_ext(i)
//
// The analytical-gradient path used to drop the (d^2x/dq^2) g_ext term. At a
// point with a non-zero gradient (i.e. away from the minimum) and a parameter
// close to its limit, this term dominates, which corrupted the Migrad seed and
// could make the minimization silently diverge to a wrong but "valid" result.
//
// This is checked deterministically (no dependence on the chaotic convergence
// behaviour) by comparing the analytical internal Hessian/G2 against a central
// finite difference of the calculator's own internal gradient.
//
// Covers GitHub issue https://github.com/root-project/root/issues/22692
TEST(Minuit2, AnalyticalHessianLimitTransformation)
{
using namespace ROOT::Minuit2;

BindingKineticsFCN fcn(/*hasHessian=*/true);

MnUserParameters upar;
// Start far from the solution and close to the lower limit: here the external
// gradient is large and dx/dq is tiny, so the missing curvature term matters.
upar.Add("ka", 1e-7, 1e-8);
upar.Add("kd", 1e-7, 1e-8);
upar.SetLowerLimit(0, 0.0);
upar.SetLowerLimit(1, 0.0);

const MnUserParameterState state(upar);
const MnUserTransformation &trafo = state.Trafo();
AnalyticalGradientCalculator agc(fcn, trafo);

const unsigned int n = 2;

MnAlgebraicVector q(n);
for (unsigned int i = 0; i < n; ++i)
q(i) = state.IntParameters()[i];

// Internal gradient computed by the calculator at an internal point.
auto internalGradient = [&](const MnAlgebraicVector &qq) { return agc(MinimumParameters(qq, 0.0)).Grad(); };

// Analytical internal Hessian.
MnAlgebraicSymMatrix hAnalytic(n);
ASSERT_TRUE(agc.Hessian(MinimumParameters(q, 0.0), hAnalytic));

// Analytical internal G2 (diagonal of the Hessian) via the dedicated method.
MnAlgebraicVector g2Analytic(n);
ASSERT_TRUE(agc.G2(MinimumParameters(q, 0.0), g2Analytic));

// Ground truth: H_int(i,j) = d g_int(i) / d q_j by central finite difference.
const double h = 1e-6;
for (unsigned int j = 0; j < n; ++j) {
MnAlgebraicVector qp(q);
MnAlgebraicVector qm(q);
qp(j) += h;
qm(j) -= h;
const MnAlgebraicVector gp = internalGradient(qp);
const MnAlgebraicVector gm = internalGradient(qm);
for (unsigned int i = 0; i < n; ++i) {
const double fd = (gp(i) - gm(i)) / (2.0 * h);
// Relative tolerance: the second derivatives span several orders of
// magnitude, and dropping the curvature term changes them by ~100%.
const double tol = 1e-3 * std::max(std::abs(fd), 1.0);
EXPECT_NEAR(hAnalytic(i, j), fd, tol) << "Hessian mismatch at (" << i << "," << j << ")";
if (i == j)

Check warning on line 408 in math/minuit2/test/testMinuit2.cxx

View workflow job for this annotation

GitHub Actions / alma10 minimal build CMAKE_CXX_STANDARD=20

suggest explicit braces to avoid ambiguous ‘else’ [-Wdangling-else]

Check warning on line 408 in math/minuit2/test/testMinuit2.cxx

View workflow job for this annotation

GitHub Actions / alma10 asan ROOT_CTEST_CUSTOM_FLAGS='-E \(^tutorial-\|cppinterop-CppInterOpTest\|roottest-cling-specialobj-runf02$\|roottest-root-collection-DeleteWarning$\|roottest-root-io-evolution-fixarr2$\|roottest-root-meta-rlibmap$\|roottest-root-treeproxy-vectorint-vectorint$\)'

suggest explicit braces to avoid ambiguous ‘else’ [-Wdangling-else]

Check warning on line 408 in math/minuit2/test/testMinuit2.cxx

View workflow job for this annotation

GitHub Actions / alma10 benchmark build CMAKE_CXX_STANDARD=20

suggest explicit braces to avoid ambiguous ‘else’ [-Wdangling-else]

Check warning on line 408 in math/minuit2/test/testMinuit2.cxx

View workflow job for this annotation

GitHub Actions / alma9 modules_off CMAKE_CXX_STANDARD=20

suggest explicit braces to avoid ambiguous ‘else’ [-Wdangling-else]

Check warning on line 408 in math/minuit2/test/testMinuit2.cxx

View workflow job for this annotation

GitHub Actions / ubuntu22 imt=Off, CMAKE_BUILD_TYPE=Debug

suggest explicit braces to avoid ambiguous ‘else’ [-Wdangling-else]

Check warning on line 408 in math/minuit2/test/testMinuit2.cxx

View workflow job for this annotation

GitHub Actions / alma10

suggest explicit braces to avoid ambiguous ‘else’ [-Wdangling-else]

Check warning on line 408 in math/minuit2/test/testMinuit2.cxx

View workflow job for this annotation

GitHub Actions / alma9 CMAKE_BUILD_TYPE=Debug

suggest explicit braces to avoid ambiguous ‘else’ [-Wdangling-else]

Check warning on line 408 in math/minuit2/test/testMinuit2.cxx

View workflow job for this annotation

GitHub Actions / ubuntu2404 CMAKE_BUILD_TYPE=Debug

suggest explicit braces to avoid ambiguous ‘else’ [-Wdangling-else]

Check warning on line 408 in math/minuit2/test/testMinuit2.cxx

View workflow job for this annotation

GitHub Actions / alma8

suggest explicit braces to avoid ambiguous ‘else’ [-Wdangling-else]

Check warning on line 408 in math/minuit2/test/testMinuit2.cxx

View workflow job for this annotation

GitHub Actions / ubuntu2604

suggest explicit braces to avoid ambiguous ‘else’ [-Wdangling-else]

Check warning on line 408 in math/minuit2/test/testMinuit2.cxx

View workflow job for this annotation

GitHub Actions / debian13 dev=ON, CMAKE_CXX_FLAGS=-Wsuggest-override

suggest explicit braces to avoid ambiguous ‘else’ [-Wdangling-else]

Check warning on line 408 in math/minuit2/test/testMinuit2.cxx

View workflow job for this annotation

GitHub Actions / opensuse16 march_native

suggest explicit braces to avoid ambiguous ‘else’ [-Wdangling-else]

Check warning on line 408 in math/minuit2/test/testMinuit2.cxx

View workflow job for this annotation

GitHub Actions / rawhide Fedora pydebug no GIL CMAKE_CXX_STANDARD=23

suggest explicit braces to avoid ambiguous ‘else’ [-Wdangling-else]

Check warning on line 408 in math/minuit2/test/testMinuit2.cxx

View workflow job for this annotation

GitHub Actions / fedora44

suggest explicit braces to avoid ambiguous ‘else’ [-Wdangling-else]

Check warning on line 408 in math/minuit2/test/testMinuit2.cxx

View workflow job for this annotation

GitHub Actions / fedora43

suggest explicit braces to avoid ambiguous ‘else’ [-Wdangling-else]
EXPECT_NEAR(g2Analytic(i), fd, tol) << "G2 mismatch at " << i;
}
}
}
Loading