Providing analytical Hessian reduces robustness of minimization to initial values in Minuit2

### Check duplicate issues.

- [x] Checked for duplicates

### Description

I ran into this issue when trying to minimize piecewise-defined binding kinetics functions with Migrad:

- If I don't provide an analytical Hessian to the cost function but rely on numerical approximation, the minimization is reliable and robust against imperfect initial parameter values. 
- However, when I provide an analytical Hessian -- which I would expect to only improve things -- the minimization gets far less robust against the initial values and I get weird failures, even "silent failures" where the minimizer reports success even when the result is off.

I have attached a (hopefully minimal enough) reproducer, representing a simplified binding kinetics example*. The model function gradient and Hessian are auto-generated using a symbolics library (sympy), so that mistakes in the derivative function definitions can be ruled out.

+++
*Example assumptions (just for the sake of context/completeness):
- constant free molecule concentration during association phase (1), no rebinding during dissociation phase (2) 
- free molecule concentration during association and equilibrium signal amplitude hardcoded to 1
- association and dissociation phase start time hardcoded to 0 and 100, respectively
- association rate (ka) and dissociation rate (kd) have physical lower limit of 0

### Reproducer

```cpp
#include <Minuit2/FCNBase.h>
#include <Minuit2/MnMigrad.h>
#include <Minuit2/MnUserParameters.h>
#include <Minuit2/FunctionMinimum.h>

#include <vector>
#include <iostream>
#include <string>
#include <functional>
#include <cmath>

class LeastSquaresFCN : public ROOT::Minuit2::FCNBase {
public:
    LeastSquaresFCN(
        std::vector<double> x,
        std::vector<double> y,
        std::vector<double> yErrors,
        const size_t nParameters,
        std::function<double(double, const std::vector<double>&)> model,
        std::function<std::vector<double>(double, const std::vector<double>&)> modelGradient = nullptr,
        std::function<std::vector<double>(double, const std::vector<double>&)> modelHessian = nullptr)
        : fX(std::move(x)),
          fY(std::move(y)),
          fYErrors(std::move(yErrors)),
          fNParameters(nParameters),
          fModel(std::move(model)),
          fModelGradient(std::move(modelGradient)),
          fModelHessian(std::move(modelHessian)) {
    }

    double Up() const override { return 1.0; }
    bool HasGradient() const override { return static_cast<bool>(fModelGradient); }
    bool HasHessian() const override { return static_cast<bool>(fModelGradient) && static_cast<bool>(fModelHessian); }

    double operator()(const std::vector<double>& p) const override {
        double value = 0;
        for (size_t i = 0; i < fX.size(); ++i) {
            const double r = (fY[i] - fModel(fX[i], p)) / fYErrors[i];
            value += r * r;
        }
        return value;
    }

    std::vector<double> Gradient(const std::vector<double>& p) const override {
        std::vector grad(fNParameters, 0.0);
        for (size_t i = 0; i < fX.size(); ++i) {
            const double r = (fY[i] - fModel(fX[i], p)) / fYErrors[i];
            std::vector<double> g = fModelGradient(fX[i], p);
            for (size_t j = 0; j < fNParameters; ++j) {
                grad[j] -= 2.0 * r * g[j] / fYErrors[i];
            }
        }
        return grad;
    }

    std::vector<double> Hessian(const std::vector<double>& p) const override {
        std::vector hess(fNParameters * fNParameters, 0.0);
        for (size_t i = 0; i < fX.size(); ++i) {
            const double r = (fY[i] - fModel(fX[i], p)) / fYErrors[i];
            std::vector<double> g = fModelGradient(fX[i], p);
            std::vector<double> h = fModelHessian(fX[i], p);
            for (size_t j = 0; j < fNParameters; ++j) {
                for (size_t k = 0; k < fNParameters; ++k) {
                    const size_t jk = j * fNParameters + k;
                    hess[jk] -= 2.0 / fYErrors[i] * (r * h[jk] - g[j] * g[k] / fYErrors[i]);
                }
            }
        }
        return hess;
    }

private:
    std::vector<double> fX;
    std::vector<double> fY;
    std::vector<double> fYErrors;
    size_t fNParameters;
    std::function<double(double, const std::vector<double>&)> fModel;
    std::function<std::vector<double>(double, const std::vector<double>&)> fModelGradient;
    std::function<std::vector<double>(double, const std::vector<double>&)> fModelHessian;
};

class BindingKineticsFCN : public LeastSquaresFCN {
public:
    explicit BindingKineticsFCN(const bool useHessian)
        : LeastSquaresFCN(XValues,
                          YValues,
                          YErrors,
                          2,
                          Model,
                          ModelGradient,
                          useHessian ? ModelHessian : nullptr) {
    }

private:
    static double Model(const double x, const std::vector<double>& p) {
        const double ka = p[0];
        const double kd = p[1];
        return x < 100
                   ? 1.0 - std::exp(x * (-ka - kd))
                   : (1.0 - std::exp(-100.0 * ka - 100.0 * kd)) * std::exp(-kd * (x - 100.0));
    }

    static std::vector<double> ModelGradient(const double x, const std::vector<double>& p) {
        const double ka = p[0];
        const double kd = p[1];
        double g0 = x < 100
                        ? x * std::exp(-x * (ka + kd))
                        : 100.0 * std::exp(-100.0 * ka - kd * x);
        double g1 = x < 100
                        ? x * std::exp(-x * (ka + kd))
                        : ((1.0 - std::exp(100.0 * ka + 100.0 * kd)) * (x - 100.0) + 100.0) *
                        std::exp(-100.0 * ka - kd * (x - 100.0) - 100.0 * kd);
        return {g0, g1};
    }

    static std::vector<double> ModelHessian(const double x, const std::vector<double>& p) {
        const double ka = p[0];
        const double kd = p[1];
        double h00 = x < 100
                         ? -std::pow(x, 2) * std::exp(-x * (ka + kd))
                         : -10000.0 * std::exp(-100.0 * ka - kd * x);
        double h01 = x < 100
                         ? -std::pow(x, 2) * std::exp(-x * (ka + kd))
                         : -100.0 * x * std::exp(-100.0 * ka - kd * x);
        double h11 = x < 100
                         ? -std::pow(x, 2) * std::exp(-x * (ka + kd))
                         : (-200.0 * x - (1.0 - std::exp(100.0 * ka + 100.0 * kd)) * std::pow(x - 100.0, 2) + 10000.0) *
                         std::exp(-100.0 * ka - kd * (x - 100.0) - 100.0 * kd);
        return {h00, h01, h01, h11};
    }

    inline static const std::vector<double> XValues = {
        0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230,
        240, 250, 260, 270, 280, 290, 300
    };

    // Generated using Model(x, [0.01, 0.01]) adding random normal noise with a standard deviation of YError = 0.001
    inline static const std::vector<double> YValues = {
        0.0014677825891482553, 0.18188013378785522, 0.32992558356212337, 0.45041553188395367, 0.54794601921519193,
        0.63195112024476019, 0.69853633449106323, 0.75286490803859663, 0.7981616977365642, 0.83331949155031482,
        0.86616863624896234, 0.78174922906099609, 0.70818268046233734, 0.64271424905745012, 0.57898830590860717,
        0.52425958636511283, 0.47459845673511991, 0.43080972773016701, 0.38834996655153947, 0.35178861510658521,
        0.31925150166578198, 0.28732404660033928, 0.26252634588235924, 0.23540772239857896, 0.21266927896262663,
        0.19394038874520433, 0.17446775399099632, 0.15841619132882989, 0.14478862098494658, 0.13111385920544755,
        0.1156468064187659
    };

    inline static const auto YErrors = std::vector(XValues.size(), 0.001);
};

void MinimizeBindingKineticsFCN(const bool useHessian) {
    std::cout << "\nFitting " << (useHessian ? "with" : "without") << " Hessian:\n";

    const BindingKineticsFCN function(useHessian);
    ROOT::Minuit2::MnUserParameters parameters;
    parameters.Add("ka", 1e-7, 1e-8);
    parameters.Add("kd", 1e-7, 1e-8);
    parameters.SetLowerLimit(0, 0);
    parameters.SetLowerLimit(1, 0);

    ROOT::Minuit2::MnMigrad migrad(function, parameters);
    const ROOT::Minuit2::FunctionMinimum minimum = migrad();

    std::cout << "minimum valid?: " << minimum.IsValid() << "\n";
    std::cout << "function value: " << minimum.Fval() << "\n";
    std::cout << "ka = " << minimum.UserState().Value("ka") << "\n";
    std::cout << "kd = " << minimum.UserState().Value("kd") << "\n";
}

int main() {
    MinimizeBindingKineticsFCN(false);
    MinimizeBindingKineticsFCN(true);

    return 0;
}
```

### ROOT version

Tested for `v6-36-14`, `v6-40-02` and latest `master` (39a2e3b0b4fa11064d2ec8451b5a5afe7fbe7733)

### Installation method

Build from source

### Operating system

Windows

### Additional context

_No response_

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Providing analytical Hessian reduces robustness of minimization to initial values in Minuit2 #22692

Check duplicate issues.

Description

Reproducer

ROOT version

Installation method

Operating system

Additional context

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Uh oh!

Providing analytical Hessian reduces robustness of minimization to initial values in Minuit2 #22692

Description

Check duplicate issues.

Description

Reproducer

ROOT version

Installation method

Operating system

Additional context

Metadata

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Issue actions