Skip to content

Providing analytical Hessian reduces robustness of minimization to initial values in Minuit2 #22692

Description

@AdrianDBS

Check duplicate issues.

  • Checked for duplicates

Description

I ran into this issue when trying to minimize piecewise-defined binding kinetics functions with Migrad:

  • If I don't provide an analytical Hessian to the cost function but rely on numerical approximation, the minimization is reliable and robust against imperfect initial parameter values.
  • However, when I provide an analytical Hessian -- which I would expect to only improve things -- the minimization gets far less robust against the initial values and I get weird failures, even "silent failures" where the minimizer reports success even when the result is off.

I have attached a (hopefully minimal enough) reproducer, representing a simplified binding kinetics example*. The model function gradient and Hessian are auto-generated using a symbolics library (sympy), so that mistakes in the derivative function definitions can be ruled out.

+++
*Example assumptions (just for the sake of context/completeness):

  • constant free molecule concentration during association phase (1), no rebinding during dissociation phase (2)
  • free molecule concentration during association and equilibrium signal amplitude hardcoded to 1
  • association and dissociation phase start time hardcoded to 0 and 100, respectively
  • association rate (ka) and dissociation rate (kd) have physical lower limit of 0

Reproducer

#include <Minuit2/FCNBase.h>
#include <Minuit2/MnMigrad.h>
#include <Minuit2/MnUserParameters.h>
#include <Minuit2/FunctionMinimum.h>

#include <vector>
#include <iostream>
#include <string>
#include <functional>
#include <cmath>

class LeastSquaresFCN : public ROOT::Minuit2::FCNBase {
public:
    LeastSquaresFCN(
        std::vector<double> x,
        std::vector<double> y,
        std::vector<double> yErrors,
        const size_t nParameters,
        std::function<double(double, const std::vector<double>&)> model,
        std::function<std::vector<double>(double, const std::vector<double>&)> modelGradient = nullptr,
        std::function<std::vector<double>(double, const std::vector<double>&)> modelHessian = nullptr)
        : fX(std::move(x)),
          fY(std::move(y)),
          fYErrors(std::move(yErrors)),
          fNParameters(nParameters),
          fModel(std::move(model)),
          fModelGradient(std::move(modelGradient)),
          fModelHessian(std::move(modelHessian)) {
    }

    double Up() const override { return 1.0; }
    bool HasGradient() const override { return static_cast<bool>(fModelGradient); }
    bool HasHessian() const override { return static_cast<bool>(fModelGradient) && static_cast<bool>(fModelHessian); }

    double operator()(const std::vector<double>& p) const override {
        double value = 0;
        for (size_t i = 0; i < fX.size(); ++i) {
            const double r = (fY[i] - fModel(fX[i], p)) / fYErrors[i];
            value += r * r;
        }
        return value;
    }

    std::vector<double> Gradient(const std::vector<double>& p) const override {
        std::vector grad(fNParameters, 0.0);
        for (size_t i = 0; i < fX.size(); ++i) {
            const double r = (fY[i] - fModel(fX[i], p)) / fYErrors[i];
            std::vector<double> g = fModelGradient(fX[i], p);
            for (size_t j = 0; j < fNParameters; ++j) {
                grad[j] -= 2.0 * r * g[j] / fYErrors[i];
            }
        }
        return grad;
    }

    std::vector<double> Hessian(const std::vector<double>& p) const override {
        std::vector hess(fNParameters * fNParameters, 0.0);
        for (size_t i = 0; i < fX.size(); ++i) {
            const double r = (fY[i] - fModel(fX[i], p)) / fYErrors[i];
            std::vector<double> g = fModelGradient(fX[i], p);
            std::vector<double> h = fModelHessian(fX[i], p);
            for (size_t j = 0; j < fNParameters; ++j) {
                for (size_t k = 0; k < fNParameters; ++k) {
                    const size_t jk = j * fNParameters + k;
                    hess[jk] -= 2.0 / fYErrors[i] * (r * h[jk] - g[j] * g[k] / fYErrors[i]);
                }
            }
        }
        return hess;
    }

private:
    std::vector<double> fX;
    std::vector<double> fY;
    std::vector<double> fYErrors;
    size_t fNParameters;
    std::function<double(double, const std::vector<double>&)> fModel;
    std::function<std::vector<double>(double, const std::vector<double>&)> fModelGradient;
    std::function<std::vector<double>(double, const std::vector<double>&)> fModelHessian;
};

class BindingKineticsFCN : public LeastSquaresFCN {
public:
    explicit BindingKineticsFCN(const bool useHessian)
        : LeastSquaresFCN(XValues,
                          YValues,
                          YErrors,
                          2,
                          Model,
                          ModelGradient,
                          useHessian ? ModelHessian : nullptr) {
    }

private:
    static double Model(const double x, const std::vector<double>& p) {
        const double ka = p[0];
        const double kd = p[1];
        return x < 100
                   ? 1.0 - std::exp(x * (-ka - kd))
                   : (1.0 - std::exp(-100.0 * ka - 100.0 * kd)) * std::exp(-kd * (x - 100.0));
    }

    static std::vector<double> ModelGradient(const double x, const std::vector<double>& p) {
        const double ka = p[0];
        const double kd = p[1];
        double g0 = x < 100
                        ? x * std::exp(-x * (ka + kd))
                        : 100.0 * std::exp(-100.0 * ka - kd * x);
        double g1 = x < 100
                        ? x * std::exp(-x * (ka + kd))
                        : ((1.0 - std::exp(100.0 * ka + 100.0 * kd)) * (x - 100.0) + 100.0) *
                        std::exp(-100.0 * ka - kd * (x - 100.0) - 100.0 * kd);
        return {g0, g1};
    }

    static std::vector<double> ModelHessian(const double x, const std::vector<double>& p) {
        const double ka = p[0];
        const double kd = p[1];
        double h00 = x < 100
                         ? -std::pow(x, 2) * std::exp(-x * (ka + kd))
                         : -10000.0 * std::exp(-100.0 * ka - kd * x);
        double h01 = x < 100
                         ? -std::pow(x, 2) * std::exp(-x * (ka + kd))
                         : -100.0 * x * std::exp(-100.0 * ka - kd * x);
        double h11 = x < 100
                         ? -std::pow(x, 2) * std::exp(-x * (ka + kd))
                         : (-200.0 * x - (1.0 - std::exp(100.0 * ka + 100.0 * kd)) * std::pow(x - 100.0, 2) + 10000.0) *
                         std::exp(-100.0 * ka - kd * (x - 100.0) - 100.0 * kd);
        return {h00, h01, h01, h11};
    }

    inline static const std::vector<double> XValues = {
        0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230,
        240, 250, 260, 270, 280, 290, 300
    };

    // Generated using Model(x, [0.01, 0.01]) adding random normal noise with a standard deviation of YError = 0.001
    inline static const std::vector<double> YValues = {
        0.0014677825891482553, 0.18188013378785522, 0.32992558356212337, 0.45041553188395367, 0.54794601921519193,
        0.63195112024476019, 0.69853633449106323, 0.75286490803859663, 0.7981616977365642, 0.83331949155031482,
        0.86616863624896234, 0.78174922906099609, 0.70818268046233734, 0.64271424905745012, 0.57898830590860717,
        0.52425958636511283, 0.47459845673511991, 0.43080972773016701, 0.38834996655153947, 0.35178861510658521,
        0.31925150166578198, 0.28732404660033928, 0.26252634588235924, 0.23540772239857896, 0.21266927896262663,
        0.19394038874520433, 0.17446775399099632, 0.15841619132882989, 0.14478862098494658, 0.13111385920544755,
        0.1156468064187659
    };

    inline static const auto YErrors = std::vector(XValues.size(), 0.001);
};

void MinimizeBindingKineticsFCN(const bool useHessian) {
    std::cout << "\nFitting " << (useHessian ? "with" : "without") << " Hessian:\n";

    const BindingKineticsFCN function(useHessian);
    ROOT::Minuit2::MnUserParameters parameters;
    parameters.Add("ka", 1e-7, 1e-8);
    parameters.Add("kd", 1e-7, 1e-8);
    parameters.SetLowerLimit(0, 0);
    parameters.SetLowerLimit(1, 0);

    ROOT::Minuit2::MnMigrad migrad(function, parameters);
    const ROOT::Minuit2::FunctionMinimum minimum = migrad();

    std::cout << "minimum valid?: " << minimum.IsValid() << "\n";
    std::cout << "function value: " << minimum.Fval() << "\n";
    std::cout << "ka = " << minimum.UserState().Value("ka") << "\n";
    std::cout << "kd = " << minimum.UserState().Value("kd") << "\n";
}

int main() {
    MinimizeBindingKineticsFCN(false);
    MinimizeBindingKineticsFCN(true);

    return 0;
}

ROOT version

Tested for v6-36-14, v6-40-02 and latest master (39a2e3b)

Installation method

Build from source

Operating system

Windows

Additional context

No response

Metadata

Metadata

Assignees

Type

No type
No fields configured for issues without a type.

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions