From c6a2913c5904bb46fd531b62777b91ada5b104c0 Mon Sep 17 00:00:00 2001 From: Abel Soares Siqueira Date: Fri, 7 Aug 2020 10:48:03 -0300 Subject: [PATCH 1/3] First draft of merit function --- .gitignore | 2 + src/SolverTools.jl | 1 + src/linesearch/line_model.jl | 10 ++- src/merit/auglagmerit.jl | 43 +++++++++++++ src/merit/l1merit.jl | 50 +++++++++++++++ src/merit/merit.jl | 35 ++++++++++ test/dummy_linesearch_solver.jl | 90 ++++++++++++++++++++++++++ test/merit.jl | 110 ++++++++++++++++++++++++++++++++ test/runtests.jl | 4 +- 9 files changed, 338 insertions(+), 7 deletions(-) create mode 100644 src/merit/auglagmerit.jl create mode 100644 src/merit/l1merit.jl create mode 100644 src/merit/merit.jl create mode 100644 test/dummy_linesearch_solver.jl create mode 100644 test/merit.jl diff --git a/.gitignore b/.gitignore index 2cd4ac3f..98474722 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ docs/build docs/Manifest.toml + +Manifest.toml \ No newline at end of file diff --git a/src/SolverTools.jl b/src/SolverTools.jl index ed6ecde2..5947635b 100644 --- a/src/SolverTools.jl +++ b/src/SolverTools.jl @@ -16,6 +16,7 @@ include("auxiliary/logger.jl") include("stats/stats.jl") # Algorithmic components. +include("merit/merit.jl") include("linesearch/linesearch.jl") include("trust-region/trust-region.jl") diff --git a/src/linesearch/line_model.jl b/src/linesearch/line_model.jl index 54f21426..08e4b78b 100644 --- a/src/linesearch/line_model.jl +++ b/src/linesearch/line_model.jl @@ -1,5 +1,3 @@ -import NLPModels: obj, grad, grad!, hess - export LineModel export obj, grad, derivative, grad!, derivative!, hess, redirect! @@ -38,7 +36,7 @@ end ϕ(t) := f(x + td). """ -function obj(f :: LineModel, t :: AbstractFloat) +function NLPModels.obj(f :: LineModel, t :: AbstractFloat) NLPModels.increment!(f, :neval_obj) return obj(f.nlp, f.x + t * f.d) end @@ -51,7 +49,7 @@ i.e., ϕ'(t) = ∇f(x + td)ᵀd. """ -function grad(f :: LineModel, t :: AbstractFloat) +function NLPModels.grad(f :: LineModel, t :: AbstractFloat) NLPModels.increment!(f, :neval_grad) return dot(grad(f.nlp, f.x + t * f.d), f.d) end @@ -67,7 +65,7 @@ i.e., The gradient ∇f(x + td) is stored in `g`. """ -function grad!(f :: LineModel, t :: AbstractFloat, g :: AbstractVector) +function NLPModels.grad!(f :: LineModel, t :: AbstractFloat, g :: AbstractVector) NLPModels.increment!(f, :neval_grad) return dot(grad!(f.nlp, f.x + t * f.d, g), f.d) end @@ -81,7 +79,7 @@ i.e., ϕ"(t) = dᵀ∇²f(x + td)d. """ -function hess(f :: LineModel, t :: AbstractFloat) +function NLPModels.hess(f :: LineModel, t :: AbstractFloat) NLPModels.increment!(f, :neval_hess) return dot(f.d, hprod(f.nlp, f.x + t * f.d, f.d)) end diff --git a/src/merit/auglagmerit.jl b/src/merit/auglagmerit.jl new file mode 100644 index 00000000..93aa2a12 --- /dev/null +++ b/src/merit/auglagmerit.jl @@ -0,0 +1,43 @@ +export AugLagMerit + +mutable struct AugLagMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel + nlp :: M + η :: T + fx :: T + gx :: V + cx :: V + Ad :: V + y :: V +end + +function AugLagMerit( + nlp :: M, + η :: T; + fx :: T = T(Inf), + gx :: V = fill(T(Inf), nlp.meta.nvar), + cx :: V = fill(T(Inf), nlp.meta.ncon), + Ad :: V = fill(T(Inf), nlp.meta.ncon), + y :: V = fill(T(Inf), nlp.meta.ncon) +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} + AugLagMerit{M,T,V}(nlp, η, fx, gx, cx, Ad, y) +end + +function NLPModels.obj(merit :: AugLagMerit, x :: AbstractVector; update :: Bool = true) + if update + merit.fx = obj(merit.nlp, x) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + end + return merit.fx - dot(merit.y, merit.cx) + merit.η * dot(merit.cx, merit.cx) / 2 +end + +function derivative(merit :: AugLagMerit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true) + if update + grad!(merit.nlp, x, merit.gx) + merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad) + end + if merit.nlp.meta.ncon == 0 + return dot(merit.gx, d) + else + return dot(merit.gx, d) - dot(merit.y, merit.Ad) + merit.η * dot(merit.cx, merit.Ad) + end +end \ No newline at end of file diff --git a/src/merit/l1merit.jl b/src/merit/l1merit.jl new file mode 100644 index 00000000..853b4e74 --- /dev/null +++ b/src/merit/l1merit.jl @@ -0,0 +1,50 @@ +export L1Merit + +mutable struct L1Merit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel + nlp :: M + η :: T + fx :: T + gx :: V + cx :: V + Ad :: V +end + +function L1Merit( + nlp :: M, + η :: T; + fx :: T = T(Inf), + gx :: V = fill(T(Inf), nlp.meta.nvar), + cx :: V = fill(T(Inf), nlp.meta.ncon), + Ad :: V = fill(T(Inf), nlp.meta.ncon) +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} + L1Merit{M,T,V}(nlp, η, fx, gx, cx, Ad) +end + +function NLPModels.obj(merit :: L1Merit, x :: AbstractVector; update :: Bool = true) + if update + merit.fx = obj(merit.nlp, x) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + end + return merit.fx + merit.η * norm(merit.cx, 1) +end + +function derivative(merit :: L1Merit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true) + if update + grad!(merit.nlp, x, merit.gx) + merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad) + end + if merit.nlp.meta.ncon == 0 + return dot(merit.gx, d) + else + return dot(merit.gx, d) + merit.η * sum( + if ci > 0 + Adi + elseif ci < 0 + -Adi + else + abs(Adi) + end + for (ci, Adi) in zip(merit.cx, merit.Ad) + ) + end +end \ No newline at end of file diff --git a/src/merit/merit.jl b/src/merit/merit.jl new file mode 100644 index 00000000..68d6ff6e --- /dev/null +++ b/src/merit/merit.jl @@ -0,0 +1,35 @@ +import NLPModels.obj +export AbstractMeritModel, obj, derivative + +""" + AbstractMeritModel + +Model for merit functions. All models should store +- `nlp`: The NLP with the corresponding problem. +- `fx`: The objective at some point. +- `cx`: The constraints vector at some point. +- `gx`: The constraints vector at some point. +- `Ad`: The Jacobian-direction product. +""" +abstract type AbstractMeritModel end + +""" + obj(merit, x; update=true) + +Computes the `merit` function value at `x`. +This will call `obj` and `cons` for the internal model, unless `update` is called with `false`. +The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`. +""" +function obj end + +""" + derivative(merit, x, d; update=true) + +Computes the derivative derivative of `merit` at `x` on direction `d`. +This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, but will assume that `cx` is correct. +The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`. +""" +function derivative! end + +include("auglagmerit.jl") +include("l1merit.jl") diff --git a/test/dummy_linesearch_solver.jl b/test/dummy_linesearch_solver.jl new file mode 100644 index 00000000..19374908 --- /dev/null +++ b/test/dummy_linesearch_solver.jl @@ -0,0 +1,90 @@ +function dummy_linesearch_solver( + nlp :: AbstractNLPModel; + x :: AbstractVector = nlp.meta.x0, + atol :: Real = 1e-6, + rtol :: Real = 1e-6, + max_eval :: Int = 1000, + max_time :: Float64 = 30.0, + merit_constructor = L1Merit +) + + start_time = time() + elapsed_time = 0.0 + + nvar, ncon = nlp.meta.nvar, nlp.meta.ncon + T = eltype(x) + + cx = ncon > 0 ? cons(nlp, x) : zeros(T, 0) + gx = grad(nlp, x) + Jx = ncon > 0 ? jac(nlp, x) : zeros(T, 0, nvar) + y = -Jx' \ gx + Hxy = ncon > 0 ? hess(nlp, x, y) : hess(nlp, x) + + dual = gx + Jx' * y + + iter = 0 + + ϕ = merit_constructor(nlp, 1e-3, cx=cx, gx=gx) + + ϵd = atol + rtol * norm(dual) + ϵp = atol + + fx = obj(nlp, x) + @info log_header([:iter, :f, :c, :dual, :time, :η], [Int, T, T, T, T, T]) + @info log_row(Any[iter, fx, norm(cx), norm(dual), elapsed_time, ϕ.η]) + solved = norm(dual) < ϵd && norm(cx) < ϵp + tired = neval_obj(nlp) + neval_cons(nlp) > max_eval || elapsed_time > max_time + + while !(solved || tired) + Hxy = ncon > 0 ? hess(nlp, x, y) : hess(nlp, x) + W = Symmetric([Hxy zeros(T, nvar, ncon); Jx zeros(T, ncon, ncon)], :L) + Δxy = -W \ [dual; cx] + Δx = Δxy[1:nvar] + Δy = Δxy[nvar+1:end] + + ϕ.fx = fx + ncon > 0 && jprod!(nlp, x, Δx, ϕ.Ad) + Dϕx = derivative(ϕ, x, Δx, update=false) + while Dϕx ≥ 0 + ϕ.η *= 2 + Dϕx = derivative(ϕ, x, Δx, update=false) + end + ϕx = obj(ϕ, x, update=false) + + t = 1.0 + ϕt = obj(ϕ, x + Δx) # Updates cx + while !(ϕt ≤ ϕx + 1e-2 * t * Dϕx) + t /= 2 + ϕt = obj(ϕ, x + t * Δx) # Updates cx + end + x += t * Δx + y += t * Δy + + grad!(nlp, x, gx) # Updates ϕ.gx + if ncon > 0 + Jx = jac(nlp, x) + end + dual = gx + Jx' * y + elapsed_time = time() - start_time + solved = norm(dual) < ϵd && norm(cx) < ϵp + tired = neval_obj(nlp) + neval_cons(nlp) > max_eval || elapsed_time > max_time + + iter += 1 + fx = obj(nlp, x) + @info log_row(Any[iter, fx, norm(cx), norm(dual), elapsed_time, ϕ.η]) + end + + status = if solved + :first_order + elseif elapsed_time > max_time + :max_time + else + :max_eval + end + + return GenericExecutionStats(:unknown, nlp, + objective=fx, dual_feas=norm(dual), primal_feas=norm(cx), + multipliers=y, multipliers_L=zeros(T, nvar), multipliers_U=zeros(T, nvar), + elapsed_time=elapsed_time, solution=x, iter=iter + ) +end diff --git a/test/merit.jl b/test/merit.jl new file mode 100644 index 00000000..b8c44085 --- /dev/null +++ b/test/merit.jl @@ -0,0 +1,110 @@ +function test_merit(merit_constructor, ϕ) + @testset "Consistency" begin + nlp = ADNLPModel(x -> dot(x, x), zeros(5), x -> [sum(x.^2) - 5; sum(x) - 1; prod(x)], zeros(3), zeros(3)) + + for x in (zeros(5), ones(5), -ones(5), BigFloat.([π, -2π, 3π, -4π, 0.0])), + d in (-ones(5), ones(5)) + η = one(eltype(x)) + merit = merit_constructor(nlp, η) + ϕx = obj(merit, x) + @test ϕx ≈ ϕ(nlp, x, η) + @test obj(nlp, x) == merit.fx + @test cons(nlp, x) == merit.cx + + Dϕx = derivative(merit, x, d) + ϵ = √eps(eltype(η)) + Dϕxa = (ϕ(nlp, x + ϵ * d, η) - ϕ(nlp, x, η)) / ϵ + @test isapprox(Dϕx, Dϕxa, rtol=1e-6) + @test grad(nlp, x) == merit.gx + @test jac(nlp, x) * d == merit.Ad + + ϕt = obj(merit, x + d) + @test ϕt ≈ ϕ(nlp, x + d, η) + @test obj(nlp, x + d) == merit.fx + @test cons(nlp, x + d) == merit.cx + end + end + + @testset "Simple line search" begin + nlp = ADNLPModel(x -> dot(x, x), [-1.0; 1.0], x -> [sum(x) - 1], [0.0], [0.0]) + + sol = [0.5; 0.5] + x = [-1.0; 1.0] + d = 30 * (sol - x) + η = 1.0 + + merit = merit_constructor(nlp, η) + + @assert obj(nlp, x + d) > obj(nlp, x) + @assert norm(cons(nlp, x + d)) > norm(cons(nlp, x)) + + reset!(nlp) + bk = 0 + ϕx = obj(merit, x) + obj(merit, x, update=false) + Dϕx = derivative(merit, x, d) + derivative(merit, x, d, update=false) + @test Dϕx < 0 # If d is not a descent direction for your merit, change your parameters + t = 1.0 + ϕt = obj(merit, x + d) + while ϕt > ϕx + 0.5 * t * Dϕx + t /= 2 + ϕt = obj(merit, x + t * d) + if t < 1e-16 + error("Failure") + end + bk += 1 + end + @test t < 1 + @test neval_obj(nlp) == 2 + bk + @test neval_cons(nlp) == 2 + bk + @test neval_grad(nlp) == 1 + @test neval_jprod(nlp) == 1 + end + + @testset "Safe for unconstrained" begin + nlp = ADNLPModel(x -> dot(x, x), ones(2)) + x = nlp.meta.x0 + d = ones(2) + + merit = merit_constructor(nlp, 1.0) + @test obj(merit, x) == obj(nlp, x) + @test derivative(merit, x, d) == dot(grad(nlp, x), d) + end + + @testset "Use on solver" begin + nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0]) + output = dummy_linesearch_solver(nlp, merit_constructor=merit_constructor, rtol=0) + @test isapprox(output.solution, ones(2), rtol=1e-6) + @test output.objective < 1e-6 + @test output.primal_feas == 0 + @test output.dual_feas < 1e-6 + + nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0], x -> [exp(x[1] - 1) - 1], [0.0], [0.0]) + output = dummy_linesearch_solver(nlp, merit_constructor=merit_constructor, rtol=0) + @test isapprox(output.solution, ones(2), rtol=1e-6) + @test output.objective < 1e-6 + @test output.primal_feas < 1e-6 + @test output.dual_feas < 1e-6 + end +end + +@testset "Merit functions" begin + merits = [ # Name, Constructor, ϕ(nlp, x, η) + ( + "L1Merit", + L1Merit, + (nlp, x, η) -> obj(nlp, x) + η * norm(cons(nlp, x), 1) + ), + ( + "AugLagMerit", + (nlp, η; kwargs...) -> AugLagMerit(nlp, η, y=ones(typeof(η), nlp.meta.ncon); kwargs...), + (nlp, x, η) -> obj(nlp, x) - sum(cons(nlp, x)) + η * norm(cons(nlp, x))^2 / 2 # y = (1,…,1)ᵀ + ) + ] + for (name, merit, ϕ) in merits + @testset "Merit $name" begin + test_merit(merit, ϕ) + end + end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index e56c4737..f4b46944 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,9 +8,11 @@ using NLPModels using LinearAlgebra, Logging, Test include("dummy_solver.jl") +include("dummy_linesearch_solver.jl") include("test_auxiliary.jl") include("test_linesearch.jl") include("test_logging.jl") +include("merit.jl") include("test_stats.jl") -include("test_trust_region.jl") +include("test_trust_region.jl") \ No newline at end of file From 80ad155898d60b80292aa2e54474d56cb32c073b Mon Sep 17 00:00:00 2001 From: Abel Date: Fri, 7 Aug 2020 14:43:47 -0300 Subject: [PATCH 2/3] Merit docs --- docs/src/api.md | 11 +++++++++++ docs/src/reference.md | 10 +++++++++- src/merit/auglagmerit.jl | 15 +++++++++++++++ src/merit/l1merit.jl | 12 ++++++++++++ src/merit/merit.jl | 17 ++++++++++++++--- 5 files changed, 61 insertions(+), 4 deletions(-) diff --git a/docs/src/api.md b/docs/src/api.md index 1585a3af..a0e4d821 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -27,6 +27,17 @@ redirect! armijo_wolfe ``` +## Merit + +See also [`obj`](@ref). + +```@docs +AbstractMeritModel +derivative +L1Merit +AugLagMerit +``` + ## Stats ```@docs diff --git a/docs/src/reference.md b/docs/src/reference.md index 2dec9b13..b0d59bcf 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -1,4 +1,12 @@ # Reference -```@index +## Contents + +```@contents +Pages = ["reference.md"] ``` + +## Index + +```@index +``` \ No newline at end of file diff --git a/src/merit/auglagmerit.jl b/src/merit/auglagmerit.jl index 93aa2a12..cbb05c12 100644 --- a/src/merit/auglagmerit.jl +++ b/src/merit/auglagmerit.jl @@ -1,5 +1,20 @@ export AugLagMerit +@doc raw""" + AugLagMerit(nlp, η; kwargs...) + +Creates an augmented Lagrangian merit function for the equality constrained problem +```math +\min f(x) \quad \text{s.to} \quad c(x) = 0 +``` +defined by +```math +\phi(x, yₖ; η) = f(x) - yₖᵀc(x) + η ½\|c(x)\|² +``` + +In addition to the keyword arguments declared in [`AbstractMeritModel`](@ref), an `AugLagMerit` also +accepts the argument `y`. +""" mutable struct AugLagMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel nlp :: M η :: T diff --git a/src/merit/l1merit.jl b/src/merit/l1merit.jl index 853b4e74..754a07d1 100644 --- a/src/merit/l1merit.jl +++ b/src/merit/l1merit.jl @@ -1,5 +1,17 @@ export L1Merit +@doc raw""" + L1Merit(nlp, η; kwargs...) + +Creates a ℓ₁ merit function for the equality constrained problem +```math +\min f(x) \quad \text{s.to} \quad c(x) = 0 +``` +defined by +```math +\phi_1(x; η) = f(x) + η\|c(x)\|₁ +``` +""" mutable struct L1Merit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel nlp :: M η :: T diff --git a/src/merit/merit.jl b/src/merit/merit.jl index 68d6ff6e..787cb21a 100644 --- a/src/merit/merit.jl +++ b/src/merit/merit.jl @@ -1,4 +1,4 @@ -import NLPModels.obj +using NLPModels export AbstractMeritModel, obj, derivative """ @@ -6,10 +6,17 @@ export AbstractMeritModel, obj, derivative Model for merit functions. All models should store - `nlp`: The NLP with the corresponding problem. +- `η`: The merit parameter. - `fx`: The objective at some point. - `cx`: The constraints vector at some point. - `gx`: The constraints vector at some point. - `Ad`: The Jacobian-direction product. + +All models allow a constructor of form + + Merit(nlp, η; fx=Inf, cx=[Inf,…,Inf], gx=[Inf,…,Inf], Ad=[Inf,…,Inf]) + +Additional arguments and constructors may be provided. """ abstract type AbstractMeritModel end @@ -20,7 +27,11 @@ Computes the `merit` function value at `x`. This will call `obj` and `cons` for the internal model, unless `update` is called with `false`. The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`. """ -function obj end +function NLPModels.obj(merit::AbstractMeritModel, x::AbstractVector) + # I'd prefer to use only `function NLPModels.obj end` instead, but it doesn't work and using + # only `function obj end` overwrites the docstring + throw(MethodError(NLPModels.obj, (merit, x))) +end """ derivative(merit, x, d; update=true) @@ -29,7 +40,7 @@ Computes the derivative derivative of `merit` at `x` on direction `d`. This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, but will assume that `cx` is correct. The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`. """ -function derivative! end +function derivative end include("auglagmerit.jl") include("l1merit.jl") From c689b3f8c733adefc24b5e70dfdb3ca65610f3fa Mon Sep 17 00:00:00 2001 From: Abel Date: Fri, 7 Aug 2020 17:18:16 -0300 Subject: [PATCH 3/3] Change merit to an NLPModel --- src/merit/auglagmerit.jl | 119 +++++++++++++++++++++++++++++---------- src/merit/l1merit.jl | 78 +++++++++++++------------ src/merit/merit.jl | 20 +++---- test/merit.jl | 4 +- 4 files changed, 139 insertions(+), 82 deletions(-) diff --git a/src/merit/auglagmerit.jl b/src/merit/auglagmerit.jl index cbb05c12..f2663dc6 100644 --- a/src/merit/auglagmerit.jl +++ b/src/merit/auglagmerit.jl @@ -1,7 +1,7 @@ export AugLagMerit @doc raw""" - AugLagMerit(nlp, η; kwargs...) + AugLagMerit(nlp, η; kwargs...) Creates an augmented Lagrangian merit function for the equality constrained problem ```math @@ -9,50 +9,107 @@ Creates an augmented Lagrangian merit function for the equality constrained prob ``` defined by ```math -\phi(x, yₖ; η) = f(x) - yₖᵀc(x) + η ½\|c(x)\|² +\phi(x, yₖ; η) = f(x) + yₖᵀc(x) + η ½\|c(x)\|² ``` In addition to the keyword arguments declared in [`AbstractMeritModel`](@ref), an `AugLagMerit` also accepts the argument `y`. """ mutable struct AugLagMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel - nlp :: M - η :: T - fx :: T - gx :: V - cx :: V - Ad :: V - y :: V + meta :: NLPModelMeta + counters :: Counters + nlp :: M + η :: T + fx :: T + gx :: V + cx :: V + Ad :: V + y :: V + y⁺ :: V + Jᵀy⁺ :: V + Jv :: V + JᵀJv :: V end function AugLagMerit( - nlp :: M, - η :: T; - fx :: T = T(Inf), - gx :: V = fill(T(Inf), nlp.meta.nvar), - cx :: V = fill(T(Inf), nlp.meta.ncon), - Ad :: V = fill(T(Inf), nlp.meta.ncon), - y :: V = fill(T(Inf), nlp.meta.ncon) + nlp :: M, + η :: T; + fx :: T = T(Inf), + gx :: V = fill(T(Inf), nlp.meta.nvar), + cx :: V = fill(T(Inf), nlp.meta.ncon), + Ad :: V = fill(T(Inf), nlp.meta.ncon), + y :: V = fill(T(Inf), nlp.meta.ncon), + y⁺ :: V = fill(T(Inf), nlp.meta.ncon), # y + η * c(x) + Jᵀy⁺ :: V = fill(T(Inf), nlp.meta.nvar), + Jv :: V = fill(T(Inf), nlp.meta.ncon), + JᵀJv :: V = fill(T(Inf), nlp.meta.nvar) ) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} - AugLagMerit{M,T,V}(nlp, η, fx, gx, cx, Ad, y) + meta = NLPModelMeta(nlp.meta.nvar) + AugLagMerit{M,T,V}(meta, Counters(), nlp, η, fx, gx, cx, Ad, y, y⁺, Jᵀy⁺, Jv, JᵀJv) end function NLPModels.obj(merit :: AugLagMerit, x :: AbstractVector; update :: Bool = true) - if update - merit.fx = obj(merit.nlp, x) - merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) - end - return merit.fx - dot(merit.y, merit.cx) + merit.η * dot(merit.cx, merit.cx) / 2 + @lencheck merit.meta.nvar x + NLPModels.increment!(merit, :neval_obj) + if update + merit.fx = obj(merit.nlp, x) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + end + return merit.fx + dot(merit.y, merit.cx) + merit.η * dot(merit.cx, merit.cx) / 2 end function derivative(merit :: AugLagMerit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true) - if update - grad!(merit.nlp, x, merit.gx) - merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad) - end - if merit.nlp.meta.ncon == 0 - return dot(merit.gx, d) - else - return dot(merit.gx, d) - dot(merit.y, merit.Ad) + merit.η * dot(merit.cx, merit.Ad) - end + @lencheck merit.meta.nvar x d + if update + grad!(merit.nlp, x, merit.gx) + merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad) + end + if merit.nlp.meta.ncon == 0 + return dot(merit.gx, d) + else + return dot(merit.gx, d) + dot(merit.y, merit.Ad) + merit.η * dot(merit.cx, merit.Ad) + end +end + +function NLPModels.grad!(merit :: AugLagMerit, x :: AbstractVector, g :: AbstractVector; update :: Bool = true) + @lencheck merit.meta.nvar x g + NLPModels.increment!(merit, :neval_grad) + if update + grad!(nlp.model, x, merit.gx) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + merit.y⁺ .= merit.y .+ merit.η .* merit.cx + merit.nlp.meta.ncon > 0 && jtprod!(merit.nlp, x, merit.y⁺, merit.Jᵀy⁺) + end + g .= merit.gx .+ merit.Jᵀy⁺ + return g +end + +function NLPModels.objgrad!(merit :: AugLagMerit, x :: AbstractVector, g :: AbstractVector) + @lencheck merit.meta.nvar x g + NLPModels.increment!(merit, :neval_obj) + NLPModels.increment!(merit, :neval_grad) + if update + merit.fx = obj(merit.nlp, x) + grad!(nlp.model, x, merit.gx) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + merit.y⁺ .= merit.y .+ merit.η .* merit.cx + merit.nlp.meta.ncon > 0 && jtprod!(merit.nlp, x, merit.y⁺, merit.Jᵀy⁺) + end + f = merit.fx + dot(merit.y, merit.cx) + merit.η * dot(merit.cx, merit.cx) / 2 + g .= merit.gx .+ merit.Jᵀy⁺ + return f, g +end + +function NLPModels.hprod!(merit :: AugLagMerit, x :: AbstractVector, v :: AbstractVector, Hv :: AbstractVector; obj_weight :: Float64 = 1.0) + @lencheck merit.meta.nvar x v Hv + NLPModels.increment!(merit, :neval_hprod) + if update + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + merit.y⁺ .= merit.y .+ merit.η .* merit.cx + end + jprod!(merit.model, x, v, merit.Jv) + jtprod!(merit.model, x, merit.Jv, merit.JᵀJv) + hprod!(merit.model, x, merit.y⁺, v, Hv, obj_weight = obj_weight) + Hv .+= merit.η * merit.JᵀJv + return Hv end \ No newline at end of file diff --git a/src/merit/l1merit.jl b/src/merit/l1merit.jl index 754a07d1..6a09da2c 100644 --- a/src/merit/l1merit.jl +++ b/src/merit/l1merit.jl @@ -13,50 +13,56 @@ defined by ``` """ mutable struct L1Merit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel - nlp :: M - η :: T - fx :: T - gx :: V - cx :: V - Ad :: V + meta :: NLPModelMeta + counters :: Counters + nlp :: M + η :: T + fx :: T + gx :: V + cx :: V + Ad :: V end function L1Merit( - nlp :: M, - η :: T; - fx :: T = T(Inf), - gx :: V = fill(T(Inf), nlp.meta.nvar), - cx :: V = fill(T(Inf), nlp.meta.ncon), - Ad :: V = fill(T(Inf), nlp.meta.ncon) + nlp :: M, + η :: T; + fx :: T = T(Inf), + gx :: V = fill(T(Inf), nlp.meta.nvar), + cx :: V = fill(T(Inf), nlp.meta.ncon), + Ad :: V = fill(T(Inf), nlp.meta.ncon) ) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} - L1Merit{M,T,V}(nlp, η, fx, gx, cx, Ad) + meta = NLPModelMeta(nlp.meta.nvar) + L1Merit{M,T,V}(meta, Counters(), nlp, η, fx, gx, cx, Ad) end function NLPModels.obj(merit :: L1Merit, x :: AbstractVector; update :: Bool = true) - if update - merit.fx = obj(merit.nlp, x) - merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) - end - return merit.fx + merit.η * norm(merit.cx, 1) + @lencheck merit.meta.nvar x + NLPModels.increment!(merit, :neval_obj) + if update + merit.fx = obj(merit.nlp, x) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + end + return merit.fx + merit.η * norm(merit.cx, 1) end function derivative(merit :: L1Merit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true) - if update - grad!(merit.nlp, x, merit.gx) - merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad) - end - if merit.nlp.meta.ncon == 0 - return dot(merit.gx, d) - else - return dot(merit.gx, d) + merit.η * sum( - if ci > 0 - Adi - elseif ci < 0 - -Adi - else - abs(Adi) - end - for (ci, Adi) in zip(merit.cx, merit.Ad) - ) - end + @lencheck merit.meta.nvar x d + if update + grad!(merit.nlp, x, merit.gx) + merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad) + end + if merit.nlp.meta.ncon == 0 + return dot(merit.gx, d) + else + return dot(merit.gx, d) + merit.η * sum( + if ci > 0 + Adi + elseif ci < 0 + -Adi + else + abs(Adi) + end + for (ci, Adi) in zip(merit.cx, merit.Ad) + ) + end end \ No newline at end of file diff --git a/src/merit/merit.jl b/src/merit/merit.jl index 787cb21a..c4869a9d 100644 --- a/src/merit/merit.jl +++ b/src/merit/merit.jl @@ -17,26 +17,20 @@ All models allow a constructor of form Merit(nlp, η; fx=Inf, cx=[Inf,…,Inf], gx=[Inf,…,Inf], Ad=[Inf,…,Inf]) Additional arguments and constructors may be provided. -""" -abstract type AbstractMeritModel end -""" - obj(merit, x; update=true) +An AbstractMeritModel is an AbstractNLPModel, but the API may not be completely implemented. For +instance, the `L1Merit` model doesn't provide any gradient function, but it provides a directional +derivative function. -Computes the `merit` function value at `x`. -This will call `obj` and `cons` for the internal model, unless `update` is called with `false`. -The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`. +Furthermore, all implemented methods accept an `update` keyword that defaults to `true`. It is used +to determine whether the internal stored values should be updated or not. """ -function NLPModels.obj(merit::AbstractMeritModel, x::AbstractVector) - # I'd prefer to use only `function NLPModels.obj end` instead, but it doesn't work and using - # only `function obj end` overwrites the docstring - throw(MethodError(NLPModels.obj, (merit, x))) -end +abstract type AbstractMeritModel <: AbstractNLPModel end """ derivative(merit, x, d; update=true) -Computes the derivative derivative of `merit` at `x` on direction `d`. +Computes the directional derivative of `merit` at `x` on direction `d`. This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, but will assume that `cx` is correct. The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`. """ diff --git a/test/merit.jl b/test/merit.jl index b8c44085..8c11e6a6 100644 --- a/test/merit.jl +++ b/test/merit.jl @@ -98,8 +98,8 @@ end ), ( "AugLagMerit", - (nlp, η; kwargs...) -> AugLagMerit(nlp, η, y=ones(typeof(η), nlp.meta.ncon); kwargs...), - (nlp, x, η) -> obj(nlp, x) - sum(cons(nlp, x)) + η * norm(cons(nlp, x))^2 / 2 # y = (1,…,1)ᵀ + (nlp, η; kwargs...) -> AugLagMerit(nlp, η, y=-ones(typeof(η), nlp.meta.ncon); kwargs...), + (nlp, x, η) -> obj(nlp, x) - sum(cons(nlp, x)) + η * norm(cons(nlp, x))^2 / 2 # y = (-1,…,-1)ᵀ ) ] for (name, merit, ϕ) in merits