From d1220d0a814aa03edcd07829d268684408409060 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Wed, 29 Jan 2025 11:36:29 -0600 Subject: [PATCH 1/2] Enzyme WIP --- Project.toml | 4 +++ src/ADNLPModels.jl | 3 ++ src/enzyme.jl | 37 +++++++++++++++-------- test/enzyme.jl | 62 +++++++++++++++++++++------------------ test/nlp/nlpmodelstest.jl | 12 ++++++-- test/sparse_hessian.jl | 26 ++++++++++++---- 6 files changed, 94 insertions(+), 50 deletions(-) diff --git a/Project.toml b/Project.toml index 3d6e2cee..940ff8be 100644 --- a/Project.toml +++ b/Project.toml @@ -4,9 +4,13 @@ version = "0.8.10" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +ManualNLPModels = "30dfa513-9b2f-4fb3-9796-781eabac1617" NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" +NLPModelsModifiers = "e01155f1-5c6f-4375-a9d8-616dd036575f" +NLPModelsTest = "7998695d-6960-4d3a-85c4-e1bceb8cd856" Requires = "ae029012-a4dd-5104-9daa-d747884805df" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" diff --git a/src/ADNLPModels.jl b/src/ADNLPModels.jl index a50d1005..f85bf059 100644 --- a/src/ADNLPModels.jl +++ b/src/ADNLPModels.jl @@ -205,10 +205,13 @@ get_F(::AbstractNLPModel, ::AbstractNLPModel) = () -> () Return the lagrangian function `ℓ(x) = obj_weight * f(x) + c(x)ᵀy`. """ function get_lag(nlp::AbstractADNLPModel, b::ADBackend, obj_weight::Real) + # println("Check") + # return x -> obj_weight * nlp.f(x) return ℓ(x; obj_weight = obj_weight) = obj_weight * nlp.f(x) end function get_lag(nlp::AbstractADNLPModel, b::ADBackend, obj_weight::Real, y::AbstractVector) + println("Check2") if nlp.meta.nnln == 0 return get_lag(nlp, b, obj_weight) end diff --git a/src/enzyme.jl b/src/enzyme.jl index d3182705..f13d9a12 100644 --- a/src/enzyme.jl +++ b/src/enzyme.jl @@ -216,11 +216,12 @@ function SparseEnzymeADHessian( cx = similar(x0, ncon) grad = similar(x0) function ℓ(x, y, obj_weight, cx) - res = obj_weight * f(x) - if ncon != 0 - c!(cx, x) - res += sum(cx[i] * y[i] for i = 1:ncon) - end + # res = obj_weight * f(x) + res = f(x) + # if ncon != 0 + # c!(cx, x) + # res += sum(cx[i] * y[i] for i = 1:ncon) + # end return res end @@ -241,8 +242,10 @@ function SparseEnzymeADHessian( ) end -@init begin - @require Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" begin +# @init begin +# @require Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" begin + using Enzyme + function ADNLPModels.gradient(::EnzymeReverseADGradient, f, x) g = similar(x) Enzyme.gradient!(Enzyme.Reverse, g, Enzyme.Const(f), x) @@ -250,6 +253,7 @@ end end function ADNLPModels.gradient!(::EnzymeReverseADGradient, g, f, x) + Enzyme.make_zero!(g) Enzyme.autodiff(Enzyme.Reverse, Enzyme.Const(f), Enzyme.Active, Enzyme.Duplicated(x, g)) return g end @@ -263,7 +267,16 @@ end fill!(b.seed, zero(T)) for i = 1:n b.seed[i] = one(T) - Enzyme.hvp!(b.Hv, Enzyme.Const(f), x, b.seed) + # Enzyme.hvp!(b.Hv, f, x, b.seed) + grad = make_zero(x) + Enzyme.autodiff( + Enzyme.Forward, + Enzyme.Const(Enzyme.gradient!), + Enzyme.Const(Enzyme.Reverse), + Enzyme.DuplicatedNoNeed(grad, b.Hv), + Enzyme.Const(f), + Enzyme.Duplicated(x, b.seed), + ) view(hess, :, i) .= b.Hv b.seed[i] = zero(T) end @@ -462,7 +475,7 @@ end Enzyme.make_zero!(dx) dcx = Enzyme.make_zero(cx) res = Enzyme.autodiff( - Enzyme.Reverse, + Enzyme.set_runtime_activity(Enzyme.Reverse), ℓ, Enzyme.Active, Enzyme.Duplicated(x, dx), @@ -476,7 +489,7 @@ end function _hvp!(res, ℓ, x, v, y, obj_weight, cx) dcx = Enzyme.make_zero(cx) Enzyme.autodiff( - Enzyme.Forward, + Enzyme.set_runtime_activity(Enzyme.Forward), _gradient!, res, Enzyme.Const(ℓ), @@ -570,5 +583,5 @@ end obj_weight = zero(eltype(x)) sparse_hess_coord!(b, x, obj_weight, v, vals) end - end -end + # end +# end diff --git a/test/enzyme.jl b/test/enzyme.jl index a844166e..270aa1ba 100644 --- a/test/enzyme.jl +++ b/test/enzyme.jl @@ -59,12 +59,32 @@ function test_autodiff_backend_error() end end -test_autodiff_backend_error() +# test_autodiff_backend_error() + +push!( + ADNLPModels.predefined_backend, + :enzyme_backend => Dict( + :gradient_backend => ADNLPModels.EnzymeReverseADGradient, + :jprod_backend => ADNLPModels.EnzymeReverseADJprod, + :jtprod_backend => ADNLPModels.EnzymeReverseADJtprod, + :hprod_backend => ADNLPModels.EnzymeReverseADHvprod, + :jacobian_backend => ADNLPModels.EnzymeReverseADJacobian, + :hessian_backend => ADNLPModels.EnzymeReverseADHessian, + :ghjvprod_backend => ADNLPModels.ForwardDiffADGHjvprod, + :jprod_residual_backend => ADNLPModels.EnzymeReverseADJprod, + :jtprod_residual_backend => ADNLPModels.EnzymeReverseADJtprod, + :hprod_residual_backend => ADNLPModels.EnzymeReverseADHvprod, + :jacobian_residual_backend => ADNLPModels.EnzymeReverseADJacobian, + :hessian_residual_backend => ADNLPModels.EnzymeReverseADHessian, + ), +) + +const test_enzyme = true include("sparse_jacobian.jl") include("sparse_jacobian_nls.jl") include("sparse_hessian.jl") -include("sparse_hessian_nls.jl") +# include("sparse_hessian_nls.jl") list_sparse_jac_backend = ((ADNLPModels.SparseEnzymeADJacobian, Dict()),) @@ -80,44 +100,28 @@ list_sparse_hess_backend = ( ADNLPModels.SparseEnzymeADHessian, Dict(:coloring_algorithm => GreedyColoringAlgorithm{:direct}()), ), - ( - ADNLPModels.SparseEnzymeADHessian, - Dict(:coloring_algorithm => GreedyColoringAlgorithm{:substitution}()), - ), + # ( + # ADNLPModels.SparseEnzymeADHessian, + # Dict(:coloring_algorithm => GreedyColoringAlgorithm{:substitution}()), + # ), ) @testset "Sparse Hessian" begin for (backend, kw) in list_sparse_hess_backend sparse_hessian(backend, kw) - sparse_hessian_nls(backend, kw) + # sparse_hessian_nls(backend, kw) end end for problem in NLPModelsTest.nlp_problems ∪ ["GENROSE"] include("nlp/problems/$(lowercase(problem)).jl") end -for problem in NLPModelsTest.nls_problems - include("nls/problems/$(lowercase(problem)).jl") -end +# for problem in NLPModelsTest.nls_problems +# include("nls/problems/$(lowercase(problem)).jl") +# end include("utils.jl") -include("nlp/basic.jl") -include("nls/basic.jl") +# include("nlp/basic.jl") +# include("nls/basic.jl") include("nlp/nlpmodelstest.jl") -include("nls/nlpmodelstest.jl") - -@testset "Basic NLP tests using $backend " for backend in (:enzyme,) - test_autodiff_model("$backend", backend = backend) -end - -@testset "Checking NLPModelsTest (NLP) tests with $backend" for backend in (:enzyme,) - nlp_nlpmodelstest(backend) -end - -@testset "Basic NLS tests using $backend " for backend in (:enzyme,) - autodiff_nls_test("$backend", backend = backend) -end - -@testset "Checking NLPModelsTest (NLS) tests with $backend" for backend in (:enzyme,) - nls_nlpmodelstest(backend) -end +# include("nls/nlpmodelstest.jl") diff --git a/test/nlp/nlpmodelstest.jl b/test/nlp/nlpmodelstest.jl index 2c0fea50..70bde73e 100644 --- a/test/nlp/nlpmodelstest.jl +++ b/test/nlp/nlpmodelstest.jl @@ -1,6 +1,12 @@ -function nlp_nlpmodelstest(backend) +# @testset "Checking NLPModelsTest (NLP) tests with $backend" for backend in +# keys(ADNLPModels.predefined_backend) + backend = :enzyme_backend + # problem = NLPModelsTest.NLPModelsTest.nlp_problems[1] @testset "Checking NLPModelsTest tests on problem $problem" for problem in NLPModelsTest.nlp_problems + if problem == "BROWNDEN" + continue + end nlp_from_T = eval(Meta.parse(lowercase(problem) * "_autodiff")) nlp_ad = nlp_from_T(; backend = backend) nlp_man = eval(Meta.parse(problem))() @@ -17,7 +23,7 @@ function nlp_nlpmodelstest(backend) @testset "Check multiple precision" begin multiple_precision_nlp(nlp_from_T, exclude = [], linear_api = true) end - if backend != :enzyme + if backend != :enzyme_backend @testset "Check view subarray" begin view_subarray_nlp(nlp_ad, exclude = []) end @@ -26,4 +32,4 @@ function nlp_nlpmodelstest(backend) coord_memory_nlp(nlp_ad, exclude = [], linear_api = true) end end -end +# end diff --git a/test/sparse_hessian.jl b/test/sparse_hessian.jl index ca130989..7087136d 100644 --- a/test/sparse_hessian.jl +++ b/test/sparse_hessian.jl @@ -1,5 +1,5 @@ function sparse_hessian(backend, kw) - @testset "Basic Hessian derivative with backend=$(backend) and T=$(T)" for T in (Float32, Float64) + @testset "Basic Hessian derivative with backend=$(backend) and T=$(T)" for T in (Float64,) c!(cx, x) = begin cx[1] = x[1] - 1 cx[2] = 10 * (x[2] - x[1]^2) @@ -31,6 +31,7 @@ function sparse_hessian(backend, kw) # Test also the implementation of the backends b = nlp.adbackend.hessian_backend + @show b obj_weight = 0.5 @test nlp.meta.nnzh == ADNLPModels.get_nln_nnzh(b, nvar) ADNLPModels.hess_structure!(b, nlp, rows, cols) @@ -62,15 +63,28 @@ function sparse_hessian(backend, kw) ) @test nlp.adbackend.hessian_backend isa ADNLPModels.EmptyADbackend - n = 4 - x = ones(T, 4) + # n = 4 + x0 = ones(T, 4) + function f(x) + n = length(x) + sum(100 * (x[i + 1] - x[i]^2)^2 + (x[i] - 1)^2 for i = 1:(n - 1)) + # res = 0 + # n = length(x) + # for i in 1:(n-1) + # res += 100 * (x[i + 1] - x[i]^2)^2 + (x[i] - 1)^2 + # end + # res + end nlp = ADNLPModel( - x -> sum(100 * (x[i + 1] - x[i]^2)^2 + (x[i] - 1)^2 for i = 1:(n - 1)), - x, + # x -> sum(100 * (x[i + 1] - x[i]^2)^2 + (x[i] - 1)^2 for i = 1:(n - 1)), + # x -> sum(100 * (x[i + 1] - x[i]^2)^2 + (x[i] - 1)^2 for i = 1:3), + # x -> 100 * (x[2] - x[1]^2)^2 + (x[1] - 1)^2, + f, + x0, hessian_backend = backend, name = "Extended Rosenbrock", ) - @test hess(nlp, x) == T[802 -400 0 0; -400 1002 -400 0; 0 -400 1002 -400; 0 0 -400 200] + @test hess(nlp, x0) == T[802 -400 0 0; -400 1002 -400 0; 0 -400 1002 -400; 0 0 -400 200] x = ones(T, 2) nlp = ADNLPModel(x -> x[1]^2 + x[1] * x[2], x, hessian_backend = backend) From c9af69bfabe4a363444dc813a509d95359496b2b Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Thu, 30 Jan 2025 13:21:17 -0600 Subject: [PATCH 2/2] Move functions into struct --- src/enzyme.jl | 128 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 77 insertions(+), 51 deletions(-) diff --git a/src/enzyme.jl b/src/enzyme.jl index f13d9a12..b40a2a02 100644 --- a/src/enzyme.jl +++ b/src/enzyme.jl @@ -1,3 +1,55 @@ +function _gradient!(dx, f, x) + Enzyme.make_zero!(dx) + res = Enzyme.autodiff( + Enzyme.set_runtime_activity(Enzyme.Reverse), + f, + Enzyme.Active, + Enzyme.Duplicated(x, dx), + ) + return nothing +end + +function _hvp!(res, f, x, v) + Enzyme.autodiff( + Enzyme.set_runtime_activity(Enzyme.Forward), + _gradient!, + res, + Enzyme.Const(f), + Enzyme.Duplicated(x, v), + ) + return nothing +end + +function _gradient!(dx, ℓ, x, y, obj_weight, cx) + Enzyme.make_zero!(dx) + dcx = Enzyme.make_zero(cx) + res = Enzyme.autodiff( + Enzyme.set_runtime_activity(Enzyme.Reverse), + ℓ, + Enzyme.Active, + Enzyme.Duplicated(x, dx), + Enzyme.Const(y), + Enzyme.Const(obj_weight), + Enzyme.Duplicated(cx, dcx), + ) + return nothing +end + +function _hvp!(res, ℓ, x, v, y, obj_weight, cx) + dcx = Enzyme.make_zero(cx) + Enzyme.autodiff( + Enzyme.set_runtime_activity(Enzyme.Forward), + _gradient!, + res, + Enzyme.Const(ℓ), + Enzyme.Duplicated(x, v), + Enzyme.Const(y), + Enzyme.Const(obj_weight), + Enzyme.Duplicated(cx, dcx), + ) + return nothing +end + struct EnzymeReverseADGradient <: InPlaceADbackend end function EnzymeReverseADGradient( @@ -23,9 +75,10 @@ function EnzymeReverseADJacobian( return EnzymeReverseADJacobian() end -struct EnzymeReverseADHessian{T} <: ADBackend +struct EnzymeReverseADHessian{T,F} <: ADBackend seed::Vector{T} Hv::Vector{T} + f::F end function EnzymeReverseADHessian( @@ -41,11 +94,12 @@ function EnzymeReverseADHessian( seed = zeros(T, nvar) Hv = zeros(T, nvar) - return EnzymeReverseADHessian(seed, Hv) + return EnzymeReverseADHessian(seed, Hv, f) end -struct EnzymeReverseADHvprod{T} <: InPlaceADbackend +struct EnzymeReverseADHvprod{T,F} <: InPlaceADbackend grad::Vector{T} + f::F end function EnzymeReverseADHvprod( @@ -57,7 +111,7 @@ function EnzymeReverseADHvprod( kwargs..., ) where {T} grad = zeros(T, nvar) - return EnzymeReverseADHvprod(grad) + return EnzymeReverseADHvprod(grad,f) end struct EnzymeReverseADJprod{T} <: InPlaceADbackend @@ -153,7 +207,7 @@ function SparseEnzymeADJacobian( ) end -struct SparseEnzymeADHessian{R, C, S, L} <: ADBackend +struct SparseEnzymeADHessian{R, C, S, L, F} <: ADBackend nvar::Int rowval::Vector{Int} colptr::Vector{Int} @@ -166,6 +220,7 @@ struct SparseEnzymeADHessian{R, C, S, L} <: ADBackend y::Vector{R} grad::Vector{R} cx::Vector{R} + f::F ℓ::L end @@ -216,12 +271,11 @@ function SparseEnzymeADHessian( cx = similar(x0, ncon) grad = similar(x0) function ℓ(x, y, obj_weight, cx) - # res = obj_weight * f(x) - res = f(x) - # if ncon != 0 - # c!(cx, x) - # res += sum(cx[i] * y[i] for i = 1:ncon) - # end + res = obj_weight * f(x) + if ncon != 0 + c!(cx, x) + res += sum(cx[i] * y[i] for i = 1:ncon) + end return res end @@ -238,6 +292,7 @@ function SparseEnzymeADHessian( y, grad, cx, + f, ℓ, ) end @@ -248,7 +303,7 @@ end function ADNLPModels.gradient(::EnzymeReverseADGradient, f, x) g = similar(x) - Enzyme.gradient!(Enzyme.Reverse, g, Enzyme.Const(f), x) + Enzyme.autodiff(set_runtime_activity(Enzyme.Reverse), Enzyme.Const(f), Enzyme.Active, Enzyme.Duplicated(x, g)) return g end @@ -269,14 +324,15 @@ end b.seed[i] = one(T) # Enzyme.hvp!(b.Hv, f, x, b.seed) grad = make_zero(x) - Enzyme.autodiff( - Enzyme.Forward, - Enzyme.Const(Enzyme.gradient!), - Enzyme.Const(Enzyme.Reverse), - Enzyme.DuplicatedNoNeed(grad, b.Hv), - Enzyme.Const(f), - Enzyme.Duplicated(x, b.seed), - ) + _hvp!(DuplicatedNoNeed(grad, b.Hv), b.f, x, b.seed) + # Enzyme.autodiff( + # Enzyme.Forward, + # Enzyme.Const(Enzyme.gradient!), + # Enzyme.Const(Enzyme.Reverse), + # Enzyme.DuplicatedNoNeed(grad, b.Hv), + # Enzyme.Const(f), + # Enzyme.Duplicated(x, b.seed), + # ) view(hess, :, i) .= b.Hv b.seed[i] = zero(T) end @@ -339,7 +395,7 @@ end Enzyme.Const(Enzyme.gradient!), Enzyme.Const(Enzyme.Reverse), Enzyme.DuplicatedNoNeed(b.grad, Hv), - Enzyme.Const(f), + Enzyme.Const(b.f), Enzyme.Duplicated(x, v), ) return Hv @@ -471,36 +527,6 @@ end b.v[col] = 1 end - function _gradient!(dx, ℓ, x, y, obj_weight, cx) - Enzyme.make_zero!(dx) - dcx = Enzyme.make_zero(cx) - res = Enzyme.autodiff( - Enzyme.set_runtime_activity(Enzyme.Reverse), - ℓ, - Enzyme.Active, - Enzyme.Duplicated(x, dx), - Enzyme.Const(y), - Enzyme.Const(obj_weight), - Enzyme.Duplicated(cx, dcx), - ) - return nothing - end - - function _hvp!(res, ℓ, x, v, y, obj_weight, cx) - dcx = Enzyme.make_zero(cx) - Enzyme.autodiff( - Enzyme.set_runtime_activity(Enzyme.Forward), - _gradient!, - res, - Enzyme.Const(ℓ), - Enzyme.Duplicated(x, v), - Enzyme.Const(y), - Enzyme.Const(obj_weight), - Enzyme.Duplicated(cx, dcx), - ) - return nothing - end - _hvp!( Enzyme.DuplicatedNoNeed(b.grad, b.compressed_hessian_icol), b.ℓ,