From 531edb47714709d295d91d073782d6821174a2b9 Mon Sep 17 00:00:00 2001 From: franckgaga Date: Sun, 20 Apr 2025 12:05:11 -0400 Subject: [PATCH 1/9] doc: minor modification --- src/controller/execute.jl | 2 +- src/estimator/execute.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/controller/execute.jl b/src/controller/execute.jl index a3925111f..7d6086a43 100644 --- a/src/controller/execute.jl +++ b/src/controller/execute.jl @@ -488,7 +488,7 @@ Call `periodsleep(mpc.estim.model)`. periodsleep(mpc::PredictiveController, busywait=false) = periodsleep(mpc.estim.model, busywait) """ - setstate!(mpc::PredictiveController, x̂, P̂=nothing) -> mpc + setstate!(mpc::PredictiveController, x̂[, P̂]) -> mpc Call [`setstate!`](@ref) on `mpc.estim` [`StateEstimator`](@ref). """ diff --git a/src/estimator/execute.jl b/src/estimator/execute.jl index 23408995b..98b63c9e2 100644 --- a/src/estimator/execute.jl +++ b/src/estimator/execute.jl @@ -330,7 +330,7 @@ function validate_args(estim::StateEstimator, ym, d, u=nothing) end """ - setstate!(estim::StateEstimator, x̂, P̂=nothing) -> estim + setstate!(estim::StateEstimator, x̂[, P̂]) -> estim Set `estim.x̂0` to `x̂ - estim.x̂op` from the argument `x̂`, and `estim.P̂` to `P̂` if applicable. From 18a75aba4b7fe07e051e8244cd807e8590dedef2 Mon Sep 17 00:00:00 2001 From: franckgaga Date: Sun, 20 Apr 2025 12:08:29 -0400 Subject: [PATCH 2/9] added: hessian kwarg in `NonLinMPC` --- src/controller/nonlinmpc.jl | 42 ++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/src/controller/nonlinmpc.jl b/src/controller/nonlinmpc.jl index 0794088da..6b43a96ef 100644 --- a/src/controller/nonlinmpc.jl +++ b/src/controller/nonlinmpc.jl @@ -14,7 +14,8 @@ struct NonLinMPC{ TM<:TranscriptionMethod, JM<:JuMP.GenericModel, GB<:AbstractADType, - JB<:AbstractADType, + JB<:AbstractADType, + HB<:Union{Nothing, AbstractADType}, PT<:Any, JEfunc<:Function, GCfunc<:Function @@ -27,6 +28,7 @@ struct NonLinMPC{ con::ControllerConstraint{NT, GCfunc} gradient::GB jacobian::JB + hessian::HB Z̃::Vector{NT} ŷ::Vector{NT} Hp::Int @@ -63,7 +65,7 @@ struct NonLinMPC{ function NonLinMPC{NT}( estim::SE, Hp, Hc, M_Hp, N_Hc, L_Hp, Cwt, Ewt, JE::JEfunc, gc!::GCfunc, nc, p::PT, - transcription::TM, optim::JM, gradient::GB, jacobian::JB + transcription::TM, optim::JM, gradient::GB, jacobian::JB, hessian::HB ) where { NT<:Real, SE<:StateEstimator, @@ -71,6 +73,7 @@ struct NonLinMPC{ JM<:JuMP.GenericModel, GB<:AbstractADType, JB<:AbstractADType, + HB<:Union{Nothing, AbstractADType}, PT<:Any, JEfunc<:Function, GCfunc<:Function, @@ -107,9 +110,9 @@ struct NonLinMPC{ nZ̃ = get_nZ(estim, transcription, Hp, Hc) + nϵ Z̃ = zeros(NT, nZ̃) buffer = PredictiveControllerBuffer(estim, transcription, Hp, Hc, nϵ) - mpc = new{NT, SE, TM, JM, GB, JB, PT, JEfunc, GCfunc}( + mpc = new{NT, SE, TM, JM, GB, JB, HB, PT, JEfunc, GCfunc}( estim, transcription, optim, con, - gradient, jacobian, + gradient, jacobian, hessian, Z̃, ŷ, Hp, Hc, nϵ, weights, @@ -206,6 +209,8 @@ This controller allocates memory at each time step for the optimization. function, see [`DifferentiationInterface` doc](@extref DifferentiationInterface List). - `jacobian=default_jacobian(transcription)` : an `AbstractADType` backend for the Jacobian of the nonlinear constraints, see `gradient` above for the options (default in Extended Help). +- `hessian=nothing` : an `AbstractADType` backend for the Hessian of the objective function, + see `gradient` above for the options, use `nothing` for the LBFGS approximation of `optim`. - additional keyword arguments are passed to [`UnscentedKalmanFilter`](@ref) constructor (or [`SteadyKalmanFilter`](@ref), for [`LinModel`](@ref)). @@ -265,8 +270,11 @@ NonLinMPC controller with a sample time Ts = 10.0 s, Ipopt optimizer, UnscentedK coloring_algorithm = GreedyColoringAlgorithm() ) ``` - Optimizers generally benefit from exact derivatives like AD. However, the [`NonLinModel`](@ref) - state-space functions must be compatible with this feature. See [`JuMP` documentation](@extref JuMP Common-mistakes-when-writing-a-user-defined-operator) + Also, the `hessian` argument defaults to `nothing` meaning the built-in second-order + approximation of `solver`. Otherwise, a sparse backend like above is recommended to test + different `hessian` methods. Optimizers generally benefit from exact derivatives like AD. + However, the [`NonLinModel`](@ref) state-space functions must be compatible with this + feature. See [`JuMP` documentation](@extref JuMP Common-mistakes-when-writing-a-user-defined-operator) for common mistakes when writing these functions. Note that if `Cwt≠Inf`, the attribute `nlp_scaling_max_gradient` of `Ipopt` is set to @@ -293,13 +301,14 @@ function NonLinMPC( optim::JuMP.GenericModel = JuMP.Model(DEFAULT_NONLINMPC_OPTIMIZER, add_bridges=false), gradient::AbstractADType = DEFAULT_NONLINMPC_GRADIENT, jacobian::AbstractADType = default_jacobian(transcription), + hessian::Union{Nothing, AbstractADType} = nothing, kwargs... ) estim = UnscentedKalmanFilter(model; kwargs...) return NonLinMPC( estim; Hp, Hc, Mwt, Nwt, Lwt, Cwt, Ewt, JE, gc, nc, p, M_Hp, N_Hc, L_Hp, - transcription, optim, gradient, jacobian + transcription, optim, gradient, jacobian, hessian ) end @@ -324,13 +333,14 @@ function NonLinMPC( optim::JuMP.GenericModel = JuMP.Model(DEFAULT_NONLINMPC_OPTIMIZER, add_bridges=false), gradient::AbstractADType = DEFAULT_NONLINMPC_GRADIENT, jacobian::AbstractADType = default_jacobian(transcription), + hessian::Union{Nothing, AbstractADType} = nothing, kwargs... ) estim = SteadyKalmanFilter(model; kwargs...) return NonLinMPC( estim; Hp, Hc, Mwt, Nwt, Lwt, Cwt, Ewt, JE, gc, nc, p, M_Hp, N_Hc, L_Hp, - transcription, optim, gradient, jacobian + transcription, optim, gradient, jacobian, hessian ) end @@ -379,6 +389,7 @@ function NonLinMPC( optim::JuMP.GenericModel = JuMP.Model(DEFAULT_NONLINMPC_OPTIMIZER, add_bridges=false), gradient::AbstractADType = DEFAULT_NONLINMPC_GRADIENT, jacobian::AbstractADType = default_jacobian(transcription), + hessian::Union{Nothing, AbstractADType} = nothing, ) where { NT<:Real, SE<:StateEstimator{NT} @@ -392,7 +403,7 @@ function NonLinMPC( gc! = get_mutating_gc(NT, gc) return NonLinMPC{NT}( estim, Hp, Hc, M_Hp, N_Hc, L_Hp, Cwt, Ewt, JE, gc!, nc, p, - transcription, optim, gradient, jacobian + transcription, optim, gradient, jacobian, hessian ) end @@ -540,7 +551,7 @@ function init_optimization!(mpc::NonLinMPC, model::SimModel, optim::JuMP.Generic JuMP.set_attribute(optim, "nlp_scaling_max_gradient", 10.0/C) end end - Jfunc, ∇Jfunc!, gfuncs, ∇gfuncs!, geqfuncs, ∇geqfuncs! = get_optim_functions( + Jfunc, ∇Jfunc!, ∇²Jfunc!, gfuncs, ∇gfuncs!, geqfuncs, ∇geqfuncs! = get_optim_functions( mpc, optim ) @operator(optim, J, nZ̃, Jfunc, ∇Jfunc!) @@ -553,14 +564,15 @@ end """ get_optim_functions( mpc::NonLinMPC, optim::JuMP.GenericModel - ) -> Jfunc, ∇Jfunc!, gfuncs, ∇gfuncs!, geqfuncs, ∇geqfuncs! + ) -> Jfunc, ∇Jfunc!, ∇J²Jfunc!, gfuncs, ∇gfuncs!, geqfuncs, ∇geqfuncs! Return the functions for the nonlinear optimization of `mpc` [`NonLinMPC`](@ref) controller. -Return the nonlinear objective `Jfunc` function, and `∇Jfunc!`, to compute its gradient. -Also return vectors with the nonlinear inequality constraint functions `gfuncs`, and -`∇gfuncs!`, for the associated gradients. Lastly, also return vectors with the nonlinear -equality constraint functions `geqfuncs` and gradients `∇geqfuncs!`. +Return the nonlinear objective `Jfunc` function, and `∇Jfunc!` and `∇²Jfunc!`, to compute +its gradient and hessian, respectively. Also return vectors with the nonlinear inequality +constraint functions `gfuncs`, and `∇gfuncs!`, for the associated gradients. Lastly, also +return vectors with the nonlinear equality constraint functions `geqfuncs` and gradients +`∇geqfuncs!`. This method is really intricate and I'm not proud of it. That's because of 3 elements: From 4772323a29808c670d9aab6895a7adeab989cbcc Mon Sep 17 00:00:00 2001 From: franckgaga Date: Sun, 20 Apr 2025 12:20:10 -0400 Subject: [PATCH 3/9] =?UTF-8?q?debug:=20assign=20`=E2=88=87=C2=B2Jfunc!`?= =?UTF-8?q?=20to=20nothing=20for=20now?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/controller/nonlinmpc.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/controller/nonlinmpc.jl b/src/controller/nonlinmpc.jl index 6b43a96ef..f13e96606 100644 --- a/src/controller/nonlinmpc.jl +++ b/src/controller/nonlinmpc.jl @@ -639,6 +639,7 @@ function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT return ∇J # multivariate syntax, see JuMP.@operator doc end end + ∇²Jfunc! = nothing # --------------------- inequality constraint functions ------------------------------- gfuncs = Vector{Function}(undef, ng) for i in eachindex(gfuncs) @@ -733,7 +734,7 @@ function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT end ∇geqfuncs![i] = ∇geqfuncs_i! end - return Jfunc, ∇Jfunc!, gfuncs, ∇gfuncs!, geqfuncs, ∇geqfuncs! + return Jfunc, ∇Jfunc!, ∇²Jfunc!, gfuncs, ∇gfuncs!, geqfuncs, ∇geqfuncs! end """ From 8afb697cdc75b46fb7d3df260effbb174315d1e7 Mon Sep 17 00:00:00 2001 From: franckgaga Date: Sun, 20 Apr 2025 12:36:01 -0400 Subject: [PATCH 4/9] test: uncomment noise in MHE vs KF tests --- test/2_test_state_estim.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/2_test_state_estim.jl b/test/2_test_state_estim.jl index c1cb8e764..082e0c45a 100644 --- a/test/2_test_state_estim.jl +++ b/test/2_test_state_estim.jl @@ -1364,7 +1364,7 @@ end X̂_mhe = zeros(4, 6) X̂_kf = zeros(4, 6) for i in 1:6 - y = [50,31] #+ randn(2) + y = [50,31] + randn(2) x̂_mhe = preparestate!(mhe, y, [25]) x̂_kf = preparestate!(kf, y, [25]) X̂_mhe[:,i] = x̂_mhe From 744370054ec2481fd73656226c40134c295f239c Mon Sep 17 00:00:00 2001 From: franckgaga Date: Sat, 26 Apr 2025 15:59:04 -0400 Subject: [PATCH 5/9] debug: correct method of `rethrow` --- src/controller/execute.jl | 2 +- src/general.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/controller/execute.jl b/src/controller/execute.jl index 7d6086a43..a72f7d748 100644 --- a/src/controller/execute.jl +++ b/src/controller/execute.jl @@ -405,7 +405,7 @@ function optim_objective!(mpc::PredictiveController{NT}) where {NT<:Real} MOIU.reset_optimizer(optim) JuMP.optimize!(optim) else - rethrow(err) + rethrow() end end if !issolved(optim) diff --git a/src/general.jl b/src/general.jl index 90a411dbc..94ed3e63f 100644 --- a/src/general.jl +++ b/src/general.jl @@ -51,7 +51,7 @@ function limit_solve_time(optim::GenericModel, Ts) if isa(err, MOI.UnsupportedAttribute{MOI.TimeLimitSec}) @warn "Solving time limit is not supported by the optimizer." else - rethrow(err) + rethrow() end end end From 20f2b2dd6310378d894d625985098d2a7e140527 Mon Sep 17 00:00:00 2001 From: franckgaga Date: Wed, 30 Apr 2025 11:02:26 -0400 Subject: [PATCH 6/9] wip: `NonLinMPC` wip hessian backend --- src/ModelPredictiveControl.jl | 4 +- src/controller/nonlinmpc.jl | 134 ++++++++++++++++++++------------- src/estimator/mhe/construct.jl | 34 +++++---- src/general.jl | 19 +++++ 4 files changed, 124 insertions(+), 67 deletions(-) diff --git a/src/ModelPredictiveControl.jl b/src/ModelPredictiveControl.jl index adefdfef2..70c0e3603 100644 --- a/src/ModelPredictiveControl.jl +++ b/src/ModelPredictiveControl.jl @@ -8,8 +8,10 @@ using RecipesBase using ProgressLogging using DifferentiationInterface: ADTypes.AbstractADType, AutoForwardDiff, AutoSparse -using DifferentiationInterface: gradient!, jacobian!, prepare_gradient, prepare_jacobian +using DifferentiationInterface: prepare_gradient, prepare_jacobian, prepare_hessian +using DifferentiationInterface: gradient!, jacobian!, hessian! using DifferentiationInterface: value_and_gradient!, value_and_jacobian! +using DifferentiationInterface: value_gradient_and_hessian! using DifferentiationInterface: Constant, Cache using SparseConnectivityTracer: TracerSparsityDetector using SparseMatrixColorings: GreedyColoringAlgorithm, sparsity_pattern diff --git a/src/controller/nonlinmpc.jl b/src/controller/nonlinmpc.jl index b11db6e80..6e43fa9f8 100644 --- a/src/controller/nonlinmpc.jl +++ b/src/controller/nonlinmpc.jl @@ -13,9 +13,9 @@ struct NonLinMPC{ SE<:StateEstimator, TM<:TranscriptionMethod, JM<:JuMP.GenericModel, - GB<:AbstractADType, - JB<:AbstractADType, + GB<:Union{Nothing, AbstractADType}, HB<:Union{Nothing, AbstractADType}, + JB<:AbstractADType, PT<:Any, JEfunc<:Function, GCfunc<:Function @@ -27,8 +27,8 @@ struct NonLinMPC{ optim::JM con::ControllerConstraint{NT, GCfunc} gradient::GB + hessian ::HB jacobian::JB - hessian::HB Z̃::Vector{NT} ŷ::Vector{NT} Hp::Int @@ -65,15 +65,15 @@ struct NonLinMPC{ function NonLinMPC{NT}( estim::SE, Hp, Hc, M_Hp, N_Hc, L_Hp, Cwt, Ewt, JE::JEfunc, gc!::GCfunc, nc, p::PT, - transcription::TM, optim::JM, gradient::GB, jacobian::JB, hessian::HB + transcription::TM, optim::JM, gradient::GB, hessian::HB, jacobian::JB, ) where { NT<:Real, SE<:StateEstimator, TM<:TranscriptionMethod, JM<:JuMP.GenericModel, - GB<:AbstractADType, - JB<:AbstractADType, + GB<:Union{Nothing, AbstractADType}, HB<:Union{Nothing, AbstractADType}, + JB<:AbstractADType, PT<:Any, JEfunc<:Function, GCfunc<:Function, @@ -110,9 +110,9 @@ struct NonLinMPC{ nZ̃ = get_nZ(estim, transcription, Hp, Hc) + nϵ Z̃ = zeros(NT, nZ̃) buffer = PredictiveControllerBuffer(estim, transcription, Hp, Hc, nϵ) - mpc = new{NT, SE, TM, JM, GB, JB, HB, PT, JEfunc, GCfunc}( + mpc = new{NT, SE, TM, JM, GB, HB, JB, PT, JEfunc, GCfunc}( estim, transcription, optim, con, - gradient, jacobian, hessian, + gradient, hessian, jacobian, Z̃, ŷ, Hp, Hc, nϵ, weights, @@ -205,12 +205,14 @@ This controller allocates memory at each time step for the optimization. - `transcription=SingleShooting()` : a [`TranscriptionMethod`](@ref) for the optimization. - `optim=JuMP.Model(Ipopt.Optimizer)` : nonlinear optimizer used in the predictive controller, provided as a [`JuMP.Model`](@extref) object (default to [`Ipopt`](https://github.com/jump-dev/Ipopt.jl) optimizer). -- `gradient=AutoForwardDiff()` : an `AbstractADType` backend for the gradient of the objective - function, see [`DifferentiationInterface` doc](@extref DifferentiationInterface List). +- `hessian=nothing` : an `AbstractADType` backend for the Hessian of the objective function + (see [`DifferentiationInterface` doc](@extref DifferentiationInterface List)), or + `nothing` for the LBFGS approximation provided by `optim` (details in Extended Help). +- `gradient=isnothing(hessian) ? AutoForwardDiff() : nothing` : an `AbstractADType` backend + for the gradient of the objective function (see `hessian` for the options), or `nothing` + to retrieve lower-order derivatives from `hessian`. - `jacobian=default_jacobian(transcription)` : an `AbstractADType` backend for the Jacobian - of the nonlinear constraints, see `gradient` above for the options (default in Extended Help). -- `hessian=nothing` : an `AbstractADType` backend for the Hessian of the objective function, - see `gradient` above for the options, use `nothing` for the LBFGS approximation of `optim`. + of the nonlinear constraints (see `hessian` for the options, defaults in Extended Help). - additional keyword arguments are passed to [`UnscentedKalmanFilter`](@ref) constructor (or [`SteadyKalmanFilter`](@ref), for [`LinModel`](@ref)). @@ -264,16 +266,16 @@ NonLinMPC controller with a sample time Ts = 10.0 s, Ipopt optimizer, UnscentedK exception: if `transcription` is not a [`SingleShooting`](@ref), the `jacobian` argument defaults to this [sparse backend](@extref DifferentiationInterface AutoSparse-object): ```julia - AutoSparse( + sparseAD = AutoSparse( AutoForwardDiff(); sparsity_detector = TracerSparsityDetector(), coloring_algorithm = GreedyColoringAlgorithm() ) ``` - Also, the `hessian` argument defaults to `nothing` meaning the built-in second-order - approximation of `solver`. Otherwise, a sparse backend like above is recommended to test - different `hessian` methods. Optimizers generally benefit from exact derivatives like AD. - However, the [`NonLinModel`](@ref) state-space functions must be compatible with this + Also, the `hessian` argument defaults to `nothing` meaning the LBFGS approximation of + `optim`. Otherwise, a sparse backend like above is recommended to test a different + `hessian` method. In general, optimizers benefit from exact derivatives like AD. + However, the [`NonLinModel`](@ref) state-space functions must be compatible with this feature. See [`JuMP` documentation](@extref JuMP Common-mistakes-when-writing-a-user-defined-operator) for common mistakes when writing these functions. @@ -299,16 +301,16 @@ function NonLinMPC( p = model.p, transcription::TranscriptionMethod = DEFAULT_NONLINMPC_TRANSCRIPTION, optim::JuMP.GenericModel = JuMP.Model(DEFAULT_NONLINMPC_OPTIMIZER, add_bridges=false), - gradient::AbstractADType = DEFAULT_NONLINMPC_GRADIENT, + hessian ::Union{Nothing, AbstractADType} = nothing, + gradient::Union{Nothing, AbstractADType} = isnothing(hessian) ? DEFAULT_NONLINMPC_GRADIENT : nothing, jacobian::AbstractADType = default_jacobian(transcription), - hessian::Union{Nothing, AbstractADType} = nothing, kwargs... ) estim = UnscentedKalmanFilter(model; kwargs...) return NonLinMPC( estim; Hp, Hc, Mwt, Nwt, Lwt, Cwt, Ewt, JE, gc, nc, p, M_Hp, N_Hc, L_Hp, - transcription, optim, gradient, jacobian, hessian + transcription, optim, gradient, hessian, jacobian ) end @@ -331,16 +333,16 @@ function NonLinMPC( p = model.p, transcription::TranscriptionMethod = DEFAULT_NONLINMPC_TRANSCRIPTION, optim::JuMP.GenericModel = JuMP.Model(DEFAULT_NONLINMPC_OPTIMIZER, add_bridges=false), - gradient::AbstractADType = DEFAULT_NONLINMPC_GRADIENT, + hessian ::Union{Nothing, AbstractADType} = nothing, + gradient::Union{Nothing, AbstractADType} = isnothing(hessian) ? DEFAULT_NONLINMPC_GRADIENT : nothing, jacobian::AbstractADType = default_jacobian(transcription), - hessian::Union{Nothing, AbstractADType} = nothing, kwargs... ) estim = SteadyKalmanFilter(model; kwargs...) return NonLinMPC( estim; Hp, Hc, Mwt, Nwt, Lwt, Cwt, Ewt, JE, gc, nc, p, M_Hp, N_Hc, L_Hp, - transcription, optim, gradient, jacobian, hessian + transcription, optim, gradient, hessian, jacobian ) end @@ -387,9 +389,9 @@ function NonLinMPC( p = estim.model.p, transcription::TranscriptionMethod = DEFAULT_NONLINMPC_TRANSCRIPTION, optim::JuMP.GenericModel = JuMP.Model(DEFAULT_NONLINMPC_OPTIMIZER, add_bridges=false), - gradient::AbstractADType = DEFAULT_NONLINMPC_GRADIENT, + hessian ::Union{Nothing, AbstractADType} = nothing, + gradient::Union{Nothing, AbstractADType} = isnothing(hessian) ? DEFAULT_NONLINMPC_GRADIENT : nothing, jacobian::AbstractADType = default_jacobian(transcription), - hessian::Union{Nothing, AbstractADType} = nothing, ) where { NT<:Real, SE<:StateEstimator{NT} @@ -403,7 +405,7 @@ function NonLinMPC( gc! = get_mutating_gc(NT, gc) return NonLinMPC{NT}( estim, Hp, Hc, M_Hp, N_Hc, L_Hp, Cwt, Ewt, JE, gc!, nc, p, - transcription, optim, gradient, jacobian, hessian + transcription, optim, gradient, hessian, jacobian ) end @@ -551,10 +553,12 @@ function init_optimization!(mpc::NonLinMPC, model::SimModel, optim::JuMP.Generic JuMP.set_attribute(optim, "nlp_scaling_max_gradient", 10.0/C) end end + validate_backends(mpc.gradient, mpc.hessian) Jfunc, ∇Jfunc!, ∇²Jfunc!, gfuncs, ∇gfuncs!, geqfuncs, ∇geqfuncs! = get_optim_functions( mpc, optim ) - @operator(optim, J, nZ̃, Jfunc, ∇Jfunc!) + Jargs = isnothing(∇²Jfunc!) ? (Jfunc, ∇Jfunc!) : (Jfunc, ∇Jfunc!, ∇²Jfunc!) + @operator(optim, J, nZ̃, Jargs...) @objective(optim, Min, J(Z̃var...)) init_nonlincon!(mpc, model, transcription, gfuncs, ∇gfuncs!, geqfuncs, ∇geqfuncs!) set_nonlincon!(mpc, model, transcription, optim) @@ -591,7 +595,7 @@ Inspired from: [User-defined operators with vector outputs](@extref JuMP User-de function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT<:Real # ----------- common cache for Jfunc, gfuncs and geqfuncs ---------------------------- model = mpc.estim.model - grad, jac = mpc.gradient, mpc.jacobian + grad, hess, jac = mpc.gradient, mpc.hessian, mpc.jacobian nu, ny, nx̂, nϵ, nk = model.nu, model.ny, mpc.estim.nx̂, mpc.nϵ, model.nk Hp, Hc = mpc.Hp, mpc.Hc ng, nc, neq = length(mpc.con.i_g), mpc.con.nc, mpc.con.neq @@ -613,32 +617,58 @@ function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT update_predictions!(ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq, mpc, Z̃) return obj_nonlinprog!(Ŷ0, U0, mpc, model, Ue, Ŷe, ΔŨ) end - Z̃_∇J = fill(myNaN, nZ̃) # NaN to force update_predictions! at first call - ∇J_context = ( + Z̃_J = fill(myNaN, nZ̃) # NaN to force update_predictions! at first call + context_J = ( Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0), Cache(Û0), Cache(K0), Cache(X̂0), Cache(gc), Cache(g), Cache(geq), ) - ∇J_prep = prepare_gradient(Jfunc!, grad, Z̃_∇J, ∇J_context...; strict) - ∇J = Vector{JNT}(undef, nZ̃) - function update_objective!(J, ∇J, Z̃, Z̃arg) + if !isnothing(grad) + prep_∇J = prepare_gradient(Jfunc!, grad, Z̃_J, context_J...; strict) + else + prep_∇J = nothing + end + if !isnothing(hess) + prep_∇²J = prepare_hessian(Jfunc!, hess, Z̃_J, context_J...; strict) + display(sparsity_pattern(prep_∇²J)) + else + prep_∇²J = nothing + end + ∇J = Vector{JNT}(undef, nZ̃) + ∇²J = init_diffmat(JNT, hess, prep_∇²J, nZ̃, nZ̃) + + + + function update_objective!(J, ∇J, Z̃, Z̃arg, hess::Nothing, grad::AbstractADType) + if isdifferent(Z̃arg, Z̃) + Z̃ .= Z̃arg + J[], _ = value_and_gradient!(Jfunc!, ∇J, prep_∇J, grad, Z̃_J, context_J...) + end + end + function update_objective!(J, ∇J, Z̃, Z̃arg, hess::AbstractADType, grad::Nothing) if isdifferent(Z̃arg, Z̃) Z̃ .= Z̃arg - J[], _ = value_and_gradient!(Jfunc!, ∇J, ∇J_prep, grad, Z̃_∇J, ∇J_context...) + J[], _ = value_gradient_and_hessian!( + Jfunc!, ∇J, ∇²J, prep_∇²J, hess, Z̃, context_J... + ) + #display(∇J) + #display(∇²J) + #println(∇²J) end - end + end + function Jfunc(Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_objective!(J, ∇J, Z̃_∇J, Z̃arg) + update_objective!(J, ∇J, Z̃_J, Z̃arg, hess, grad) return J[]::T end ∇Jfunc! = if nZ̃ == 1 # univariate syntax (see JuMP.@operator doc): function (Z̃arg) - update_objective!(J, ∇J, Z̃_∇J, Z̃arg) + update_objective!(J, ∇J, Z̃_J, Z̃arg, hess, grad) return ∇J[begin] end else # multivariate syntax (see JuMP.@operator doc): function (∇Jarg::AbstractVector{T}, Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_objective!(J, ∇J, Z̃_∇J, Z̃arg) + update_objective!(J, ∇J, Z̃_J, Z̃arg, hess, grad) return ∇Jarg .= ∇J end end @@ -648,27 +678,27 @@ function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT update_predictions!(ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq, mpc, Z̃) return g end - Z̃_∇g = fill(myNaN, nZ̃) # NaN to force update_predictions! at first call - ∇g_context = ( + Z̃_g = fill(myNaN, nZ̃) # NaN to force update_predictions! at first call + context_g = ( Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0), Cache(Û0), Cache(K0), Cache(X̂0), Cache(gc), Cache(geq), ) # temporarily enable all the inequality constraints for sparsity detection: mpc.con.i_g[1:end-nc] .= true - ∇g_prep = prepare_jacobian(gfunc!, g, jac, Z̃_∇g, ∇g_context...; strict) + ∇g_prep = prepare_jacobian(gfunc!, g, jac, Z̃_g, context_g...; strict) mpc.con.i_g[1:end-nc] .= false ∇g = init_diffmat(JNT, jac, ∇g_prep, nZ̃, ng) function update_con!(g, ∇g, Z̃, Z̃arg) if isdifferent(Z̃arg, Z̃) Z̃ .= Z̃arg - value_and_jacobian!(gfunc!, g, ∇g, ∇g_prep, jac, Z̃, ∇g_context...) + value_and_jacobian!(gfunc!, g, ∇g, ∇g_prep, jac, Z̃, context_g...) end end gfuncs = Vector{Function}(undef, ng) for i in eachindex(gfuncs) gfunc_i = function (Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_con!(g, ∇g, Z̃_∇g, Z̃arg) + update_con!(g, ∇g, Z̃_g, Z̃arg) return g[i]::T end gfuncs[i] = gfunc_i @@ -677,12 +707,12 @@ function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT for i in eachindex(∇gfuncs!) ∇gfuncs_i! = if nZ̃ == 1 # univariate syntax (see JuMP.@operator doc): function (Z̃arg::T) where T<:Real - update_con!(g, ∇g, Z̃_∇g, Z̃arg) + update_con!(g, ∇g, Z̃_g, Z̃arg) return ∇g[i, begin] end else # multivariate syntax (see JuMP.@operator doc): function (∇g_i, Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_con!(g, ∇g, Z̃_∇g, Z̃arg) + update_con!(g, ∇g, Z̃_g, Z̃arg) return ∇g_i .= @views ∇g[i, :] end end @@ -693,24 +723,24 @@ function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT update_predictions!(ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq, mpc, Z̃) return geq end - Z̃_∇geq = fill(myNaN, nZ̃) # NaN to force update_predictions! at first call - ∇geq_context = ( + Z̃_geq = fill(myNaN, nZ̃) # NaN to force update_predictions! at first call + context_geq = ( Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0), Cache(Û0), Cache(K0), Cache(X̂0), Cache(gc), Cache(g) ) - ∇geq_prep = prepare_jacobian(geqfunc!, geq, jac, Z̃_∇geq, ∇geq_context...; strict) + ∇geq_prep = prepare_jacobian(geqfunc!, geq, jac, Z̃_geq, context_geq...; strict) ∇geq = init_diffmat(JNT, jac, ∇geq_prep, nZ̃, neq) function update_con_eq!(geq, ∇geq, Z̃, Z̃arg) if isdifferent(Z̃arg, Z̃) Z̃ .= Z̃arg - value_and_jacobian!(geqfunc!, geq, ∇geq, ∇geq_prep, jac, Z̃, ∇geq_context...) + value_and_jacobian!(geqfunc!, geq, ∇geq, ∇geq_prep, jac, Z̃, context_geq...) end end geqfuncs = Vector{Function}(undef, neq) for i in eachindex(geqfuncs) geqfunc_i = function (Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_con_eq!(geq, ∇geq, Z̃_∇geq, Z̃arg) + update_con_eq!(geq, ∇geq, Z̃_geq, Z̃arg) return geq[i]::T end geqfuncs[i] = geqfunc_i @@ -721,7 +751,7 @@ function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT # constraints imply MultipleShooting, thus input increment ΔU and state X̂0 in Z̃: ∇geqfuncs_i! = function (∇geq_i, Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_con_eq!(geq, ∇geq, Z̃_∇geq, Z̃arg) + update_con_eq!(geq, ∇geq, Z̃_geq, Z̃arg) return ∇geq_i .= @views ∇geq[i, :] end ∇geqfuncs![i] = ∇geqfuncs_i! diff --git a/src/estimator/mhe/construct.jl b/src/estimator/mhe/construct.jl index 4c6ebb71c..beba35cb9 100644 --- a/src/estimator/mhe/construct.jl +++ b/src/estimator/mhe/construct.jl @@ -250,33 +250,37 @@ transcription for now. - `model::SimModel` : (deterministic) model for the estimations. - `He=nothing` : estimation horizon ``H_e``, must be specified. - `i_ym=1:model.ny` : `model` output indices that are measured ``\mathbf{y^m}``, the rest - are unmeasured ``\mathbf{y^u}``. + are unmeasured ``\mathbf{y^u}``. - `σP_0=fill(1/model.nx,model.nx)` or *`sigmaP_0`* : main diagonal of the initial estimate - covariance ``\mathbf{P}(0)``, specified as a standard deviation vector. + covariance ``\mathbf{P}(0)``, specified as a standard deviation vector. - `σQ=fill(1/model.nx,model.nx)` or *`sigmaQ`* : main diagonal of the process noise - covariance ``\mathbf{Q}`` of `model`, specified as a standard deviation vector. + covariance ``\mathbf{Q}`` of `model`, specified as a standard deviation vector. - `σR=fill(1,length(i_ym))` or *`sigmaR`* : main diagonal of the sensor noise covariance - ``\mathbf{R}`` of `model` measured outputs, specified as a standard deviation vector. + ``\mathbf{R}`` of `model` measured outputs, specified as a standard deviation vector. - `nint_u=0`: integrator quantity for the stochastic model of the unmeasured disturbances at - the manipulated inputs (vector), use `nint_u=0` for no integrator (see Extended Help). + the manipulated inputs (vector), use `nint_u=0` for no integrator (see Extended Help). - `nint_ym=default_nint(model,i_ym,nint_u)` : same than `nint_u` but for the unmeasured - disturbances at the measured outputs, use `nint_ym=0` for no integrator (see Extended Help). + disturbances at the measured outputs, use `nint_ym=0` for no integrator (see Extended Help). - `σQint_u=fill(1,sum(nint_u))` or *`sigmaQint_u`* : same than `σQ` but for the unmeasured - disturbances at manipulated inputs ``\mathbf{Q_{int_u}}`` (composed of integrators). + disturbances at manipulated inputs ``\mathbf{Q_{int_u}}`` (composed of integrators). - `σPint_u_0=fill(1,sum(nint_u))` or *`sigmaPint_u_0`* : same than `σP_0` but for the unmeasured - disturbances at manipulated inputs ``\mathbf{P_{int_u}}(0)`` (composed of integrators). + disturbances at manipulated inputs ``\mathbf{P_{int_u}}(0)`` (composed of integrators). - `σQint_ym=fill(1,sum(nint_ym))` or *`sigmaQint_u`* : same than `σQ` for the unmeasured - disturbances at measured outputs ``\mathbf{Q_{int_{ym}}}`` (composed of integrators). + disturbances at measured outputs ``\mathbf{Q_{int_{ym}}}`` (composed of integrators). - `σPint_ym_0=fill(1,sum(nint_ym))` or *`sigmaPint_ym_0`* : same than `σP_0` but for the unmeasured - disturbances at measured outputs ``\mathbf{P_{int_{ym}}}(0)`` (composed of integrators). + disturbances at measured outputs ``\mathbf{P_{int_{ym}}}(0)`` (composed of integrators). - `Cwt=Inf` : slack variable weight ``C``, default to `Inf` meaning hard constraints only. - `optim=default_optim_mhe(model)` : a [`JuMP.Model`](@extref) object with a quadratic or nonlinear optimizer for solving (default to [`Ipopt`](https://github.com/jump-dev/Ipopt.jl), or [`OSQP`](https://osqp.org/docs/parsers/jump.html) if `model` is a [`LinModel`](@ref)). -- `gradient=AutoForwardDiff()` : an `AbstractADType` backend for the gradient of the objective - function when `model` is not a [`LinModel`](@ref), see [`DifferentiationInterface` doc](@extref DifferentiationInterface List). +- `hessian=nothing` : an `AbstractADType` backend for the Hessian of the objective function + when `model` is not a [`LinModel`](@ref) (see [`DifferentiationInterface` doc](@extref DifferentiationInterface List)), + or `nothing` for the LBFGS approximation provided by `optim` (details in Extended Help). +- `gradient=isnothing(hessian) ? AutoForwardDiff() : nothing` : an `AbstractADType` backend + for the gradient of the objective function when `model` is not a [`LinModel`](@ref) (see + `hessian` for the options), or `nothing` to retrieve lower-order derivatives from `hessian`. - `jacobian=AutoForwardDiff()` : an `AbstractADType` backend for the Jacobian of the - constraints when `model` is not a [`LinModel`](@ref), see `gradient` above for the options. + constraints when `model` is not a [`LinModel`](@ref) (see `hessian` for the options). - `direct=true`: construct with a direct transmission from ``\mathbf{y^m}`` (a.k.a. current estimator, in opposition to the delayed/predictor form). @@ -359,7 +363,9 @@ MovingHorizonEstimator estimator with a sample time Ts = 10.0 s, Ipopt optimizer default, a [`KalmanFilter`](@ref) estimates the arrival covariance (customizable). - Else, a nonlinear program with dense [`ForwardDiff`](@extref ForwardDiff) automatic differentiation (AD) compute the objective and constraint derivatives by default - (customizable). Optimizers generally benefit from exact derivatives like AD. However, + (customizable). The `hessian` argument defaults the LBFGS approximation of `optim`, + but see [`NonLinMPC`](@ref) extended help for a sparse backend that would be otherwise + recommended. Optimizers generally benefit from exact derivatives like AD. However, the `f` and `h` functions must be compatible with this feature. See the [`JuMP` documentation](@extref JuMP Common-mistakes-when-writing-a-user-defined-operator) for common mistakes when writing these functions. Also, an [`UnscentedKalmanFilter`](@ref) diff --git a/src/general.jl b/src/general.jl index 94ed3e63f..b2b1b9c4a 100644 --- a/src/general.jl +++ b/src/general.jl @@ -56,9 +56,28 @@ function limit_solve_time(optim::GenericModel, Ts) end end +"Verify that provided 1st and 2nd order differentiation backends are possible and efficient." +validate_backends(firstOrder::AbstractADType, secondOrder::Nothing) = nothing +validate_backends(firstOrder::Nothing, secondOrder::AbstractADType) = nothing +function validate_backends(firstOrder::AbstractADType, secondOrder::AbstractADType) + @warn( + """ + Two AbstractADType backends were provided for the 1st and 2nd order differentiations, + meaning that 1st order derivatives will be computed twice. Use gradient=nothing to + retrieve the result from the hessian backend, which is more efficient. + """ + ) + return nothing +end +function validate_backends(firstOrder::Nothing, secondOrder::Nothing) + throw(ArgumentError("1st and 2nd order differentiation backends cannot be both nothing.")) +end + + "Init a differentiation result matrix as dense or sparse matrix, as required by `backend`." init_diffmat(T, backend::AbstractADType, _ , nx , ny) = Matrix{T}(undef, ny, nx) init_diffmat(T, backend::AutoSparse ,prep , _ , _ ) = similar(sparsity_pattern(prep), T) +init_diffmat(T, backend::Nothing , _ , nx , ny) = Matrix{T}(undef, ny, nx) "Verify that x and y elements are different using `!==`." isdifferent(x, y) = any(xi !== yi for (xi, yi) in zip(x, y)) From d29673dff75ca70bff9b5e5d5323c308dc4454c5 Mon Sep 17 00:00:00 2001 From: franckgaga Date: Wed, 30 Apr 2025 14:06:15 -0400 Subject: [PATCH 7/9] wip: print some info on `NonLinMPC` hessians --- src/controller/nonlinmpc.jl | 80 ++++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 24 deletions(-) diff --git a/src/controller/nonlinmpc.jl b/src/controller/nonlinmpc.jl index 6e43fa9f8..0eb777b1e 100644 --- a/src/controller/nonlinmpc.jl +++ b/src/controller/nonlinmpc.jl @@ -630,45 +630,31 @@ function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT end if !isnothing(hess) prep_∇²J = prepare_hessian(Jfunc!, hess, Z̃_J, context_J...; strict) + @warn "Here's the objective Hessian sparsity pattern:" display(sparsity_pattern(prep_∇²J)) else prep_∇²J = nothing end ∇J = Vector{JNT}(undef, nZ̃) ∇²J = init_diffmat(JNT, hess, prep_∇²J, nZ̃, nZ̃) - - - - function update_objective!(J, ∇J, Z̃, Z̃arg, hess::Nothing, grad::AbstractADType) - if isdifferent(Z̃arg, Z̃) - Z̃ .= Z̃arg - J[], _ = value_and_gradient!(Jfunc!, ∇J, prep_∇J, grad, Z̃_J, context_J...) - end - end - function update_objective!(J, ∇J, Z̃, Z̃arg, hess::AbstractADType, grad::Nothing) - if isdifferent(Z̃arg, Z̃) - Z̃ .= Z̃arg - J[], _ = value_gradient_and_hessian!( - Jfunc!, ∇J, ∇²J, prep_∇²J, hess, Z̃, context_J... - ) - #display(∇J) - #display(∇²J) - #println(∇²J) - end - end - function Jfunc(Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_objective!(J, ∇J, Z̃_J, Z̃arg, hess, grad) + update_diff_objective!( + Z̃_J, J, ∇J, ∇²J, prep_∇J, prep_∇²J, context_J, grad, hess, Jfunc!, Z̃arg + ) return J[]::T end ∇Jfunc! = if nZ̃ == 1 # univariate syntax (see JuMP.@operator doc): function (Z̃arg) - update_objective!(J, ∇J, Z̃_J, Z̃arg, hess, grad) + update_diff_objective!( + Z̃_J, J, ∇J, ∇²J, prep_∇J, prep_∇²J, context_J, grad, hess, Jfunc!, Z̃arg + ) return ∇J[begin] end else # multivariate syntax (see JuMP.@operator doc): function (∇Jarg::AbstractVector{T}, Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_objective!(J, ∇J, Z̃_J, Z̃arg, hess, grad) + update_diff_objective!( + Z̃_J, J, ∇J, ∇²J, prep_∇J, prep_∇²J, context_J, grad, hess, Jfunc!, Z̃arg + ) return ∇Jarg .= ∇J end end @@ -784,6 +770,52 @@ function update_predictions!( return nothing end +""" + update_diff_objective!( + Z̃_J, J, ∇J, ∇²J, prep_∇J, prep_∇²J , context_J, + grad::AbstractADType, hess::Nothing, Jfunc!, Z̃arg + ) + +TBW +""" +function update_diff_objective!( + Z̃_J, J, ∇J, ∇²J, prep_∇J, _ , context_J, + grad::AbstractADType, hess::Nothing, Jfunc!::F, Z̃arg +) where F <: Function + if isdifferent(Z̃arg, Z̃_J) + Z̃_J .= Z̃arg + J[], _ = value_and_gradient!(Jfunc!, ∇J, prep_∇J, grad, Z̃_J, context...) + end + return nothing +end + +function update_diff_objective!( + Z̃_J, J, ∇J, ∇²J, _ , prep_∇²J, context_J, + grad::Nothing, hess::AbstractADType, Jfunc!::F, Z̃arg +) where F <: Function + if isdifferent(Z̃arg, Z̃_J) + Z̃_J .= Z̃arg + J[], _ = value_gradient_and_hessian!( + Jfunc!, ∇J, ∇²J, prep_∇²J, hess, Z̃_J, context_J... + ) + @warn "Here's the current Hessian:" + println(∇²J) + end + return nothing +end + +function update_diff_objective!( + Z̃_J, J, ∇J, ∇²J, prep_∇J, prep_∇²J, context_J, + grad::AbstractADType, hess::AbstractADType, Jfunc!::F, Z̃arg +) where F<: Function + if isdifferent(Z̃arg, Z̃_J) + Z̃_J .= Z̃arg # inefficient, as warned by validate_backends(), but still possible: + hessian!(Jfunc!, ∇²J, prep_∇²J, hess, Z̃_J, context_J...) + J[], _ = value_and_gradient!(Jfunc!, ∇J, prep_∇J, grad, Z̃_J, context_J...) + end + return nothing +end + @doc raw""" con_custom!(gc, mpc::NonLinMPC, Ue, Ŷe, ϵ) -> gc From 8e94d752eee65192c978daea583d776d8e66fa2e Mon Sep 17 00:00:00 2001 From: franckgaga Date: Wed, 30 Apr 2025 14:12:02 -0400 Subject: [PATCH 8/9] commented out the print Hessian line --- src/controller/nonlinmpc.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/controller/nonlinmpc.jl b/src/controller/nonlinmpc.jl index 0eb777b1e..41859e71d 100644 --- a/src/controller/nonlinmpc.jl +++ b/src/controller/nonlinmpc.jl @@ -798,8 +798,8 @@ function update_diff_objective!( J[], _ = value_gradient_and_hessian!( Jfunc!, ∇J, ∇²J, prep_∇²J, hess, Z̃_J, context_J... ) - @warn "Here's the current Hessian:" - println(∇²J) + @warn "Uncomment the following line to print the current Hessian" + # println(∇²J) end return nothing end From 84a22d491acba3e2e1ece9f27dcc91ae57d1e48e Mon Sep 17 00:00:00 2001 From: franckgaga Date: Fri, 2 May 2025 17:10:51 -0400 Subject: [PATCH 9/9] wip: cleanup `NonLinMPC` optim functions --- src/controller/nonlinmpc.jl | 142 ++++++++++++-------------------- src/controller/transcription.jl | 32 +++---- src/general.jl | 70 +++++++++++++++- 3 files changed, 133 insertions(+), 111 deletions(-) diff --git a/src/controller/nonlinmpc.jl b/src/controller/nonlinmpc.jl index 41859e71d..fb3d36191 100644 --- a/src/controller/nonlinmpc.jl +++ b/src/controller/nonlinmpc.jl @@ -554,13 +554,11 @@ function init_optimization!(mpc::NonLinMPC, model::SimModel, optim::JuMP.Generic end end validate_backends(mpc.gradient, mpc.hessian) - Jfunc, ∇Jfunc!, ∇²Jfunc!, gfuncs, ∇gfuncs!, geqfuncs, ∇geqfuncs! = get_optim_functions( - mpc, optim - ) - Jargs = isnothing(∇²Jfunc!) ? (Jfunc, ∇Jfunc!) : (Jfunc, ∇Jfunc!, ∇²Jfunc!) - @operator(optim, J, nZ̃, Jargs...) + J_args, g_vec_args, geq_vec_args = get_optim_functions(mpc, optim) + #display(J_args) + @operator(optim, J, nZ̃, J_args...) @objective(optim, Min, J(Z̃var...)) - init_nonlincon!(mpc, model, transcription, gfuncs, ∇gfuncs!, geqfuncs, ∇geqfuncs!) + init_nonlincon!(mpc, model, transcription, g_vec_args, geq_vec_args) set_nonlincon!(mpc, model, transcription, optim) return nothing end @@ -568,15 +566,15 @@ end """ get_optim_functions( mpc::NonLinMPC, optim::JuMP.GenericModel - ) -> Jfunc, ∇Jfunc!, ∇J²Jfunc!, gfuncs, ∇gfuncs!, geqfuncs, ∇geqfuncs! + ) -> J_args, g_vec_args, geq_vec_args Return the functions for the nonlinear optimization of `mpc` [`NonLinMPC`](@ref) controller. - -Return the nonlinear objective `Jfunc` function, and `∇Jfunc!` and `∇²Jfunc!`, to compute -its gradient and hessian, respectively. Also return vectors with the nonlinear inequality -constraint functions `gfuncs`, and `∇gfuncs!`, for the associated gradients. Lastly, also -return vectors with the nonlinear equality constraint functions `geqfuncs` and gradients -`∇geqfuncs!`. + +Return the tuple `J_args` containing the functions to compute the objective function +value and its derivatives. Also return the tuple `g_vec_args` containing 2 vectors of +functions to compute the nonlinear inequality values and associated gradients. Lastly, also +return `geq_vec_args` containing 2 vectors of functions to compute the nonlinear equality +values and associated gradients. This method is really intricate and I'm not proud of it. That's because of 3 elements: @@ -630,7 +628,6 @@ function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT end if !isnothing(hess) prep_∇²J = prepare_hessian(Jfunc!, hess, Z̃_J, context_J...; strict) - @warn "Here's the objective Hessian sparsity pattern:" display(sparsity_pattern(prep_∇²J)) else prep_∇²J = nothing @@ -638,27 +635,46 @@ function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT ∇J = Vector{JNT}(undef, nZ̃) ∇²J = init_diffmat(JNT, hess, prep_∇²J, nZ̃, nZ̃) function Jfunc(Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_diff_objective!( + update_memoized_diff!( Z̃_J, J, ∇J, ∇²J, prep_∇J, prep_∇²J, context_J, grad, hess, Jfunc!, Z̃arg ) return J[]::T end - ∇Jfunc! = if nZ̃ == 1 # univariate syntax (see JuMP.@operator doc): + ∇Jfunc! = if nZ̃ == 1 # univariate syntax (see JuMP.@operator doc): function (Z̃arg) - update_diff_objective!( + update_memoized_diff!( Z̃_J, J, ∇J, ∇²J, prep_∇J, prep_∇²J, context_J, grad, hess, Jfunc!, Z̃arg ) return ∇J[begin] end - else # multivariate syntax (see JuMP.@operator doc): + else # multivariate syntax (see JuMP.@operator doc): function (∇Jarg::AbstractVector{T}, Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_diff_objective!( + update_memoized_diff!( Z̃_J, J, ∇J, ∇²J, prep_∇J, prep_∇²J, context_J, grad, hess, Jfunc!, Z̃arg ) return ∇Jarg .= ∇J end end - ∇²Jfunc! = nothing + ∇²Jfunc! = if nZ̃ == 1 # univariate syntax (see JuMP.@operator doc): + function (Z̃arg) + update_memoized_diff!( + Z̃_J, J, ∇J, ∇²J, prep_∇J, prep_∇²J, context_J, grad, hess, Jfunc!, Z̃arg + ) + return ∇²J[begin, begin] + end + else # multivariate syntax (see JuMP.@operator doc): + function (∇²Jarg::AbstractMatrix{T}, Z̃arg::Vararg{T, N}) where {N, T<:Real} + print("d") + update_memoized_diff!( + Z̃_J, J, ∇J, ∇²J, prep_∇J, prep_∇²J, context_J, grad, hess, Jfunc!, Z̃arg + ) + for i in 1:N, j in 1:i + ∇²Jarg[i, j] = ∇²J[i, j] + end + return ∇²Jarg + end + end + J_args = isnothing(hess) ? (Jfunc, ∇Jfunc!) : (Jfunc, ∇Jfunc!, ∇²Jfunc!) # --------------------- inequality constraint functions ------------------------------- function gfunc!(g, Z̃, ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, geq) update_predictions!(ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq, mpc, Z̃) @@ -672,19 +688,13 @@ function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT ) # temporarily enable all the inequality constraints for sparsity detection: mpc.con.i_g[1:end-nc] .= true - ∇g_prep = prepare_jacobian(gfunc!, g, jac, Z̃_g, context_g...; strict) + prep_∇g = prepare_jacobian(gfunc!, g, jac, Z̃_g, context_g...; strict) mpc.con.i_g[1:end-nc] .= false - ∇g = init_diffmat(JNT, jac, ∇g_prep, nZ̃, ng) - function update_con!(g, ∇g, Z̃, Z̃arg) - if isdifferent(Z̃arg, Z̃) - Z̃ .= Z̃arg - value_and_jacobian!(gfunc!, g, ∇g, ∇g_prep, jac, Z̃, context_g...) - end - end + ∇g = init_diffmat(JNT, jac, prep_∇g, nZ̃, ng) gfuncs = Vector{Function}(undef, ng) for i in eachindex(gfuncs) gfunc_i = function (Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_con!(g, ∇g, Z̃_g, Z̃arg) + update_memoized_diff!(Z̃_g, g, ∇g, prep_∇g, context_g, jac, gfunc!, Z̃arg) return g[i]::T end gfuncs[i] = gfunc_i @@ -693,17 +703,18 @@ function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT for i in eachindex(∇gfuncs!) ∇gfuncs_i! = if nZ̃ == 1 # univariate syntax (see JuMP.@operator doc): function (Z̃arg::T) where T<:Real - update_con!(g, ∇g, Z̃_g, Z̃arg) + update_memoized_diff!(Z̃_g, g, ∇g, prep_∇g, context_g, jac, gfunc!, Z̃arg) return ∇g[i, begin] end - else # multivariate syntax (see JuMP.@operator doc): + else # multivariate syntax (see JuMP.@operator doc): function (∇g_i, Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_con!(g, ∇g, Z̃_g, Z̃arg) + update_memoized_diff!(Z̃_g, g, ∇g, prep_∇g, context_g, jac, gfunc!, Z̃arg) return ∇g_i .= @views ∇g[i, :] end end ∇gfuncs![i] = ∇gfuncs_i! end + g_vec_args = (gfuncs, ∇gfuncs!) # --------------------- equality constraint functions --------------------------------- function geqfunc!(geq, Z̃, ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g) update_predictions!(ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq, mpc, Z̃) @@ -715,18 +726,14 @@ function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT Cache(Û0), Cache(K0), Cache(X̂0), Cache(gc), Cache(g) ) - ∇geq_prep = prepare_jacobian(geqfunc!, geq, jac, Z̃_geq, context_geq...; strict) - ∇geq = init_diffmat(JNT, jac, ∇geq_prep, nZ̃, neq) - function update_con_eq!(geq, ∇geq, Z̃, Z̃arg) - if isdifferent(Z̃arg, Z̃) - Z̃ .= Z̃arg - value_and_jacobian!(geqfunc!, geq, ∇geq, ∇geq_prep, jac, Z̃, context_geq...) - end - end + prep_∇geq = prepare_jacobian(geqfunc!, geq, jac, Z̃_geq, context_geq...; strict) + ∇geq = init_diffmat(JNT, jac, prep_∇geq, nZ̃, neq) geqfuncs = Vector{Function}(undef, neq) for i in eachindex(geqfuncs) geqfunc_i = function (Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_con_eq!(geq, ∇geq, Z̃_geq, Z̃arg) + update_memoized_diff!( + Z̃_geq, geq, ∇geq, prep_∇geq, context_geq, jac, geqfunc!, Z̃arg + ) return geq[i]::T end geqfuncs[i] = geqfunc_i @@ -737,12 +744,15 @@ function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT # constraints imply MultipleShooting, thus input increment ΔU and state X̂0 in Z̃: ∇geqfuncs_i! = function (∇geq_i, Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_con_eq!(geq, ∇geq, Z̃_geq, Z̃arg) + update_memoized_diff!( + Z̃_geq, geq, ∇geq, prep_∇geq, context_geq, jac, geqfunc!, Z̃arg + ) return ∇geq_i .= @views ∇geq[i, :] end ∇geqfuncs![i] = ∇geqfuncs_i! end - return Jfunc, ∇Jfunc!, ∇²Jfunc!, gfuncs, ∇gfuncs!, geqfuncs, ∇geqfuncs! + geq_vec_args = (geqfuncs, ∇geqfuncs!) + return J_args, g_vec_args, geq_vec_args end """ @@ -770,52 +780,6 @@ function update_predictions!( return nothing end -""" - update_diff_objective!( - Z̃_J, J, ∇J, ∇²J, prep_∇J, prep_∇²J , context_J, - grad::AbstractADType, hess::Nothing, Jfunc!, Z̃arg - ) - -TBW -""" -function update_diff_objective!( - Z̃_J, J, ∇J, ∇²J, prep_∇J, _ , context_J, - grad::AbstractADType, hess::Nothing, Jfunc!::F, Z̃arg -) where F <: Function - if isdifferent(Z̃arg, Z̃_J) - Z̃_J .= Z̃arg - J[], _ = value_and_gradient!(Jfunc!, ∇J, prep_∇J, grad, Z̃_J, context...) - end - return nothing -end - -function update_diff_objective!( - Z̃_J, J, ∇J, ∇²J, _ , prep_∇²J, context_J, - grad::Nothing, hess::AbstractADType, Jfunc!::F, Z̃arg -) where F <: Function - if isdifferent(Z̃arg, Z̃_J) - Z̃_J .= Z̃arg - J[], _ = value_gradient_and_hessian!( - Jfunc!, ∇J, ∇²J, prep_∇²J, hess, Z̃_J, context_J... - ) - @warn "Uncomment the following line to print the current Hessian" - # println(∇²J) - end - return nothing -end - -function update_diff_objective!( - Z̃_J, J, ∇J, ∇²J, prep_∇J, prep_∇²J, context_J, - grad::AbstractADType, hess::AbstractADType, Jfunc!::F, Z̃arg -) where F<: Function - if isdifferent(Z̃arg, Z̃_J) - Z̃_J .= Z̃arg # inefficient, as warned by validate_backends(), but still possible: - hessian!(Jfunc!, ∇²J, prep_∇²J, hess, Z̃_J, context_J...) - J[], _ = value_and_gradient!(Jfunc!, ∇J, prep_∇J, grad, Z̃_J, context_J...) - end - return nothing -end - @doc raw""" con_custom!(gc, mpc::NonLinMPC, Ue, Ŷe, ϵ) -> gc diff --git a/src/controller/transcription.jl b/src/controller/transcription.jl index b737092f5..6c0b69d2d 100644 --- a/src/controller/transcription.jl +++ b/src/controller/transcription.jl @@ -604,21 +604,18 @@ end """ init_nonlincon!( - mpc::PredictiveController, model::LinModel, transcription::TranscriptionMethod, - gfuncs , ∇gfuncs!, - geqfuncs, ∇geqfuncs! - ) + mpc::PredictiveController, ::LinModel, ::TranscriptionMethod, g_vec_args, geq_vec_args + ) -> nothing Init nonlinear constraints for [`LinModel`](@ref) for all [`TranscriptionMethod`](@ref). The only nonlinear constraints are the custom inequality constraints `gc`. """ function init_nonlincon!( - mpc::PredictiveController, ::LinModel, ::TranscriptionMethod, - gfuncs, ∇gfuncs!, - _ , _ + mpc::PredictiveController, ::LinModel, ::TranscriptionMethod, g_vec_args, _ ) optim, con = mpc.optim, mpc.con + gfuncs, ∇gfuncs! = g_vec_args nZ̃ = length(mpc.Z̃) if length(con.i_g) ≠ 0 i_base = 0 @@ -634,10 +631,8 @@ end """ init_nonlincon!( - mpc::PredictiveController, model::NonLinModel, transcription::MultipleShooting, - gfuncs, ∇gfuncs!, - geqfuncs, ∇geqfuncs! - ) + mpc::PredictiveController, ::NonLinModel, ::MultipleShooting, g_vec_args, geq_vec_args + ) -> nothing Init nonlinear constraints for [`NonLinModel`](@ref) and [`MultipleShooting`](@ref). @@ -645,11 +640,11 @@ The nonlinear constraints are the output prediction `Ŷ` bounds, the custom ine constraints `gc` and all the nonlinear equality constraints `geq`. """ function init_nonlincon!( - mpc::PredictiveController, ::NonLinModel, ::MultipleShooting, - gfuncs, ∇gfuncs!, - geqfuncs, ∇geqfuncs! + mpc::PredictiveController, ::NonLinModel, ::MultipleShooting, g_vec_args, geq_vec_args ) optim, con = mpc.optim, mpc.con + gfuncs , ∇gfuncs! = g_vec_args + geqfuncs, ∇geqfuncs! = geq_vec_args ny, nx̂, Hp, nZ̃ = mpc.estim.model.ny, mpc.estim.nx̂, mpc.Hp, length(mpc.Z̃) # --- nonlinear inequality constraints --- if length(con.i_g) ≠ 0 @@ -691,10 +686,8 @@ end """ init_nonlincon!( - mpc::PredictiveController, model::NonLinModel, ::SingleShooting, - gfuncs, ∇gfuncs!, - geqfuncs, ∇geqfuncs! - ) + mpc::PredictiveController, ::NonLinModel, ::SingleShooting, g_vec_args, geq_vec_args + ) -> nothing Init nonlinear constraints for [`NonLinModel`](@ref) and [`SingleShooting`](@ref). @@ -702,9 +695,10 @@ The nonlinear constraints are the custom inequality constraints `gc`, the output prediction `Ŷ` bounds and the terminal state `x̂end` bounds. """ function init_nonlincon!( - mpc::PredictiveController, ::NonLinModel, ::SingleShooting, gfuncs, ∇gfuncs!, _ , _ + mpc::PredictiveController, ::NonLinModel, ::SingleShooting, g_vec_args, _ ) optim, con = mpc.optim, mpc.con + gfuncs, ∇gfuncs! = g_vec_args ny, nx̂, Hp, nZ̃ = mpc.estim.model.ny, mpc.estim.nx̂, mpc.Hp, length(mpc.Z̃) if length(con.i_g) ≠ 0 i_base = 0 diff --git a/src/general.jl b/src/general.jl index b2b1b9c4a..0bb2fc28c 100644 --- a/src/general.jl +++ b/src/general.jl @@ -63,8 +63,8 @@ function validate_backends(firstOrder::AbstractADType, secondOrder::AbstractADTy @warn( """ Two AbstractADType backends were provided for the 1st and 2nd order differentiations, - meaning that 1st order derivatives will be computed twice. Use gradient=nothing to - retrieve the result from the hessian backend, which is more efficient. + meaning that 1st order derivatives will be computed twice. Use nothing for the 1st + order backend to retrieve results from the hessian backend, which is more efficient. """ ) return nothing @@ -73,12 +73,76 @@ function validate_backends(firstOrder::Nothing, secondOrder::Nothing) throw(ArgumentError("1st and 2nd order differentiation backends cannot be both nothing.")) end - "Init a differentiation result matrix as dense or sparse matrix, as required by `backend`." init_diffmat(T, backend::AbstractADType, _ , nx , ny) = Matrix{T}(undef, ny, nx) init_diffmat(T, backend::AutoSparse ,prep , _ , _ ) = similar(sparsity_pattern(prep), T) init_diffmat(T, backend::Nothing , _ , nx , ny) = Matrix{T}(undef, ny, nx) +""" + update_memoized_diff!( + x, y, ∇f, ∇²f, prep_∇f, prep_∇²f, context, + gradient::AbstractADType, hessian::Nothing, f!, xarg + ) -> nothing + +Update `f!` value `y` and and its gradient `∇f` in-place if `x ≠ xarg`. + +The method mutates all the arguments before `gradient`. This function is used for the +memoization of the `f!` function derivatives, to avoid redundant computations with the +splatting syntax of `JuMP.@operator`. +""" +function update_memoized_diff!( + x, y, ∇f, _ , prep_∇f, _ , context, + gradient::AbstractADType, hessian::Nothing, f!::F, xarg +) where F <: Function + if isdifferent(xarg, x) + x .= xarg # more efficient than individual f! and gradient! calls: + y[], _ = value_and_gradient!(f!, ∇f, prep_∇f, gradient, x, context...) + end + return nothing +end + +"Also update the Hessian `∇²f` if `hessian isa AbstractADType` and `isnothing(gradient)`." +function update_memoized_diff!( + x, y, ∇f, ∇²f, _ , prep_∇²f, context, + gradient::Nothing, hessian::AbstractADType, f!::F, xarg +) where F <: Function + if isdifferent(xarg, x) + x .= xarg # more efficient than individual f!, gradient! and hessian! calls: + y[], _ = value_gradient_and_hessian!(f!, ∇f, ∇²f, prep_∇²f, hessian, x, context...) + end + return nothing +end + +"Update `∇f` and `∇²f` individually if both backends are `AbstractADType`." +function update_memoized_diff!( + x, y, ∇f, ∇²f, prep_∇f, prep_∇²f, context, + gradient::AbstractADType, hessian::AbstractADType, f!::F, xarg +) where F <: Function + if isdifferent(xarg, x) + x .= xarg # inefficient, as warned by validate_backends(), but still possible: + hessian!(f!, ∇²f, prep_∇²f, hessian, x, context...) + y[], _ = value_and_gradient!(f!, ∇f, prep_∇f, gradient, x, context...) + end + return nothing +end + +""" + update_memoized_diff!(x, y, ∇f, prep_∇f, context, jacobian::AbstractADType, f!, xarg) + +Update `f!` value `y` (vector) and and its jacobian `∇f` in-place if `x ≠ xarg`. + +This method mutates all the arguments before `jacobian`. +""" +function update_memoized_diff!( + x, y, ∇f, prep_∇f, context, jacobian::AbstractADType, f!::F, xarg +) where F <: Function + if isdifferent(xarg, x) + x .= xarg # more efficient than individual f! and jacobian! calls: + value_and_jacobian!(f!, y, ∇f, prep_∇f, jacobian, x, context...) + end + return nothing +end + "Verify that x and y elements are different using `!==`." isdifferent(x, y) = any(xi !== yi for (xi, yi) in zip(x, y))