diff --git a/lib/OrdinaryDiffEqRKIP/LICENSE.md b/lib/OrdinaryDiffEqRKIP/LICENSE.md new file mode 100644 index 0000000000..4a7df96ac5 --- /dev/null +++ b/lib/OrdinaryDiffEqRKIP/LICENSE.md @@ -0,0 +1,24 @@ +The OrdinaryDiffEq.jl package is licensed under the MIT "Expat" License: + +> Copyright (c) 2016-2020: ChrisRackauckas, Yingbo Ma, Julia Computing Inc, and +> other contributors: +> +> https://github.com/SciML/OrdinaryDiffEq.jl/graphs/contributors +> +> Permission is hereby granted, free of charge, to any person obtaining a copy +> of this software and associated documentation files (the "Software"), to deal +> in the Software without restriction, including without limitation the rights +> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +> copies of the Software, and to permit persons to whom the Software is +> furnished to do so, subject to the following conditions: +> +> The above copyright notice and this permission notice shall be included in all +> copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +> SOFTWARE. diff --git a/lib/OrdinaryDiffEqRKIP/Project.toml b/lib/OrdinaryDiffEqRKIP/Project.toml new file mode 100644 index 0000000000..2c30c83f7a --- /dev/null +++ b/lib/OrdinaryDiffEqRKIP/Project.toml @@ -0,0 +1,44 @@ +name = "OrdinaryDiffEqRKIP" +uuid = "a4daff8c-1d43-4ff3-8eff-f78720aeecdc" +authors = ["Azercoco <20425137+Azercoco@users.noreply.github.com>"] +version = "1.0.0" + + +[sources] +OrdinaryDiffEqCore = {path = "../OrdinaryDiffEqCore"} + +[deps] +DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" +DiffEqDevTools = "f3b72e0c-5b89-59e1-b016-84e28bfd966d" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MaybeInplace = "bb5d69b7-63fc-4a16-80bd-7e42200c7bdb" +OrdinaryDiffEqCore = "bbf590c4-e513-4bbe-9b18-05decba2e5d8" +SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" +SciMLOperators = "c0aeaf25-5076-4817-a8d5-81caf7dfa961" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" +UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" + +[compat] +DiffEqBase = "6.175" +DiffEqDevTools = "2.48" +MaybeInplace = "0.1.4" +OrdinaryDiffEqCore = "1.26" +SciMLBase = "2.99" +SciMLOperators = "1.3.1" +StaticArrays = "1.9.13" +UnPack = "1.0.2" +LinearAlgebra = "1.11" +CUDA = "5.5.2" +FFTW = "1.8.0" +SafeTestsets = "0.1.0" + +julia = "1.11" + +[extras] +FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" +SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + +[targets] +test = ["FFTW", "Test", "SafeTestsets", "CUDA"] diff --git a/lib/OrdinaryDiffEqRKIP/src/OrdinaryDiffEqRKIP.jl b/lib/OrdinaryDiffEqRKIP/src/OrdinaryDiffEqRKIP.jl new file mode 100644 index 0000000000..f6d1a0ea7e --- /dev/null +++ b/lib/OrdinaryDiffEqRKIP/src/OrdinaryDiffEqRKIP.jl @@ -0,0 +1,26 @@ +module OrdinaryDiffEqRKIP + +using LinearAlgebra: ldiv!, exp, axpy!, norm, mul! +using SciMLOperators: AbstractSciMLOperator +using UnPack: @pack!, @unpack +using MaybeInplace: @bb +using SciMLBase: isinplace +using DiffEqBase: ExplicitRKTableau +using DiffEqDevTools: constructVerner6 + +import OrdinaryDiffEqCore: OrdinaryDiffEqAdaptiveExponentialAlgorithm, alg_adaptive_order, + alg_order, alg_cache, @cache, SplitFunction, get_fsalfirstlast, + initialize!, perform_step!, + has_dtnew_modification, calculate_residuals, + calculate_residuals!, increment_nf!, + OrdinaryDiffEqAdaptiveAlgorithm, OrdinaryDiffEqMutableCache, + dtnew_modification, generic_solver_docstring + +include("rkip_cache.jl") +include("algorithms.jl") +include("rkip_utils.jl") +include("rkip_perform_step.jl") + +export RKIP + +end diff --git a/lib/OrdinaryDiffEqRKIP/src/algorithms.jl b/lib/OrdinaryDiffEqRKIP/src/algorithms.jl new file mode 100644 index 0000000000..507068ea81 --- /dev/null +++ b/lib/OrdinaryDiffEqRKIP/src/algorithms.jl @@ -0,0 +1,176 @@ +using OrdinaryDiffEqCore: OrdinaryDiffEqCore + +METHOD_DESCRIPTION = """ +Runge-Kutta in the interaction picture. + +This is suited for solving semilinear problem of the form: + +```math +\frac{du}{dt} = Au + f(u,p,t) +``` + +where A is possibly stiff time-independent linear operator whose scaled exponential exp(Ah) can be calculated efficiently for any h. +The problem is first transformed in a non-stiff variant (interaction picture) + +```math +\begin{aligned} +u_I(t) &= \exp(-At) u(t) \\ +\frac{du_I}{dt} &= f_I(u_I,p,t) \\ +f_I(u_I,p,t) &= f(exp(-At)u_I, p, t) \\ +\end{aligned} +``` +and is then solved with an explicit (adaptive) Runge-Kutta method. + +This solver is only implemented for semilinear problem: `SplitODEProblem` when the first function `f1` is a `AbstractSciMLOperator` A implementing: + +```julia +LinearAlgebra.exp(A, t) # = exp(A*t) +``` +`A` and the return value of `exp(A, t)` must either also both implement the `AbstractSciMLOperator` interface: +```julia +A(du, u, v, p, t) # for in-place problem +A(u, v, p, t) # for out-of-place problem +``` + +For performance, the algorithm will cache and reuse the computed operator-exponential for a fixed set of time steps. + +# Arguments +- `dtmin::T`: the smallest step `dt` for which `exp(A*dt)` will be cached. Default is `1e-3` +- `dtmax::T`: the largest step `dt` for which `exp(A*dt)` will be cached. Default is `1.0` + +The fixed steps will follow a geometric progression. +Time stepping can still happen outside the bounds (for the end step for e.g) but no cache will occur (`exp(A*dt)` getting computed each step) degrading the performances. +The time step can be forcibly clamped within the cache range through the keywords `clamp_lower_dt` and `clamp_higher_dt`. + +The cached operator exponentials are also directly stored in the algorithm such that: + +```julia +rkip = RKIP() +solve(ode_prob_1, rkip, t1) +solve(ode_prob_2, rkip, t2) +```` + +will reuse the precomputed exponential cached during the first `solve` call. +This can be useful for solving several times successively problems with a common `A`. + +""" +REFERENCE = """Zhongxi Zhang, Liang Chen, and Xiaoyi Bao, "A fourth-order Runge-Kutta in the interaction picture method for numerically solving the coupled nonlinear Schrödinger equation," Opt. Express 18, 8261-8276 (2010)""" + +KEYWORD_DESCRIPTION = """ +- `nb_of_cache_step::Integer`: the number of steps. Default is `100`. +- `tableau::ExplicitRKTableau`: the Runge-Kutta Tableau to use. Default is `constructVerner6()`. +- `clamp_lower_dt::Bool`: whether to clamp proposed step to the smallest cached step in order to force the use of cached exponential, improving performance. + This may prevent reaching the desired tolerance. Default is `false`. +- `clamp_higher_dt::Bool`: whether to clamp proposed step to the largest cached step in order to force the use of cached exponential, improving performance. + This can cause performance degradation if `integrator.dtmax` is too small. Default is `true`. +- `use_ldiv::Bool`: whether, to use `ldiv(exp(A, t), v)` instead of caching `exp(A, -t)*v`. Reduces the memory usage but is slightly less efficient. `ldiv` must be implemented. Only works for in-place problems. Default is `false`. +""" + +@doc generic_solver_docstring( + METHOD_DESCRIPTION, "RKIP", "Adaptative Exponential Runge-Kutta", + REFERENCE, KEYWORD_DESCRIPTION, "") +mutable struct RKIP{ + tableauType <: ExplicitRKTableau, elType, dtType <: AbstractVector{elType}} <: + OrdinaryDiffEqAdaptiveAlgorithm + tableau::tableauType + dt_for_expÂ_caching::dtType + clamp_lower_dt::Bool + clamp_higher_dt::Bool + use_ldiv::Bool + cache::Union{Nothing, RKIPCache} +end + +function RKIP(dtmin::T = 1e-3, dtmax::T = 1.0; nb_of_cache_step::Int = 100, + tableau = constructVerner6(T), clamp_lower_dt::Bool = false, + clamp_higher_dt::Bool = true, use_ldiv = false) where {T} + RKIP{ + typeof(tableau), T, Vector{T}}( + tableau, + logrange(dtmin, dtmax, nb_of_cache_step), + clamp_lower_dt, + clamp_higher_dt, + use_ldiv, + nothing + ) +end + +alg_order(alg::RKIP) = alg.tableau.order +alg_adaptive_order(alg::RKIP) = alg.tableau.adaptiveorder + +has_dtnew_modification(alg::RKIP) = true + +function dtnew_modification(alg::RKIP{tableauType, elType, dtType}, + dtnew) where {tableauType, elType, dtType} + @unpack dt_for_expÂ_caching = alg + if first(alg.dt_for_expÂ_caching) > dtnew && alg.clamp_lower_dt + dtnew = first(alg.dt_for_expÂ_caching) + elseif last(alg.dt_for_expÂ_caching) < dtnew && alg.clamp_higher_dt + dtnew = last(alg.dt_for_expÂ_caching) + else + dtnew = alg.dt_for_expÂ_caching[searchsortedfirst(alg.dt_for_expÂ_caching, dtnew)] + end + return dtnew +end + +dtnew_modification(_, alg::RKIP, dtnew) = dtnew_modification(alg, dtnew) + +function alg_cache( + alg::RKIP, u::uType, rate_prototype, uEltypeNoUnits, uBottomEltypeNoUnits, + tTypeNoUnits, uprev, uprev2, f, t, dt, reltol, p, calck, iip) where {uType} + tmp = zero(u) + utilde = zero(u) + kk = [zero(u) for _ in 1:(alg.tableau.stages)] + + Â = isa(f, SplitFunction) ? f.f1.f : + throw(ArgumentError("RKIP is only implemented for semilinear problems")) + opType = typeof(Â) + expOpType = typeof(exp(Â, 1.0)) + + if isnothing(alg.cache) + is_cached = Vector{Bool}(undef, length(alg.dt_for_expÂ_caching)) + fill!(is_cached, false) + + c_extended = vcat(alg.tableau.c, 1.0) # all the c values of Runge-Kutta and 1 wich is needed for the RKIP step + c_unique = unique(c_extended) # in some tableau, there is duplicate: we only keep the unique value to save on caching time and memory + c_index = [findfirst(==(c), c_unique) for c in c_extended] # index mapping + + exp_cache = ExpCache{expOpType}( + Array{expOpType, 2}(undef, length(alg.dt_for_expÂ_caching), length(c_unique)), + Vector{expOpType}(undef, length(c_unique))) + + if !alg.use_ldiv + exp_cache = ExpCacheNoLdiv(exp_cache, + ExpCache{expOpType}( + Array{expOpType, 2}( + undef, length(alg.dt_for_expÂ_caching), length(c_unique)), + Vector{expOpType}(undef, length(c_unique)))) + expCacheType = ExpCacheNoLdiv{expOpType} + else + expCacheType = ExpCache{expOpType} + end + + alg.cache = RKIPCache{expOpType, expCacheType, tTypeNoUnits, opType, uType, iip}( + exp_cache, + zero(tTypeNoUnits), + is_cached, + tmp, + utilde, + kk, + c_unique, + c_index + ) + else # cache recycling + alg.cache = RKIPCache{ + expOpType, typeof(alg.cache.exp_cache), tTypeNoUnits, opType, uType, iip}( + alg.cache.exp_cache, + alg.cache.last_step, + alg.cache.cached, + tmp, + utilde, + kk, + alg.cache.c_unique, + alg.cache.c_mapping + ) + end + return alg.cache +end diff --git a/lib/OrdinaryDiffEqRKIP/src/rkip_cache.jl b/lib/OrdinaryDiffEqRKIP/src/rkip_cache.jl new file mode 100644 index 0000000000..5b64330b19 --- /dev/null +++ b/lib/OrdinaryDiffEqRKIP/src/rkip_cache.jl @@ -0,0 +1,92 @@ +abstract type AbstractExpCache{expOpType <: AbstractSciMLOperator} end + +struct ExpCache{expOpType} <: AbstractExpCache{expOpType} + expÂ_cached::Array{expOpType, 2} + expÂ_for_this_step::Vector{expOpType} +end +struct ExpCacheNoLdiv{expOpType} <: AbstractExpCache{expOpType} + exp_cache::ExpCache{expOpType} + exp_cache_inv::ExpCache{expOpType} +end + +function get_op_for_this_step(cache::ExpCache{expOpType}, index::Int) where {expOpType} + cache.expÂ_for_this_step[index] +end +function get_op_for_this_step(cache_no_ldiv::ExpCacheNoLdiv{expOpType}, + positive::Bool, index::Int) where {expOpType} + positive ? cache_no_ldiv.exp_cache.expÂ_for_this_step[index] : + cache_no_ldiv.exp_cache_inv.expÂ_for_this_step[index] +end + +mutable struct RKIPCache{ + expOpType <: AbstractSciMLOperator, cacheType <: AbstractExpCache{expOpType}, + tType <: Number, opType <: AbstractSciMLOperator, uType, iip} <: + OrdinaryDiffEqMutableCache + exp_cache::cacheType + last_step::tType + cached::Vector{Bool} + tmp::uType + utilde::uType + kk::Vector{uType} + c_unique::Vector{tType} + c_mapping::Vector{Integer} +end + +get_fsalfirstlast(cache::RKIPCache, u) = (zero(cache.tmp), zero(cache.tmp)) + +@inline function cache_exp!(cache::ExpCache{expOpType}, + A::opType, + h::T, + action::Symbol, + step_index::Int, + unique_stage_index::Int) where { + expOpType <: AbstractSciMLOperator, opType <: AbstractSciMLOperator, T <: Number} + @unpack expÂ_for_this_step, expÂ_cached = cache + expÂ_for_this_step[unique_stage_index] = (action == :use_cached) ? + expÂ_cached[step_index, unique_stage_index] : + exp(A, h) # fetching or generating exp(Â*c_i*dt) + if action == :cache + expÂ_cached[step_index, unique_stage_index] = expÂ_for_this_step[unique_stage_index] # storing exp(Â*c_i*dt) + end +end + +@inline function cache_exp!(cache::ExpCacheNoLdiv{expOpType}, + Â::opType, + h::T, + action::Symbol, + step_index::Int, + unique_stage_index::Int) where { + expOpType <: AbstractSciMLOperator, opType <: AbstractSciMLOperator, T <: Number} + cache_exp!(cache.exp_cache, Â, h, action, step_index, unique_stage_index) + cache_exp!(cache.exp_cache_inv, Â, -h, action, step_index, unique_stage_index) +end + +""" + Prepare/generate all the needed exp(± A * dt * c[i]) for a step size dt +""" +@inline function cache_exp_op_for_this_step!( + cache::RKIPCache{expOpType, cacheType, tType, opType, uType, iip}, + Â::opType, dt::tType, + alg::algType) where {expOpType, cacheType, tType, opType, uType, algType, iip} + @unpack dt_for_expÂ_caching = alg + + if !iszero(dt) && !(dt ≈ cache.last_step) # we check that new exp(A dt) are needed + dt_abs = abs(dt) # only the positive dt are used for indexing + action = :single_use # exp(A*dt) is only computed for this step + + step_index = clamp(searchsortedlast(dt_for_expÂ_caching, dt_abs), + 1, lastindex(dt_for_expÂ_caching)) # fetching the index corresponding to the step size + + if dt_for_expÂ_caching[step_index] ≈ dt_abs # if dt corresponds to a cahing step + action = (cache.cached[step_index] ? :use_cached : :cache) # if alreay present, we reuse the cached, otherwise it is generated + end + + for (unique_stage_index, c) in enumerate(cache.c_unique) # iterating over all unique c_i of the RK tableau + cache_exp!( + cache.exp_cache, Â, abs(dt * c), action, step_index, unique_stage_index) # generating and caching + end + cache.cached[step_index] |= (action == :cache) # set the flag that we have already cached exp(Â*c_i*dt) for this dt + end + + cache.last_step = dt +end diff --git a/lib/OrdinaryDiffEqRKIP/src/rkip_perform_step.jl b/lib/OrdinaryDiffEqRKIP/src/rkip_perform_step.jl new file mode 100644 index 0000000000..4a4cf63bf4 --- /dev/null +++ b/lib/OrdinaryDiffEqRKIP/src/rkip_perform_step.jl @@ -0,0 +1,127 @@ +""" +Helper function for the (nonlinear) part of the problem maybe in place. +Overwrite compute f(u, p, t) and if possible, overwrite u with the result. +Same principle of operation as `matvec_prod_mip` for mutability/in-place handling. +""" +function nl_part_mip( + tmp::uType, f!, u::uType, p, t::tType, ::Val{true}) where {tType, uType} + f!(tmp, u, p, t) + copyto!(u, tmp) + return u +end +function nl_part_mip(_::uType, f, u::uType, p, t::tType, ::Val{false}) where {tType, uType} + f(u, p, t) +end + +""" +Helper function to compute Au + f(u, p, t) and if in place, store the result in res. +Retunr res if in place, otherwise return Au + f(u, p, t) +""" +function f_mip!(res::uType, tmp::uType, A::opType, f, u::uType, p, + t::tType, ::Val{true}) where {tType, uType, opType} + res .= u + res = matvec_prod_mip(tmp, A, res, Val(true), p, t) + f(tmp, u, p, t) + res .+= tmp + return res +end + +function f_mip!(_::uType, tmp::uType, A::opType, f, u::uType, p, + t::tType, ::Val{false}) where {tType, uType, opType} + matvec_prod_mip(tmp, A, u, Val(false), p, t) + f(u, p, t) +end + +""" +Helper function for the residual maybe in place. +Same principle of operation as `_safe_matvec_prod` for mutability/in-place handling. +""" +function calculate_residuals_mip(tmp::uType, utilde::uType, uprev::uType, u::uType, abstol, + reltol, internalnorm, t, ::Val{true}) where {uType} + calculate_residuals!(tmp, utilde, uprev, u, abstol, reltol, internalnorm, t) + return tmp +end +function calculate_residuals_mip(_::uType, utilde::uType, uprev::uType, u::uType, abstol, + reltol, internalnorm, t, ::Val{false}) where {uType} + calculate_residuals(utilde, uprev, u, abstol, reltol, internalnorm, t) +end + +@fastmath function perform_step!(integrator, + cache::RKIPCache{expOpType, cacheType, tType, opType, uType, iip}) where { + expOpType, cacheType, tType, opType, uType, iip} + @unpack t, dt, uprev, u, f, p, fsalfirst, fsallast, alg = integrator + @unpack c, α, αEEst, stages, A = alg.tableau + @unpack kk, utilde, tmp = cache + @unpack adaptive, abstol, reltol, internalnorm = integrator.opts + + Â::opType = f.f1.f # Linear Operator + + cache_exp_op_for_this_step!(cache, Â, dt, alg) # compute/reuse cached exp(± Â * [dt * cᵢ]) for this dt and cache them if possible + + @bb u .= uprev + + for i in 1:(stages) + @bb kk[i] .= uprev + + for j in 1:(i - 1) + g = A[i, j] * dt + kk[i] = axpy_mip(g, kk[j], kk[i], iip) # kk_i += dt*A[i, j] * kk_j + end + + t_ = t + c[i] * dt + + # if mutable/heaps type, assignment does nothing as the function is in-place, + kk[i] = expmv_rkip_mip(cache, kk[i], dt, i, p, t_) # kᵢ = exp(Â * [dt * cᵢ])*kᵢ ➡ Change from interaction picture to "true" coordinate + + kk[i] = nl_part_mip(cache.tmp, f.f2, kk[i], p, t_, iip) # kᵢ = f(u + Σⱼ dt*(Aᵢⱼ kⱼ), t + dt*cᵢ) + + kk[i] = expmv_rkip_mip(cache, kk[i], -dt, i, p, t_) # kᵢ = exp(-Â * [dt * cᵢ])*kᵢ ➡ Going back in interaction picture + + integrator.stats.nf += 2 # two exp vec product + integrator.stats.nf2 += 1 # one function evaluation + + u = axpy_mip(α[i] * dt, kk[i], u, iip) # uₙ = uₙ₋₁ + dt Σᵢ * (αᵢ - α*ᵢ) kᵢ + + if adaptive # error estimation ũ = Σᵢ dt * (αᵢ - α*ᵢ) kᵢ + if i == 1 + @bb @. utilde = dt * (α[i] - αEEst[i]) * kk[i] # to avoid filling with zero, we set it to i == 1 + else + utilde = axpy_mip(dt * (α[i] - αEEst[i]), kk[i], utilde, iip) # otherwise we add it + end + end + end + + u = expmv_rkip_mip(cache, u, dt, p, t) # stepping forward into the interaction u = exp(Â dt)*u + + if adaptive + utilde = expmv_rkip_mip(cache, utilde, dt, p, t) # stepping forward into the interaction ũ = exp(Â dt)*ũ + tmp = calculate_residuals_mip( + tmp, utilde, uprev, u, abstol, reltol, internalnorm, t, iip) # error computation maybe in place + integrator.EEst = internalnorm(tmp, t) + end + + fsallast = f_mip!(fsallast, cache.tmp, Â, f.f2, u, p, t + dt, iip) # derivative estimation for interpolation + integrator.stats.nf += 1 + integrator.stats.nf2 += 1 + @pack! integrator = fsalfirst, fsallast, u + + @bb copyto!(integrator.k[1], fsalfirst) + @bb copyto!(integrator.k[2], fsallast) +end + +function initialize!(integrator, + cache::RKIPCache{expOpType, cacheType, tType, opType, uType, iip}) where { + expOpType, cacheType, tType, opType, uType, iip} + @unpack f, u, p, t, fsalfirst, fsallast = integrator + + kshortsize = 2 + k = [zero(u) for _ in 1:kshortsize] + + fsalfirst = f_mip!(fsalfirst, cache.tmp, f.f1.f, f.f2, u, p, t, iip) # first derivative for interpolation computation, maybe in place + integrator.stats.nf += 1 + integrator.stats.nf2 += 1 + + @pack! integrator = kshortsize, k, fsalfirst, fsallast + + @bb copyto!(integrator.k[1], fsalfirst) + @bb copyto!(integrator.k[2], fsallast) +end diff --git a/lib/OrdinaryDiffEqRKIP/src/rkip_utils.jl b/lib/OrdinaryDiffEqRKIP/src/rkip_utils.jl new file mode 100644 index 0000000000..0eff0d5e69 --- /dev/null +++ b/lib/OrdinaryDiffEqRKIP/src/rkip_utils.jl @@ -0,0 +1,64 @@ +# cannot use @bb on axpy due to https://github.com/SciML/MaybeInplace.jl/issues/14 +@inline axpy_mip(α, x::uType, y::uType, ::Val{true}) where {uType} = axpy!(α, x, y) #BLAS call +@inline axpy_mip(α, x::uType, y::uType, ::Val{false}) where {uType} = y .+ α .* x + +""" +Maybe-In Place "mip" +Safe function for computing the matrix exponential vector product for both mutable and immutable type. +Same principle of operation as `_safe_matvec_prod` for mutability/in-place handling. +""" +@inline expmv_rkip_mip(cache::RKIPCache{expOpType, cacheType, tType, opType, uType, iip}, v::uType, h::tType, p, t) where {expOpType, cacheType, tType, opType, uType, iip} = expmv_rkip_mip( + cache, v, h, lastindex(cache.c_mapping), p, t) # If i is not precised, this is the final step in the RKIP -> h = dt + +@inline function expmv_rkip_mip( + cache::RKIPCache{expOpType, cacheType, tType, opType, uType, iip}, v::uType, + h::tType, stage_index::Int, p, t) where { + expOpType, cacheType, tType, opType, uType, iip} + if !(h ≈ tType(0.0)) + c = cache.c_mapping[stage_index] + exp_cache = cache.exp_cache + tmp = cache.tmp + return _expmv_rkip_mip(exp_cache, tmp, v, c, h > tType(0.0), iip, p, t) + end + return v +end + +@inline function _expmv_rkip_mip( + cache::ExpCacheNoLdiv{expOpType}, tmp::vType, v::vType, stage_index::Integer, + positive, iip, p, t) where {expOpType <: AbstractSciMLOperator, vType} + op = get_op_for_this_step(cache, positive, stage_index) + v = matvec_prod_mip(tmp, op, v, iip, p, t) + return v +end + +@inline function _expmv_rkip_mip( + cache::ExpCache{expOpType}, tmp::vType, v::vType, stage_index::Integer, + positive, ::Val{true}, p, t) where {expOpType <: AbstractSciMLOperator, vType} + op = get_op_for_this_step(cache, stage_index) + if positive + matvec_prod_mip(tmp, op, v, Val(true), p, t) + else + ldiv!(tmp, op, v) #ldiv can only be used for in place call + copyto!(v, tmp) + end + return v +end + +""" +Maybe-In Place "mip" +Safe function for matrix vector product for both mutable and immutable type. +For mutable type, overwrite `v` with `mat*v` and return `v`. Otherwise return `v` +Use the dispatch between ::Val{true} (in place) and ::Val{false} immutable to decide +""" +@inline function matvec_prod_mip( + tmp::V, mat::M, v::V, ::Val{true}, p, t) where {V, M <: AbstractSciMLOperator} + mat(tmp, v, v, p, t) + copyto!(v, tmp) + return v +end + +@inline function matvec_prod_mip( + _::V, mat::M, v::V, ::Val{false}, p, t) where {V, M <: AbstractSciMLOperator} + v = mat(v, v, p, t) + return v +end diff --git a/lib/OrdinaryDiffEqRKIP/test/cache_recycling_test.jl b/lib/OrdinaryDiffEqRKIP/test/cache_recycling_test.jl new file mode 100644 index 0000000000..2e51e5ee7f --- /dev/null +++ b/lib/OrdinaryDiffEqRKIP/test/cache_recycling_test.jl @@ -0,0 +1,21 @@ +using SciMLOperators: MatrixOperator +using OrdinaryDiffEqRKIP: RKIP +using SciMLBase: SplitODEProblem, SplitFunction, solve + +import LinearAlgebra + +@testset "Cache Recycling Test" begin + matrix = [1 0.2 -0.2; 0.4 1 -1; 0.2 0.0 1.2] + Â = MatrixOperator(matrix) + u0 = [0.0, 0.0, 0.0] + f! = (dx, x, p, t) -> dx .= cos.(x) + + alg = RKIP(1e-2, 1e1) + + splfc = SplitFunction{true}(Â, f!) + spltode = SplitODEProblem(splfc, u0, (0, 1.0)) + + sol_1 = solve(spltode, alg).u[end] + sol_2 = solve(spltode, alg).u[end] + @test all(sol_1 .≈ sol_2) +end diff --git a/lib/OrdinaryDiffEqRKIP/test/runtests.jl b/lib/OrdinaryDiffEqRKIP/test/runtests.jl new file mode 100644 index 0000000000..e492c53cf3 --- /dev/null +++ b/lib/OrdinaryDiffEqRKIP/test/runtests.jl @@ -0,0 +1,6 @@ +using SafeTestsets + +@safetestset "Type Safety Tests" include("type_test.jl") +@safetestset "Cache Test" include("cache_recycling_test.jl") +@safetestset "Fourier Semilinear PDE Tests" include("semilinear_pde_test_cpu.jl") +@safetestset "CUDA Test" include("semilinear_pde_test_cuda.jl") diff --git a/lib/OrdinaryDiffEqRKIP/test/semilinear_fft.jl b/lib/OrdinaryDiffEqRKIP/test/semilinear_fft.jl new file mode 100644 index 0000000000..23acb406ca --- /dev/null +++ b/lib/OrdinaryDiffEqRKIP/test/semilinear_fft.jl @@ -0,0 +1,164 @@ +import LinearAlgebra +using CUDA +using FFTW + +using OrdinaryDiffEqRKIP: RKIP +using SciMLBase: SplitODEProblem, SplitFunction, solve +using SciMLOperators: AbstractSciMLOperator + +struct DiagonalFourierOperator{T, uType <: AbstractVector{T}, fftType, ifftType} <: + AbstractSciMLOperator{T} + diag::uType + plan_fft!::fftType + plan_ifft!::ifftType + tmp::uType +end + +function DiagonalFourierOperator(diag, plan_fft!_prototype, plan_ifft!_prototype) + tmp = similar(diag) + return DiagonalFourierOperator( + diag, plan_fft!_prototype(tmp), plan_ifft!_prototype(tmp), tmp) +end + +function DiagonalFourierOperator(diag::Vector{T}) where {T <: Complex} + DiagonalFourierOperator(diag, FFTW.plan_fft!, FFTW.plan_ifft!) +end +function DiagonalFourierOperator(diag::CuArray{T, 1}) where {T <: Complex} + DiagonalFourierOperator(diag, CUDA.CUFFT.plan_fft!, CUDA.CUFFT.plan_ifft!) +end + +function LinearAlgebra.exp(D::DiagonalFourierOperator, t) + exp_kernel = similar(D.diag) + D.tmp .= D.diag + D.tmp .*= t + exp_kernel .= exp.(D.tmp) + DiagonalFourierOperator(exp_kernel, D.plan_fft!, D.plan_ifft!, D.tmp) +end + +@inline @fastmath function apply_kernel!(du, u, tmp, kernel, op_fft!, op_ifft!) + copyto!(tmp, u) + op_fft! * tmp + tmp .*= kernel + op_ifft! * tmp + copyto!(du, tmp) +end + +function (D::DiagonalFourierOperator)(du, u, _, _, _) + apply_kernel!(du, u, D.tmp, D.diag, D.plan_fft!, D.plan_ifft!) +end + +gpu_init(u0::CuArray, ::Vector{T}) where {T <: Real} = u0 +gpu_init(u0::Vector, ::Vector{T}) where {T <: Real} = CuArray{Complex{T}, 1}(u0) +gpu_init(u0::Function, τ::Vector{T}) where {T <: Real} = CuArray{Complex{T}, 1}(u0.(τ)) + +cpu_init(u0::Vector, ::Vector{T}) where {T <: Real} = u0 +cpu_init(u0::Function, τ::Vector{T}) where {T <: Real} = Vector{Complex{T}}(u0.(τ)) + +init(u0, τ, ::Val{true}) = gpu_init(u0, τ) +init(u0, τ, ::Val{false}) = cpu_init(u0, τ) + +function create_semilinear_problem( + nb_pts::Int, + dτ::T, + integration_limit::T, + u0, + semilinear_fourier_part::Function, + nonlinear_part!::Function, + use_gpu +) where {T <: Real} + domain_size = dτ * nb_pts + τ = Vector{T}(range(-domain_size / 2.0, domain_size / 2.0, nb_pts)) + + u0_good_type = init(u0, τ, use_gpu) + + ω = 2π .* Vector{T}(FFTW.fftfreq(nb_pts, nb_pts / domain_size)) + diag = typeof(u0_good_type)(semilinear_fourier_part.(ω)) + + D̂ = DiagonalFourierOperator(diag) + N̂ = (du, u, _, t) -> nonlinear_part!(du, u, t) + + splfc = SplitFunction{true}(D̂, N̂) + return SplitODEProblem(splfc, u0_good_type, (0.0, integration_limit)) +end + +@inline @fastmath function nlse_non_linpart(res, u) + res .= u + res .= abs2.(res) + res .*= u + res .*= 1im +end + +@inline @fastmath function lle_non_linpart!(res, u, Δ, S) + nlse_non_linpart(res, u) + LinearAlgebra.axpby!(-1im * Δ, u, 1, res) + res .+= S +end + +function create_nlse( + nb_pts::Int, dτ::T, integration_limit::T, u0, use_gpu) where {T <: Real} + create_semilinear_problem(nb_pts, dτ, integration_limit, u0, ω -> -0.5im * ω^2, + (du, u, t) -> nlse_non_linpart(du, u), use_gpu) +end + +function create_lle( + nb_pts::Int, dτ::T, integration_limit::T, u0, use_gpu, Δ::T, S::T) where {T <: Real} + create_semilinear_problem(nb_pts, dτ, integration_limit, u0, ω -> -1 - 1im * ω^2, + (du, u, t) -> lle_non_linpart!(du, u, Δ, S), use_gpu) +end + +function create_lle_scan(nb_pts::Int, dτ::T, scan_time::T, u0, use_gpu, + Δ_min::T, Δ_max::T, S::T) where {T <: Real} + create_semilinear_problem(nb_pts, dτ, scan_time, u0, ω -> -1 - 1im * ω^2, + (du, u, t) -> lle_non_linpart!(du, u, Δ_min + (Δ_max - Δ_min) * t / scan_time, S), use_gpu) +end + +""" +Test function of the NLSE on a conservative soliton +""" +function nlse_test( + ::Type{T}, use_gpu; B = 5.0, nb_pts = 512, dτ = 5e-3, propag_dist = 10.0, + save_everystep = false, reltol = 1e-6) where {T <: Real} + B = T(B) + + problem = create_nlse( + nb_pts, + T(dτ), + T(propag_dist), + τ -> B * sech(B * τ), + use_gpu + ) + return solve(problem, RKIP(T(1e-4), T(1.0)); save_everystep, reltol = T(reltol)) +end + +function nlse_test(type::Type{T}; kwargs...) where {T <: Real} + nlse_test(type, Val(false); kwargs...) +end +nlse_test(use_gpu::Val{true}; kwargs...) = nlse_test(Float64, use_gpu; kwargs...) +nlse_test(use_gpu::Val{false}; kwargs...) = nlse_test(Float64, use_gpu; kwargs...) +nlse_test(; kwargs...) = nlse_test(Float64; kwargs...) + +""" +Lugiato-Lefever equation test. Useful for benchmark due to its particular dynamic +""" +function lle_scan_test( + ::Type{T}, use_gpu; nb_pts = 1024, dτ = 5e-2, save_everystep = false, + reltol = 1e-6, S = 3, Δ_min = -2.0, Δ_max = 12.0, t_max = 10) where {T <: Real} + problem = create_lle_scan( + nb_pts, + T(dτ), + T(t_max), + 1e-1 * exp.(2im * π * rand(T, nb_pts)), + use_gpu, + T(Δ_min), + T(Δ_max), + T(S) + ) + return solve(problem, RKIP(T(1e-4), T(1.0)); save_everystep, reltol = T(reltol)) +end + +function lle_scan_test(type::Type{T}; kwargs...) where {T <: Real} + lle_scan_test(type, Val(false); kwargs...) +end +lle_scan_test(use_gpu::Val{true}; kwargs...) = lle_scan_test(Float64, use_gpu; kwargs...) +lle_scan_test(use_gpu::Val{false}; kwargs...) = lle_scan_test(Float64, use_gpu; kwargs...) +lle_scan_test(; kwargs...) = lle_scan_test(Float64; kwargs...) diff --git a/lib/OrdinaryDiffEqRKIP/test/semilinear_pde_test_cpu.jl b/lib/OrdinaryDiffEqRKIP/test/semilinear_pde_test_cpu.jl new file mode 100644 index 0000000000..c322c2e1d1 --- /dev/null +++ b/lib/OrdinaryDiffEqRKIP/test/semilinear_pde_test_cpu.jl @@ -0,0 +1,32 @@ +using Test +using LinearAlgebra: norm + +include("semilinear_fft.jl") + +@testset "Lugiato-Lefever equation test - Float64" begin + @test !isnothing(lle_scan_test()) +end + +@testset "Lugiato-Lefever equation test - Float32" begin + @test !isnothing(lle_scan_test(Float32)) +end + +@testset "NLSE test - Float64" begin + sol = nlse_test() + + # NLSE Soliton solution, only the phase change + p1 = abs2.(sol.u[1]) + p2 = abs2.(sol.u[end]) + + @test norm(p1 .- p2) / norm(p1) < 1e-2 # small difference with analytical solution due to boundary periodic condition +end + +@testset "NLSE test - Float32" begin + sol = nlse_test(Float32) + + # NLSE Soliton solution, only the phase change + p1 = abs2.(sol.u[1]) + p2 = abs2.(sol.u[end]) + + @test norm(p1 .- p2) / norm(p1) < 1e-2 # small difference with analytical solution due to boundary periodic condition +end diff --git a/lib/OrdinaryDiffEqRKIP/test/semilinear_pde_test_cuda.jl b/lib/OrdinaryDiffEqRKIP/test/semilinear_pde_test_cuda.jl new file mode 100644 index 0000000000..dde5dbc3de --- /dev/null +++ b/lib/OrdinaryDiffEqRKIP/test/semilinear_pde_test_cuda.jl @@ -0,0 +1,35 @@ +using Test +using LinearAlgebra: norm +using CUDA + +include("semilinear_fft.jl") + +CUDA.allowscalar(false) # ensure no scalar indexing is used (for performance) + +@testset "Lugiato-Lefever equation test on GPU with CUDA - Float64" begin + @test !isnothing(lle_scan_test(Val(true))) +end + +@testset "Lugiato-Lefever equation test on GPU with CUDA - Float32" begin + @test !isnothing(lle_scan_test(Float32, Val(true))) +end + +@testset "NLSE test on GPU with CUDA - Float64" begin + sol = nlse_test(Val(true)) + + # NLSE Soliton solution, only the phase change + p1 = abs2.(sol.u[1]) + p2 = abs2.(sol.u[end]) + + @test norm(p1 .- p2) / norm(p1) < 1.5e-2 # small difference with analytical sech solution due to boundary periodic condition and PDE discretization +end + +@testset "NLSE test on GPU with CUDA - Float32" begin + sol = nlse_test(Float32, Val(true)) + + # NLSE Soliton solution, only the phase change + p1 = abs2.(sol.u[1]) + p2 = abs2.(sol.u[end]) + + @test norm(p1 .- p2) / norm(p1) < 1.5e-2 # small difference with analytical sech solution due to boundary periodic condition and PDE discretization +end \ No newline at end of file diff --git a/lib/OrdinaryDiffEqRKIP/test/type_test.jl b/lib/OrdinaryDiffEqRKIP/test/type_test.jl new file mode 100644 index 0000000000..d33c51abc6 --- /dev/null +++ b/lib/OrdinaryDiffEqRKIP/test/type_test.jl @@ -0,0 +1,86 @@ +using Test +using OrdinaryDiffEqRKIP: RKIP +using SciMLBase: SplitODEProblem, SplitFunction, solve +using SciMLOperators: ScalarOperator, DiagonalOperator, MatrixOperator, + AbstractSciMLOperator + +using StaticArrays + +import LinearAlgebra + +LinearAlgebra.exp(d::ScalarOperator, t) = ScalarOperator(exp(t * d.val)) # Temporary Fix for missing exp dispatch +LinearAlgebra.exp(d::MatrixOperator, t) = MatrixOperator(exp(t * d.A)) + +function test(A_prototype, u_prototype, iip; use_ldiv = false) + for reltol in [1e-3, 1e-6, 1e-8, 1e-10, 1e-12], + μ in 1.05 + + analytic = (u0, _, t) -> u0 .* exp(2μ * t) + + for μₐ in [2μ, μ, 0.0] + Â = A_prototype(μₐ) + f = (du, u, _, _) -> du .= (2μ - μₐ) .* u + + if !iip + f = (u, _, _) -> (2μ - μₐ) .* u + end + + u0 = u_prototype(1.0) + t = (0.0, 1.0) + + splfc = SplitFunction{iip}(Â, f; analytic = analytic) + spltode = SplitODEProblem(splfc, u0, t) + sol = solve( + spltode, RKIP(; use_ldiv = use_ldiv); reltol = reltol, abstol = 1e-10) + + @test isapprox(sol(t[end]), splfc.analytic(u0, nothing, t[end]); + rtol = 1e2 * reltol, atol = 1e-8) + end + end +end + +@testset "In-Place ScalarOperator Vector Adaptative Ldiv" begin + test(μ -> ScalarOperator(μ), u0 -> [u0], true; use_ldiv = true) +end + +@testset "In-Place DiagonalOperator Vector Adaptative Ldiv" begin + test(μ -> DiagonalOperator([μ]), u0 -> [u0], true; use_ldiv = true) +end + +@testset "In-Place ScalarOperator Vector Adaptative" begin + test(μ -> ScalarOperator(μ), u0 -> [u0], true) +end + +@testset "In-Place DiagonalOperator Vector Adaptative" begin + test(μ -> DiagonalOperator([μ]), u0 -> [u0], true) +end + +@testset "In-Place MatrixOperator Vector Float Adaptative" begin + test(μ -> MatrixOperator([μ;;]), u0 -> [u0], true) +end + +@testset "Out-of-place Scalar Operator Float Adaptative" begin + test(μ -> ScalarOperator(μ), u0 -> u0, false) +end + +@testset "Out-of-place Scalar Operator SVector Adaptative" begin + test(μ -> ScalarOperator(μ), u0 -> SVector(u0), false) +end + +# Failing Test due to missing dispatch in SciMLOperator + +# fails as calling a MatrixOperator on a StaticVector change its type to Vector, causing a type instability + +# @testset "Out-of-place Diagonal 1x1 Operator SVector Adaptative" begin +# test((μ) -> DiagonalOperator([μ]), u0 -> SVector(u0), false) +# end + +# @testset "Out-of-place Matrix 1x1 Operator SVector Adaptative" begin +# test((μ) -> MatrixOperator([μ;;]), u0 -> SVector(u0), false) +# end + +# # Fails for a strange reason as calling ldiv!(a, B, c) on a MatrixOperator dispatch toward an inexisting method + +# @testset "In-Place MatrixOperator Vector Float Adaptative Ldiv" begin +# test(μ -> MatrixOperator([μ;;]), u0 -> [u0], true; use_ldiv=true) +# end diff --git a/src/OrdinaryDiffEq.jl b/src/OrdinaryDiffEq.jl index e689440fbf..64cfcad11d 100644 --- a/src/OrdinaryDiffEq.jl +++ b/src/OrdinaryDiffEq.jl @@ -208,6 +208,8 @@ export LawsonEuler, NorsettEuler, ETD1, ETDRK2, ETDRK3, ETDRK4, HochOst4, Exp4, EPIRK4s3B, EPIRK5s3, EXPRB53s3, EPIRK5P1, EPIRK5P2, ETD2, Exprb32, Exprb43 + + import PrecompileTools import Preferences import DocStringExtensions