Skip to content

make tensor products faster #58

Open
@vpuri3

Description

@vpuri3

with current @views based implementation:

function LinearAlgebra.mul!(v::AbstractVecOrMat, L::TensorProductOperator, u::AbstractVecOrMat)
@assert L.isset "cache needs to be set up for operator of type $(typeof(L)).
set up cache by calling cache_operator(L::AbstractSciMLOperator, u::AbstractArray)"
mi, ni = size(L.inner)
mo, no = size(L.outer)
k = size(u, 2)
C = L.cache
U = _reshape(u, (ni, no*k))
"""
v .= kron(B, A) * u
V .= A * U * B'
"""
# C .= A * U
mul!(C, L.inner, U)
# V .= U * B' <===> V' .= B * C'
if k>1
V = _reshape(v, (mi, mo, k))
C = _reshape(C, (mi, no, k))
@views for i=1:k
mul!(transpose(V[:,:,i]), L.outer, transpose(C[:,:,i]))
end
else
V = _reshape(v, (mi, mo))
C = _reshape(C, (mi, no))
mul!(transpose(V), L.outer, transpose(C))
end
v
end

using SciMLOperators, LinearAlgebra
using BenchmarkTools

A = TensorProductOperator(rand(12,12), rand(12,12), rand(12,12))

u = rand(12^3, 100)
v = rand(12^3, 100)

A = cache_operator(A, u)

mul!(v, A, u) # dunny
@btime mul!($v, $A, $u); # 16.901 ms (348801 allocations: 45.51 MiB)

B = convert(AbstractMatrix, A)
mul!(v, B, u) # dummy
@btime mul!($v, $B, $u); # 10.337 ms (0 allocations: 0 bytes)
julia> versioninfo()
Julia Version 1.8.0-rc1
Commit 6368fdc6565 (2022-05-27 18:33 UTC)
Platform Info:
  OS: macOS (x86_64-apple-darwin21.4.0)
  CPU: 4 × Intel(R) Core(TM) i5-5257U CPU @ 2.70GHz
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-13.0.1 (ORCJIT, broadwell)
  Threads: 4 on 4 virtual cores
Environment:
  JULIA_NUM_PRECOMPILE_TASKS = 4
  JULIA_DEPOT_PATH = /Users/vp/.julia
  JULIA_NUM_THREADS = 4

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions