diff --git a/benchmark/pybench/benchmarks/bench_blas.py b/benchmark/pybench/benchmarks/bench_blas.py index 064be1ead9..70ea030731 100644 --- a/benchmark/pybench/benchmarks/bench_blas.py +++ b/benchmark/pybench/benchmarks/bench_blas.py @@ -1,15 +1,15 @@ import pytest import numpy as np -from openblas_wrap import ( - # level 1 - dnrm2, ddot, daxpy, - # level 3 - dgemm, dsyrk, - # lapack - dgesv, # linalg.solve - dgesdd, dgesdd_lwork, # linalg.svd - dsyev, dsyev_lwork, # linalg.eigh -) +import openblas_wrap as ow + +dtype_map = { + 's': np.float32, + 'd': np.float64, + 'c': np.complex64, + 'z': np.complex128, + 'dz': np.complex128, +} + # ### BLAS level 1 ### @@ -17,53 +17,119 @@ dnrm2_sizes = [100, 1000] -def run_dnrm2(n, x, incx): - res = dnrm2(x, n, incx=incx) +def run_dnrm2(n, x, incx, func): + res = func(x, n, incx=incx) return res +@pytest.mark.parametrize('variant', ['d', 'dz']) @pytest.mark.parametrize('n', dnrm2_sizes) -def test_nrm2(benchmark, n): +def test_nrm2(benchmark, n, variant): rndm = np.random.RandomState(1234) - x = np.array(rndm.uniform(size=(n,)), dtype=float) - result = benchmark(run_dnrm2, n, x, 1) + dtyp = dtype_map[variant] + + x = np.array(rndm.uniform(size=(n,)), dtype=dtyp) + nrm2 = ow.get_func('nrm2', variant) + result = benchmark(run_dnrm2, n, x, 1, nrm2) # ddot ddot_sizes = [100, 1000] -def run_ddot(x, y,): - res = ddot(x, y) +def run_ddot(x, y, func): + res = func(x, y) return res @pytest.mark.parametrize('n', ddot_sizes) def test_dot(benchmark, n): rndm = np.random.RandomState(1234) + x = np.array(rndm.uniform(size=(n,)), dtype=float) y = np.array(rndm.uniform(size=(n,)), dtype=float) - result = benchmark(run_ddot, x, y) + dot = ow.get_func('dot', 'd') + result = benchmark(run_ddot, x, y, dot) # daxpy daxpy_sizes = [100, 1000] -def run_daxpy(x, y,): - res = daxpy(x, y, a=2.0) +def run_daxpy(x, y, func): + res = func(x, y, a=2.0) return res +@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z']) @pytest.mark.parametrize('n', daxpy_sizes) -def test_daxpy(benchmark, n): +def test_daxpy(benchmark, n, variant): rndm = np.random.RandomState(1234) - x = np.array(rndm.uniform(size=(n,)), dtype=float) - y = np.array(rndm.uniform(size=(n,)), dtype=float) - result = benchmark(run_daxpy, x, y) + dtyp = dtype_map[variant] + + x = np.array(rndm.uniform(size=(n,)), dtype=dtyp) + y = np.array(rndm.uniform(size=(n,)), dtype=dtyp) + axpy = ow.get_func('axpy', variant) + result = benchmark(run_daxpy, x, y, axpy) + + +# ### BLAS level 2 ### + +gemv_sizes = [100, 1000] + +def run_gemv(a, x, y, func): + res = func(1.0, a, x, y=y, overwrite_y=True) + return res + + +@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z']) +@pytest.mark.parametrize('n', gemv_sizes) +def test_dgemv(benchmark, n, variant): + rndm = np.random.RandomState(1234) + dtyp = dtype_map[variant] + + x = np.array(rndm.uniform(size=(n,)), dtype=dtyp) + y = np.empty(n, dtype=dtyp) + + a = np.array(rndm.uniform(size=(n,n)), dtype=dtyp) + x = np.array(rndm.uniform(size=(n,)), dtype=dtyp) + y = np.zeros(n, dtype=dtyp) + + gemv = ow.get_func('gemv', variant) + result = benchmark(run_gemv, a, x, y, gemv) + assert result is y +# dgbmv + +dgbmv_sizes = [100, 1000] + +def run_gbmv(m, n, kl, ku, a, x, y, func): + res = func(m, n, kl, ku, 1.0, a, x, y=y, overwrite_y=True) + return res + + + +@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z']) +@pytest.mark.parametrize('n', dgbmv_sizes) +@pytest.mark.parametrize('kl', [1]) +def test_dgbmv(benchmark, n, kl, variant): + rndm = np.random.RandomState(1234) + dtyp = dtype_map[variant] + + x = np.array(rndm.uniform(size=(n,)), dtype=dtyp) + y = np.empty(n, dtype=dtyp) + + m = n + + a = rndm.uniform(size=(2*kl + 1, n)) + a = np.array(a, dtype=dtyp, order='F') + + gbmv = ow.get_func('gbmv', variant) + result = benchmark(run_gbmv, m, n, kl, kl, a, x, y, gbmv) + assert result is y + # ### BLAS level 3 ### @@ -71,19 +137,22 @@ def test_daxpy(benchmark, n): gemm_sizes = [100, 1000] -def run_gemm(a, b, c): +def run_gemm(a, b, c, func): alpha = 1.0 - res = dgemm(alpha, a, b, c=c, overwrite_c=True) + res = func(alpha, a, b, c=c, overwrite_c=True) return res +@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z']) @pytest.mark.parametrize('n', gemm_sizes) -def test_gemm(benchmark, n): +def test_gemm(benchmark, n, variant): rndm = np.random.RandomState(1234) - a = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F') - b = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F') - c = np.empty((n, n), dtype=float, order='F') - result = benchmark(run_gemm, a, b, c) + dtyp = dtype_map[variant] + a = np.array(rndm.uniform(size=(n, n)), dtype=dtyp, order='F') + b = np.array(rndm.uniform(size=(n, n)), dtype=dtyp, order='F') + c = np.empty((n, n), dtype=dtyp, order='F') + gemm = ow.get_func('gemm', variant) + result = benchmark(run_gemm, a, b, c, gemm) assert result is c @@ -92,17 +161,20 @@ def test_gemm(benchmark, n): syrk_sizes = [100, 1000] -def run_syrk(a, c): - res = dsyrk(1.0, a, c=c, overwrite_c=True) +def run_syrk(a, c, func): + res = func(1.0, a, c=c, overwrite_c=True) return res +@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z']) @pytest.mark.parametrize('n', syrk_sizes) -def test_syrk(benchmark, n): +def test_syrk(benchmark, n, variant): rndm = np.random.RandomState(1234) - a = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F') - c = np.empty((n, n), dtype=float, order='F') - result = benchmark(run_syrk, a, c) + dtyp = dtype_map[variant] + a = np.array(rndm.uniform(size=(n, n)), dtype=dtyp, order='F') + c = np.empty((n, n), dtype=dtyp, order='F') + syrk = ow.get_func('syrk', variant) + result = benchmark(run_syrk, a, c, syrk) assert result is c @@ -113,18 +185,22 @@ def test_syrk(benchmark, n): gesv_sizes = [100, 1000] -def run_gesv(a, b): - res = dgesv(a, b, overwrite_a=True, overwrite_b=True) +def run_gesv(a, b, func): + res = func(a, b, overwrite_a=True, overwrite_b=True) return res +@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z']) @pytest.mark.parametrize('n', gesv_sizes) -def test_gesv(benchmark, n): +def test_gesv(benchmark, n, variant): rndm = np.random.RandomState(1234) - a = (np.array(rndm.uniform(size=(n, n)), dtype=float, order='F') + - np.eye(n, order='F')) - b = np.array(rndm.uniform(size=(n, 1)), order='F') - lu, piv, x, info = benchmark(run_gesv, a, b) + dtyp = dtype_map[variant] + + a = (np.array(rndm.uniform(size=(n, n)), dtype=dtyp, order='F') + + np.eye(n, dtype=dtyp, order='F')) + b = np.array(rndm.uniform(size=(n, 1)), dtype=dtyp, order='F') + gesv = ow.get_func('gesv', variant) + lu, piv, x, info = benchmark(run_gesv, a, b, gesv) assert lu is a assert x is b assert info == 0 @@ -135,25 +211,34 @@ def test_gesv(benchmark, n): gesdd_sizes = [(100, 5), (1000, 222)] -def run_gesdd(a, lwork): - res = dgesdd(a, lwork=lwork, full_matrices=False, overwrite_a=False) +def run_gesdd(a, lwork, func): + res = func(a, lwork=lwork, full_matrices=False, overwrite_a=False) return res +@pytest.mark.parametrize('variant', ['s', 'd']) @pytest.mark.parametrize('mn', gesdd_sizes) -def test_gesdd(benchmark, mn): +def test_gesdd(benchmark, mn, variant): m, n = mn rndm = np.random.RandomState(1234) - a = np.array(rndm.uniform(size=(m, n)), dtype=float, order='F') + dtyp = dtype_map[variant] + + a = np.array(rndm.uniform(size=(m, n)), dtype=dtyp, order='F') - lwork, info = dgesdd_lwork(m, n) + gesdd_lwork = ow.get_func('gesdd_lwork', variant) + + lwork, info = gesdd_lwork(m, n) lwork = int(lwork) assert info == 0 - u, s, vt, info = benchmark(run_gesdd, a, lwork) + gesdd = ow.get_func('gesdd', variant) + u, s, vt, info = benchmark(run_gesdd, a, lwork, gesdd) assert info == 0 - np.testing.assert_allclose(u @ np.diag(s) @ vt, a, atol=1e-13) + + atol = {'s': 1e-5, 'd': 1e-13} + + np.testing.assert_allclose(u @ np.diag(s) @ vt, a, atol=atol[variant]) # linalg.eigh @@ -161,23 +246,28 @@ def test_gesdd(benchmark, mn): syev_sizes = [50, 200] -def run_syev(a, lwork): - res = dsyev(a, lwork=lwork, overwrite_a=True) +def run_syev(a, lwork, func): + res = func(a, lwork=lwork, overwrite_a=True) return res +@pytest.mark.parametrize('variant', ['s', 'd']) @pytest.mark.parametrize('n', syev_sizes) -def test_syev(benchmark, n): +def test_syev(benchmark, n, variant): rndm = np.random.RandomState(1234) + dtyp = dtype_map[variant] + a = rndm.uniform(size=(n, n)) - a = np.asarray(a + a.T, dtype=float, order='F') + a = np.asarray(a + a.T, dtype=dtyp, order='F') a_ = a.copy() + dsyev_lwork = ow.get_func('syev_lwork', variant) lwork, info = dsyev_lwork(n) lwork = int(lwork) assert info == 0 - w, v, info = benchmark(run_syev, a, lwork) + syev = ow.get_func('syev', variant) + w, v, info = benchmark(run_syev, a, lwork, syev) assert info == 0 assert a is v # overwrite_a=True diff --git a/benchmark/pybench/openblas_wrap/__init__.py b/benchmark/pybench/openblas_wrap/__init__.py index 06e16a665a..9babb1917e 100644 --- a/benchmark/pybench/openblas_wrap/__init__.py +++ b/benchmark/pybench/openblas_wrap/__init__.py @@ -6,23 +6,12 @@ __version__ = "0.1" -#from scipy.linalg.blas import ( -from ._flapack import ( - # level 1 - dnrm2 as dnrm2, - ddot as ddot, - daxpy as daxpy, - # level 3 - dgemm as dgemm, - dsyrk as dsyrk, -) +from . import _flapack + +PREFIX = '' + + +def get_func(name, variant): + """get_func('gesv', 'c') -> cgesv etc.""" + return getattr(_flapack, PREFIX + variant + name) -#from scipy.linalg.lapack import ( -from openblas_wrap._flapack import ( - # linalg.solve - dgesv as dgesv, - # linalg.svd - dgesdd as dgesdd, dgesdd_lwork as dgesdd_lwork, - # linalg.eigh - dsyev as dsyev, dsyev_lwork as dsyev_lwork -) diff --git a/benchmark/pybench/openblas_wrap/blas_lapack.pyf.src b/benchmark/pybench/openblas_wrap/blas_lapack.pyf.src index d2d94baa0e..1ee1d3c30b 100644 --- a/benchmark/pybench/openblas_wrap/blas_lapack.pyf.src +++ b/benchmark/pybench/openblas_wrap/blas_lapack.pyf.src @@ -111,6 +111,97 @@ function nrm2(n,x,offx,incx) result(n2) end function nrm2 + +! +! Level 2 BLAS +! + + +subroutine gemv(m,n,alpha,a,x,beta,y,offx,incx,offy,incy,trans,rows,cols,ly) + ! Computes a matrix-vector product using a general matrix + ! + ! y = gemv(alpha,a,x,beta=0,y=0,offx=0,incx=1,offy=0,incy=0,trans=0) + ! Calculate y <- alpha * op(A) * x + beta * y + + callstatement (*f2py_func)((trans?(trans==2?"C":"T"):"N"),&m,&n,&alpha,a,&m, & + x+offx,&incx,&beta,y+offy,&incy) + callprotoargument char*,F_INT*,F_INT*,*,*,F_INT*,*,F_INT*,*, & + *,F_INT* + + integer optional, intent(in), check(trans>=0 && trans <=2) :: trans = 0 + integer optional, intent(in), check(incx>0||incx<0) :: incx = 1 + integer optional, intent(in), check(incy>0||incy<0) :: incy = 1 + intent(in) :: alpha + intent(in), optional :: beta = <0.0,\0,(0.0\,0.0),\2> + + dimension(*), intent(in) :: x + dimension(ly), intent(in,copy,out), depend(ly),optional :: y + integer intent(hide), depend(incy,rows,offy) :: ly = & + (y_capi==Py_None?1+offy+(rows-1)*abs(incy):-1) + dimension(m,n), intent(in) :: a + integer depend(a), intent(hide):: m = shape(a,0) + integer depend(a), intent(hide):: n = shape(a,1) + + integer optional, intent(in) :: offx=0 + integer optional, intent(in) :: offy=0 + check(offx>=0 && offxoffx+(cols-1)*abs(incx)) :: x + depend(offx,cols,incx) :: x + + check(offy>=0 && offyoffy+(rows-1)*abs(incy)) :: y + depend(offy,rows,incy) :: y + + integer depend(m,n,trans), intent(hide) :: rows = (trans?n:m) + integer depend(m,n,trans), intent(hide) :: cols = (trans?m:n) + +end subroutine gemv + + +subroutine gbmv(m,n,kl,ku,alpha,a,lda,x,incx,offx,beta,y,incy,offy,trans,ly) + ! Performs one of the matrix-vector operations + ! + ! y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, + ! or y := alpha*A**H*x + beta*y, + ! + ! where alpha and beta are scalars, x and y are vectors and A is an + ! m by n band matrix, with kl sub-diagonals and ku super-diagonals. + + callstatement (*f2py_func)((trans?(trans==2?"C":"T"):"N"),&m,&n,&kl,&ku,&alpha,a,&lda,x+offx,&incx,&beta,y+offy,&incy) + callprotoargument char*,F_INT*,F_INT*,F_INT*,F_INT*,*,*,F_INT*,*,F_INT*,*,*,F_INT* + + integer optional,intent(in),check(trans>=0 && trans <=2) :: trans = 0 + integer intent(in), depend(ku,kl),check(m>=ku+kl+1) :: m + integer intent(in),check(n>=0&&n==shape(a,1)),depend(a) :: n + integer intent(in),check(kl>=0) :: kl + integer intent(in),check(ku>=0) :: ku + integer intent(hide),depend(a) :: lda = MAX(shape(a,0),1) + integer optional, intent(in),check(incx>0||incx<0) :: incx = 1 + integer optional, intent(in),check(incy>0||incy<0) :: incy = 1 + integer intent(hide),depend(m,n,incy,offy,trans) :: ly = & + (y_capi==Py_None?1+offy+(trans==0?m-1:n-1)*abs(incy):-1) + integer optional, intent(in) :: offx=0 + integer optional, intent(in) :: offy=0 + + intent(in) :: alpha + intent(in),optional :: beta = <0.0,\0,(0.0\,0.0),\2> + + dimension(lda,n),intent(in) :: a + + dimension(ly), intent(in,out,copy,out=yout),depend(ly),optional :: y + check(offy>=0 && offyoffy+(trans==0?m-1:n-1)*abs(incy)) :: y + depend(offy,n,incy) :: y + + dimension(*), intent(in) :: x + check(offx>=0 && offxoffx+(trans==0?n-1:m-1)*abs(incx)) :: x + depend(offx,n,incx) :: x + +end subroutine gbmv + + + ! ! Level 3 BLAS !