Skip to content

Commit 25b34e6

Browse files
authored
Merge pull request #4678 from ev-br/codspeed
WIP: add codspeed benchmarks [skip cirrus]
2 parents 5015548 + 81cf0db commit 25b34e6

File tree

10 files changed

+1148
-0
lines changed

10 files changed

+1148
-0
lines changed

.github/workflows/codspeed-bench.yml

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
name: Run codspeed benchmarks
2+
3+
on: [push, pull_request]
4+
5+
concurrency:
6+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
7+
cancel-in-progress: true
8+
9+
permissions:
10+
contents: read # to fetch code (actions/checkout)
11+
12+
jobs:
13+
benchmarks:
14+
if: "github.repository == 'OpenMathLib/OpenBLAS'"
15+
strategy:
16+
fail-fast: false
17+
matrix:
18+
os: [ubuntu-latest]
19+
fortran: [gfortran]
20+
build: [make]
21+
pyver: ["3.12"]
22+
runs-on: ${{ matrix.os }}
23+
steps:
24+
- uses: actions/checkout@v3
25+
- uses: actions/setup-python@v3
26+
with:
27+
python-version: ${{ matrix.pyver }}
28+
29+
- name: Print system information
30+
run: |
31+
if [ "$RUNNER_OS" == "Linux" ]; then
32+
cat /proc/cpuinfo
33+
fi
34+
35+
- name: Install Dependencies
36+
run: |
37+
if [ "$RUNNER_OS" == "Linux" ]; then
38+
sudo apt-get update
39+
sudo apt-get install -y gfortran cmake ccache libtinfo5
40+
else
41+
echo "::error::$RUNNER_OS not supported"
42+
exit 1
43+
fi
44+
45+
- name: Compilation cache
46+
uses: actions/cache@v3
47+
with:
48+
path: ~/.ccache
49+
# We include the commit sha in the cache key, as new cache entries are
50+
# only created if there is no existing entry for the key yet.
51+
# GNU make and cmake call the compilers differently. It looks like
52+
# that causes the cache to mismatch. Keep the ccache for both build
53+
# tools separate to avoid polluting each other.
54+
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
55+
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
56+
restore-keys: |
57+
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
58+
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
59+
ccache-${{ runner.os }}-${{ matrix.build }}
60+
61+
- name: Write out the .pc
62+
run: |
63+
cd benchmark/pybench
64+
cat > openblas.pc << EOF
65+
libdir=${{ github.workspace }}
66+
includedir= ${{ github.workspace }}
67+
openblas_config= OpenBLAS 0.3.27 DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=64
68+
version=0.0.99
69+
extralib=-lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas
70+
Name: openblas
71+
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
72+
Version: ${version}
73+
URL: https://github.com/xianyi/OpenBLAS
74+
Libs: ${{ github.workspace }}/libopenblas.so -Wl,-rpath,${{ github.workspace }}
75+
Libs.private: -lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas
76+
Cflags: -I${{ github.workspace}}
77+
EOF
78+
cat openblas.pc
79+
80+
- name: Configure ccache
81+
run: |
82+
if [ "${{ matrix.build }}" = "make" ]; then
83+
# Add ccache to path
84+
if [ "$RUNNER_OS" = "Linux" ]; then
85+
echo "/usr/lib/ccache" >> $GITHUB_PATH
86+
elif [ "$RUNNER_OS" = "macOS" ]; then
87+
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH
88+
else
89+
echo "::error::$RUNNER_OS not supported"
90+
exit 1
91+
fi
92+
fi
93+
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
94+
test -d ~/.ccache || mkdir -p ~/.ccache
95+
echo "max_size = 300M" > ~/.ccache/ccache.conf
96+
echo "compression = true" >> ~/.ccache/ccache.conf
97+
ccache -s
98+
99+
- name: Build OpenBLAS
100+
run: |
101+
case "${{ matrix.build }}" in
102+
"make")
103+
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
104+
;;
105+
"cmake")
106+
mkdir build && cd build
107+
cmake -DDYNAMIC_ARCH=1 \
108+
-DNOFORTRAN=0 \
109+
-DBUILD_WITHOUT_LAPACK=0 \
110+
-DCMAKE_VERBOSE_MAKEFILE=ON \
111+
-DCMAKE_BUILD_TYPE=Release \
112+
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
113+
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
114+
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
115+
..
116+
cmake --build .
117+
;;
118+
*)
119+
echo "::error::Configuration not supported"
120+
exit 1
121+
;;
122+
esac
123+
124+
- name: Show ccache status
125+
continue-on-error: true
126+
run: ccache -s
127+
128+
- name: Install benchmark dependencies
129+
run: pip install meson ninja numpy pytest pytest-codspeed --user
130+
131+
- name: Build the wrapper
132+
run: |
133+
cd benchmark/pybench
134+
export PKG_CONFIG_PATH=$PWD
135+
meson setup build --prefix=$PWD/build-install
136+
meson install -C build
137+
#
138+
# sanity check
139+
cd build/openblas_wrap
140+
python -c'import _flapack; print(dir(_flapack))'
141+
142+
- name: Run benchmarks
143+
uses: CodSpeedHQ/action@v2
144+
with:
145+
token: ${{ secrets.CODSPEED_TOKEN }}
146+
run: |
147+
cd benchmark/pybench
148+
export PYTHONPATH=$PWD/build-install/lib/python${{matrix.pyver}}/site-packages/
149+
OPENBLAS_NUM_THREADS=1 pytest benchmarks/bench_blas.py --codspeed
150+

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,4 @@ benchmark/smallscaling
109109
CMakeCache.txt
110110
CMakeFiles/*
111111
.vscode
112+
**/__pycache__

benchmark/pybench/README.md

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Continuous benchmarking of OpenBLAS performance
2+
3+
We run a set of benchmarks of subset of OpenBLAS functionality.
4+
5+
## Benchmark runner
6+
7+
[![CodSpeed Badge](https://img.shields.io/endpoint?url=https://codspeed.io/badge.json)](https://codspeed.io/OpenMathLib/OpenBLAS/)
8+
9+
Click on [benchmarks](https://codspeed.io/OpenMathLib/OpenBLAS/benchmarks) to see the performance of a particular benchmark over time;
10+
Click on [branches](https://codspeed.io/OpenMathLib/OpenBLAS/branches/) and then on the last PR link to see the flamegraphs.
11+
12+
## What are the benchmarks
13+
14+
We run raw BLAS/LAPACK subroutines, via f2py-generated python wrappers. The wrappers themselves are equivalent to [those from SciPy](https://docs.scipy.org/doc/scipy/reference/linalg.lapack.html).
15+
In fact, the wrappers _are_ from SciPy, we take a small subset simply to avoid having to build the whole SciPy for each CI run.
16+
17+
18+
## Adding a new benchmark
19+
20+
`.github/workflows/codspeed-bench.yml` does all the orchestration on CI.
21+
22+
Benchmarks live in the `benchmark/pybench` directory. It is organized as follows:
23+
24+
- benchmarks themselves live in the `benchmarks` folder. Note that the LAPACK routines are imported from the `openblas_wrap` package.
25+
- the `openblas_wrap` package is a simple trampoline: it contains an f2py extension, `_flapack`, which talks to OpenBLAS, and exports the python names in its `__init__.py`.
26+
This way, the `openblas_wrap` package shields the benchmarks from the details of where a particular LAPACK function comes from. If wanted, you may for instance swap the `_flapack` extension to
27+
`scipy.linalg.blas` and `scipy.linalg.lapack`.
28+
29+
To change parameters of an existing benchmark, edit python files in the `benchmark/pybench/benchmarks` directory.
30+
31+
To add a benchmark for a new BLAS or LAPACK function, you need to:
32+
33+
- add an f2py wrapper for the bare LAPACK function. You can simply copy a wrapper from SciPy (look for `*.pyf.src` files in https://github.com/scipy/scipy/tree/main/scipy/linalg)
34+
- add an import to `benchmark/pybench/openblas_wrap/__init__.py`
35+
36+
37+
## Running benchmarks locally
38+
39+
This benchmarking layer is orchestrated from python, therefore you'll need to
40+
have all what it takes to build OpenBLAS from source, plus `python` and
41+
42+
```
43+
$ python -mpip install numpy meson ninja pytest pytest-benchmark
44+
```
45+
46+
The benchmark syntax is consistent with that of `pytest-benchmark` framework. The incantation to run the suite locally is `$ pytest benchmark/pybench/benchmarks/test_blas.py`.
47+
48+
An ASV compatible benchmark suite is planned but currently not implemented.
49+
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
import pytest
2+
import numpy as np
3+
from openblas_wrap import (
4+
# level 1
5+
dnrm2, ddot, daxpy,
6+
# level 3
7+
dgemm, dsyrk,
8+
# lapack
9+
dgesv, # linalg.solve
10+
dgesdd, dgesdd_lwork, # linalg.svd
11+
dsyev, dsyev_lwork, # linalg.eigh
12+
)
13+
14+
# ### BLAS level 1 ###
15+
16+
# dnrm2
17+
18+
dnrm2_sizes = [100, 1000]
19+
20+
def run_dnrm2(n, x, incx):
21+
res = dnrm2(x, n, incx=incx)
22+
return res
23+
24+
25+
@pytest.mark.parametrize('n', dnrm2_sizes)
26+
def test_nrm2(benchmark, n):
27+
rndm = np.random.RandomState(1234)
28+
x = np.array(rndm.uniform(size=(n,)), dtype=float)
29+
result = benchmark(run_dnrm2, n, x, 1)
30+
31+
32+
# ddot
33+
34+
ddot_sizes = [100, 1000]
35+
36+
def run_ddot(x, y,):
37+
res = ddot(x, y)
38+
return res
39+
40+
41+
@pytest.mark.parametrize('n', ddot_sizes)
42+
def test_dot(benchmark, n):
43+
rndm = np.random.RandomState(1234)
44+
x = np.array(rndm.uniform(size=(n,)), dtype=float)
45+
y = np.array(rndm.uniform(size=(n,)), dtype=float)
46+
result = benchmark(run_ddot, x, y)
47+
48+
49+
# daxpy
50+
51+
daxpy_sizes = [100, 1000]
52+
53+
def run_daxpy(x, y,):
54+
res = daxpy(x, y, a=2.0)
55+
return res
56+
57+
58+
@pytest.mark.parametrize('n', daxpy_sizes)
59+
def test_daxpy(benchmark, n):
60+
rndm = np.random.RandomState(1234)
61+
x = np.array(rndm.uniform(size=(n,)), dtype=float)
62+
y = np.array(rndm.uniform(size=(n,)), dtype=float)
63+
result = benchmark(run_daxpy, x, y)
64+
65+
66+
67+
68+
# ### BLAS level 3 ###
69+
70+
# dgemm
71+
72+
gemm_sizes = [100, 1000]
73+
74+
def run_gemm(a, b, c):
75+
alpha = 1.0
76+
res = dgemm(alpha, a, b, c=c, overwrite_c=True)
77+
return res
78+
79+
80+
@pytest.mark.parametrize('n', gemm_sizes)
81+
def test_gemm(benchmark, n):
82+
rndm = np.random.RandomState(1234)
83+
a = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F')
84+
b = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F')
85+
c = np.empty((n, n), dtype=float, order='F')
86+
result = benchmark(run_gemm, a, b, c)
87+
assert result is c
88+
89+
90+
# dsyrk
91+
92+
syrk_sizes = [100, 1000]
93+
94+
95+
def run_syrk(a, c):
96+
res = dsyrk(1.0, a, c=c, overwrite_c=True)
97+
return res
98+
99+
100+
@pytest.mark.parametrize('n', syrk_sizes)
101+
def test_syrk(benchmark, n):
102+
rndm = np.random.RandomState(1234)
103+
a = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F')
104+
c = np.empty((n, n), dtype=float, order='F')
105+
result = benchmark(run_syrk, a, c)
106+
assert result is c
107+
108+
109+
# ### LAPACK ###
110+
111+
# linalg.solve
112+
113+
gesv_sizes = [100, 1000]
114+
115+
116+
def run_gesv(a, b):
117+
res = dgesv(a, b, overwrite_a=True, overwrite_b=True)
118+
return res
119+
120+
121+
@pytest.mark.parametrize('n', gesv_sizes)
122+
def test_gesv(benchmark, n):
123+
rndm = np.random.RandomState(1234)
124+
a = (np.array(rndm.uniform(size=(n, n)), dtype=float, order='F') +
125+
np.eye(n, order='F'))
126+
b = np.array(rndm.uniform(size=(n, 1)), order='F')
127+
lu, piv, x, info = benchmark(run_gesv, a, b)
128+
assert lu is a
129+
assert x is b
130+
assert info == 0
131+
132+
133+
# linalg.svd
134+
135+
gesdd_sizes = [(100, 5), (1000, 222)]
136+
137+
138+
def run_gesdd(a, lwork):
139+
res = dgesdd(a, lwork=lwork, full_matrices=False, overwrite_a=False)
140+
return res
141+
142+
143+
@pytest.mark.parametrize('mn', gesdd_sizes)
144+
def test_gesdd(benchmark, mn):
145+
m, n = mn
146+
rndm = np.random.RandomState(1234)
147+
a = np.array(rndm.uniform(size=(m, n)), dtype=float, order='F')
148+
149+
lwork, info = dgesdd_lwork(m, n)
150+
lwork = int(lwork)
151+
assert info == 0
152+
153+
u, s, vt, info = benchmark(run_gesdd, a, lwork)
154+
155+
assert info == 0
156+
np.testing.assert_allclose(u @ np.diag(s) @ vt, a, atol=1e-13)
157+
158+
159+
# linalg.eigh
160+
161+
syev_sizes = [50, 200]
162+
163+
164+
def run_syev(a, lwork):
165+
res = dsyev(a, lwork=lwork, overwrite_a=True)
166+
return res
167+
168+
169+
@pytest.mark.parametrize('n', syev_sizes)
170+
def test_syev(benchmark, n):
171+
rndm = np.random.RandomState(1234)
172+
a = rndm.uniform(size=(n, n))
173+
a = np.asarray(a + a.T, dtype=float, order='F')
174+
a_ = a.copy()
175+
176+
lwork, info = dsyev_lwork(n)
177+
lwork = int(lwork)
178+
assert info == 0
179+
180+
w, v, info = benchmark(run_syev, a, lwork)
181+
182+
assert info == 0
183+
assert a is v # overwrite_a=True
184+
185+

0 commit comments

Comments
 (0)