Skip to content

Commit 8670644

Browse files
authored
Introduce write-mostly sums
wmsum counters are a reduced version of aggsum counters, optimized for write-mostly scenarios. They do not provide optimized read functions, but instead allow much cheaper add function. The primary usage is infrequently read statistic counters, not requiring exact precision. The Linux implementation is directly mapped into percpu_counter KPI. The FreeBSD implementation is directly mapped into counter(9) KPI. In user-space due to lack of better implementation mapped to aggsum. Unfortunately neither Linux percpu_counter nor FreeBSD counter(9) provide sufficient functionality to completelly replace aggsum, so it still remains to be used for several hot counters. Reviewed-by: Paul Dagnelie <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Ryan Moeller <[email protected]> Signed-off-by: Alexander Motin <[email protected]> Sponsored-By: iXsystems, Inc. Closes #12114
1 parent 2041d6e commit 8670644

File tree

10 files changed

+319
-72
lines changed

10 files changed

+319
-72
lines changed

config/kernel-percpu.m4

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,31 @@ AC_DEFUN([ZFS_AC_KERNEL_PERCPU_COUNTER_INIT], [
2525
])
2626
])
2727

28+
dnl #
29+
dnl # 4.13 API change,
30+
dnl # __percpu_counter_add() was renamed to percpu_counter_add_batch().
31+
dnl #
32+
AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_ADD_BATCH], [
33+
ZFS_LINUX_TEST_SRC([percpu_counter_add_batch], [
34+
#include <linux/percpu_counter.h>
35+
],[
36+
struct percpu_counter counter;
37+
38+
percpu_counter_add_batch(&counter, 1, 1);
39+
])
40+
])
41+
42+
AC_DEFUN([ZFS_AC_KERNEL_PERCPU_COUNTER_ADD_BATCH], [
43+
AC_MSG_CHECKING([whether percpu_counter_add_batch() is defined])
44+
ZFS_LINUX_TEST_RESULT([percpu_counter_add_batch], [
45+
AC_MSG_RESULT(yes)
46+
AC_DEFINE(HAVE_PERCPU_COUNTER_ADD_BATCH, 1,
47+
[percpu_counter_add_batch() is defined])
48+
],[
49+
AC_MSG_RESULT(no)
50+
])
51+
])
52+
2853
dnl #
2954
dnl # 5.10 API change,
3055
dnl # The "count" was moved into ref->data, from ref
@@ -51,10 +76,12 @@ AC_DEFUN([ZFS_AC_KERNEL_PERCPU_REF_COUNT_IN_DATA], [
5176
])
5277
AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU], [
5378
ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_INIT
79+
ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_ADD_BATCH
5480
ZFS_AC_KERNEL_SRC_PERCPU_REF_COUNT_IN_DATA
5581
])
5682

5783
AC_DEFUN([ZFS_AC_KERNEL_PERCPU], [
5884
ZFS_AC_KERNEL_PERCPU_COUNTER_INIT
85+
ZFS_AC_KERNEL_PERCPU_COUNTER_ADD_BATCH
5986
ZFS_AC_KERNEL_PERCPU_REF_COUNT_IN_DATA
6087
])

include/os/freebsd/spl/sys/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ KERNEL_H = \
6868
vmsystm.h \
6969
vnode_impl.h \
7070
vnode.h \
71+
wmsum.h \
7172
zmod.h \
7273
zone.h
7374

include/os/freebsd/spl/sys/wmsum.h

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* This file and its contents are supplied under the terms of the
5+
* Common Development and Distribution License ("CDDL"), version 1.0.
6+
* You may only use this file in accordance with the terms of version
7+
* 1.0 of the CDDL.
8+
*
9+
* A full copy of the text of the CDDL should have accompanied this
10+
* source. A copy of the CDDL is also available via the Internet at
11+
* http://www.illumos.org/license/CDDL.
12+
*
13+
* CDDL HEADER END
14+
*/
15+
16+
/*
17+
* wmsum counters are a reduced version of aggsum counters, optimized for
18+
* write-mostly scenarios. They do not provide optimized read functions,
19+
* but instead allow much cheaper add function. The primary usage is
20+
* infrequently read statistic counters, not requiring exact precision.
21+
*
22+
* The FreeBSD implementation is directly mapped into counter(9) KPI.
23+
*/
24+
25+
#ifndef _SYS_WMSUM_H
26+
#define _SYS_WMSUM_H
27+
28+
#include <sys/types.h>
29+
#include <sys/systm.h>
30+
#include <sys/counter.h>
31+
#include <sys/malloc.h>
32+
33+
#ifdef __cplusplus
34+
extern "C" {
35+
#endif
36+
37+
#define wmsum_t counter_u64_t
38+
39+
static inline void
40+
wmsum_init(wmsum_t *ws, uint64_t value)
41+
{
42+
43+
*ws = counter_u64_alloc(M_WAITOK);
44+
counter_u64_add(*ws, value);
45+
}
46+
47+
static inline void
48+
wmsum_fini(wmsum_t *ws)
49+
{
50+
51+
counter_u64_free(*ws);
52+
}
53+
54+
static inline uint64_t
55+
wmsum_value(wmsum_t *ws)
56+
{
57+
58+
return (counter_u64_fetch(*ws));
59+
}
60+
61+
static inline void
62+
wmsum_add(wmsum_t *ws, int64_t delta)
63+
{
64+
65+
counter_u64_add(*ws, delta);
66+
}
67+
68+
#ifdef __cplusplus
69+
}
70+
#endif
71+
72+
#endif /* _SYS_WMSUM_H */

include/os/linux/spl/sys/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ KERNEL_H = \
5454
vmsystm.h \
5555
vnode.h \
5656
wait.h \
57+
wmsum.h \
5758
zmod.h \
5859
zone.h
5960

include/os/linux/spl/sys/wmsum.h

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* This file and its contents are supplied under the terms of the
5+
* Common Development and Distribution License ("CDDL"), version 1.0.
6+
* You may only use this file in accordance with the terms of version
7+
* 1.0 of the CDDL.
8+
*
9+
* A full copy of the text of the CDDL should have accompanied this
10+
* source. A copy of the CDDL is also available via the Internet at
11+
* http://www.illumos.org/license/CDDL.
12+
*
13+
* CDDL HEADER END
14+
*/
15+
16+
/*
17+
* wmsum counters are a reduced version of aggsum counters, optimized for
18+
* write-mostly scenarios. They do not provide optimized read functions,
19+
* but instead allow much cheaper add function. The primary usage is
20+
* infrequently read statistic counters, not requiring exact precision.
21+
*
22+
* The Linux implementation is directly mapped into percpu_counter KPI.
23+
*/
24+
25+
#ifndef _SYS_WMSUM_H
26+
#define _SYS_WMSUM_H
27+
28+
#include <linux/percpu_counter.h>
29+
30+
#ifdef __cplusplus
31+
extern "C" {
32+
#endif
33+
34+
typedef struct percpu_counter wmsum_t;
35+
36+
static inline void
37+
wmsum_init(wmsum_t *ws, uint64_t value)
38+
{
39+
40+
#ifdef HAVE_PERCPU_COUNTER_INIT_WITH_GFP
41+
percpu_counter_init(ws, value, GFP_KERNEL);
42+
#else
43+
percpu_counter_init(ws, value);
44+
#endif
45+
}
46+
47+
static inline void
48+
wmsum_fini(wmsum_t *ws)
49+
{
50+
51+
percpu_counter_destroy(ws);
52+
}
53+
54+
static inline uint64_t
55+
wmsum_value(wmsum_t *ws)
56+
{
57+
58+
return (percpu_counter_sum(ws));
59+
}
60+
61+
static inline void
62+
wmsum_add(wmsum_t *ws, int64_t delta)
63+
{
64+
65+
#ifdef HAVE_PERCPU_COUNTER_ADD_BATCH
66+
percpu_counter_add_batch(ws, delta, INT_MAX / 2);
67+
#else
68+
__percpu_counter_add(ws, delta, INT_MAX / 2);
69+
#endif
70+
}
71+
72+
#ifdef __cplusplus
73+
}
74+
#endif
75+
76+
#endif /* _SYS_WMSUM_H */

include/sys/dataset_kstats.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,18 +27,18 @@
2727
#ifndef _SYS_DATASET_KSTATS_H
2828
#define _SYS_DATASET_KSTATS_H
2929

30-
#include <sys/aggsum.h>
30+
#include <sys/wmsum.h>
3131
#include <sys/dmu.h>
3232
#include <sys/kstat.h>
3333

34-
typedef struct dataset_aggsum_stats_t {
35-
aggsum_t das_writes;
36-
aggsum_t das_nwritten;
37-
aggsum_t das_reads;
38-
aggsum_t das_nread;
39-
aggsum_t das_nunlinks;
40-
aggsum_t das_nunlinked;
41-
} dataset_aggsum_stats_t;
34+
typedef struct dataset_sum_stats_t {
35+
wmsum_t dss_writes;
36+
wmsum_t dss_nwritten;
37+
wmsum_t dss_reads;
38+
wmsum_t dss_nread;
39+
wmsum_t dss_nunlinks;
40+
wmsum_t dss_nunlinked;
41+
} dataset_sum_stats_t;
4242

4343
typedef struct dataset_kstat_values {
4444
kstat_named_t dkv_ds_name;
@@ -59,7 +59,7 @@ typedef struct dataset_kstat_values {
5959
} dataset_kstat_values_t;
6060

6161
typedef struct dataset_kstats {
62-
dataset_aggsum_stats_t dk_aggsums;
62+
dataset_sum_stats_t dk_sums;
6363
kstat_t *dk_kstats;
6464
} dataset_kstats_t;
6565

lib/libspl/include/sys/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,5 @@ libspl_HEADERS = \
4444
varargs.h \
4545
vnode.h \
4646
vtoc.h \
47+
wmsum.h \
4748
zone.h

lib/libspl/include/sys/wmsum.h

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* This file and its contents are supplied under the terms of the
5+
* Common Development and Distribution License ("CDDL"), version 1.0.
6+
* You may only use this file in accordance with the terms of version
7+
* 1.0 of the CDDL.
8+
*
9+
* A full copy of the text of the CDDL should have accompanied this
10+
* source. A copy of the CDDL is also available via the Internet at
11+
* http://www.illumos.org/license/CDDL.
12+
*
13+
* CDDL HEADER END
14+
*/
15+
16+
/*
17+
* wmsum counters are a reduced version of aggsum counters, optimized for
18+
* write-mostly scenarios. They do not provide optimized read functions,
19+
* but instead allow much cheaper add function. The primary usage is
20+
* infrequently read statistic counters, not requiring exact precision.
21+
*
22+
* In user-space due to lack of better implementation mapped to aggsum.
23+
*/
24+
25+
#ifndef _SYS_WMSUM_H
26+
#define _SYS_WMSUM_H
27+
28+
#include <sys/aggsum.h>
29+
30+
#ifdef __cplusplus
31+
extern "C" {
32+
#endif
33+
34+
#define wmsum_t aggsum_t
35+
36+
static inline void
37+
wmsum_init(wmsum_t *ws, uint64_t value)
38+
{
39+
40+
aggsum_init(ws, value);
41+
}
42+
43+
static inline void
44+
wmsum_fini(wmsum_t *ws)
45+
{
46+
47+
aggsum_fini(ws);
48+
}
49+
50+
static inline uint64_t
51+
wmsum_value(wmsum_t *ws)
52+
{
53+
54+
return (aggsum_value(ws));
55+
}
56+
57+
static inline void
58+
wmsum_add(wmsum_t *ws, int64_t delta)
59+
{
60+
61+
aggsum_add(ws, delta);
62+
}
63+
64+
#ifdef __cplusplus
65+
}
66+
#endif
67+
68+
#endif /* _SYS_WMSUM_H */

0 commit comments

Comments
 (0)