Skip to content

Commit bd44fa2

Browse files
committed
RAIDZ: Use cache blocking during parity math
RAIDZ parity is calculated by adding data one column at a time. It works OK for small blocks, but for large blocks results of previous addition may already be evicted from CPU caches to main memory, and in addition to extra memory write require extra read to get it back. This patch splits large parity operations into 64KB chunks, that should in most cases fit into CPU L2 caches from the last decade. I haven't touched more complicated cases of data reconstruction to not overcomplicate the code. Those should be relatively rare. My tests on Xeon Gold 6242R CPU with 1MB of L2 cache per core show up to 10/20% memory traffic reduction when writing to 4-wide RAIDZ/ RAIDZ2 blocks of ~4MB and up. Older CPUs with 256KB of L2 cache should see the effect even on smaller blocks. Wider vdevs may need bigger blocks to be affected. Signed-off-by: Alexander Motin <[email protected]> Sponsored by: iXsystems, Inc.
1 parent e007908 commit bd44fa2

File tree

3 files changed

+104
-82
lines changed

3 files changed

+104
-82
lines changed

include/sys/abd.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,11 +133,11 @@ int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t);
133133
void abd_zero_off(abd_t *, size_t, size_t);
134134
void abd_verify(abd_t *);
135135

136-
void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
137-
ssize_t csize, ssize_t dsize, const unsigned parity,
136+
void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
137+
size_t csize, size_t dsize, const unsigned parity,
138138
void (*func_raidz_gen)(void **, const void *, size_t, size_t));
139139
void abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
140-
ssize_t tsize, const unsigned parity,
140+
size_t tsize, const unsigned parity,
141141
void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
142142
const unsigned *mul),
143143
const unsigned *mul);

module/zfs/abd.c

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1017,12 +1017,12 @@ abd_cmp(abd_t *dabd, abd_t *sabd)
10171017
* is the same when taking linear and when taking scatter
10181018
*/
10191019
void
1020-
abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
1021-
ssize_t csize, ssize_t dsize, const unsigned parity,
1020+
abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
1021+
size_t csize, size_t dsize, const unsigned parity,
10221022
void (*func_raidz_gen)(void **, const void *, size_t, size_t))
10231023
{
10241024
int i;
1025-
ssize_t len, dlen;
1025+
size_t len, dlen;
10261026
struct abd_iter caiters[3];
10271027
struct abd_iter daiter;
10281028
void *caddrs[3];
@@ -1033,16 +1033,15 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
10331033
ASSERT3U(parity, <=, 3);
10341034
for (i = 0; i < parity; i++) {
10351035
abd_verify(cabds[i]);
1036-
ASSERT3U(csize, <=, cabds[i]->abd_size);
1037-
c_cabds[i] = abd_init_abd_iter(cabds[i], &caiters[i], 0);
1036+
ASSERT3U(off + csize, <=, cabds[i]->abd_size);
1037+
c_cabds[i] = abd_init_abd_iter(cabds[i], &caiters[i], off);
10381038
}
10391039

1040-
ASSERT3S(dsize, >=, 0);
10411040
if (dsize > 0) {
10421041
ASSERT(dabd);
10431042
abd_verify(dabd);
1044-
ASSERT3U(dsize, <=, dabd->abd_size);
1045-
c_dabd = abd_init_abd_iter(dabd, &daiter, 0);
1043+
ASSERT3U(off + dsize, <=, dabd->abd_size);
1044+
c_dabd = abd_init_abd_iter(dabd, &daiter, off);
10461045
}
10471046

10481047
abd_enter_critical(flags);
@@ -1064,7 +1063,7 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
10641063
dlen = 0;
10651064

10661065
/* must be progressive */
1067-
ASSERT3S(len, >, 0);
1066+
ASSERT3U(len, >, 0);
10681067
/*
10691068
* The iterated function likely will not do well if each
10701069
* segment except the last one is not multiple of 512 (raidz).
@@ -1089,9 +1088,6 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
10891088
}
10901089

10911090
csize -= len;
1092-
1093-
ASSERT3S(dsize, >=, 0);
1094-
ASSERT3S(csize, >=, 0);
10951091
}
10961092
abd_exit_critical(flags);
10971093
}
@@ -1108,13 +1104,13 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
11081104
*/
11091105
void
11101106
abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
1111-
ssize_t tsize, const unsigned parity,
1107+
size_t tsize, const unsigned parity,
11121108
void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
11131109
const unsigned *mul),
11141110
const unsigned *mul)
11151111
{
11161112
int i;
1117-
ssize_t len;
1113+
size_t len;
11181114
struct abd_iter citers[3];
11191115
struct abd_iter xiters[3];
11201116
void *caddrs[3], *xaddrs[3];

0 commit comments

Comments
 (0)