Skip to content

Commit e716630

Browse files
committed
Notable upstream pull request merges: #15022 5caeef0 RAID-Z expansion feature #15457 887a3c5 Increase L2ARC write rate and headroom #15504 1c1be60 Unbreak FreeBSD world build after 3bd4df3 Obtained from: OpenZFS OpenZFS commit: 887a3c5
2 parents f5b3e68 + 887a3c5 commit e716630

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+5748
-884
lines changed

cddl/lib/libzpool/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ KERNEL_C = \
142142
vdev_indirect_mapping.c \
143143
vdev_initialize.c \
144144
vdev_label.c \
145+
vdev_label_os.c \
145146
vdev_mirror.c \
146147
vdev_missing.c \
147148
vdev_queue.c \

sys/contrib/openzfs/cmd/raidz_test/raidz_bench.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,10 @@ run_gen_bench_impl(const char *impl)
8484

8585
if (rto_opts.rto_expand) {
8686
rm_bench = vdev_raidz_map_alloc_expanded(
87-
zio_bench.io_abd,
88-
zio_bench.io_size, zio_bench.io_offset,
87+
&zio_bench,
8988
rto_opts.rto_ashift, ncols+1, ncols,
90-
fn+1, rto_opts.rto_expand_offset);
89+
fn+1, rto_opts.rto_expand_offset,
90+
0, B_FALSE);
9191
} else {
9292
rm_bench = vdev_raidz_map_alloc(&zio_bench,
9393
BENCH_ASHIFT, ncols, fn+1);
@@ -172,10 +172,10 @@ run_rec_bench_impl(const char *impl)
172172

173173
if (rto_opts.rto_expand) {
174174
rm_bench = vdev_raidz_map_alloc_expanded(
175-
zio_bench.io_abd,
176-
zio_bench.io_size, zio_bench.io_offset,
175+
&zio_bench,
177176
BENCH_ASHIFT, ncols+1, ncols,
178-
PARITY_PQR, rto_opts.rto_expand_offset);
177+
PARITY_PQR,
178+
rto_opts.rto_expand_offset, 0, B_FALSE);
179179
} else {
180180
rm_bench = vdev_raidz_map_alloc(&zio_bench,
181181
BENCH_ASHIFT, ncols, PARITY_PQR);

sys/contrib/openzfs/cmd/raidz_test/raidz_test.c

Lines changed: 6 additions & 190 deletions
Original file line numberDiff line numberDiff line change
@@ -327,14 +327,12 @@ init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
327327

328328
if (opts->rto_expand) {
329329
opts->rm_golden =
330-
vdev_raidz_map_alloc_expanded(opts->zio_golden->io_abd,
331-
opts->zio_golden->io_size, opts->zio_golden->io_offset,
330+
vdev_raidz_map_alloc_expanded(opts->zio_golden,
332331
opts->rto_ashift, total_ncols+1, total_ncols,
333-
parity, opts->rto_expand_offset);
334-
rm_test = vdev_raidz_map_alloc_expanded(zio_test->io_abd,
335-
zio_test->io_size, zio_test->io_offset,
332+
parity, opts->rto_expand_offset, 0, B_FALSE);
333+
rm_test = vdev_raidz_map_alloc_expanded(zio_test,
336334
opts->rto_ashift, total_ncols+1, total_ncols,
337-
parity, opts->rto_expand_offset);
335+
parity, opts->rto_expand_offset, 0, B_FALSE);
338336
} else {
339337
opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
340338
opts->rto_ashift, total_ncols, parity);
@@ -361,187 +359,6 @@ init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
361359
return (err);
362360
}
363361

364-
/*
365-
* If reflow is not in progress, reflow_offset should be UINT64_MAX.
366-
* For each row, if the row is entirely before reflow_offset, it will
367-
* come from the new location. Otherwise this row will come from the
368-
* old location. Therefore, rows that straddle the reflow_offset will
369-
* come from the old location.
370-
*
371-
* NOTE: Until raidz expansion is implemented this function is only
372-
* needed by raidz_test.c to the multi-row raid_map_t functionality.
373-
*/
374-
raidz_map_t *
375-
vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset,
376-
uint64_t ashift, uint64_t physical_cols, uint64_t logical_cols,
377-
uint64_t nparity, uint64_t reflow_offset)
378-
{
379-
/* The zio's size in units of the vdev's minimum sector size. */
380-
uint64_t s = size >> ashift;
381-
uint64_t q, r, bc, devidx, asize = 0, tot;
382-
383-
/*
384-
* "Quotient": The number of data sectors for this stripe on all but
385-
* the "big column" child vdevs that also contain "remainder" data.
386-
* AKA "full rows"
387-
*/
388-
q = s / (logical_cols - nparity);
389-
390-
/*
391-
* "Remainder": The number of partial stripe data sectors in this I/O.
392-
* This will add a sector to some, but not all, child vdevs.
393-
*/
394-
r = s - q * (logical_cols - nparity);
395-
396-
/* The number of "big columns" - those which contain remainder data. */
397-
bc = (r == 0 ? 0 : r + nparity);
398-
399-
/*
400-
* The total number of data and parity sectors associated with
401-
* this I/O.
402-
*/
403-
tot = s + nparity * (q + (r == 0 ? 0 : 1));
404-
405-
/* How many rows contain data (not skip) */
406-
uint64_t rows = howmany(tot, logical_cols);
407-
int cols = MIN(tot, logical_cols);
408-
409-
raidz_map_t *rm = kmem_zalloc(offsetof(raidz_map_t, rm_row[rows]),
410-
KM_SLEEP);
411-
rm->rm_nrows = rows;
412-
413-
for (uint64_t row = 0; row < rows; row++) {
414-
raidz_row_t *rr = kmem_alloc(offsetof(raidz_row_t,
415-
rr_col[cols]), KM_SLEEP);
416-
rm->rm_row[row] = rr;
417-
418-
/* The starting RAIDZ (parent) vdev sector of the row. */
419-
uint64_t b = (offset >> ashift) + row * logical_cols;
420-
421-
/*
422-
* If we are in the middle of a reflow, and any part of this
423-
* row has not been copied, then use the old location of
424-
* this row.
425-
*/
426-
int row_phys_cols = physical_cols;
427-
if (b + (logical_cols - nparity) > reflow_offset >> ashift)
428-
row_phys_cols--;
429-
430-
/* starting child of this row */
431-
uint64_t child_id = b % row_phys_cols;
432-
/* The starting byte offset on each child vdev. */
433-
uint64_t child_offset = (b / row_phys_cols) << ashift;
434-
435-
/*
436-
* We set cols to the entire width of the block, even
437-
* if this row is shorter. This is needed because parity
438-
* generation (for Q and R) needs to know the entire width,
439-
* because it treats the short row as though it was
440-
* full-width (and the "phantom" sectors were zero-filled).
441-
*
442-
* Another approach to this would be to set cols shorter
443-
* (to just the number of columns that we might do i/o to)
444-
* and have another mechanism to tell the parity generation
445-
* about the "entire width". Reconstruction (at least
446-
* vdev_raidz_reconstruct_general()) would also need to
447-
* know about the "entire width".
448-
*/
449-
rr->rr_cols = cols;
450-
rr->rr_bigcols = bc;
451-
rr->rr_missingdata = 0;
452-
rr->rr_missingparity = 0;
453-
rr->rr_firstdatacol = nparity;
454-
rr->rr_abd_empty = NULL;
455-
rr->rr_nempty = 0;
456-
457-
for (int c = 0; c < rr->rr_cols; c++, child_id++) {
458-
if (child_id >= row_phys_cols) {
459-
child_id -= row_phys_cols;
460-
child_offset += 1ULL << ashift;
461-
}
462-
rr->rr_col[c].rc_devidx = child_id;
463-
rr->rr_col[c].rc_offset = child_offset;
464-
rr->rr_col[c].rc_orig_data = NULL;
465-
rr->rr_col[c].rc_error = 0;
466-
rr->rr_col[c].rc_tried = 0;
467-
rr->rr_col[c].rc_skipped = 0;
468-
rr->rr_col[c].rc_need_orig_restore = B_FALSE;
469-
470-
uint64_t dc = c - rr->rr_firstdatacol;
471-
if (c < rr->rr_firstdatacol) {
472-
rr->rr_col[c].rc_size = 1ULL << ashift;
473-
rr->rr_col[c].rc_abd =
474-
abd_alloc_linear(rr->rr_col[c].rc_size,
475-
B_TRUE);
476-
} else if (row == rows - 1 && bc != 0 && c >= bc) {
477-
/*
478-
* Past the end, this for parity generation.
479-
*/
480-
rr->rr_col[c].rc_size = 0;
481-
rr->rr_col[c].rc_abd = NULL;
482-
} else {
483-
/*
484-
* "data column" (col excluding parity)
485-
* Add an ASCII art diagram here
486-
*/
487-
uint64_t off;
488-
489-
if (c < bc || r == 0) {
490-
off = dc * rows + row;
491-
} else {
492-
off = r * rows +
493-
(dc - r) * (rows - 1) + row;
494-
}
495-
rr->rr_col[c].rc_size = 1ULL << ashift;
496-
rr->rr_col[c].rc_abd = abd_get_offset_struct(
497-
&rr->rr_col[c].rc_abdstruct,
498-
abd, off << ashift, 1 << ashift);
499-
}
500-
501-
asize += rr->rr_col[c].rc_size;
502-
}
503-
/*
504-
* If all data stored spans all columns, there's a danger that
505-
* parity will always be on the same device and, since parity
506-
* isn't read during normal operation, that that device's I/O
507-
* bandwidth won't be used effectively. We therefore switch
508-
* the parity every 1MB.
509-
*
510-
* ...at least that was, ostensibly, the theory. As a practical
511-
* matter unless we juggle the parity between all devices
512-
* evenly, we won't see any benefit. Further, occasional writes
513-
* that aren't a multiple of the LCM of the number of children
514-
* and the minimum stripe width are sufficient to avoid pessimal
515-
* behavior. Unfortunately, this decision created an implicit
516-
* on-disk format requirement that we need to support for all
517-
* eternity, but only for single-parity RAID-Z.
518-
*
519-
* If we intend to skip a sector in the zeroth column for
520-
* padding we must make sure to note this swap. We will never
521-
* intend to skip the first column since at least one data and
522-
* one parity column must appear in each row.
523-
*/
524-
if (rr->rr_firstdatacol == 1 && rr->rr_cols > 1 &&
525-
(offset & (1ULL << 20))) {
526-
ASSERT(rr->rr_cols >= 2);
527-
ASSERT(rr->rr_col[0].rc_size == rr->rr_col[1].rc_size);
528-
devidx = rr->rr_col[0].rc_devidx;
529-
uint64_t o = rr->rr_col[0].rc_offset;
530-
rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx;
531-
rr->rr_col[0].rc_offset = rr->rr_col[1].rc_offset;
532-
rr->rr_col[1].rc_devidx = devidx;
533-
rr->rr_col[1].rc_offset = o;
534-
}
535-
536-
}
537-
ASSERT3U(asize, ==, tot << ashift);
538-
539-
/* init RAIDZ parity ops */
540-
rm->rm_ops = vdev_raidz_math_get_ops();
541-
542-
return (rm);
543-
}
544-
545362
static raidz_map_t *
546363
init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
547364
{
@@ -561,10 +378,9 @@ init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
561378
init_zio_abd(*zio);
562379

563380
if (opts->rto_expand) {
564-
rm = vdev_raidz_map_alloc_expanded((*zio)->io_abd,
565-
(*zio)->io_size, (*zio)->io_offset,
381+
rm = vdev_raidz_map_alloc_expanded(*zio,
566382
opts->rto_ashift, total_ncols+1, total_ncols,
567-
parity, opts->rto_expand_offset);
383+
parity, opts->rto_expand_offset, 0, B_FALSE);
568384
} else {
569385
rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
570386
total_ncols, parity);

sys/contrib/openzfs/cmd/raidz_test/raidz_test.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,4 @@ void init_zio_abd(zio_t *zio);
119119

120120
void run_raidz_benchmark(void);
121121

122-
struct raidz_map *vdev_raidz_map_alloc_expanded(abd_t *, uint64_t, uint64_t,
123-
uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
124-
125122
#endif /* RAIDZ_TEST_H */

sys/contrib/openzfs/cmd/zdb/zdb.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4134,6 +4134,11 @@ dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
41344134
}
41354135
(void) printf("\tcheckpoint_txg = %llu\n",
41364136
(u_longlong_t)ub->ub_checkpoint_txg);
4137+
4138+
(void) printf("\traidz_reflow state=%u off=%llu\n",
4139+
(int)RRSS_GET_STATE(ub),
4140+
(u_longlong_t)RRSS_GET_OFFSET(ub));
4141+
41374142
(void) printf("%s", footer ? footer : "");
41384143
}
41394144

0 commit comments

Comments
 (0)