Skip to content

Commit 141368a

Browse files
authored
Restrict raidz faulted vdev count
Specifically, a child in a replacing vdev won't count when assessing the dtl during a vdev_fault() Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Tino Reichardt <[email protected]> Signed-off-by: Don Brady <[email protected]> Closes #16569
1 parent 7e957fd commit 141368a

File tree

4 files changed

+132
-11
lines changed

4 files changed

+132
-11
lines changed

module/zfs/vdev.c

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3149,9 +3149,9 @@ vdev_dtl_should_excise(vdev_t *vd, boolean_t rebuild_done)
31493149
* Reassess DTLs after a config change or scrub completion. If txg == 0 no
31503150
* write operations will be issued to the pool.
31513151
*/
3152-
void
3153-
vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
3154-
boolean_t scrub_done, boolean_t rebuild_done)
3152+
static void
3153+
vdev_dtl_reassess_impl(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
3154+
boolean_t scrub_done, boolean_t rebuild_done, boolean_t faulting)
31553155
{
31563156
spa_t *spa = vd->vdev_spa;
31573157
avl_tree_t reftree;
@@ -3160,8 +3160,8 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
31603160
ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
31613161

31623162
for (int c = 0; c < vd->vdev_children; c++)
3163-
vdev_dtl_reassess(vd->vdev_child[c], txg,
3164-
scrub_txg, scrub_done, rebuild_done);
3163+
vdev_dtl_reassess_impl(vd->vdev_child[c], txg,
3164+
scrub_txg, scrub_done, rebuild_done, faulting);
31653165

31663166
if (vd == spa->spa_root_vdev || !vdev_is_concrete(vd) || vd->vdev_aux)
31673167
return;
@@ -3255,11 +3255,21 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
32553255
if (scrub_done)
32563256
range_tree_vacate(vd->vdev_dtl[DTL_SCRUB], NULL, NULL);
32573257
range_tree_vacate(vd->vdev_dtl[DTL_OUTAGE], NULL, NULL);
3258-
if (!vdev_readable(vd))
3258+
3259+
/*
3260+
* For the faulting case, treat members of a replacing vdev
3261+
* as if they are not available. It's more likely than not that
3262+
* a vdev in a replacing vdev could encounter read errors so
3263+
* treat it as not being able to contribute.
3264+
*/
3265+
if (!vdev_readable(vd) ||
3266+
(faulting && vd->vdev_parent != NULL &&
3267+
vd->vdev_parent->vdev_ops == &vdev_replacing_ops)) {
32593268
range_tree_add(vd->vdev_dtl[DTL_OUTAGE], 0, -1ULL);
3260-
else
3269+
} else {
32613270
range_tree_walk(vd->vdev_dtl[DTL_MISSING],
32623271
range_tree_add, vd->vdev_dtl[DTL_OUTAGE]);
3272+
}
32633273

32643274
/*
32653275
* If the vdev was resilvering or rebuilding and no longer
@@ -3321,6 +3331,14 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
33213331
}
33223332
}
33233333

3334+
void
3335+
vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
3336+
boolean_t scrub_done, boolean_t rebuild_done)
3337+
{
3338+
return (vdev_dtl_reassess_impl(vd, txg, scrub_txg, scrub_done,
3339+
rebuild_done, B_FALSE));
3340+
}
3341+
33243342
/*
33253343
* Iterate over all the vdevs except spare, and post kobj events
33263344
*/
@@ -3548,7 +3566,11 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg)
35483566
}
35493567

35503568
/*
3551-
* Determine whether the specified vdev can be offlined/detached/removed
3569+
* Determine whether the specified vdev can be
3570+
* - offlined
3571+
* - detached
3572+
* - removed
3573+
* - faulted
35523574
* without losing data.
35533575
*/
35543576
boolean_t
@@ -3558,6 +3580,7 @@ vdev_dtl_required(vdev_t *vd)
35583580
vdev_t *tvd = vd->vdev_top;
35593581
uint8_t cant_read = vd->vdev_cant_read;
35603582
boolean_t required;
3583+
boolean_t faulting = vd->vdev_state == VDEV_STATE_FAULTED;
35613584

35623585
ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
35633586

@@ -3570,10 +3593,10 @@ vdev_dtl_required(vdev_t *vd)
35703593
* If not, we can safely offline/detach/remove the device.
35713594
*/
35723595
vd->vdev_cant_read = B_TRUE;
3573-
vdev_dtl_reassess(tvd, 0, 0, B_FALSE, B_FALSE);
3596+
vdev_dtl_reassess_impl(tvd, 0, 0, B_FALSE, B_FALSE, faulting);
35743597
required = !vdev_dtl_empty(tvd, DTL_OUTAGE);
35753598
vd->vdev_cant_read = cant_read;
3576-
vdev_dtl_reassess(tvd, 0, 0, B_FALSE, B_FALSE);
3599+
vdev_dtl_reassess_impl(tvd, 0, 0, B_FALSE, B_FALSE, faulting);
35773600

35783601
if (!required && zio_injection_enabled) {
35793602
required = !!zio_handle_device_injection(vd, NULL,

tests/runfiles/linux.run

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@ tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
125125
'auto_replace_001_pos', 'auto_replace_002_pos', 'auto_spare_001_pos',
126126
'auto_spare_002_pos', 'auto_spare_multiple', 'auto_spare_ashift',
127127
'auto_spare_shared', 'decrypt_fault', 'decompress_fault',
128-
'scrub_after_resilver', 'suspend_resume_single', 'zpool_status_-s']
128+
'fault_limits', 'scrub_after_resilver', 'suspend_resume_single',
129+
'zpool_status_-s']
129130
tags = ['functional', 'fault']
130131

131132
[tests/functional/features/large_dnode:Linux]

tests/zfs-tests/tests/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1525,6 +1525,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
15251525
functional/fault/cleanup.ksh \
15261526
functional/fault/decompress_fault.ksh \
15271527
functional/fault/decrypt_fault.ksh \
1528+
functional/fault/fault_limits.ksh \
15281529
functional/fault/scrub_after_resilver.ksh \
15291530
functional/fault/suspend_resume_single.ksh \
15301531
functional/fault/setup.ksh \
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#!/bin/ksh -p
2+
#
3+
# CDDL HEADER START
4+
#
5+
# The contents of this file are subject to the terms of the
6+
# Common Development and Distribution License (the "License").
7+
# You may not use this file except in compliance with the License.
8+
#
9+
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10+
# or https://opensource.org/licenses/CDDL-1.0.
11+
# See the License for the specific language governing permissions
12+
# and limitations under the License.
13+
#
14+
# When distributing Covered Code, include this CDDL HEADER in each
15+
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16+
# If applicable, add the following below this CDDL HEADER, with the
17+
# fields enclosed by brackets "[]" replaced with your own identifying
18+
# information: Portions Copyright [yyyy] [name of copyright owner]
19+
#
20+
# CDDL HEADER END
21+
#
22+
#
23+
# Copyright (c) 2024 by Klara, Inc.
24+
#
25+
26+
. $STF_SUITE/include/libtest.shlib
27+
. $STF_SUITE/tests/functional/fault/fault.cfg
28+
29+
#
30+
# DESCRIPTION: Verify that raidz children vdev fault count is restricted
31+
#
32+
# STRATEGY:
33+
# 1. Create a raidz2 or raidz3 pool and add some data to it
34+
# 2. Replace one of the child vdevs to create a replacing vdev
35+
# 3. While it is resilvering, attempt to fault disks
36+
# 4. Verify that less than parity count was faulted while replacing
37+
#
38+
39+
TESTPOOL="fault-test-pool"
40+
PARITY=$((RANDOM%(2) + 2))
41+
VDEV_CNT=$((4 + (2 * PARITY)))
42+
VDEV_SIZ=512M
43+
44+
function cleanup
45+
{
46+
poolexists "$TESTPOOL" && log_must_busy zpool destroy "$TESTPOOL"
47+
48+
for i in {0..$((VDEV_CNT - 1))}; do
49+
log_must rm -f "$TEST_BASE_DIR/dev-$i"
50+
done
51+
}
52+
53+
log_onexit cleanup
54+
log_assert "restricts raidz children vdev fault count"
55+
56+
log_note "creating $VDEV_CNT vdevs for parity $PARITY test"
57+
typeset -a disks
58+
for i in {0..$((VDEV_CNT - 1))}; do
59+
device=$TEST_BASE_DIR/dev-$i
60+
log_must truncate -s $VDEV_SIZ $device
61+
disks[${#disks[*]}+1]=$device
62+
done
63+
64+
log_must zpool create -f ${TESTPOOL} raidz${PARITY} ${disks[1..$((VDEV_CNT - 1))]}
65+
66+
# Add some data to the pool
67+
log_must zfs create $TESTPOOL/fs
68+
MNTPOINT="$(get_prop mountpoint $TESTPOOL/fs)"
69+
log_must fill_fs $MNTPOINT $PARITY 200 32768 1000 Z
70+
sync_pool $TESTPOOL
71+
72+
# Replace the last child vdev to form a replacing vdev
73+
log_must zpool replace ${TESTPOOL} ${disks[$((VDEV_CNT - 1))]} ${disks[$VDEV_CNT]}
74+
# imediately offline replacement disk to keep replacing vdev around
75+
log_must zpool offline ${TESTPOOL} ${disks[$VDEV_CNT]}
76+
77+
# Fault disks while a replacing vdev is still active
78+
for disk in ${disks[0..$PARITY]}; do
79+
log_must zpool offline -tf ${TESTPOOL} $disk
80+
done
81+
82+
zpool status $TESTPOOL
83+
84+
# Count the faults that succeeded
85+
faults=0
86+
for disk in ${disks[0..$PARITY]}; do
87+
state=$(zpool get -H -o value state ${TESTPOOL} ${disk})
88+
if [ "$state" = "FAULTED" ] ; then
89+
((faults=faults+1))
90+
fi
91+
done
92+
93+
log_must test "$faults" -lt "$PARITY"
94+
log_must test "$faults" -gt 0
95+
96+
log_pass "restricts raidz children vdev fault count"

0 commit comments

Comments
 (0)