|
| 1 | +#!/bin/ksh -p |
| 2 | +# |
| 3 | +# CDDL HEADER START |
| 4 | +# |
| 5 | +# The contents of this file are subject to the terms of the |
| 6 | +# Common Development and Distribution License (the "License"). |
| 7 | +# You may not use this file except in compliance with the License. |
| 8 | +# |
| 9 | +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| 10 | +# or https://opensource.org/licenses/CDDL-1.0. |
| 11 | +# See the License for the specific language governing permissions |
| 12 | +# and limitations under the License. |
| 13 | +# |
| 14 | +# When distributing Covered Code, include this CDDL HEADER in each |
| 15 | +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| 16 | +# If applicable, add the following below this CDDL HEADER, with the |
| 17 | +# fields enclosed by brackets "[]" replaced with your own identifying |
| 18 | +# information: Portions Copyright [yyyy] [name of copyright owner] |
| 19 | +# |
| 20 | +# CDDL HEADER END |
| 21 | +# |
| 22 | + |
| 23 | +# |
| 24 | +# Copyright (c) 2024, Klara Inc. |
| 25 | +# |
| 26 | + |
| 27 | +. $STF_SUITE/include/libtest.shlib |
| 28 | +. $STF_SUITE/include/blkdev.shlib |
| 29 | + |
| 30 | +# |
| 31 | +# DESCRIPTION: Verify that 4 disk removed from a raidz3 will suspend the pool |
| 32 | +# |
| 33 | +# STRATEGY: |
| 34 | +# 1. Disable ZED -- this test is focused on vdev_probe errors |
| 35 | +# 2. Create a raidz3 pool where 4 disks can be removed (i.e., using scsi_debug) |
| 36 | +# 3. Add some data to it for a resilver workload |
| 37 | +# 4. Replace one of the child vdevs to start a replacing vdev |
| 38 | +# 5. During the resilver, remove 4 disks including one from the replacing vdev |
| 39 | +# 6. Verify that the pool is suspended (it used to remain online) |
| 40 | +# |
| 41 | + |
| 42 | +DEV_SIZE_MB=1024 |
| 43 | + |
| 44 | +FILE_VDEV_CNT=8 |
| 45 | +FILE_VDEV_SIZ=256M |
| 46 | + |
| 47 | +function cleanup |
| 48 | +{ |
| 49 | + destroy_pool $TESTPOOL |
| 50 | + unload_scsi_debug |
| 51 | + rm -f $DATA_FILE |
| 52 | + for i in {0..$((FILE_VDEV_CNT - 1))}; do |
| 53 | + log_must rm -f "$TEST_BASE_DIR/dev-$i" |
| 54 | + done |
| 55 | + zed_start |
| 56 | +} |
| 57 | + |
| 58 | +log_onexit cleanup |
| 59 | + |
| 60 | +log_assert "VDEV probe errors for more disks than parity should suspend a pool" |
| 61 | + |
| 62 | +log_note "Stoping ZED process" |
| 63 | +zed_stop |
| 64 | +zpool events -c |
| 65 | + |
| 66 | +# Make a debug device that we can "unplug" and loose 4 drives at once |
| 67 | +unload_scsi_debug |
| 68 | +load_scsi_debug $DEV_SIZE_MB 1 1 1 '512b' |
| 69 | +sd=$(get_debug_device) |
| 70 | + |
| 71 | +# Create 4 partitions that match the FILE_VDEV_SIZ |
| 72 | +parted "/dev/${sd}" --script mklabel gpt |
| 73 | +parted "/dev/${sd}" --script mkpart primary 0% 25% |
| 74 | +parted "/dev/${sd}" --script mkpart primary 25% 50% |
| 75 | +parted "/dev/${sd}" --script mkpart primary 50% 75% |
| 76 | +parted "/dev/${sd}" --script mkpart primary 75% 100% |
| 77 | +block_device_wait "/dev/${sd}" |
| 78 | +blkdevs="/dev/${sd}1 /dev/${sd}2 /dev/${sd}3 /dev/${sd}4" |
| 79 | + |
| 80 | +# Create 8 file vdevs |
| 81 | +typeset -a filedevs |
| 82 | +for i in {0..$((FILE_VDEV_CNT - 1))}; do |
| 83 | + device=$TEST_BASE_DIR/dev-$i |
| 84 | + log_must truncate -s $FILE_VDEV_SIZ $device |
| 85 | + # Use all but the last one for pool create |
| 86 | + if [[ $i -lt "7" ]]; then |
| 87 | + filedevs[${#filedevs[*]}+1]=$device |
| 88 | + fi |
| 89 | +done |
| 90 | + |
| 91 | +# Create a raidz-3 pool that we can pull 4 disks from |
| 92 | +log_must zpool create -f $TESTPOOL raidz3 ${filedevs[@]} $blkdevs |
| 93 | +sync_pool $TESTPOOL |
| 94 | + |
| 95 | +# Add some data to the pool |
| 96 | +log_must zfs create $TESTPOOL/fs |
| 97 | +MNTPOINT="$(get_prop mountpoint $TESTPOOL/fs)" |
| 98 | +SECONDS=0 |
| 99 | +log_must fill_fs $MNTPOINT 3 200 32768 300 Z |
| 100 | +log_note "fill_fs took $SECONDS seconds" |
| 101 | +sync_pool $TESTPOOL |
| 102 | + |
| 103 | +# Start a replacing vdev |
| 104 | +log_must zpool replace -f $TESTPOOL /dev/${sd}4 $TEST_BASE_DIR/dev-7 |
| 105 | + |
| 106 | +# Remove 4 disks all at once |
| 107 | +log_must eval "echo offline > /sys/block/${sd}/device/state" |
| 108 | + |
| 109 | +# Add some writes to drive the vdev probe errors |
| 110 | +log_must dd if=/dev/urandom of=$MNTPOINT/writes bs=1M count=1 |
| 111 | + |
| 112 | +# Wait until sync starts, and the pool suspends |
| 113 | +log_note "waiting for pool to suspend" |
| 114 | +typeset -i tries=10 |
| 115 | +until [[ $(cat /proc/spl/kstat/zfs/$TESTPOOL/state) == "SUSPENDED" ]] ; do |
| 116 | + if ((tries-- == 0)); then |
| 117 | + zpool status -s |
| 118 | + log_fail "UNEXPECTED -- pool did not suspend" |
| 119 | + fi |
| 120 | + sleep 1 |
| 121 | +done |
| 122 | + |
| 123 | +zpool status $TESTPOOL |
| 124 | + |
| 125 | +# Put the missing disks back into service |
| 126 | +log_must eval "echo running > /sys/block/$sd/device/state" |
| 127 | + |
| 128 | +# Clear the vdev error states, which will reopen the vdevs and resume the pool |
| 129 | +log_must zpool clear $TESTPOOL |
| 130 | + |
| 131 | +# Wait until the pool resumes |
| 132 | +log_note "waiting for pool to resume" |
| 133 | +tries=10 |
| 134 | +until [[ $(cat /proc/spl/kstat/zfs/$TESTPOOL/state) != "SUSPENDED" ]] ; do |
| 135 | + if ((tries-- == 0)); then |
| 136 | + log_fail "pool did not resume" |
| 137 | + fi |
| 138 | + sleep 1 |
| 139 | +done |
| 140 | + |
| 141 | +# Make sure a pool scrub comes back clean |
| 142 | +log_must zpool scrub -w $TESTPOOL |
| 143 | +log_must check_pool_status $pool "errors" "No known data errors" |
| 144 | + |
| 145 | +log_pass "VDEV probe errors for more disks than parity should suspend a pool" |
0 commit comments