Skip to content

Fix dRAID self-healing short columns #12010

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion module/zfs/vdev_draid.c
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,12 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr)
/* this is a "big column", nothing to add */
ASSERT3P(rc->rc_abd, !=, NULL);
} else {
/* short data column, add a skip sector */
/*
* short data column, add a skip sector and clear
* rc_tried to force the entire column to be re-read
* thereby including the missing skip sector data
* which is needed for reconstruction.
*/
ASSERT3U(rc->rc_size + skip_size, ==, parity_size);
ASSERT3U(rr->rr_nempty, !=, 0);
ASSERT3P(rc->rc_abd, !=, NULL);
Expand All @@ -823,6 +828,7 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr)
abd_gang_add(rc->rc_abd, abd_get_offset_size(
rr->rr_abd_empty, skip_off, skip_size), B_TRUE);
skip_off += skip_size;
rc->rc_tried = 0;
}

/*
Expand Down
4 changes: 2 additions & 2 deletions tests/runfiles/common.run
Original file line number Diff line number Diff line change
Expand Up @@ -741,8 +741,8 @@ tests = ['raidz_001_neg', 'raidz_002_pos', 'raidz_003_pos', 'raidz_004_pos']
tags = ['functional', 'raidz']

[tests/functional/redundancy]
tests = ['redundancy_draid1', 'redundancy_draid2', 'redundancy_draid3',
'redundancy_draid_spare1', 'redundancy_draid_spare2',
tests = ['redundancy_draid', 'redundancy_draid1', 'redundancy_draid2',
'redundancy_draid3', 'redundancy_draid_spare1', 'redundancy_draid_spare2',
'redundancy_draid_spare3', 'redundancy_mirror', 'redundancy_raidz',
'redundancy_raidz1', 'redundancy_raidz2', 'redundancy_raidz3',
'redundancy_stripe']
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/functional/redundancy/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/redundancy
dist_pkgdata_SCRIPTS = \
setup.ksh \
cleanup.ksh \
redundancy_draid.ksh \
redundancy_draid1.ksh \
redundancy_draid2.ksh \
redundancy_draid3.ksh \
Expand Down
248 changes: 248 additions & 0 deletions tests/zfs-tests/tests/functional/redundancy/redundancy_draid.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright (c) 2020 by vStack. All rights reserved.
# Copyright (c) 2021 by Delphix. All rights reserved.
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib

#
# DESCRIPTION:
# dRAID should provide redundancy
#
# STRATEGY:
# 1. Create block device files for the test draid pool
# 2. For each parity value [1..3]
# - create draid pool
# - fill it with some directories/files
# - verify self-healing by overwriting devices
# - verify resilver by replacing devices
# - verify scrub by zeroing devices
# - destroy the draid pool

typeset -r devs=6
typeset -r dev_size_mb=512

typeset -a disks

prefetch_disable=$(get_tunable PREFETCH_DISABLE)

function cleanup
{
poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"

for i in {0..$devs}; do
rm -f "$TEST_BASE_DIR/dev-$i"
done

set_tunable32 PREFETCH_DISABLE $prefetch_disable
}

function test_selfheal # <pool> <parity> <dir>
{
typeset pool=$1
typeset nparity=$2
typeset dir=$3

log_must zpool export $pool

for (( i=0; i<$nparity; i=i+1 )); do
log_must dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
bs=1M seek=4 count=$(($dev_size_mb-4))
done

log_must zpool import -o cachefile=none -d $dir $pool

typeset mntpnt=$(get_prop mountpoint $pool/fs)
log_must find $mntpnt -type f -exec cksum {} + >> /dev/null 2>&1
log_must check_pool_status $pool "errors" "No known data errors"

#
# Scrub the pool because the find command will only self-heal blocks
# from the files which were read. Before overwriting additional
# devices we need to repair all of the blocks in the pool.
#
log_must zpool scrub -w $pool
log_must check_pool_status $pool "errors" "No known data errors"

log_must zpool clear $pool

log_must zpool export $pool

for (( i=$nparity; i<$nparity*2; i=i+1 )); do
log_must dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
bs=1M seek=4 count=$(($dev_size_mb-4))
done

log_must zpool import -o cachefile=none -d $dir $pool

typeset mntpnt=$(get_prop mountpoint $pool/fs)
log_must find $mntpnt -type f -exec cksum {} + >> /dev/null 2>&1
log_must check_pool_status $pool "errors" "No known data errors"

log_must zpool scrub -w $pool
log_must check_pool_status $pool "errors" "No known data errors"

log_must zpool clear $pool
}

function test_resilver # <pool> <parity> <dir>
{
typeset pool=$1
typeset nparity=$2
typeset dir=$3

for (( i=0; i<$nparity; i=i+1 )); do
log_must zpool offline $pool $dir/dev-$i
done

log_must zpool export $pool

for (( i=0; i<$nparity; i=i+1 )); do
log_must zpool labelclear -f $dir/dev-$i
done

log_must zpool import -o cachefile=none -d $dir $pool

for (( i=0; i<$nparity; i=i+1 )); do
log_must zpool replace -fw $pool $dir/dev-$i
done

log_must check_pool_status $pool "errors" "No known data errors"
resilver_cksum=$(cksum_pool $pool)
if [[ $resilver_cksum != 0 ]]; then
log_must zpool status -v $pool
log_fail "resilver cksum errors: $resilver_cksum"
fi

log_must zpool clear $pool

for (( i=$nparity; i<$nparity*2; i=i+1 )); do
log_must zpool offline $pool $dir/dev-$i
done

log_must zpool export $pool

for (( i=$nparity; i<$nparity*2; i=i+1 )); do
log_must zpool labelclear -f $dir/dev-$i
done

log_must zpool import -o cachefile=none -d $dir $pool

for (( i=$nparity; i<$nparity*2; i=i+1 )); do
log_must zpool replace -fw $pool $dir/dev-$i
done

log_must check_pool_status $pool "errors" "No known data errors"
resilver_cksum=$(cksum_pool $pool)
if [[ $resilver_cksum != 0 ]]; then
log_must zpool status -v $pool
log_fail "resilver cksum errors: $resilver_cksum"
fi

log_must zpool clear $pool
}

function test_scrub # <pool> <parity> <dir>
{
typeset pool=$1
typeset nparity=$2
typeset dir=$3

log_must zpool export $pool

for (( i=0; i<$nparity; i=i+1 )); do
dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
bs=1M seek=4 count=$(($dev_size_mb-4))
done

log_must zpool import -o cachefile=none -d $dir $pool

log_must zpool scrub -w $pool
log_must check_pool_status $pool "errors" "No known data errors"

log_must zpool clear $pool

log_must zpool export $pool

for (( i=$nparity; i<$nparity*2; i=i+1 )); do
dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
bs=1M seek=4 count=$(($dev_size_mb-4))
done

log_must zpool import -o cachefile=none -d $dir $pool

log_must zpool scrub -w $pool
log_must check_pool_status $pool "errors" "No known data errors"

log_must zpool clear $pool
}

log_onexit cleanup

log_must set_tunable32 PREFETCH_DISABLE 1

# Disk files which will be used by pool
for i in {0..$(($devs - 1))}; do
device=$TEST_BASE_DIR/dev-$i
log_must truncate -s ${dev_size_mb}M $device
disks[${#disks[*]}+1]=$device
done

# Disk file which will be attached
log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs

for nparity in 1 2 3; do
raid=draid$nparity
dir=$TEST_BASE_DIR

log_must zpool create -f -o cachefile=none $TESTPOOL $raid ${disks[@]}
log_must zfs set primarycache=metadata $TESTPOOL

log_must zfs create $TESTPOOL/fs
log_must fill_fs /$TESTPOOL/fs 1 512 100 1024 R

log_must zfs create -o compress=on $TESTPOOL/fs2
log_must fill_fs /$TESTPOOL/fs2 1 512 100 1024 R

log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3
log_must fill_fs /$TESTPOOL/fs3 1 512 100 1024 R

typeset pool_size=$(get_pool_prop size $TESTPOOL)

log_must zpool export $TESTPOOL
log_must zpool import -o cachefile=none -d $dir $TESTPOOL

log_must check_pool_status $TESTPOOL "errors" "No known data errors"

test_selfheal $TESTPOOL $nparity $dir
test_resilver $TESTPOOL $nparity $dir
test_scrub $TESTPOOL $nparity $dir

log_must zpool destroy "$TESTPOOL"
done

log_pass "draid redundancy test succeeded."
52 changes: 51 additions & 1 deletion tests/zfs-tests/tests/functional/redundancy/redundancy_raidz.ksh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#
# Copyright (c) 2020 by vStack. All rights reserved.
# Copyright (c) 2021 by Delphix. All rights reserved.
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
#

. $STF_SUITE/include/libtest.shlib
Expand All @@ -37,6 +38,7 @@
# 2. For each parity value [1..3]
# - create raidz pool
# - fill it with some directories/files
# - verify self-healing by overwriting devices
# - verify resilver by replacing devices
# - verify scrub by zeroing devices
# - destroy the raidz pool
Expand All @@ -59,6 +61,54 @@ function cleanup
set_tunable32 PREFETCH_DISABLE $prefetch_disable
}

function test_selfheal # <pool> <parity> <dir>
{
typeset pool=$1
typeset nparity=$2
typeset dir=$3

log_must zpool export $pool

for (( i=0; i<$nparity; i=i+1 )); do
log_must dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
bs=1M seek=4 count=$(($dev_size_mb-4))
done

log_must zpool import -o cachefile=none -d $dir $pool

typeset mntpnt=$(get_prop mountpoint $pool/fs)
log_must find $mntpnt -type f -exec cksum {} + >> /dev/null 2>&1
log_must check_pool_status $pool "errors" "No known data errors"

#
# Scrub the pool because the find command will only self-heal blocks
# from the files which were read. Before overwriting additional
# devices we need to repair all of the blocks in the pool.
#
log_must zpool scrub -w $pool
log_must check_pool_status $pool "errors" "No known data errors"

log_must zpool clear $pool

log_must zpool export $pool

for (( i=$nparity; i<$nparity*2; i=i+1 )); do
log_must dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
bs=1M seek=4 count=$(($dev_size_mb-4))
done

log_must zpool import -o cachefile=none -d $dir $pool

typeset mntpnt=$(get_prop mountpoint $pool/fs)
log_must find $mntpnt -type f -exec cksum {} + >> /dev/null 2>&1
log_must check_pool_status $pool "errors" "No known data errors"

log_must zpool scrub -w $pool
log_must check_pool_status $pool "errors" "No known data errors"

log_must zpool clear $pool
}

function test_resilver # <pool> <parity> <dir>
{
typeset pool=$1
Expand Down Expand Up @@ -121,7 +171,6 @@ function test_scrub # <pool> <parity> <dir>
typeset pool=$1
typeset nparity=$2
typeset dir=$3
typeset combrec=$4

log_must zpool export $pool

Expand Down Expand Up @@ -189,6 +238,7 @@ for nparity in 1 2 3; do

log_must check_pool_status $TESTPOOL "errors" "No known data errors"

test_selfheal $TESTPOOL $nparity $dir
test_resilver $TESTPOOL $nparity $dir
test_scrub $TESTPOOL $nparity $dir

Expand Down