From b3ad60a20891cbfff2f542e38165a5b10299b631 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 18 Mar 2024 14:19:53 -0400 Subject: [PATCH 1/3] BRT: Fix holes cloning. - When reading L0 block pointers handle buffers without ones and without dirty records as a holes. Those appear when dnode size was increased, but the end was never written, so there are no new indirection levels to store the pointers. It makes no sense to return EAGAIN here, since sync won't create new indirection levels until there will be actual writes. - When cloning blocks set destination hole logical birth time to the current TXG. Otherwise if we are cloning over existing data, newly created holes may not be properly replicated later. Use BP_SET_BIRTH() when possible to not replicate its logic. Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes: #15994 --- module/zfs/dmu.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index b88cf447d296..753dde6d5205 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -2265,11 +2265,13 @@ dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, if (bp == NULL) { /* - * The block was created in this transaction group, - * so it has no BP yet. + * The file size was increased, but the block was never + * written, otherwise we would either have the block + * pointer or the dirty record and would not get here. + * It is effectively a hole, so report it as such. */ - error = SET_ERROR(EAGAIN); - goto out; + BP_ZERO(&bps[i]); + continue; } /* * Make sure we clone only data blocks. @@ -2361,19 +2363,17 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, ASSERT3U(dr->dr_txg, ==, tx->tx_txg); dl = &dr->dt.dl; dl->dr_overridden_by = *bp; - dl->dr_brtwrite = B_TRUE; - dl->dr_override_state = DR_OVERRIDDEN; - if (BP_IS_HOLE(bp)) { - BP_SET_LOGICAL_BIRTH(&dl->dr_overridden_by, 0); - BP_SET_PHYSICAL_BIRTH(&dl->dr_overridden_by, 0); - } else { - BP_SET_LOGICAL_BIRTH(&dl->dr_overridden_by, - dr->dr_txg); + if (!BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) != 0) { if (!BP_IS_EMBEDDED(bp)) { - BP_SET_PHYSICAL_BIRTH(&dl->dr_overridden_by, + BP_SET_BIRTH(&dl->dr_overridden_by, dr->dr_txg, BP_GET_BIRTH(bp)); + } else { + BP_SET_LOGICAL_BIRTH(&dl->dr_overridden_by, + dr->dr_txg); } } + dl->dr_brtwrite = B_TRUE; + dl->dr_override_state = DR_OVERRIDDEN; mutex_exit(&db->db_mtx); From 714bd9820f901c5041e360936a9641f9ac1903a9 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 19 Mar 2024 12:25:14 -0400 Subject: [PATCH 2/3] BRT: Fix tests to work on non-empty pools It should not normally happen, but if it does, better to not fail everything for no good reason, or it may be hard to debug. Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. --- .../functional/bclone/bclone_common.kshlib | 47 ++++++++++--------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib b/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib index 3b8eaea5bb54..84b92b4dcdc9 100644 --- a/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib +++ b/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib @@ -97,20 +97,19 @@ function verify_pool_prop_eq function verify_pool_props { - typeset -r dsize=$1 - typeset -r ratio=$2 + typeset -r oused=$1 + typeset -r osaved=$2 + typeset dsize=$3 + typeset ratio=$4 if [[ $dsize -eq 0 ]]; then - verify_pool_prop_eq bcloneused 0 - verify_pool_prop_eq bclonesaved 0 - verify_pool_prop_eq bcloneratio 1.00 - else - if [[ $ratio -eq 1 ]]; then - verify_pool_prop_eq bcloneused 0 - else - verify_pool_prop_eq bcloneused $dsize - fi - verify_pool_prop_eq bclonesaved $((dsize*(ratio-1))) + ratio=1 + elif [[ $ratio -eq 1 ]]; then + dsize=0 + fi + verify_pool_prop_eq bcloneused $(($oused+$dsize)) + verify_pool_prop_eq bclonesaved $(($osaved+dsize*(ratio-1))) + if [[ $oused -eq 0 ]]; then verify_pool_prop_eq bcloneratio "${ratio}.00" fi } @@ -124,16 +123,22 @@ function bclone_test typeset -r srcdir=$4 typeset -r dstdir=$5 typeset dsize + typeset oused + typeset osaved typeset -r original="${srcdir}/original" typeset -r clone="${dstdir}/clone" log_note "Testing file copy with datatype $datatype, file size $filesize, embedded $embedded" + # Save current block cloning stats for later use. + sync_pool $TESTPOOL + oused=$(get_pool_prop bcloneused $TESTPOOL) + osaved=$(get_pool_prop bclonesaved $TESTPOOL) + # Create a test file with known content. case $datatype in random|text) - sync_pool $TESTPOOL if [[ $datatype = "random" ]]; then dd if=/dev/urandom of=$original bs=$filesize count=1 2>/dev/null else @@ -146,13 +151,13 @@ function bclone_test sync_pool $TESTPOOL # It is hard to predict block sizes that will be used, # so just do one clone and take it from bcloneused. - filesize=$(zpool get -Hp -o value bcloneused $TESTPOOL) + dsize=$(get_pool_prop bcloneused $TESTPOOL) + dsize=$(($dsize-$oused)) if [[ $embedded = "false" ]]; then - log_must test $filesize -gt 0 + log_must test $dsize -gt 0 fi rm -f "${clone}-tmp" sync_pool $TESTPOOL - dsize=$filesize ;; hole) log_must truncate_test -s $filesize -f $original @@ -217,7 +222,7 @@ function bclone_test test_file_integrity $original_checksum "${clone}4" $filesize test_file_integrity $original_checksum "${clone}5" $filesize - verify_pool_props $dsize 7 + verify_pool_props $oused $osaved $dsize 7 # Clear cache and test after fresh import. log_must zpool export $TESTPOOL @@ -240,7 +245,7 @@ function bclone_test sync_pool $TESTPOOL - verify_pool_props $dsize 11 + verify_pool_props $oused $osaved $dsize 11 log_must zpool export $TESTPOOL log_must zpool import $TESTPOOL @@ -268,7 +273,7 @@ function bclone_test test_file_integrity $original_checksum "${clone}8" $filesize test_file_integrity $original_checksum "${clone}9" $filesize - verify_pool_props $dsize 6 + verify_pool_props $oused $osaved $dsize 6 rm -f "${clone}0" "${clone}2" "${clone}4" "${clone}8" "${clone}9" @@ -276,11 +281,11 @@ function bclone_test test_file_integrity $original_checksum "${clone}6" $filesize - verify_pool_props $dsize 1 + verify_pool_props $oused $osaved $dsize 1 rm -f "${clone}6" sync_pool $TESTPOOL - verify_pool_props $dsize 1 + verify_pool_props $oused $osaved $dsize 1 } From a2f659c1f2eeddeea573c337a1cb5c555ff3ee0a Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 19 Mar 2024 13:08:05 -0400 Subject: [PATCH 3/3] BRT: Check pool clone stats in more tests This should allow to catch some leaks, if those happen. While there fix some cosmetic issues. Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. --- .../bclone/bclone_corner_cases.kshlib | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_corner_cases.kshlib b/tests/zfs-tests/tests/functional/bclone/bclone_corner_cases.kshlib index ddfbfc999c4e..aeb8efe91715 100644 --- a/tests/zfs-tests/tests/functional/bclone/bclone_corner_cases.kshlib +++ b/tests/zfs-tests/tests/functional/bclone/bclone_corner_cases.kshlib @@ -66,7 +66,7 @@ function bclone_corner_cases_init export SECOND_HALF_ORIG0_CHECKSUM=$(second_half_checksum $ORIG0) export SECOND_HALF_ORIG1_CHECKSUM=$(second_half_checksum $ORIG1) export SECOND_HALF_ORIG2_CHECKSUM=$(second_half_checksum $ORIG2) - export ZEROS_CHECKSUM=$(dd if=/dev/zero bs=$HALFRECORDSIZE count=1 | sha256digest) + export ZEROS_CHECKSUM=$(dd if=/dev/zero bs=$HALFRECORDSIZE count=1 2>/dev/null | sha256digest) export FIRST_HALF_CHECKSUM="" export SECOND_HALF_CHECKSUM="" } @@ -210,6 +210,8 @@ function bclone_corner_cases_test typeset -r dstdir=$2 typeset limit=$3 typeset -i count=0 + typeset oused + typeset osaved if [[ $srcdir != "count" ]]; then if [[ -n "$limit" ]]; then @@ -217,6 +219,11 @@ function bclone_corner_cases_test limit=$(random_int_between 1 $total_count $((limit*2)) | sort -nu | head -n $limit | xargs) fi bclone_corner_cases_init $srcdir $dstdir + + # Save current block cloning stats for later use. + sync_pool $TESTPOOL + oused=$(get_pool_prop bcloneused $TESTPOOL) + osaved=$(get_pool_prop bclonesaved $TESTPOOL) fi # @@ -285,21 +292,24 @@ function bclone_corner_cases_test overwrite_clone "$second_overwrite" if checksum_compare $read_after; then - log_note "existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after" + log_note "existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / second_overwrite: $second_overwrite / read_after: $read_after" else - log_fail "FAIL: existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after" + log_fail "FAIL: existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / second_overwrite: $second_overwrite / read_after: $read_after" fi log_must zpool export $TESTPOOL log_must zpool import $TESTPOOL if checksum_compare "yes"; then - log_note "existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after / read_next_txg" + log_note "existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / second_overwrite: $second_overwrite / read_after: $read_after / read_next_txg" else - log_fail "FAIL: existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after / read_next_txg" + log_fail "FAIL: existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / second_overwrite: $second_overwrite / read_after: $read_after / read_next_txg" fi rm -f "$CLONE" + sync_pool $TESTPOOL + verify_pool_prop_eq bcloneused $oused + verify_pool_prop_eq bclonesaved $osaved done done done