Skip to content

Commit becab0a

Browse files
committed
Tighten up 'zpool remove' space check
The available space check in spa_vdev_remove_top_check() is intended to verify there is enough available space on the other devices before starting the removal process. This is obviously a good idea but the current check can significantly overestimate the space requirements and often prevent removal when it clearly should be possible. This change reworks the check to use the per-vdev vs_stats. This is sufficiently accurate and is convenient because it allows a direct comparison to the allocated space. If using dsl_dir_space_available() then the available space of the device being removed is also included in the return value and must somehow be accounted for. Additionally, we reduce the slop space requirement to be inline with with the capacity of the pool after the device has been removed. This way if a large device is accidentally added to a small pool the vastly increased slop requirement of the larger pool won't prevent removal. For example, it was previously possible that even newly added empty vdevs couldn't be removed due to the increased slop requirement. This was particularly unfortunate since one of the main motivations for this feature was to allow such mistakes to be corrected. Lastly it's worth mentioning that by allowing the removal to start with close to the minimum required free space it is more likely that an active pool close to capacity may fail the removal process. This failure case has always been possible since we can't know what new data will be written during the removal process. It is correctly handled and there's no danager to the pool. Signed-off-by: Brian Behlendorf <[email protected]> Issue #11356
1 parent 808e681 commit becab0a

File tree

8 files changed

+97
-19
lines changed

8 files changed

+97
-19
lines changed

include/sys/spa.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1041,7 +1041,7 @@ extern uint64_t spa_freeze_txg(spa_t *spa);
10411041
extern uint64_t spa_get_worst_case_asize(spa_t *spa, uint64_t lsize);
10421042
extern uint64_t spa_get_dspace(spa_t *spa);
10431043
extern uint64_t spa_get_checkpoint_space(spa_t *spa);
1044-
extern uint64_t spa_get_slop_space(spa_t *spa);
1044+
extern uint64_t spa_get_slop_space(spa_t *spa, int64_t);
10451045
extern void spa_update_dspace(spa_t *spa);
10461046
extern uint64_t spa_version(spa_t *spa);
10471047
extern boolean_t spa_deflate(spa_t *spa);

module/zfs/dsl_pool.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -861,7 +861,7 @@ dsl_pool_adjustedsize(dsl_pool_t *dp, zfs_space_check_t slop_policy)
861861

862862
space = spa_get_dspace(spa)
863863
- spa_get_checkpoint_space(spa) - spa_deferred_frees;
864-
resv = spa_get_slop_space(spa);
864+
resv = spa_get_slop_space(spa, 0);
865865

866866
switch (slop_policy) {
867867
case ZFS_SPACE_CHECK_NORMAL:

module/zfs/metaslab.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4261,7 +4261,7 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
42614261

42624262
uint64_t free_space = metaslab_class_get_space(spa_normal_class(spa)) -
42634263
metaslab_class_get_alloc(spa_normal_class(spa));
4264-
if (free_space <= spa_get_slop_space(spa) || vd->vdev_removing) {
4264+
if (free_space <= spa_get_slop_space(spa, 0) || vd->vdev_removing) {
42654265
defer_allowed = B_FALSE;
42664266
}
42674267

module/zfs/spa_misc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1783,9 +1783,9 @@ spa_get_worst_case_asize(spa_t *spa, uint64_t lsize)
17831783
* See the comment above spa_slop_shift for details.
17841784
*/
17851785
uint64_t
1786-
spa_get_slop_space(spa_t *spa)
1786+
spa_get_slop_space(spa_t *spa, int64_t adjustment)
17871787
{
1788-
uint64_t space = spa_get_dspace(spa);
1788+
uint64_t space = spa_get_dspace(spa) + adjustment;
17891789
return (MAX(space >> spa_slop_shift, MIN(space >> 1, spa_min_slop)));
17901790
}
17911791

module/zfs/vdev_removal.c

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1995,19 +1995,30 @@ spa_vdev_remove_top_check(vdev_t *vd)
19951995
if (available < vd->vdev_stat.vs_alloc)
19961996
return (SET_ERROR(ENOSPC));
19971997
} else {
1998-
/* available space in the pool's normal class */
1999-
uint64_t available = dsl_dir_space_available(
2000-
spa->spa_dsl_pool->dp_root_dir, NULL, 0, B_TRUE);
2001-
if (available <
2002-
vd->vdev_stat.vs_dspace + spa_get_slop_space(spa)) {
2003-
/*
2004-
* This is a normal device. There has to be enough free
2005-
* space to remove the device and leave double the
2006-
* "slop" space (i.e. we must leave at least 3% of the
2007-
* pool free, in addition to the normal slop space).
2008-
*/
1998+
vdev_stat_t *rs = &spa->spa_root_vdev->vdev_stat;
1999+
vdev_stat_t *vs = &vd->vdev_stat;
2000+
2001+
/*
2002+
* This is a normal device. There has to be enough free
2003+
* space on the other pool devices to copy the allocated
2004+
* space from the device being removed plus double the
2005+
* "slop" space (i.e. we must leave at least 3% of the
2006+
* pool free, in addition to the normal slop space).
2007+
*/
2008+
uint64_t available = (rs->vs_dspace - rs->vs_alloc) -
2009+
(vs->vs_dspace - vs->vs_alloc);
2010+
2011+
/*
2012+
* Calculate the "slop" space requirement based on the pool
2013+
* capacity after the device has been removed. This helps
2014+
* avoid the case where removal is prevented because a large
2015+
* vdev is added to a small pool resulting in a vastly
2016+
* inflated slop requirement.
2017+
*/
2018+
uint64_t slop = spa_get_slop_space(spa, -vs->vs_dspace);
2019+
2020+
if (available < vd->vdev_stat.vs_alloc + 2 * slop)
20092021
return (SET_ERROR(ENOSPC));
2010-
}
20112022
}
20122023

20132024
/*

tests/runfiles/common.run

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -756,7 +756,7 @@ tests = ['removal_all_vdev', 'removal_cancel', 'removal_check_space',
756756
'removal_with_send_recv', 'removal_with_snapshot',
757757
'removal_with_write', 'removal_with_zdb', 'remove_expanded',
758758
'remove_mirror', 'remove_mirror_sanity', 'remove_raidz',
759-
'remove_indirect', 'remove_attach_mirror']
759+
'remove_indirect', 'remove_attach_mirror', 'remove_unbalanced']
760760
tags = ['functional', 'removal']
761761

762762
[tests/functional/rename_dirs]

tests/zfs-tests/tests/functional/removal/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ dist_pkgdata_SCRIPTS = \
3030
removal_with_snapshot.ksh removal_with_write.ksh \
3131
removal_with_zdb.ksh remove_mirror.ksh remove_mirror_sanity.ksh \
3232
remove_raidz.ksh remove_expanded.ksh remove_indirect.ksh \
33-
remove_attach_mirror.ksh
33+
remove_attach_mirror.ksh remove_unbalanced.ksh
3434

3535
dist_pkgdata_DATA = \
3636
removal.kshlib
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#!/bin/ksh -p
2+
#
3+
# CDDL HEADER START
4+
#
5+
# This file and its contents are supplied under the terms of the
6+
# Common Development and Distribution License ("CDDL"), version 1.0.
7+
# You may only use this file in accordance with the terms of version
8+
# 1.0 of the CDDL.
9+
#
10+
# A full copy of the text of the CDDL should have accompanied this
11+
# source. A copy of the CDDL is also available via the Internet at
12+
# http://www.illumos.org/license/CDDL.
13+
#
14+
# CDDL HEADER END
15+
#
16+
17+
#
18+
# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
19+
#
20+
21+
. $STF_SUITE/include/libtest.shlib
22+
. $STF_SUITE/tests/functional/removal/removal.kshlib
23+
24+
#
25+
# DESCRIPTION:
26+
# Device removal is possible for highly unbalanced vdevs.
27+
#
28+
# STRATEGY:
29+
# 1. Create a pool with unbalanced removable devices
30+
# 2. Remove the large top-level device
31+
#
32+
33+
verify_runnable "global"
34+
35+
function cleanup
36+
{
37+
destroy_pool $TESTPOOL
38+
log_must rm -f $TEST_BASE_DIR/device-{1,2,3,4}
39+
}
40+
41+
log_assert "Device removal is possible for highly unbalanced vdevs."
42+
log_onexit cleanup
43+
44+
# 1. Create a pool with unbalanced removable devices
45+
truncate -s 1G $TEST_BASE_DIR/device-{1,2}
46+
truncate -s 1T $TEST_BASE_DIR/device-{3,4}
47+
zpool create $TESTPOOL mirror $TEST_BASE_DIR/device-{1,2} \
48+
mirror $TEST_BASE_DIR/device-{3,4}
49+
50+
WORDS_FILE1="/usr/dict/words"
51+
WORDS_FILE2="/usr/share/dict/words"
52+
FILE_CONTENTS="Leeloo Dallas mul-ti-pass."
53+
54+
if [[ -f $WORDS_FILE1 ]]; then
55+
log_must cp $WORDS_FILE1 $TESTDIR
56+
elif [[ -f $WORDS_FILE2 ]]; then
57+
log_must cp $WORDS_FILE2 $TESTDIR
58+
else
59+
echo $FILE_CONTENTS >$TESTDIR/$TESTFILE0
60+
log_must [ "x$(<$TESTDIR/$TESTFILE0)" = "x$FILE_CONTENTS" ]
61+
fi
62+
63+
# 2. Remove the large top-level device
64+
log_must zpool remove $TESTPOOL mirror-1
65+
log_must wait_for_removal $TESTPOOL
66+
67+
log_pass "Device removal is possible for highly unbalanced vdevs"

0 commit comments

Comments
 (0)