Skip to content

Commit 13cda05

Browse files
committed
Tighten up 'zpool remove' space check
The available space check in spa_vdev_remove_top_check() is intended to verify there is enough available space on the other devices before starting the removal process. This is obviously a good idea but the current check can significantly overestimate the space requirements and often prevent removal when it clearly should be possible. This change reworks the check slightly. First, we reduce the slop space requirement to be inline with with the capacity of the pool after the device has been removed. This way if a large device is accidentally added to a small pool the vastly increased slop requirement of the larger pool won't prevent removal. For example, it was previously possible that even newly added empty vdevs couldn't be removed due to the increased slop requirement. This was particularly unfortunate since one of the main motivations for this feature was to allow such mistakes to be corrected. Second, we handle the case of very small pools where the minimum slop size of 128M represents a large fraction (>25%) of the total pool capacity. In this case, it's reasonable to reduce the extra slop requirement. Lastly it's worth mentioning that by allowing the removal to start with close to the minimum required free space it is more likely that an active pool close to capacity may fail the removal process. This failure case has always been possible since we can't know what new data will be written during the removal process. It is correctly handled and there's no danger to the pool. Signed-off-by: Brian Behlendorf <[email protected]> Issue #11356
1 parent 06346cc commit 13cda05

File tree

9 files changed

+157
-20
lines changed

9 files changed

+157
-20
lines changed

include/sys/spa.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1041,7 +1041,7 @@ extern uint64_t spa_freeze_txg(spa_t *spa);
10411041
extern uint64_t spa_get_worst_case_asize(spa_t *spa, uint64_t lsize);
10421042
extern uint64_t spa_get_dspace(spa_t *spa);
10431043
extern uint64_t spa_get_checkpoint_space(spa_t *spa);
1044-
extern uint64_t spa_get_slop_space(spa_t *spa);
1044+
extern uint64_t spa_get_slop_space(spa_t *spa, int64_t);
10451045
extern void spa_update_dspace(spa_t *spa);
10461046
extern uint64_t spa_version(spa_t *spa);
10471047
extern boolean_t spa_deflate(spa_t *spa);

module/zfs/dsl_pool.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -861,7 +861,7 @@ dsl_pool_adjustedsize(dsl_pool_t *dp, zfs_space_check_t slop_policy)
861861

862862
space = spa_get_dspace(spa)
863863
- spa_get_checkpoint_space(spa) - spa_deferred_frees;
864-
resv = spa_get_slop_space(spa);
864+
resv = spa_get_slop_space(spa, 0);
865865

866866
switch (slop_policy) {
867867
case ZFS_SPACE_CHECK_NORMAL:

module/zfs/metaslab.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4261,7 +4261,7 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
42614261

42624262
uint64_t free_space = metaslab_class_get_space(spa_normal_class(spa)) -
42634263
metaslab_class_get_alloc(spa_normal_class(spa));
4264-
if (free_space <= spa_get_slop_space(spa) || vd->vdev_removing) {
4264+
if (free_space <= spa_get_slop_space(spa, 0) || vd->vdev_removing) {
42654265
defer_allowed = B_FALSE;
42664266
}
42674267

module/zfs/spa_misc.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1778,14 +1778,16 @@ spa_get_worst_case_asize(spa_t *spa, uint64_t lsize)
17781778
/*
17791779
* Return the amount of slop space in bytes. It is 1/32 of the pool (3.2%),
17801780
* or at least 128MB, unless that would cause it to be more than half the
1781-
* pool size.
1781+
* pool size. An adjustment may be passed to apply the calculation to a
1782+
* modified pool size. For example, in the case of top-level device removal
1783+
* where the total pool capacity will be reduced.
17821784
*
17831785
* See the comment above spa_slop_shift for details.
17841786
*/
17851787
uint64_t
1786-
spa_get_slop_space(spa_t *spa)
1788+
spa_get_slop_space(spa_t *spa, int64_t adjustment)
17871789
{
1788-
uint64_t space = spa_get_dspace(spa);
1790+
uint64_t space = spa_get_dspace(spa) + adjustment;
17891791
return (MAX(space >> spa_slop_shift, MIN(space >> 1, spa_min_slop)));
17901792
}
17911793

module/zfs/vdev_removal.c

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1995,19 +1995,44 @@ spa_vdev_remove_top_check(vdev_t *vd)
19951995
if (available < vd->vdev_stat.vs_alloc)
19961996
return (SET_ERROR(ENOSPC));
19971997
} else {
1998-
/* available space in the pool's normal class */
1999-
uint64_t available = dsl_dir_space_available(
2000-
spa->spa_dsl_pool->dp_root_dir, NULL, 0, B_TRUE);
2001-
if (available <
2002-
vd->vdev_stat.vs_dspace + spa_get_slop_space(spa)) {
2003-
/*
2004-
* This is a normal device. There has to be enough free
2005-
* space to remove the device and leave double the
2006-
* "slop" space (i.e. we must leave at least 3% of the
2007-
* pool free, in addition to the normal slop space).
2008-
*/
1998+
vdev_stat_t *rs = &spa->spa_root_vdev->vdev_stat;
1999+
vdev_stat_t *vs = &vd->vdev_stat;
2000+
2001+
/*
2002+
* This is a normal device. There has to be enough free
2003+
* space in the pool's normal class to copy the allocated
2004+
* space from the device being removed, this will account
2005+
* for both reserved and "slop" space. Any available
2006+
* capacity on the device being removed is deducted from
2007+
* the total available pool capacity.
2008+
*/
2009+
int64_t available = dsl_dir_space_available(
2010+
spa->spa_dsl_pool->dp_root_dir, NULL, 0, B_TRUE) -
2011+
(vs->vs_dspace - vs->vs_alloc);
2012+
2013+
/*
2014+
* Calculate the "slop" space requirement based on the pool
2015+
* capacity after the device has been removed. This helps
2016+
* avoid the case where removal is prevented because a large
2017+
* vdev is added to a small pool resulting in a vastly
2018+
* inflated slop requirement. Furthermore, if the "slop"
2019+
* space exceeds a quarter of the total pool capacity then
2020+
* the pool must be tiny and this requirement is reduced.
2021+
*/
2022+
int64_t slop = spa_get_slop_space(spa, -vs->vs_dspace);
2023+
int64_t factor = 4;
2024+
2025+
if (slop > (rs->vs_dspace - vs->vs_dspace) / 4)
2026+
factor = 1;
2027+
2028+
/*
2029+
* There has to be enough free space to remove the device
2030+
* and leave double the "slop" space (i.e. we must leave
2031+
* at least 3% of the pool free, in addition to the normal
2032+
* slop space).
2033+
*/
2034+
if (available < (int64_t)vs->vs_alloc + (factor * slop / 2))
20092035
return (SET_ERROR(ENOSPC));
2010-
}
20112036
}
20122037

20132038
/*

tests/runfiles/common.run

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -756,7 +756,8 @@ tests = ['removal_all_vdev', 'removal_cancel', 'removal_check_space',
756756
'removal_with_send_recv', 'removal_with_snapshot',
757757
'removal_with_write', 'removal_with_zdb', 'remove_expanded',
758758
'remove_mirror', 'remove_mirror_sanity', 'remove_raidz',
759-
'remove_indirect', 'remove_attach_mirror']
759+
'remove_indirect', 'remove_attach_mirror', 'remove_minimum',
760+
'remove_unbalanced']
760761
tags = ['functional', 'removal']
761762

762763
[tests/functional/rename_dirs]

tests/zfs-tests/tests/functional/removal/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ dist_pkgdata_SCRIPTS = \
3030
removal_with_snapshot.ksh removal_with_write.ksh \
3131
removal_with_zdb.ksh remove_mirror.ksh remove_mirror_sanity.ksh \
3232
remove_raidz.ksh remove_expanded.ksh remove_indirect.ksh \
33-
remove_attach_mirror.ksh
33+
remove_attach_mirror.ksh remove_minimum.ksh remove_unbalanced.ksh
3434

3535
dist_pkgdata_DATA = \
3636
removal.kshlib
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#!/bin/ksh -p
2+
#
3+
# CDDL HEADER START
4+
#
5+
# This file and its contents are supplied under the terms of the
6+
# Common Development and Distribution License ("CDDL"), version 1.0.
7+
# You may only use this file in accordance with the terms of version
8+
# 1.0 of the CDDL.
9+
#
10+
# A full copy of the text of the CDDL should have accompanied this
11+
# source. A copy of the CDDL is also available via the Internet at
12+
# http://www.illumos.org/license/CDDL.
13+
#
14+
# CDDL HEADER END
15+
#
16+
17+
#
18+
# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
19+
#
20+
21+
. $STF_SUITE/include/libtest.shlib
22+
. $STF_SUITE/tests/functional/removal/removal.kshlib
23+
24+
#
25+
# DESCRIPTION:
26+
# Device removal is possible for minimum sized vdevs.
27+
#
28+
# STRATEGY:
29+
# 1. Create a pool with minimum sized removable devices
30+
# 2. Remove a top-level device
31+
#
32+
33+
verify_runnable "global"
34+
35+
function cleanup
36+
{
37+
default_cleanup_noexit
38+
log_must rm -f $TEST_BASE_DIR/device-{1,2}
39+
}
40+
41+
log_assert "Device removal is possible for minimum sized vdevs."
42+
log_onexit cleanup
43+
44+
# 1. Create a pool with minimum sized removable devices
45+
log_must truncate -s $MINVDEVSIZE $TEST_BASE_DIR/device-{1,2}
46+
log_must default_setup_noexit "$TEST_BASE_DIR/device-1 $TEST_BASE_DIR/device-2"
47+
48+
log_must dd if=/dev/urandom of=$TESTDIR/$TESTFILE0 bs=1M count=32
49+
50+
# 2. Remove a top-level device
51+
log_must zpool remove $TESTPOOL $TEST_BASE_DIR/device-1
52+
log_must wait_for_removal $TESTPOOL
53+
54+
log_pass "Device removal is possible for minimum sized vdevs"
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#!/bin/ksh -p
2+
#
3+
# CDDL HEADER START
4+
#
5+
# This file and its contents are supplied under the terms of the
6+
# Common Development and Distribution License ("CDDL"), version 1.0.
7+
# You may only use this file in accordance with the terms of version
8+
# 1.0 of the CDDL.
9+
#
10+
# A full copy of the text of the CDDL should have accompanied this
11+
# source. A copy of the CDDL is also available via the Internet at
12+
# http://www.illumos.org/license/CDDL.
13+
#
14+
# CDDL HEADER END
15+
#
16+
17+
#
18+
# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
19+
#
20+
21+
. $STF_SUITE/include/libtest.shlib
22+
. $STF_SUITE/tests/functional/removal/removal.kshlib
23+
24+
#
25+
# DESCRIPTION:
26+
# Device removal is possible for highly unbalanced vdevs.
27+
#
28+
# STRATEGY:
29+
# 1. Create a pool with unbalanced removable devices
30+
# 2. Remove the large top-level device
31+
#
32+
33+
verify_runnable "global"
34+
35+
function cleanup
36+
{
37+
default_cleanup_noexit
38+
log_must rm -f $TEST_BASE_DIR/device-{1,2}
39+
}
40+
41+
log_assert "Device removal is possible for highly unbalanced vdevs."
42+
log_onexit cleanup
43+
44+
# 1. Create a pool with unbalanced removable devices
45+
log_must truncate -s 10G $TEST_BASE_DIR/device-1
46+
log_must truncate -s 1T $TEST_BASE_DIR/device-2
47+
log_must default_setup_noexit "$TEST_BASE_DIR/device-1 $TEST_BASE_DIR/device-2"
48+
49+
log_must dd if=/dev/urandom of=$TESTDIR/$TESTFILE0 bs=1M count=32
50+
51+
# 2. Remove the large top-level device
52+
log_must zpool remove $TESTPOOL $TEST_BASE_DIR/device-1
53+
log_must wait_for_removal $TESTPOOL
54+
55+
log_pass "Device removal is possible for highly unbalanced vdevs"

0 commit comments

Comments
 (0)