Skip to content

Commit d57777b

Browse files
committed
Fix "Detach spare vdev in case if resilvering does not happen"
Spare vdev should detach from the pool when a disk is reinserted. However, spare detachment depends on the completion of resilvering, and if resilver does not schedule, the spare vdev keeps attached to the pool until the next resilvering. When a zfs pool contains several disks (25+ mirror), resilvering does not always happen when a disk is reinserted. In this patch, spare vdev is manually detached from the pool when resilvering does not occur and it has been tested on both Linux and FreeBSD. Signed-off-by: Ameer Hamza <[email protected]>
1 parent 1038f87 commit d57777b

File tree

6 files changed

+32
-10
lines changed

6 files changed

+32
-10
lines changed

cmd/ztest.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3456,7 +3456,7 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
34563456
* of devices that have pending state changes.
34573457
*/
34583458
if (ztest_random(2) == 0)
3459-
(void) vdev_online(spa, guid, 0, NULL);
3459+
(void) vdev_online(spa, guid, 0, NULL, NULL);
34603460

34613461
error = spa_vdev_remove(spa, guid, B_FALSE);
34623462

@@ -3935,7 +3935,7 @@ online_vdev(vdev_t *vd, void *arg)
39353935

39363936
/* Calling vdev_online will initialize the new metaslabs */
39373937
spa_config_exit(spa, SCL_STATE, spa);
3938-
error = vdev_online(spa, guid, ZFS_ONLINE_EXPAND, &newstate);
3938+
error = vdev_online(spa, guid, ZFS_ONLINE_EXPAND, &newstate, NULL);
39393939
spa_config_enter(spa, SCL_STATE, spa, RW_READER);
39403940

39413941
/*
@@ -6215,7 +6215,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
62156215
* aux_vdev removal.
62166216
*/
62176217
mutex_enter(&ztest_vdev_lock);
6218-
(void) vdev_online(spa, guid0, 0, NULL);
6218+
(void) vdev_online(spa, guid0, 0, NULL, NULL);
62196219
mutex_exit(&ztest_vdev_lock);
62206220
}
62216221
}

include/sys/vdev.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ vdev_gang_header_asize(vdev_t *vd)
146146
extern int vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux);
147147
extern int vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux);
148148
extern int vdev_online(spa_t *spa, uint64_t guid, uint64_t flags,
149-
vdev_state_t *);
149+
vdev_state_t *, uint64_t *spare_guid);
150150
extern int vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags);
151151
extern int vdev_remove_wanted(spa_t *spa, uint64_t guid);
152152
extern void vdev_clear(spa_t *spa, vdev_t *vd);

module/os/freebsd/zfs/vdev_geom.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ vdev_geom_resize(struct g_consumer *cp)
166166
spa = vd->vdev_spa;
167167
if (!spa->spa_autoexpand)
168168
continue;
169-
vdev_online(spa, vd->vdev_guid, ZFS_ONLINE_EXPAND, NULL);
169+
vdev_online(spa, vd->vdev_guid, ZFS_ONLINE_EXPAND, NULL, NULL);
170170
}
171171
}
172172

module/zfs/spa.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7058,7 +7058,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
70587058
* Detach a device from a mirror or replacing vdev.
70597059
*
70607060
* If 'replace_done' is specified, only detach if the parent
7061-
* is a replacing vdev.
7061+
* is a replacing or a spare vdev.
70627062
*/
70637063
int
70647064
spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)

module/zfs/vdev.c

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4102,11 +4102,13 @@ vdev_remove_wanted(spa_t *spa, uint64_t guid)
41024102
* events are generated if the device fails to open.
41034103
*/
41044104
int
4105-
vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
4105+
vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate,
4106+
uint64_t *spare_guid)
41064107
{
41074108
vdev_t *vd, *tvd, *pvd, *rvd = spa->spa_root_vdev;
41084109
boolean_t wasoffline;
41094110
vdev_state_t oldstate;
4111+
boolean_t parent_is_spare = B_FALSE;
41104112

41114113
spa_vdev_state_enter(spa, SCL_NONE);
41124114

@@ -4146,8 +4148,10 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
41464148
if ((flags & ZFS_ONLINE_UNSPARE) &&
41474149
!vdev_is_dead(vd) && vd->vdev_parent &&
41484150
vd->vdev_parent->vdev_ops == &vdev_spare_ops &&
4149-
vd->vdev_parent->vdev_child[0] == vd)
4151+
vd->vdev_parent->vdev_child[0] == vd) {
41504152
vd->vdev_unspare = B_TRUE;
4153+
parent_is_spare = B_TRUE;
4154+
}
41514155

41524156
if ((flags & ZFS_ONLINE_EXPAND) || spa->spa_autoexpand) {
41534157

@@ -4183,9 +4187,23 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
41834187

41844188
if (wasoffline ||
41854189
(oldstate < VDEV_STATE_DEGRADED &&
4186-
vd->vdev_state >= VDEV_STATE_DEGRADED))
4190+
vd->vdev_state >= VDEV_STATE_DEGRADED)) {
41874191
spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_ONLINE);
41884192

4193+
/*
4194+
* If resilver or rebuild could not get schedule, report
4195+
* to detach the spare vdev
4196+
*/
4197+
if (spare_guid && !dsl_scan_resilvering(spa->spa_dsl_pool) &&
4198+
!dsl_scan_resilver_scheduled(spa->spa_dsl_pool) &&
4199+
!vdev_rebuild_active(tvd) && parent_is_spare &&
4200+
vd->vdev_parent->vdev_child[1] &&
4201+
vd->vdev_parent->vdev_child[1]->vdev_isspare) {
4202+
*spare_guid =
4203+
vd->vdev_parent->vdev_child[1]->vdev_guid;
4204+
}
4205+
}
4206+
41894207
return (spa_vdev_state_exit(spa, vd, 0));
41904208
}
41914209

module/zfs/zfs_ioctl.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1882,12 +1882,16 @@ zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
18821882
spa_t *spa;
18831883
int error;
18841884
vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1885+
uint64_t spare_guid = 0;
18851886

18861887
if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
18871888
return (error);
18881889
switch (zc->zc_cookie) {
18891890
case VDEV_STATE_ONLINE:
1890-
error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1891+
error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate,
1892+
&spare_guid);
1893+
if (error == 0 && spare_guid != 0)
1894+
spa_vdev_detach(spa, spare_guid, 0, B_FALSE);
18911895
break;
18921896

18931897
case VDEV_STATE_OFFLINE:

0 commit comments

Comments
 (0)