Skip to content

Commit 307f747

Browse files
Ryan Moellerbehlendorf
authored andcommitted
ZED: Match added disk by pool/vdev GUID if found (#12217)
This enables ZED to auto-online vdevs that are not wholedisk managed by ZFS. Signed-off-by: Ryan Moeller <[email protected]> Reviewed-by: Don Brady <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Tony Hutter <[email protected]>
1 parent ec6a6e8 commit 307f747

File tree

6 files changed

+137
-10
lines changed

6 files changed

+137
-10
lines changed

cmd/zed/agents/zfs_mod.c

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,27 @@ devid_iter(const char *devid, zfs_process_func_t func, boolean_t is_slice)
640640
return (data.dd_found);
641641
}
642642

643+
/*
644+
* Given a device guid, find any vdevs with a matching guid.
645+
*/
646+
static boolean_t
647+
guid_iter(uint64_t pool_guid, uint64_t vdev_guid, const char *devid,
648+
zfs_process_func_t func, boolean_t is_slice)
649+
{
650+
dev_data_t data = { 0 };
651+
652+
data.dd_func = func;
653+
data.dd_found = B_FALSE;
654+
data.dd_pool_guid = pool_guid;
655+
data.dd_vdev_guid = vdev_guid;
656+
data.dd_islabeled = is_slice;
657+
data.dd_new_devid = devid;
658+
659+
(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
660+
661+
return (data.dd_found);
662+
}
663+
643664
/*
644665
* Handle a EC_DEV_ADD.ESC_DISK event.
645666
*
@@ -663,15 +684,18 @@ static int
663684
zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
664685
{
665686
char *devpath = NULL, *devid;
687+
uint64_t pool_guid = 0, vdev_guid = 0;
666688
boolean_t is_slice;
667689

668690
/*
669-
* Expecting a devid string and an optional physical location
691+
* Expecting a devid string and an optional physical location and guid
670692
*/
671693
if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid) != 0)
672694
return (-1);
673695

674696
(void) nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath);
697+
(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
698+
(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
675699

676700
is_slice = (nvlist_lookup_boolean(nvl, DEV_IS_PART) == 0);
677701

@@ -682,12 +706,16 @@ zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
682706
* Iterate over all vdevs looking for a match in the following order:
683707
* 1. ZPOOL_CONFIG_DEVID (identifies the unique disk)
684708
* 2. ZPOOL_CONFIG_PHYS_PATH (identifies disk physical location).
685-
*
686-
* For disks, we only want to pay attention to vdevs marked as whole
687-
* disks or are a multipath device.
709+
* 3. ZPOOL_CONFIG_GUID (identifies unique vdev).
688710
*/
689-
if (!devid_iter(devid, zfs_process_add, is_slice) && devpath != NULL)
690-
(void) devphys_iter(devpath, devid, zfs_process_add, is_slice);
711+
if (devid_iter(devid, zfs_process_add, is_slice))
712+
return (0);
713+
if (devpath != NULL && devphys_iter(devpath, devid, zfs_process_add,
714+
is_slice))
715+
return (0);
716+
if (vdev_guid != 0)
717+
(void) guid_iter(pool_guid, vdev_guid, devid, zfs_process_add,
718+
is_slice);
691719

692720
return (0);
693721
}

cmd/zed/zed_disk_event.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl)
7272
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PATH, strval);
7373
if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &strval) == 0)
7474
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_IDENTIFIER, strval);
75+
if (nvlist_lookup_boolean(nvl, DEV_IS_PART) == B_TRUE)
76+
zed_log_msg(LOG_INFO, "\t%s: B_TRUE", DEV_IS_PART);
7577
if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &strval) == 0)
7678
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
7779
if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)

tests/runfiles/linux.run

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,11 @@ tests = ['fallocate_prealloc', 'fallocate_punch-hole']
9898
tags = ['functional', 'fallocate']
9999

100100
[tests/functional/fault:Linux]
101-
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_replace_001_pos',
102-
'auto_spare_001_pos', 'auto_spare_002_pos', 'auto_spare_multiple',
103-
'auto_spare_ashift', 'auto_spare_shared', 'decrypt_fault',
104-
'decompress_fault', 'scrub_after_resilver', 'zpool_status_-s']
101+
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
102+
'auto_replace_001_pos', 'auto_spare_001_pos', 'auto_spare_002_pos',
103+
'auto_spare_multiple', 'auto_spare_ashift', 'auto_spare_shared',
104+
'decrypt_fault', 'decompress_fault', 'scrub_after_resilver',
105+
'zpool_status_-s']
105106
tags = ['functional', 'fault']
106107

107108
[tests/functional/features/large_dnode:Linux]

tests/test-runner/bin/zts-report.py.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ if os.environ.get('CI') == 'true':
323323
'cli_root/zpool_split/zpool_split_wholedisk': ['SKIP', ci_reason],
324324
'fault/auto_offline_001_pos': ['SKIP', ci_reason],
325325
'fault/auto_online_001_pos': ['SKIP', ci_reason],
326+
'fault/auto_online_002_pos': ['SKIP', ci_reason],
326327
'fault/auto_replace_001_pos': ['SKIP', ci_reason],
327328
'fault/auto_spare_ashift': ['SKIP', ci_reason],
328329
'fault/auto_spare_shared': ['SKIP', ci_reason],

tests/zfs-tests/tests/functional/fault/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ dist_pkgdata_SCRIPTS = \
44
cleanup.ksh \
55
auto_offline_001_pos.ksh \
66
auto_online_001_pos.ksh \
7+
auto_online_002_pos.ksh \
78
auto_replace_001_pos.ksh \
89
auto_spare_001_pos.ksh \
910
auto_spare_002_pos.ksh \
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#!/bin/ksh -p
2+
#
3+
# CDDL HEADER START
4+
#
5+
# The contents of this file are subject to the terms of the
6+
# Common Development and Distribution License (the "License").
7+
# You may not use this file except in compliance with the License.
8+
#
9+
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10+
# or http://www.opensolaris.org/os/licensing.
11+
# See the License for the specific language governing permissions
12+
# and limitations under the License.
13+
#
14+
# When distributing Covered Code, include this CDDL HEADER in each
15+
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16+
# If applicable, add the following below this CDDL HEADER, with the
17+
# fields enclosed by brackets "[]" replaced with your own identifying
18+
# information: Portions Copyright [yyyy] [name of copyright owner]
19+
#
20+
# CDDL HEADER END
21+
#
22+
# Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved.
23+
# Copyright (c) 2019 by Delphix. All rights reserved.
24+
# Portions Copyright 2021 iXsystems, Inc.
25+
#
26+
27+
. $STF_SUITE/include/libtest.shlib
28+
. $STF_SUITE/tests/functional/fault/fault.cfg
29+
30+
#
31+
# DESCRIPTION:
32+
# Testing Fault Management Agent ZED Logic - Automated Auto-Online Test.
33+
# Now with partitioned vdevs.
34+
#
35+
# STRATEGY:
36+
# 1. Partition a scsi_debug device for simulating removal
37+
# 2. Create a pool
38+
# 3. Offline disk
39+
# 4. ZED polls for an event change for online disk to be automatically
40+
# added back to the pool.
41+
#
42+
verify_runnable "both"
43+
44+
function cleanup
45+
{
46+
poolexists ${TESTPOOL} && destroy_pool ${TESTPOOL}
47+
unload_scsi_debug
48+
}
49+
50+
log_assert "Testing automated auto-online FMA test with partitioned vdev"
51+
52+
log_onexit cleanup
53+
54+
load_scsi_debug ${SDSIZE} ${SDHOSTS} ${SDTGTS} ${SDLUNS} '512b'
55+
SDDEVICE=$(get_debug_device)
56+
zpool labelclear -f ${SDDEVICE}
57+
partition_disk ${SDSIZE} ${SDDEVICE} 1
58+
part=${SDDEVICE}1
59+
host=$(get_scsi_host ${SDDEVICE})
60+
61+
block_device_wait /dev/${part}
62+
log_must zpool create -f ${TESTPOOL} raidz1 ${part} ${DISKS}
63+
64+
# Add some data to the pool
65+
log_must mkfile ${FSIZE} /${TESTPOOL}/data
66+
67+
remove_disk ${SDDEVICE}
68+
check_state ${TESTPOOL} "" "degraded" || \
69+
log_fail "${TESTPOOL} is not degraded"
70+
71+
# Clear zpool events
72+
log_must zpool events -c
73+
74+
# Online disk
75+
insert_disk ${SDDEVICE} ${host}
76+
77+
log_note "Delay for ZED auto-online"
78+
typeset -i timeout=0
79+
until is_pool_resilvered ${TESTPOOL}; do
80+
if ((timeout++ == MAXTIMEOUT)); then
81+
log_fail "Timeout occurred"
82+
fi
83+
sleep 1
84+
done
85+
log_note "Auto-online of ${SDDEVICE} is complete"
86+
87+
# Validate auto-online was successful
88+
sleep 1
89+
check_state ${TESTPOOL} "" "online" || \
90+
log_fail "${TESTPOOL} is not back online"
91+
92+
log_must zpool destroy ${TESTPOOL}
93+
94+
log_pass "Auto-online with partitioned vdev test successful"

0 commit comments

Comments
 (0)