Skip to content

Commit a77eac0

Browse files
committed
Add 'zpool status -e' flag to see unhealthy vdevs
When very large pools are present, it can be laborious to find reasons for why a pool is degraded and/or where an unhealthy vdev is. This option filters out vdevs that are ONLINE and with no errors to make it easier to see where the issues are. Root and parents of unhealthy vdevs will always be printed. Testing: ZFS errors and drive failures for multiple vdevs were simulated with zinject. Sample vdev listings with '-e' option - All vdevs healthy NAME STATE READ WRITE CKSUM iron5 ONLINE 0 0 0 - ZFS errors NAME STATE READ WRITE CKSUM iron5 ONLINE 0 0 0 raidz2-5 ONLINE 1 0 0 L23 ONLINE 1 0 0 L24 ONLINE 1 0 0 L37 ONLINE 1 0 0 - Vdev faulted NAME STATE READ WRITE CKSUM iron5 DEGRADED 0 0 0 raidz2-6 DEGRADED 0 0 0 L67 FAULTED 0 0 0 too many errors - Vdev faults and data errors NAME STATE READ WRITE CKSUM iron5 DEGRADED 0 0 0 raidz2-1 DEGRADED 0 0 0 L2 FAULTED 0 0 0 too many errors raidz2-5 ONLINE 1 0 0 L23 ONLINE 1 0 0 L24 ONLINE 1 0 0 L37 ONLINE 1 0 0 raidz2-6 DEGRADED 0 0 0 L67 FAULTED 0 0 0 too many errors - Vdev missing NAME STATE READ WRITE CKSUM iron5 DEGRADED 0 0 0 raidz2-6 DEGRADED 0 0 0 L67 UNAVAIL 3 1 0 - Slow devices when -s provided with -e NAME STATE READ WRITE CKSUM SLOW iron5 DEGRADED 0 0 0 - raidz2-5 DEGRADED 0 0 0 - L10 FAULTED 0 0 0 0 external device fault L51 ONLINE 0 0 0 14 Signed-off-by: Cameron Harr <[email protected]>
1 parent 2e6b3c4 commit a77eac0

File tree

2 files changed

+57
-5
lines changed

2 files changed

+57
-5
lines changed

cmd/zpool/zpool_main.c

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2161,6 +2161,7 @@ typedef struct status_cbdata {
21612161
boolean_t cb_explain;
21622162
boolean_t cb_first;
21632163
boolean_t cb_dedup_stats;
2164+
boolean_t cb_print_unhealthy;
21642165
boolean_t cb_print_status;
21652166
boolean_t cb_print_slow_ios;
21662167
boolean_t cb_print_vdev_init;
@@ -2357,6 +2358,35 @@ health_str_to_color(const char *health)
23572358
return (NULL);
23582359
}
23592360

2361+
/*
2362+
* Called for each leaf vdev. Returns 0 if the vdev is healthy.
2363+
* A vdev is unhealthy if any of the following are true:
2364+
* 1) there are read, write, or checksum errors,
2365+
* 2) its state is not ONLINE, or
2366+
* 3) slow IO reporting was requested (-s) and there are slow IOs.
2367+
*/
2368+
static int
2369+
vdev_is_healthy(void *hdl_data, nvlist_t *nv, void *data)
2370+
{
2371+
status_cbdata_t *cb = data;
2372+
vdev_stat_t *vs;
2373+
uint_t vsc;
2374+
(void) hdl_data;
2375+
2376+
if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
2377+
(uint64_t **)&vs, &vsc) != 0)
2378+
return (1);
2379+
2380+
if (vs->vs_checksum_errors || vs->vs_read_errors ||
2381+
vs->vs_write_errors || vs->vs_state != VDEV_STATE_HEALTHY)
2382+
return (1);
2383+
2384+
if (cb->cb_print_slow_ios && vs->vs_slow_ios)
2385+
return (1);
2386+
2387+
return (0);
2388+
}
2389+
23602390
/*
23612391
* Print out configuration state as requested by status_callback.
23622392
*/
@@ -2375,7 +2405,8 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
23752405
const char *state;
23762406
const char *type;
23772407
const char *path = NULL;
2378-
const char *rcolor = NULL, *wcolor = NULL, *ccolor = NULL;
2408+
const char *rcolor = NULL, *wcolor = NULL, *ccolor = NULL,
2409+
*scolor = NULL;
23792410

23802411
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
23812412
&child, &children) != 0)
@@ -2402,6 +2433,15 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
24022433
state = gettext("AVAIL");
24032434
}
24042435

2436+
/*
2437+
* If '-e' is specified then top-level vdevs and their children
2438+
* can be pruned if all of their leaves are healthy.
2439+
*/
2440+
if (cb->cb_print_unhealthy && depth > 0 &&
2441+
for_each_vdev_in_nvlist(nv, vdev_is_healthy, cb) == 0) {
2442+
return;
2443+
}
2444+
24052445
printf_color(health_str_to_color(state),
24062446
"\t%*s%-*s %-8s", depth, "", cb->cb_namewidth - depth,
24072447
name, state);
@@ -2416,6 +2456,9 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
24162456
if (vs->vs_checksum_errors)
24172457
ccolor = ANSI_RED;
24182458

2459+
if (vs->vs_slow_ios)
2460+
scolor = ANSI_BLUE;
2461+
24192462
if (cb->cb_literal) {
24202463
fputc(' ', stdout);
24212464
printf_color(rcolor, "%5llu",
@@ -2448,9 +2491,10 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
24482491
}
24492492

24502493
if (cb->cb_literal)
2451-
printf(" %5llu", (u_longlong_t)vs->vs_slow_ios);
2494+
printf_color(scolor, " %5llu",
2495+
(u_longlong_t)vs->vs_slow_ios);
24522496
else
2453-
printf(" %5s", rbuf);
2497+
printf_color(scolor, " %5s", rbuf);
24542498
}
24552499
if (cb->cb_print_power) {
24562500
if (children == 0) {
@@ -9106,9 +9150,11 @@ status_callback(zpool_handle_t *zhp, void *data)
91069150
(void) printf(gettext(
91079151
"errors: No known data errors\n"));
91089152
} else if (!cbp->cb_verbose) {
9153+
color_start(ANSI_RED);
91099154
(void) printf(gettext("errors: %llu data "
91109155
"errors, use '-v' for a list\n"),
91119156
(u_longlong_t)nerr);
9157+
color_end();
91129158
} else {
91139159
print_error_log(zhp);
91149160
}
@@ -9129,6 +9175,7 @@ status_callback(zpool_handle_t *zhp, void *data)
91299175
* [pool] [interval [count]]
91309176
*
91319177
* -c CMD For each vdev, run command CMD
9178+
* -e Display only unhealthy vdevs
91329179
* -i Display vdev initialization status.
91339180
* -g Display guid for individual vdev name.
91349181
* -L Follow links when resolving vdev path name.
@@ -9160,7 +9207,7 @@ zpool_do_status(int argc, char **argv)
91609207
};
91619208

91629209
/* check options */
9163-
while ((c = getopt_long(argc, argv, "c:igLpPsvxDtT:", long_options,
9210+
while ((c = getopt_long(argc, argv, "c:eigLpPsvxDtT:", long_options,
91649211
NULL)) != -1) {
91659212
switch (c) {
91669213
case 'c':
@@ -9187,6 +9234,9 @@ zpool_do_status(int argc, char **argv)
91879234
}
91889235
cmd = optarg;
91899236
break;
9237+
case 'e':
9238+
cb.cb_print_unhealthy = B_TRUE;
9239+
break;
91909240
case 'i':
91919241
cb.cb_print_vdev_init = B_TRUE;
91929242
break;

man/man8/zpool-status.8

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
.Sh SYNOPSIS
3737
.Nm zpool
3838
.Cm status
39-
.Op Fl DigLpPstvx
39+
.Op Fl DeigLpPstvx
4040
.Op Fl T Sy u Ns | Ns Sy d
4141
.Op Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns
4242
.Oo Ar pool Oc Ns
@@ -69,6 +69,8 @@ See the
6969
option of
7070
.Nm zpool Cm iostat
7171
for complete details.
72+
.It Fl e
73+
Only show unhealthy vdevs (not-ONLINE or with errors).
7274
.It Fl i
7375
Display vdev initialization status.
7476
.It Fl g

0 commit comments

Comments
 (0)