Skip to content

Commit 8afac5d

Browse files
Don Bradybehlendorf
authored andcommitted
Avoid posting duplicate zpool events
Duplicate io and checksum ereport events can misrepresent that things are worse than they seem. Ideally the zpool events and the corresponding vdev stat error counts in a zpool status should be for unique errors -- not the same error being counted over and over. This can be demonstrated in a simple example. With a single bad block in a datafile and just 5 reads of the file we end up with a degraded vdev, even though there is only one unique error in the pool. The proposed solution to the above issue, is to eliminate duplicates when posting events and when updating vdev error stats. We now save recent error events of interest when posting events so that we can easily check for duplicates when posting an error. Reviewed by: Brad Lewis <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Don Brady <[email protected]> Closes #10861
1 parent bd72426 commit 8afac5d

File tree

22 files changed

+799
-79
lines changed

22 files changed

+799
-79
lines changed

include/sys/fm/fs/zfs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323
* Use is subject to license terms.
2424
*/
2525

26+
/*
27+
* Copyright (c) 2020 by Delphix. All rights reserved.
28+
*/
29+
2630
#ifndef _SYS_FM_FS_ZFS_H
2731
#define _SYS_FM_FS_ZFS_H
2832

@@ -88,6 +92,7 @@ extern "C" {
8892
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE "zio_size"
8993
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS "zio_flags"
9094
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE "zio_stage"
95+
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY "zio_priority"
9196
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE "zio_pipeline"
9297
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY "zio_delay"
9398
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP "zio_timestamp"

include/sys/fm/util.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ extern int zfs_zevent_seek(zfs_zevent_t *, uint64_t);
104104
extern void zfs_zevent_init(zfs_zevent_t **);
105105
extern void zfs_zevent_destroy(zfs_zevent_t *);
106106

107+
extern void zfs_zevent_track_duplicate(void);
108+
extern void zfs_ereport_init(void);
109+
extern void zfs_ereport_fini(void);
107110
#else
108111

109112
static inline void fm_init(void) { }

include/sys/spa.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
*/
2121
/*
2222
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23-
* Copyright (c) 2011, 2019 by Delphix. All rights reserved.
23+
* Copyright (c) 2011, 2020 by Delphix. All rights reserved.
2424
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
2525
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
2626
* Copyright 2013 Saso Kiselkov. All rights reserved.
@@ -1145,10 +1145,10 @@ extern const char *spa_state_to_name(spa_t *spa);
11451145
struct zbookmark_phys;
11461146
extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb);
11471147
extern int zfs_ereport_post(const char *clazz, spa_t *spa, vdev_t *vd,
1148-
const zbookmark_phys_t *zb, zio_t *zio, uint64_t stateoroffset,
1149-
uint64_t length);
1148+
const zbookmark_phys_t *zb, zio_t *zio, uint64_t state);
11501149
extern boolean_t zfs_ereport_is_valid(const char *clazz, spa_t *spa, vdev_t *vd,
11511150
zio_t *zio);
1151+
extern void zfs_ereport_taskq_fini(void);
11521152
extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
11531153
const char *name, nvlist_t *aux);
11541154
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);

include/sys/zio.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
/*
2323
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
2424
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25-
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
25+
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
2626
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
2727
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
2828
* Copyright 2016 Toomas Soome <[email protected]>
@@ -680,7 +680,7 @@ extern hrtime_t zio_handle_io_delay(zio_t *zio);
680680
/*
681681
* Checksum ereport functions
682682
*/
683-
extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
683+
extern int zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
684684
const zbookmark_phys_t *zb, struct zio *zio, uint64_t offset,
685685
uint64_t length, void *arg, struct zio_bad_cksum *info);
686686
extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report,

man/man5/zfs-module-parameters.5

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
'\" te
22
.\" Copyright (c) 2013 by Turbo Fredriksson <[email protected]>. All rights reserved.
3-
.\" Copyright (c) 2019 by Delphix. All rights reserved.
3+
.\" Copyright (c) 2019, 2020 by Delphix. All rights reserved.
44
.\" Copyright (c) 2019 Datto Inc.
55
.\" The contents of this file are subject to the terms of the Common Development
66
.\" and Distribution License (the "License"). You may not use this file except
@@ -3656,6 +3656,27 @@ Default value: \fB0\fR.
36563656

36573657
.sp
36583658
.ne 2
3659+
.na
3660+
\fBzfs_zevent_retain_max\fR (int)
3661+
.ad
3662+
.RS 12n
3663+
Maximum recent zevent records to retain for duplicate checking. Setting
3664+
this value to zero disables duplicate detection.
3665+
.sp
3666+
Default value: \fB2000\fR.
3667+
.RE
3668+
3669+
.sp
3670+
.ne 2
3671+
.na
3672+
\fBzfs_zevent_retain_expire_secs\fR (int)
3673+
.ad
3674+
.RS 12n
3675+
Lifespan for a recent ereport that was retained for duplicate checking.
3676+
.sp
3677+
Default value: \fB900\fR.
3678+
.RE
3679+
36593680
.na
36603681
\fBzfs_zil_clean_taskq_maxalloc\fR (int)
36613682
.ad

module/zfs/arc.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
/*
2222
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
2323
* Copyright (c) 2018, Joyent, Inc.
24-
* Copyright (c) 2011, 2019, Delphix. All rights reserved.
24+
* Copyright (c) 2011, 2020, Delphix. All rights reserved.
2525
* Copyright (c) 2014, Saso Kiselkov. All rights reserved.
2626
* Copyright (c) 2017, Nexenta Systems, Inc. All rights reserved.
2727
* Copyright (c) 2019, loli10K <[email protected]>. All rights reserved.
@@ -2188,7 +2188,7 @@ arc_untransform(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
21882188
ret = SET_ERROR(EIO);
21892189
spa_log_error(spa, zb);
21902190
(void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
2191-
spa, NULL, zb, NULL, 0, 0);
2191+
spa, NULL, zb, NULL, 0);
21922192
}
21932193

21942194
return (ret);
@@ -5654,7 +5654,7 @@ arc_read_done(zio_t *zio)
56545654
spa_log_error(zio->io_spa, &acb->acb_zb);
56555655
(void) zfs_ereport_post(
56565656
FM_EREPORT_ZFS_AUTHENTICATION,
5657-
zio->io_spa, NULL, &acb->acb_zb, zio, 0, 0);
5657+
zio->io_spa, NULL, &acb->acb_zb, zio, 0);
56585658
}
56595659
}
56605660

@@ -5931,7 +5931,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
59315931
spa_log_error(spa, zb);
59325932
(void) zfs_ereport_post(
59335933
FM_EREPORT_ZFS_AUTHENTICATION,
5934-
spa, NULL, zb, NULL, 0, 0);
5934+
spa, NULL, zb, NULL, 0);
59355935
}
59365936
}
59375937
if (rc != 0) {

module/zfs/fm.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,15 @@ struct erpt_kstat {
104104
kstat_named_t erpt_set_failed; /* num erpt set failures */
105105
kstat_named_t fmri_set_failed; /* num fmri set failures */
106106
kstat_named_t payload_set_failed; /* num payload set failures */
107+
kstat_named_t erpt_duplicates; /* num duplicate erpts */
107108
};
108109

109110
static struct erpt_kstat erpt_kstat_data = {
110111
{ "erpt-dropped", KSTAT_DATA_UINT64 },
111112
{ "erpt-set-failed", KSTAT_DATA_UINT64 },
112113
{ "fmri-set-failed", KSTAT_DATA_UINT64 },
113-
{ "payload-set-failed", KSTAT_DATA_UINT64 }
114+
{ "payload-set-failed", KSTAT_DATA_UINT64 },
115+
{ "erpt-duplicates", KSTAT_DATA_UINT64 }
114116
};
115117

116118
kstat_t *fm_ksp;
@@ -568,6 +570,12 @@ zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
568570
return (error);
569571
}
570572

573+
void
574+
zfs_zevent_track_duplicate(void)
575+
{
576+
atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64);
577+
}
578+
571579
static int
572580
zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
573581
{
@@ -1633,13 +1641,17 @@ fm_init(void)
16331641
list_create(&zevent_list, sizeof (zevent_t),
16341642
offsetof(zevent_t, ev_node));
16351643
cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
1644+
1645+
zfs_ereport_init();
16361646
}
16371647

16381648
void
16391649
fm_fini(void)
16401650
{
16411651
int count;
16421652

1653+
zfs_ereport_fini();
1654+
16431655
zfs_zevent_drain_all(&count);
16441656

16451657
mutex_enter(&zevent_lock);

module/zfs/spa.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
/*
2323
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24-
* Copyright (c) 2011, 2019 by Delphix. All rights reserved.
24+
* Copyright (c) 2011, 2020 by Delphix. All rights reserved.
2525
* Copyright (c) 2018, Nexenta Systems, Inc. All rights reserved.
2626
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
2727
* Copyright 2013 Saso Kiselkov. All rights reserved.
@@ -2868,7 +2868,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type)
28682868
}
28692869
if (error != EBADF) {
28702870
(void) zfs_ereport_post(ereport, spa,
2871-
NULL, NULL, NULL, 0, 0);
2871+
NULL, NULL, NULL, 0);
28722872
}
28732873
}
28742874
spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;

module/zfs/spa_config.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
/*
2323
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
2424
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25-
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
25+
* Copyright (c) 2011, 2020 by Delphix. All rights reserved.
2626
* Copyright 2017 Joyent, Inc.
2727
*/
2828

@@ -316,7 +316,7 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent)
316316
if (target->spa_ccw_fail_time == 0) {
317317
(void) zfs_ereport_post(
318318
FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
319-
target, NULL, NULL, NULL, 0, 0);
319+
target, NULL, NULL, NULL, 0);
320320
}
321321
target->spa_ccw_fail_time = gethrtime();
322322
spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE);

module/zfs/vdev.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1481,7 +1481,7 @@ vdev_probe_done(zio_t *zio)
14811481
ASSERT(zio->io_error != 0);
14821482
vdev_dbgmsg(vd, "failed probe");
14831483
(void) zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
1484-
spa, vd, NULL, NULL, 0, 0);
1484+
spa, vd, NULL, NULL, 0);
14851485
zio->io_error = SET_ERROR(ENXIO);
14861486
}
14871487

@@ -1862,11 +1862,10 @@ vdev_open(vdev_t *vd)
18621862
vd->vdev_ops->vdev_op_leaf) {
18631863
(void) zfs_ereport_post(
18641864
FM_EREPORT_ZFS_DEVICE_BAD_ASHIFT,
1865-
spa, vd, NULL, NULL, 0, 0);
1865+
spa, vd, NULL, NULL, 0);
18661866
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
18671867
VDEV_AUX_BAD_LABEL);
18681868
return (SET_ERROR(EDOM));
1869-
18701869
}
18711870
vd->vdev_max_asize = max_asize;
18721871
}
@@ -4759,7 +4758,7 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
47594758
}
47604759

47614760
(void) zfs_ereport_post(class, spa, vd, NULL, NULL,
4762-
save_state, 0);
4761+
save_state);
47634762
}
47644763

47654764
/* Erase any notion of persistent removed state */

module/zfs/vdev_indirect.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
/*
1717
* Copyright (c) 2014, 2017 by Delphix. All rights reserved.
1818
* Copyright (c) 2019, loli10K <[email protected]>. All rights reserved.
19-
* Copyright (c) 2014, 2019 by Delphix. All rights reserved.
19+
* Copyright (c) 2014, 2020 by Delphix. All rights reserved.
2020
*/
2121

2222
#include <sys/zfs_context.h>
@@ -1473,13 +1473,14 @@ vdev_indirect_all_checksum_errors(zio_t *zio)
14731473

14741474
vdev_t *vd = ic->ic_vdev;
14751475

1476-
mutex_enter(&vd->vdev_stat_lock);
1477-
vd->vdev_stat.vs_checksum_errors++;
1478-
mutex_exit(&vd->vdev_stat_lock);
1479-
1480-
(void) zfs_ereport_post_checksum(zio->io_spa, vd,
1476+
int ret = zfs_ereport_post_checksum(zio->io_spa, vd,
14811477
NULL, zio, is->is_target_offset, is->is_size,
14821478
NULL, NULL, NULL);
1479+
if (ret != EALREADY) {
1480+
mutex_enter(&vd->vdev_stat_lock);
1481+
vd->vdev_stat.vs_checksum_errors++;
1482+
mutex_exit(&vd->vdev_stat_lock);
1483+
}
14831484
}
14841485
}
14851486
}

module/zfs/vdev_raidz.c

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
/*
2323
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24-
* Copyright (c) 2012, 2019 by Delphix. All rights reserved.
24+
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
2525
* Copyright (c) 2016 Gvozden Nešković. All rights reserved.
2626
*/
2727

@@ -1790,16 +1790,17 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
17901790
zio_bad_cksum_t zbc;
17911791
raidz_map_t *rm = zio->io_vsd;
17921792

1793-
mutex_enter(&vd->vdev_stat_lock);
1794-
vd->vdev_stat.vs_checksum_errors++;
1795-
mutex_exit(&vd->vdev_stat_lock);
1796-
17971793
zbc.zbc_has_cksum = 0;
17981794
zbc.zbc_injected = rm->rm_ecksuminjected;
17991795

1800-
(void) zfs_ereport_post_checksum(zio->io_spa, vd,
1796+
int ret = zfs_ereport_post_checksum(zio->io_spa, vd,
18011797
&zio->io_bookmark, zio, rc->rc_offset, rc->rc_size,
18021798
rc->rc_abd, bad_data, &zbc);
1799+
if (ret != EALREADY) {
1800+
mutex_enter(&vd->vdev_stat_lock);
1801+
vd->vdev_stat.vs_checksum_errors++;
1802+
mutex_exit(&vd->vdev_stat_lock);
1803+
}
18031804
}
18041805
}
18051806

@@ -2279,21 +2280,21 @@ vdev_raidz_io_done(zio_t *zio)
22792280
vdev_t *cvd;
22802281
rc = &rm->rm_col[c];
22812282
cvd = vd->vdev_child[rc->rc_devidx];
2282-
if (rc->rc_error == 0) {
2283-
zio_bad_cksum_t zbc;
2284-
zbc.zbc_has_cksum = 0;
2285-
zbc.zbc_injected =
2286-
rm->rm_ecksuminjected;
2283+
if (rc->rc_error != 0)
2284+
continue;
22872285

2286+
zio_bad_cksum_t zbc;
2287+
zbc.zbc_has_cksum = 0;
2288+
zbc.zbc_injected = rm->rm_ecksuminjected;
2289+
2290+
int ret = zfs_ereport_start_checksum(
2291+
zio->io_spa, cvd, &zio->io_bookmark, zio,
2292+
rc->rc_offset, rc->rc_size,
2293+
(void *)(uintptr_t)c, &zbc);
2294+
if (ret != EALREADY) {
22882295
mutex_enter(&cvd->vdev_stat_lock);
22892296
cvd->vdev_stat.vs_checksum_errors++;
22902297
mutex_exit(&cvd->vdev_stat_lock);
2291-
2292-
zfs_ereport_start_checksum(
2293-
zio->io_spa, cvd,
2294-
&zio->io_bookmark, zio,
2295-
rc->rc_offset, rc->rc_size,
2296-
(void *)(uintptr_t)c, &zbc);
22972298
}
22982299
}
22992300
}

0 commit comments

Comments
 (0)