Skip to content

Commit ae9e059

Browse files
amotinandrewc12
authored andcommitted
Reduce ZIO io_lock contention on sorted scrub
During sorted scrub multiple threads (one per vdev) are issuing many ZIOs same time, all using the same scn->scn_zio_root ZIO as parent. It causes huge lock contention on the single global lock on that ZIO. Improve it by introducing per-queue null ZIOs, children to that one, and using them instead as proxy. For 12 SSD pool storing 1.5TB of 4KB blocks on 80-core system this dramatically reduces lock contention and reduces scrub time from 21 minutes down to 12.5, while actual read stages (not scan) are about 3x faster, reaching 100K blocks per second per vdev. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Alexander Motin <[email protected]> Sponsored-By: iXsystems, Inc. Closes openzfs#13553
1 parent b52139b commit ae9e059

File tree

1 file changed

+15
-4
lines changed

1 file changed

+15
-4
lines changed

module/zfs/dsl_scan.c

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ typedef struct scan_io {
280280
struct dsl_scan_io_queue {
281281
dsl_scan_t *q_scn; /* associated dsl_scan_t */
282282
vdev_t *q_vd; /* top-level vdev that this queue represents */
283+
zio_t *q_zio; /* scn_zio_root child for waiting on IO */
283284

284285
/* trees used for sorting I/Os and extents of I/Os */
285286
range_tree_t *q_exts_by_addr;
@@ -3036,15 +3037,19 @@ scan_io_queues_run_one(void *arg)
30363037
dsl_scan_io_queue_t *queue = arg;
30373038
kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
30383039
boolean_t suspended = B_FALSE;
3039-
range_seg_t *rs = NULL;
3040-
scan_io_t *sio = NULL;
3040+
range_seg_t *rs;
3041+
scan_io_t *sio;
3042+
zio_t *zio;
30413043
list_t sio_list;
30423044

30433045
ASSERT(queue->q_scn->scn_is_sorted);
30443046

30453047
list_create(&sio_list, sizeof (scan_io_t),
30463048
offsetof(scan_io_t, sio_nodes.sio_list_node));
3049+
zio = zio_null(queue->q_scn->scn_zio_root, queue->q_scn->scn_dp->dp_spa,
3050+
NULL, NULL, NULL, ZIO_FLAG_CANFAIL);
30473051
mutex_enter(q_lock);
3052+
queue->q_zio = zio;
30483053

30493054
/* Calculate maximum in-flight bytes for this vdev. */
30503055
queue->q_maxinflight_bytes = MAX(1, zfs_scan_vdev_limit *
@@ -3111,7 +3116,9 @@ scan_io_queues_run_one(void *arg)
31113116
scan_io_queue_insert_impl(queue, sio);
31123117
}
31133118

3119+
queue->q_zio = NULL;
31143120
mutex_exit(q_lock);
3121+
zio_nowait(zio);
31153122
list_destroy(&sio_list);
31163123
}
31173124

@@ -4076,6 +4083,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
40764083
dsl_scan_t *scn = dp->dp_scan;
40774084
size_t size = BP_GET_PSIZE(bp);
40784085
abd_t *data = abd_alloc_for_io(size, B_FALSE);
4086+
zio_t *pio;
40794087

40804088
if (queue == NULL) {
40814089
ASSERT3U(scn->scn_maxinflight_bytes, >, 0);
@@ -4084,6 +4092,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
40844092
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
40854093
spa->spa_scrub_inflight += BP_GET_PSIZE(bp);
40864094
mutex_exit(&spa->spa_scrub_lock);
4095+
pio = scn->scn_zio_root;
40874096
} else {
40884097
kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
40894098

@@ -4092,12 +4101,14 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
40924101
while (queue->q_inflight_bytes >= queue->q_maxinflight_bytes)
40934102
cv_wait(&queue->q_zio_cv, q_lock);
40944103
queue->q_inflight_bytes += BP_GET_PSIZE(bp);
4104+
pio = queue->q_zio;
40954105
mutex_exit(q_lock);
40964106
}
40974107

4108+
ASSERT(pio != NULL);
40984109
count_block(scn, dp->dp_blkstats, bp);
4099-
zio_nowait(zio_read(scn->scn_zio_root, spa, bp, data, size,
4100-
dsl_scan_scrub_done, queue, ZIO_PRIORITY_SCRUB, zio_flags, zb));
4110+
zio_nowait(zio_read(pio, spa, bp, data, size, dsl_scan_scrub_done,
4111+
queue, ZIO_PRIORITY_SCRUB, zio_flags, zb));
41014112
}
41024113

41034114
/*

0 commit comments

Comments
 (0)