Skip to content

Commit 5ab1936

Browse files
committed
Reduce ZIO io_lock contention on sorted scrub.
During sorted scrub multiple threads (one per vdev) are issuing many ZIOs same time, all using the same scn->scn_zio_root ZIO as parent. It causes huge lock contention on the single global lock on that ZIO. Improve it by introducing per-queue null ZIOs, children to that one, and using them instead as proxy. For 12 SSD pool storing 1.5TB of 4KB blocks on 80-core system this dramatically reduces lock contention and reduces scrub time from 21 minutes down to 12.5, while actual read stages (not scan) are about 3x faster, reaching 100K blocks per second per vdev. Signed-off-by: Alexander Motin <[email protected]> Sponsored-By: iXsystems, Inc.
1 parent 482505f commit 5ab1936

File tree

1 file changed

+11
-2
lines changed

1 file changed

+11
-2
lines changed

module/zfs/dsl_scan.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ typedef struct scan_io {
280280
struct dsl_scan_io_queue {
281281
dsl_scan_t *q_scn; /* associated dsl_scan_t */
282282
vdev_t *q_vd; /* top-level vdev that this queue represents */
283+
zio_t *q_zio; /* scn_zio_root child for waiting on IO */
283284

284285
/* trees used for sorting I/Os and extents of I/Os */
285286
range_tree_t *q_exts_by_addr;
@@ -3044,6 +3045,8 @@ scan_io_queues_run_one(void *arg)
30443045

30453046
list_create(&sio_list, sizeof (scan_io_t),
30463047
offsetof(scan_io_t, sio_nodes.sio_list_node));
3048+
queue->q_zio = zio_null(queue->q_scn->scn_zio_root,
3049+
queue->q_scn->scn_dp->dp_spa, NULL, NULL, NULL, ZIO_FLAG_CANFAIL);
30473050
mutex_enter(q_lock);
30483051

30493052
/* Calculate maximum in-flight bytes for this vdev. */
@@ -3113,6 +3116,8 @@ scan_io_queues_run_one(void *arg)
31133116

31143117
mutex_exit(q_lock);
31153118
list_destroy(&sio_list);
3119+
zio_nowait(queue->q_zio);
3120+
queue->q_zio = NULL;
31163121
}
31173122

31183123
/*
@@ -4076,6 +4081,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
40764081
dsl_scan_t *scn = dp->dp_scan;
40774082
size_t size = BP_GET_PSIZE(bp);
40784083
abd_t *data = abd_alloc_for_io(size, B_FALSE);
4084+
zio_t *pio;
40794085

40804086
if (queue == NULL) {
40814087
ASSERT3U(scn->scn_maxinflight_bytes, >, 0);
@@ -4084,6 +4090,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
40844090
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
40854091
spa->spa_scrub_inflight += BP_GET_PSIZE(bp);
40864092
mutex_exit(&spa->spa_scrub_lock);
4093+
pio = scn->scn_zio_root;
40874094
} else {
40884095
kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
40894096

@@ -4093,11 +4100,13 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
40934100
cv_wait(&queue->q_zio_cv, q_lock);
40944101
queue->q_inflight_bytes += BP_GET_PSIZE(bp);
40954102
mutex_exit(q_lock);
4103+
pio = queue->q_zio;
4104+
ASSERT(pio != NULL);
40964105
}
40974106

40984107
count_block(scn, dp->dp_blkstats, bp);
4099-
zio_nowait(zio_read(scn->scn_zio_root, spa, bp, data, size,
4100-
dsl_scan_scrub_done, queue, ZIO_PRIORITY_SCRUB, zio_flags, zb));
4108+
zio_nowait(zio_read(pio, spa, bp, data, size, dsl_scan_scrub_done,
4109+
queue, ZIO_PRIORITY_SCRUB, zio_flags, zb));
41014110
}
41024111

41034112
/*

0 commit comments

Comments
 (0)