Skip to content

Improve scrub maxinflight_bytes math. #12046

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion man/man5/zfs-module-parameters.5
Original file line number Diff line number Diff line change
Expand Up @@ -3326,7 +3326,7 @@ Default value: \fB0\fR.
Maximum amount of data that can be concurrently issued at once for scrubs and
resilvers per leaf device, given in bytes.
.sp
Default value: \fB41943040\fR.
Default value: \fB4194304\fR.
.RE

.sp
Expand Down
40 changes: 15 additions & 25 deletions module/zfs/dsl_scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ static boolean_t scan_ds_queue_contains(dsl_scan_t *scn, uint64_t dsobj,
static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg);
static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj);
static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx);
static uint64_t dsl_scan_count_leaves(vdev_t *vd);
static uint64_t dsl_scan_count_data_disks(vdev_t *vd);

extern int zfs_vdev_async_write_active_min_dirty_percent;

Expand Down Expand Up @@ -451,7 +451,7 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
* phase are done per top-level vdev and are handled separately.
*/
scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit *
dsl_scan_count_leaves(spa->spa_root_vdev), 1ULL << 20);
dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20);

avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t),
offsetof(scan_ds_t, sds_node));
Expand Down Expand Up @@ -2759,22 +2759,16 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
}

static uint64_t
dsl_scan_count_leaves(vdev_t *vd)
dsl_scan_count_data_disks(vdev_t *rvd)
{
uint64_t i, leaves = 0;

/* we only count leaves that belong to the main pool and are readable */
if (vd->vdev_islog || vd->vdev_isspare ||
vd->vdev_isl2cache || !vdev_readable(vd))
return (0);

if (vd->vdev_ops->vdev_op_leaf)
return (1);

for (i = 0; i < vd->vdev_children; i++) {
leaves += dsl_scan_count_leaves(vd->vdev_child[i]);
for (i = 0; i < rvd->vdev_children; i++) {
vdev_t *vd = rvd->vdev_child[i];
if (vd->vdev_islog || vd->vdev_isspare || vd->vdev_isl2cache)
continue;
leaves += vdev_get_ndisks(vd) - vdev_get_nparity(vd);
}

return (leaves);
}

Expand Down Expand Up @@ -3017,18 +3011,16 @@ scan_io_queues_run_one(void *arg)
range_seg_t *rs = NULL;
scan_io_t *sio = NULL;
list_t sio_list;
uint64_t bytes_per_leaf = zfs_scan_vdev_limit;
uint64_t nr_leaves = dsl_scan_count_leaves(queue->q_vd);

ASSERT(queue->q_scn->scn_is_sorted);

list_create(&sio_list, sizeof (scan_io_t),
offsetof(scan_io_t, sio_nodes.sio_list_node));
mutex_enter(q_lock);

/* calculate maximum in-flight bytes for this txg (min 1MB) */
queue->q_maxinflight_bytes =
MAX(nr_leaves * bytes_per_leaf, 1ULL << 20);
/* Calculate maximum in-flight bytes for this vdev. */
queue->q_maxinflight_bytes = MAX(1, zfs_scan_vdev_limit *
(vdev_get_ndisks(queue->q_vd) - vdev_get_nparity(queue->q_vd)));

/* reset per-queue scan statistics for this txg */
queue->q_total_seg_size_this_txg = 0;
Expand Down Expand Up @@ -3665,16 +3657,14 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
/* Need to scan metadata for more blocks to scrub */
dsl_scan_phys_t *scnp = &scn->scn_phys;
taskqid_t prefetch_tqid;
uint64_t bytes_per_leaf = zfs_scan_vdev_limit;
uint64_t nr_leaves = dsl_scan_count_leaves(spa->spa_root_vdev);

/*
* Recalculate the max number of in-flight bytes for pool-wide
* scanning operations (minimum 1MB). Limits for the issuing
* phase are done per top-level vdev and are handled separately.
*/
scn->scn_maxinflight_bytes =
MAX(nr_leaves * bytes_per_leaf, 1ULL << 20);
scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit *
dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20);

if (scnp->scn_ddt_bookmark.ddb_class <=
scnp->scn_ddt_class_max) {
Expand Down Expand Up @@ -4050,9 +4040,8 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
size_t size = BP_GET_PSIZE(bp);
abd_t *data = abd_alloc_for_io(size, B_FALSE);

ASSERT3U(scn->scn_maxinflight_bytes, >, 0);

if (queue == NULL) {
ASSERT3U(scn->scn_maxinflight_bytes, >, 0);
mutex_enter(&spa->spa_scrub_lock);
while (spa->spa_scrub_inflight >= scn->scn_maxinflight_bytes)
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
Expand All @@ -4061,6 +4050,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
} else {
kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;

ASSERT3U(queue->q_maxinflight_bytes, >, 0);
mutex_enter(q_lock);
while (queue->q_inflight_bytes >= queue->q_maxinflight_bytes)
cv_wait(&queue->q_zio_cv, q_lock);
Expand Down