Skip to content

Improve speculative prefetcher for block cloning #16814

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 4, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 29 additions & 5 deletions module/zfs/dmu_zfetch.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ static unsigned int zfetch_min_distance = 4 * 1024 * 1024;
/* max bytes to prefetch per stream (default 64MB) */
unsigned int zfetch_max_distance = 64 * 1024 * 1024;
#endif
/* max bytes to prefetch indirects for per stream (default 64MB) */
unsigned int zfetch_max_idistance = 64 * 1024 * 1024;
/* max bytes to prefetch indirects for per stream (default 128MB) */
unsigned int zfetch_max_idistance = 128 * 1024 * 1024;
/* max request reorder distance within a stream (default 16MB) */
unsigned int zfetch_max_reorder = 16 * 1024 * 1024;
/* Max log2 fraction of holes in a stream */
Expand Down Expand Up @@ -472,6 +472,7 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
zstream_t *zs;
spa_t *spa = zf->zf_dnode->dn_objset->os_spa;
zfs_prefetch_type_t os_prefetch = zf->zf_dnode->dn_objset->os_prefetch;
int64_t ipf_start, ipf_end;

if (zfs_prefetch_disable || os_prefetch == ZFS_PREFETCH_NONE)
return (NULL);
Expand Down Expand Up @@ -571,13 +572,13 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
* This access is not part of any existing stream. Create a new
* stream for it unless we are at the end of file.
*/
ASSERT0P(zs);
if (end_blkid < maxblkid)
dmu_zfetch_stream_create(zf, end_blkid);
mutex_exit(&zf->zf_lock);
if (!have_lock)
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
ZFETCHSTAT_BUMP(zfetchstat_misses);
return (NULL);
ipf_start = 0;
goto prescient;

hit:
nblks = dmu_zfetch_hit(zs, nblks);
Expand Down Expand Up @@ -650,6 +651,7 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
pf_nblks = zs->zs_ipf_dist >> dbs;
if (zs->zs_ipf_start < zs->zs_pf_end)
zs->zs_ipf_start = zs->zs_pf_end;
ipf_start = zs->zs_ipf_end;
if (zs->zs_ipf_end < zs->zs_pf_end + pf_nblks)
zs->zs_ipf_end = zs->zs_pf_end + pf_nblks;

Expand All @@ -658,8 +660,30 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
zfs_refcount_add(&zs->zs_callers, NULL);
mutex_exit(&zf->zf_lock);

prescient:
/*
* Prefetch the following indirect blocks for this access to reduce
* dbuf_hold() sync read delays in dmu_buf_hold_array_by_dnode().
* This covers the gap during the first couple accesses when we can
* not predict the future yet, but know what is needed right now.
* This should be very rare for reads/writes to need more than one
* indirect, but more useful for cloning due to much bigger accesses.
*/
ipf_start = MAX(ipf_start, blkid + 1);
int epbs = zf->zf_dnode->dn_indblkshift - SPA_BLKPTRSHIFT;
ipf_start = P2ROUNDUP(ipf_start, 1 << epbs) >> epbs;
ipf_end = P2ROUNDUP(end_blkid, 1 << epbs) >> epbs;

int issued = 0;
for (int64_t iblk = ipf_start; iblk < ipf_end; iblk++) {
issued += dbuf_prefetch(zf->zf_dnode, 1, iblk,
ZIO_PRIORITY_SYNC_READ, ARC_FLAG_PRESCIENT_PREFETCH);
}

if (!have_lock)
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
if (issued)
ZFETCHSTAT_ADD(zfetchstat_io_issued, issued);
return (zs);
}

Expand Down