Skip to content

Commit 1786825

Browse files
markjdbbehlendorf
authored andcommitted
Grab the rangelock unconditionally in zfs_getpages()
As a deadlock avoidance measure, zfs_getpages() would only try to acquire a rangelock, falling back to a single-page read if this was not possible. However, this is incompatible with direct I/O. Instead, release the busy lock before trying to acquire the rangelock in blocking mode. This means that it's possible for the page to be replaced, so we have to re-lookup. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Brian Atkinson <[email protected]> Signed-off-by: Mark Johnston <[email protected]> Closes #16643
1 parent 25eb538 commit 1786825

File tree

1 file changed

+51
-17
lines changed

1 file changed

+51
-17
lines changed

module/os/freebsd/zfs/zfs_vnops_os.c

Lines changed: 51 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3930,6 +3930,7 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
39303930
if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0)
39313931
return (zfs_vm_pagerret_error);
39323932

3933+
object = ma[0]->object;
39333934
start = IDX_TO_OFF(ma[0]->pindex);
39343935
end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
39353936

@@ -3938,33 +3939,47 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
39383939
* Note that we need to handle the case of the block size growing.
39393940
*/
39403941
for (;;) {
3942+
uint64_t len;
3943+
39413944
blksz = zp->z_blksz;
3945+
len = roundup(end, blksz) - rounddown(start, blksz);
3946+
39423947
lr = zfs_rangelock_tryenter(&zp->z_rangelock,
3943-
rounddown(start, blksz),
3944-
roundup(end, blksz) - rounddown(start, blksz), RL_READER);
3948+
rounddown(start, blksz), len, RL_READER);
39453949
if (lr == NULL) {
3946-
if (rahead != NULL) {
3947-
*rahead = 0;
3948-
rahead = NULL;
3949-
}
3950-
if (rbehind != NULL) {
3951-
*rbehind = 0;
3952-
rbehind = NULL;
3950+
/*
3951+
* Avoid a deadlock with update_pages(). We need to
3952+
* hold the range lock when copying from the DMU, so
3953+
* give up the busy lock to allow update_pages() to
3954+
* proceed. We might need to allocate new pages, which
3955+
* isn't quite right since this allocation isn't subject
3956+
* to the page fault handler's OOM logic, but this is
3957+
* the best we can do for now.
3958+
*/
3959+
for (int i = 0; i < count; i++) {
3960+
ASSERT(vm_page_none_valid(ma[i]));
3961+
vm_page_xunbusy(ma[i]);
39533962
}
3954-
break;
3963+
3964+
lr = zfs_rangelock_enter(&zp->z_rangelock,
3965+
rounddown(start, blksz), len, RL_READER);
3966+
3967+
zfs_vmobject_wlock(object);
3968+
(void) vm_page_grab_pages(object, OFF_TO_IDX(start),
3969+
VM_ALLOC_NORMAL | VM_ALLOC_WAITOK | VM_ALLOC_ZERO,
3970+
ma, count);
3971+
zfs_vmobject_wunlock(object);
39553972
}
39563973
if (blksz == zp->z_blksz)
39573974
break;
39583975
zfs_rangelock_exit(lr);
39593976
}
39603977

3961-
object = ma[0]->object;
39623978
zfs_vmobject_wlock(object);
39633979
obj_size = object->un_pager.vnp.vnp_size;
39643980
zfs_vmobject_wunlock(object);
39653981
if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
3966-
if (lr != NULL)
3967-
zfs_rangelock_exit(lr);
3982+
zfs_rangelock_exit(lr);
39683983
zfs_exit(zfsvfs, FTAG);
39693984
return (zfs_vm_pagerret_bad);
39703985
}
@@ -3989,11 +4004,30 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
39894004
* ZFS will panic if we request DMU to read beyond the end of the last
39904005
* allocated block.
39914006
*/
3992-
error = dmu_read_pages(zfsvfs->z_os, zp->z_id, ma, count, &pgsin_b,
3993-
&pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE));
4007+
for (int i = 0; i < count; i++) {
4008+
int count1, j, last_size;
39944009

3995-
if (lr != NULL)
3996-
zfs_rangelock_exit(lr);
4010+
if (vm_page_any_valid(ma[i])) {
4011+
ASSERT(vm_page_all_valid(ma[i]));
4012+
continue;
4013+
}
4014+
for (j = i + 1; j < count; j++) {
4015+
if (vm_page_any_valid(ma[j])) {
4016+
ASSERT(vm_page_all_valid(ma[j]));
4017+
break;
4018+
}
4019+
}
4020+
count1 = j - i;
4021+
last_size = j == count ?
4022+
MIN(end, obj_size) - (end - PAGE_SIZE) : PAGE_SIZE;
4023+
error = dmu_read_pages(zfsvfs->z_os, zp->z_id, &ma[i], count1,
4024+
i == 0 ? &pgsin_b : NULL, j == count ? &pgsin_a : NULL,
4025+
last_size);
4026+
if (error != 0)
4027+
break;
4028+
}
4029+
4030+
zfs_rangelock_exit(lr);
39974031
ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
39984032

39994033
dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE);

0 commit comments

Comments
 (0)