Skip to content

Commit 0759e5e

Browse files
committed
FreeBSD: zfs_putpages: don't undirty pages until after write completes
zfs_putpages() would put the entire range of pages onto the ZIL, then return VM_PAGER_OK for each page to the kernel. However, an associated zil_commit() or txg sync had not happened at this point, so the write may not actually be on disk. So, we rework it to use a ZIL commit callback, and do the post-write work of undirtying the page and signaling completion there. We return VM_PAGER_PEND to the kernel instead so it knows that we will take care of it. We change from a single zfs_log_write() to cover the whole range, to multiple calls, each for a single page. This is because if zfs_log_write() receives a write too large to fit into a single ZIL block, it will split it into multiple writes, but each will receive the same callback and argument. This will lead to the callback being called multiple times, but we don't which pages/range the call is for, and so can't clean them up. This way is not ideal, but is simple and correct, and so is a good start. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Signed-off-by: Rob Norris <[email protected]>
1 parent 46b82de commit 0759e5e

File tree

3 files changed

+45
-9
lines changed

3 files changed

+45
-9
lines changed

include/os/freebsd/spl/sys/vm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
extern const int zfs_vm_pagerret_bad;
3636
extern const int zfs_vm_pagerret_error;
3737
extern const int zfs_vm_pagerret_ok;
38+
extern const int zfs_vm_pagerret_pend;
39+
extern const int zfs_vm_pagerret_again;
3840
extern const int zfs_vm_pagerput_sync;
3941
extern const int zfs_vm_pagerput_inval;
4042

module/os/freebsd/spl/spl_vm.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
const int zfs_vm_pagerret_bad = VM_PAGER_BAD;
4444
const int zfs_vm_pagerret_error = VM_PAGER_ERROR;
4545
const int zfs_vm_pagerret_ok = VM_PAGER_OK;
46+
const int zfs_vm_pagerret_pend = VM_PAGER_PEND;
47+
const int zfs_vm_pagerret_again = VM_PAGER_AGAIN;
4648
const int zfs_vm_pagerput_sync = VM_PAGER_PUT_SYNC;
4749
const int zfs_vm_pagerput_inval = VM_PAGER_PUT_INVAL;
4850

module/os/freebsd/zfs/zfs_vnops_os.c

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
2626
* Copyright (c) 2014 Integros [integros.com]
2727
* Copyright 2017 Nexenta Systems, Inc.
28+
* Copyright (c) 2025, Klara, Inc.
2829
*/
2930

3031
/* Portions Copyright 2007 Jeremy Teo */
@@ -4084,6 +4085,15 @@ zfs_freebsd_getpages(struct vop_getpages_args *ap)
40844085
ap->a_rahead));
40854086
}
40864087

4088+
static void
4089+
zfs_putpage_commit_cb(void *arg)
4090+
{
4091+
vm_page_t pp = arg;
4092+
vm_page_undirty(pp);
4093+
vm_page_sunbusy(pp);
4094+
vm_object_pip_wakeup(pp->object);
4095+
}
4096+
40874097
static int
40884098
zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
40894099
int *rtvals)
@@ -4185,10 +4195,12 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
41854195
}
41864196

41874197
if (zp->z_blksz < PAGE_SIZE) {
4188-
for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) {
4189-
tocopy = len > PAGE_SIZE ? PAGE_SIZE : len;
4198+
vm_ooffset_t woff = off;
4199+
size_t wlen = len;
4200+
for (i = 0; wlen > 0; woff += tocopy, wlen -= tocopy, i++) {
4201+
tocopy = MIN(PAGE_SIZE, wlen);
41904202
va = zfs_map_page(ma[i], &sf);
4191-
dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx);
4203+
dmu_write(zfsvfs->z_os, zp->z_id, woff, tocopy, va, tx);
41924204
zfs_unmap_page(sf);
41934205
}
41944206
} else {
@@ -4209,19 +4221,39 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
42094221
zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
42104222
err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
42114223
ASSERT0(err);
4224+
42124225
/*
4213-
* XXX we should be passing a callback to undirty
4214-
* but that would make the locking messier
4226+
* Loop over the pages and load them onto the ZIL. Each page
4227+
* gets a separate log write so we can get the correct page
4228+
* pointer into the callback.
42154229
*/
4216-
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
4217-
len, commit, B_FALSE, NULL, NULL);
4230+
vm_ooffset_t pgoff = off;
4231+
size_t pgrem = len;
42184232

42194233
zfs_vmobject_wlock(object);
42204234
for (i = 0; i < ncount; i++) {
4221-
rtvals[i] = zfs_vm_pagerret_ok;
4222-
vm_page_undirty(ma[i]);
4235+
ASSERT3U(pgrem, >, 0);
4236+
size_t pglen = MIN(PAGE_SIZE, pgrem);
4237+
4238+
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp,
4239+
pgoff, pglen, commit, B_FALSE,
4240+
zfs_putpage_commit_cb, ma[i]);
4241+
4242+
pgoff += pglen;
4243+
pgrem -= pglen;
4244+
4245+
/*
4246+
* Inform the kernel that we will take care of page
4247+
* cleanup and signalling once it's written out.
4248+
*/
4249+
rtvals[i] = zfs_vm_pagerret_pend;
42234250
}
4251+
4252+
ASSERT3U(pgoff, ==, off+len);
4253+
ASSERT0(pgrem);
4254+
42244255
zfs_vmobject_wunlock(object);
4256+
42254257
VM_CNT_INC(v_vnodeout);
42264258
VM_CNT_ADD(v_vnodepgsout, ncount);
42274259
}

0 commit comments

Comments
 (0)