Skip to content

Commit ab2110e

Browse files
problametonyhutter
authored andcommitted
zfs_vnops: make zfs_get_data OS-independent
Move zfs_get_data() in to platform-independent code. The only platform-specific aspect of it is the way we release an inode (Linux) / vnode_t (FreeBSD). I am not aware of a platform that could be supported by ZFS that couldn't implement zfs_rele_async itself. It's sibling zvol_get_data already is platform-independent. Reviewed-by: Ryan Moeller <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Christian Schwarz <[email protected]> Closes #10979
1 parent a909190 commit ab2110e

File tree

7 files changed

+179
-329
lines changed

7 files changed

+179
-329
lines changed

include/os/freebsd/zfs/sys/zfs_znode_impl.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,6 @@ extern void zfs_tstamp_update_setup_ext(struct znode *,
173173
uint_t, uint64_t [2], uint64_t [2], boolean_t have_tx);
174174
extern void zfs_znode_free(struct znode *);
175175

176-
extern zil_get_data_t zfs_get_data;
177176
extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE];
178177
extern int zfsfstype;
179178

include/os/linux/zfs/sys/zfs_znode_impl.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,6 @@ extern caddr_t zfs_map_page(page_t *, enum seg_rw);
173173
extern void zfs_unmap_page(page_t *, caddr_t);
174174
#endif /* HAVE_UIO_RW */
175175

176-
extern zil_get_data_t zfs_get_data;
177176
extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE];
178177
extern int zfsfstype;
179178

include/sys/zfs_vnops.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,17 @@ extern int mappedread(znode_t *, int, uio_t *);
3939
extern int mappedread_sf(znode_t *, int, uio_t *);
4040
extern void update_pages(znode_t *, int64_t, int, objset_t *);
4141

42+
/*
43+
* Platform code that asynchronously drops zp's inode / vnode_t.
44+
*
45+
* Asynchronous dropping ensures that the caller will never drop the
46+
* last reference on an inode / vnode_t in the current context.
47+
* Doing so while holding open a tx could result in a deadlock if
48+
* the platform calls into filesystem again in the implementation
49+
* of inode / vnode_t dropping (e.g. call from iput_final()).
50+
*/
51+
extern void zfs_zrele_async(znode_t *zp);
52+
53+
extern zil_get_data_t zfs_get_data;
54+
4255
#endif

module/os/freebsd/zfs/zfs_vfsops.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include <sys/mount.h>
4343
#include <sys/cmn_err.h>
4444
#include <sys/zfs_znode.h>
45+
#include <sys/zfs_vnops.h>
4546
#include <sys/zfs_dir.h>
4647
#include <sys/zil.h>
4748
#include <sys/fs/zfs.h>

module/os/freebsd/zfs/zfs_vnops_os.c

Lines changed: 6 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
/* Portions Copyright 2007 Jeremy Teo */
3030
/* Portions Copyright 2010 Robert Milkowski */
3131

32+
3233
#include <sys/types.h>
3334
#include <sys/param.h>
3435
#include <sys/time.h>
@@ -669,163 +670,13 @@ zfs_write_simple(znode_t *zp, const void *data, size_t len,
669670
return (error);
670671
}
671672

672-
static void
673-
zfs_get_done(zgd_t *zgd, int error)
674-
{
675-
znode_t *zp = zgd->zgd_private;
676-
objset_t *os = zp->z_zfsvfs->z_os;
677-
678-
if (zgd->zgd_db)
679-
dmu_buf_rele(zgd->zgd_db, zgd);
680-
681-
zfs_rangelock_exit(zgd->zgd_lr);
682-
683-
/*
684-
* Release the vnode asynchronously as we currently have the
685-
* txg stopped from syncing.
686-
*/
687-
VN_RELE_ASYNC(ZTOV(zp), dsl_pool_zrele_taskq(dmu_objset_pool(os)));
688-
689-
kmem_free(zgd, sizeof (zgd_t));
690-
}
691-
692-
#ifdef ZFS_DEBUG
693-
static int zil_fault_io = 0;
694-
#endif
695-
696-
/*
697-
* Get data to generate a TX_WRITE intent log record.
698-
*/
699-
int
700-
zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
673+
void
674+
zfs_zrele_async(znode_t *zp)
701675
{
702-
zfsvfs_t *zfsvfs = arg;
703-
objset_t *os = zfsvfs->z_os;
704-
znode_t *zp;
705-
uint64_t object = lr->lr_foid;
706-
uint64_t offset = lr->lr_offset;
707-
uint64_t size = lr->lr_length;
708-
dmu_buf_t *db;
709-
zgd_t *zgd;
710-
int error = 0;
711-
712-
ASSERT3P(lwb, !=, NULL);
713-
ASSERT3P(zio, !=, NULL);
714-
ASSERT3U(size, !=, 0);
715-
716-
/*
717-
* Nothing to do if the file has been removed
718-
*/
719-
if (zfs_zget(zfsvfs, object, &zp) != 0)
720-
return (SET_ERROR(ENOENT));
721-
if (zp->z_unlinked) {
722-
/*
723-
* Release the vnode asynchronously as we currently have the
724-
* txg stopped from syncing.
725-
*/
726-
VN_RELE_ASYNC(ZTOV(zp),
727-
dsl_pool_zrele_taskq(dmu_objset_pool(os)));
728-
return (SET_ERROR(ENOENT));
729-
}
730-
731-
zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
732-
zgd->zgd_lwb = lwb;
733-
zgd->zgd_private = zp;
734-
735-
/*
736-
* Write records come in two flavors: immediate and indirect.
737-
* For small writes it's cheaper to store the data with the
738-
* log record (immediate); for large writes it's cheaper to
739-
* sync the data and get a pointer to it (indirect) so that
740-
* we don't have to write the data twice.
741-
*/
742-
if (buf != NULL) { /* immediate write */
743-
zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock, offset,
744-
size, RL_READER);
745-
/* test for truncation needs to be done while range locked */
746-
if (offset >= zp->z_size) {
747-
error = SET_ERROR(ENOENT);
748-
} else {
749-
error = dmu_read(os, object, offset, size, buf,
750-
DMU_READ_NO_PREFETCH);
751-
}
752-
ASSERT(error == 0 || error == ENOENT);
753-
} else { /* indirect write */
754-
/*
755-
* Have to lock the whole block to ensure when it's
756-
* written out and its checksum is being calculated
757-
* that no one can change the data. We need to re-check
758-
* blocksize after we get the lock in case it's changed!
759-
*/
760-
for (;;) {
761-
uint64_t blkoff;
762-
size = zp->z_blksz;
763-
blkoff = ISP2(size) ? P2PHASE(offset, size) : offset;
764-
offset -= blkoff;
765-
zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock,
766-
offset, size, RL_READER);
767-
if (zp->z_blksz == size)
768-
break;
769-
offset += blkoff;
770-
zfs_rangelock_exit(zgd->zgd_lr);
771-
}
772-
/* test for truncation needs to be done while range locked */
773-
if (lr->lr_offset >= zp->z_size)
774-
error = SET_ERROR(ENOENT);
775-
#ifdef ZFS_DEBUG
776-
if (zil_fault_io) {
777-
error = SET_ERROR(EIO);
778-
zil_fault_io = 0;
779-
}
780-
#endif
781-
if (error == 0)
782-
error = dmu_buf_hold(os, object, offset, zgd, &db,
783-
DMU_READ_NO_PREFETCH);
784-
785-
if (error == 0) {
786-
blkptr_t *bp = &lr->lr_blkptr;
787-
788-
zgd->zgd_db = db;
789-
zgd->zgd_bp = bp;
790-
791-
ASSERT(db->db_offset == offset);
792-
ASSERT(db->db_size == size);
793-
794-
error = dmu_sync(zio, lr->lr_common.lrc_txg,
795-
zfs_get_done, zgd);
796-
ASSERT(error || lr->lr_length <= size);
797-
798-
/*
799-
* On success, we need to wait for the write I/O
800-
* initiated by dmu_sync() to complete before we can
801-
* release this dbuf. We will finish everything up
802-
* in the zfs_get_done() callback.
803-
*/
804-
if (error == 0)
805-
return (0);
806-
807-
if (error == EALREADY) {
808-
lr->lr_common.lrc_txtype = TX_WRITE2;
809-
/*
810-
* TX_WRITE2 relies on the data previously
811-
* written by the TX_WRITE that caused
812-
* EALREADY. We zero out the BP because
813-
* it is the old, currently-on-disk BP,
814-
* so there's no need to zio_flush() its
815-
* vdevs (flushing would needlesly hurt
816-
* performance, and doesn't work on
817-
* indirect vdevs).
818-
*/
819-
zgd->zgd_bp = NULL;
820-
BP_ZERO(bp);
821-
error = 0;
822-
}
823-
}
824-
}
825-
826-
zfs_get_done(zgd, error);
676+
vnode_t *vp = ZTOV(zp);
677+
objset_t *os = ITOZSB(vp)->z_os;
827678

828-
return (error);
679+
VN_RELE_ASYNC(vp, dsl_pool_zrele_taskq(dmu_objset_pool(os)));
829680
}
830681

831682
static int

0 commit comments

Comments
 (0)