Skip to content

Commit 4299712

Browse files
cypharRyan Moeller
authored andcommitted
zfs_rename: support RENAME_* flags
Implement support for Linux's RENAME_* flags (for renameat2). Aside from being quite useful for userspace (providing race-free ways to exchange paths and implement mv --no-clobber), they are used by overlayfs and are thus required in order to use overlayfs-on-ZFS. In order for us to represent the new renameat2(2) flags in the ZIL, we create two new transaction types for the two flags which need transactional-level support (RENAME_EXCHANGE and RENAME_WHITEOUT). RENAME_NOREPLACE does not need any ZIL support because we know that if the operation succeeded before creating the ZIL entry, there was no file to be clobbered and thus it can be treated as a regular TX_RENAME. Cc: Pavel Snajdr <[email protected]> Signed-off-by: Aleksa Sarai <[email protected]>
1 parent d524f3c commit 4299712

File tree

33 files changed

+929
-71
lines changed

33 files changed

+929
-71
lines changed

AUTHORS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ CONTRIBUTORS:
2020
Alec Salazar <[email protected]>
2121
Alejandro R. Sedeño <[email protected]>
2222
Alek Pinchuk <[email protected]>
23+
Aleksa Sarai <[email protected]>
2324
Alex Braunegg <[email protected]>
2425
Alex McWhirter <[email protected]>
2526
Alex Reece <[email protected]>
@@ -236,6 +237,7 @@ CONTRIBUTORS:
236237
Paul Dagnelie <[email protected]>
237238
Paul Zuchowski <[email protected]>
238239
Pavel Boldin <[email protected]>
240+
Pavel Snajdr <[email protected]>
239241
Pavel Zakharov <[email protected]>
240242
Pawel Jakub Dawidek <[email protected]>
241243
Pedro Giffuni <[email protected]>

cmd/zdb/zdb_il.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,14 @@ zil_prt_rec_rename(zilog_t *zilog, int txtype, const void *arg)
128128
(void) printf("%ssdoid %llu, tdoid %llu\n", tab_prefix,
129129
(u_longlong_t)lr->lr_sdoid, (u_longlong_t)lr->lr_tdoid);
130130
(void) printf("%ssrc %s tgt %s\n", tab_prefix, snm, tnm);
131+
switch (txtype) {
132+
case TX_RENAME_EXCHANGE:
133+
(void) printf("%sflags RENAME_EXCHANGE\n", tab_prefix);
134+
break;
135+
case TX_RENAME_WHITEOUT:
136+
(void) printf("%sflags RENAME_WHITEOUT\n", tab_prefix);
137+
break;
138+
}
131139
}
132140

133141
static int
@@ -330,6 +338,8 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
330338
{.zri_print = zil_prt_rec_write, .zri_name = "TX_WRITE2 "},
331339
{.zri_print = zil_prt_rec_setsaxattr,
332340
.zri_name = "TX_SETSAXATTR "},
341+
{.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME_EXCHANGE "},
342+
{.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME_WHITEOUT "},
333343
};
334344

335345
static int

cmd/ztest.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2368,6 +2368,8 @@ static zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
23682368
NULL, /* TX_MKDIR_ACL_ATTR */
23692369
NULL, /* TX_WRITE2 */
23702370
NULL, /* TX_SETSAXATTR */
2371+
NULL, /* TX_RENAME_EXCHANGE */
2372+
NULL, /* TX_RENAME_WHITEOUT */
23712373
};
23722374

23732375
/*

config/kernel-rename.m4

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,28 @@
11
AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [
2+
dnl #
3+
dnl # 3.9 (to 4.9) API change,
4+
dnl #
5+
dnl # A new version of iops->rename() was added (rename2) that takes a flag
6+
dnl # argument (to support renameat2). However this separate function was
7+
dnl # merged back into iops->rename() in Linux 4.9.
8+
dnl #
9+
ZFS_LINUX_TEST_SRC([inode_operations_rename2], [
10+
#include <linux/fs.h>
11+
int rename2_fn(struct inode *sip, struct dentry *sdp,
12+
struct inode *tip, struct dentry *tdp,
13+
unsigned int flags) { return 0; }
14+
15+
static const struct inode_operations
16+
iops __attribute__ ((unused)) = {
17+
.rename2 = rename2_fn,
18+
};
19+
],[])
20+
221
dnl #
322
dnl # 4.9 API change,
4-
dnl # iops->rename2() merged into iops->rename(), and iops->rename() now wants
5-
dnl # flags.
23+
dnl #
24+
dnl # iops->rename2() merged into iops->rename(), and iops->rename() now
25+
dnl # wants flags.
626
dnl #
727
ZFS_LINUX_TEST_SRC([inode_operations_rename_flags], [
828
#include <linux/fs.h>
@@ -16,11 +36,29 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [
1636
};
1737
],[])
1838
39+
dnl #
40+
dnl # EL7 compatibility
41+
dnl #
42+
dnl # EL7 has backported renameat2 support, but it's done by defining a
43+
dnl # separate iops wrapper structure that takes the .renameat2 function.
44+
dnl #
45+
ZFS_LINUX_TEST_SRC([dir_inode_operations_wrapper_rename2], [
46+
#include <linux/fs.h>
47+
int rename2_fn(struct inode *sip, struct dentry *sdp,
48+
struct inode *tip, struct dentry *tdp,
49+
unsigned int flags) { return 0; }
50+
51+
static const struct inode_operations_wrapper
52+
iops __attribute__ ((unused)) = {
53+
.rename2 = rename2_fn,
54+
};
55+
],[])
56+
1957
dnl #
2058
dnl # 5.12 API change,
2159
dnl #
22-
dnl # Linux 5.12 introduced passing struct user_namespace* as the first argument
23-
dnl # of the rename() and other inode_operations members.
60+
dnl # Linux 5.12 introduced passing struct user_namespace* as the first
61+
dnl # argument of the rename() and other inode_operations members.
2462
dnl #
2563
ZFS_LINUX_TEST_SRC([inode_operations_rename_userns], [
2664
#include <linux/fs.h>
@@ -44,13 +82,30 @@ AC_DEFUN([ZFS_AC_KERNEL_RENAME], [
4482
],[
4583
AC_MSG_RESULT(no)
4684
47-
AC_MSG_CHECKING([whether iop->rename() wants flags])
48-
ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [
85+
AC_MSG_CHECKING([whether iops->rename2() exists])
86+
ZFS_LINUX_TEST_RESULT([inode_operations_rename2], [
4987
AC_MSG_RESULT(yes)
50-
AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1,
51-
[iops->rename() wants flags])
88+
AC_DEFINE(HAVE_RENAME2, 1, [iops->rename2() exists])
5289
],[
5390
AC_MSG_RESULT(no)
91+
92+
AC_MSG_CHECKING([whether iops->rename() wants flags])
93+
ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [
94+
AC_MSG_RESULT(yes)
95+
AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1,
96+
[iops->rename() wants flags])
97+
],[
98+
AC_MSG_RESULT(no)
99+
100+
AC_MSG_CHECKING([whether struct inode_operations_wrapper takes .rename2()])
101+
ZFS_LINUX_TEST_RESULT([dir_inode_operations_wrapper_rename2], [
102+
AC_MSG_RESULT(yes)
103+
AC_DEFINE(HAVE_RENAME2_OPERATIONS_WRAPPER, 1,
104+
[struct inode_operations_wrapper takes .rename2()])
105+
],[
106+
AC_MSG_RESULT(no)
107+
])
108+
])
54109
])
55110
])
56111
])

include/os/freebsd/zfs/sys/zfs_vnops_os.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ extern int zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd,
4141
extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr,
4242
zuserns_t *mnt_ns);
4343
extern int zfs_rename(znode_t *sdzp, const char *snm, znode_t *tdzp,
44-
const char *tnm, cred_t *cr, int flags, zuserns_t *mnt_ns);
44+
const char *tnm, cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap,
45+
zuserns_t *mnt_ns);
4546
extern int zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
4647
const char *link, znode_t **zpp, cred_t *cr, int flags, zuserns_t *mnt_ns);
4748
extern int zfs_link(znode_t *tdzp, znode_t *sp,

include/os/linux/kernel/linux/vfs_compat.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,19 @@ static inline void zfs_gid_write(struct inode *ip, gid_t gid)
324324
ip->i_gid = make_kgid(kcred->user_ns, gid);
325325
}
326326

327+
/*
328+
* 3.15 API change
329+
*/
330+
#ifndef RENAME_NOREPLACE
331+
#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */
332+
#endif
333+
#ifndef RENAME_EXCHANGE
334+
#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */
335+
#endif
336+
#ifndef RENAME_WHITEOUT
337+
#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */
338+
#endif
339+
327340
/*
328341
* 4.9 API change
329342
*/

include/os/linux/spl/sys/sysmacros.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,16 @@ extern uint32_t zone_get_hostid(void *zone);
120120
extern void spl_setup(void);
121121
extern void spl_cleanup(void);
122122

123+
/*
124+
* Only handles the first 4096 majors and first 256 minors. We don't have a
125+
* libc for the kernel module so we define this inline.
126+
*/
127+
static inline dev_t
128+
makedev(unsigned int major, unsigned int minor)
129+
{
130+
return ((major & 0xFFF) << 8) | (minor & 0xFF);
131+
}
132+
123133
#define highbit(x) __fls(x)
124134
#define lowbit(x) __ffs(x)
125135

include/os/linux/zfs/sys/zfs_vnops_os.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ extern int zfs_getattr_fast(struct user_namespace *, struct inode *ip,
6161
extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr,
6262
zuserns_t *mnt_ns);
6363
extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp,
64-
char *tnm, cred_t *cr, int flags, zuserns_t *mnt_ns);
64+
char *tnm, cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap,
65+
zuserns_t *mnt_ns);
6566
extern int zfs_symlink(znode_t *dzp, char *name, vattr_t *vap,
6667
char *link, znode_t **zpp, cred_t *cr, int flags, zuserns_t *mnt_ns);
6768
extern int zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr);

include/os/linux/zfs/sys/zpl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,11 @@ extern void zpl_vap_init(vattr_t *vap, struct inode *dir,
4242
umode_t mode, cred_t *cr, zuserns_t *mnt_ns);
4343

4444
extern const struct inode_operations zpl_inode_operations;
45+
#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER
46+
extern const struct inode_operations_wrapper zpl_dir_inode_operations;
47+
#else
4548
extern const struct inode_operations zpl_dir_inode_operations;
49+
#endif
4650
extern const struct inode_operations zpl_symlink_inode_operations;
4751
extern const struct inode_operations zpl_special_inode_operations;
4852

include/sys/zfs_znode.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,12 @@ extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
299299
extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
300300
znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname,
301301
znode_t *szp);
302+
extern void zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx,
303+
uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp,
304+
const char *dname, znode_t *szp);
305+
extern void zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx,
306+
uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp,
307+
const char *dname, znode_t *szp, znode_t *wzp);
302308
extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
303309
znode_t *zp, offset_t off, ssize_t len, int ioflag,
304310
zil_callback_t callback, void *callback_data);

include/sys/zil.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,9 @@ typedef enum zil_create {
164164
#define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */
165165
#define TX_WRITE2 20 /* dmu_sync EALREADY write */
166166
#define TX_SETSAXATTR 21 /* Set sa xattrs on file */
167-
#define TX_MAX_TYPE 22 /* Max transaction type */
167+
#define TX_RENAME_EXCHANGE 22 /* Atomic swap via renameat2 */
168+
#define TX_RENAME_WHITEOUT 23 /* Atomic whiteout via renameat2 */
169+
#define TX_MAX_TYPE 24 /* Max transaction type */
168170

169171
/*
170172
* The transactions for mkdir, symlink, remove, rmdir, link, and rename
@@ -317,6 +319,19 @@ typedef struct {
317319
/* 2 strings: names of source and destination follow this */
318320
} lr_rename_t;
319321

322+
typedef struct {
323+
lr_rename_t lr_rename; /* common rename portion */
324+
/* members related to the whiteout file (based on lr_create_t) */
325+
uint64_t lr_wfoid; /* obj id of the new whiteout file */
326+
uint64_t lr_wmode; /* mode of object */
327+
uint64_t lr_wuid; /* uid of whiteout */
328+
uint64_t lr_wgid; /* gid of whiteout */
329+
uint64_t lr_wgen; /* generation (txg of creation) */
330+
uint64_t lr_wcrtime[2]; /* creation time */
331+
uint64_t lr_wrdev; /* always makedev(0, 0) */
332+
/* 2 strings: names of source and destination follow this */
333+
} lr_rename_whiteout_t;
334+
320335
typedef struct {
321336
lr_t lr_common; /* common portion of log record */
322337
uint64_t lr_foid; /* file object to write */

module/os/freebsd/zfs/zfs_vnops_os.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3420,14 +3420,17 @@ zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
34203420

34213421
int
34223422
zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname,
3423-
cred_t *cr, int flags, zuserns_t *mnt_ns)
3423+
cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, zuserns_t *mnt_ns)
34243424
{
34253425
struct componentname scn, tcn;
34263426
vnode_t *sdvp, *tdvp;
34273427
vnode_t *svp, *tvp;
34283428
int error;
34293429
svp = tvp = NULL;
34303430

3431+
if (rflags != 0 || wo_vap != NULL)
3432+
return (SET_ERROR(EINVAL));
3433+
34313434
sdvp = ZTOV(sdzp);
34323435
tdvp = ZTOV(tdzp);
34333436
error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE);

module/os/linux/zfs/zfs_dir.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1035,7 +1035,8 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
10351035
}
10361036

10371037
/* The only error is !zfs_dirempty() and we checked earlier. */
1038-
ASSERT3U(zfs_drop_nlink_locked(zp, tx, &unlinked), ==, 0);
1038+
error = zfs_drop_nlink_locked(zp, tx, &unlinked);
1039+
ASSERT3U(error, ==, 0);
10391040
mutex_exit(&zp->z_lock);
10401041
} else {
10411042
error = zfs_dropname(dl, zp, dzp, tx, flag);

0 commit comments

Comments
 (0)