Skip to content

Commit 93766fb

Browse files
dhowellsAl Viro
authored andcommitted
vfs: syscall: Add fsmount() to create a mount for a superblock
Provide a system call by which a filesystem opened with fsopen() and configured by a series of fsconfig() calls can have a detached mount object created for it. This mount object can then be attached to the VFS mount hierarchy using move_mount() by passing the returned file descriptor as the from directory fd. The system call looks like: int mfd = fsmount(int fsfd, unsigned int flags, unsigned int attr_flags); where fsfd is the file descriptor returned by fsopen(). flags can be 0 or FSMOUNT_CLOEXEC. attr_flags is a bitwise-OR of the following flags: MOUNT_ATTR_RDONLY Mount read-only MOUNT_ATTR_NOSUID Ignore suid and sgid bits MOUNT_ATTR_NODEV Disallow access to device special files MOUNT_ATTR_NOEXEC Disallow program execution MOUNT_ATTR__ATIME Setting on how atime should be updated MOUNT_ATTR_RELATIME - Update atime relative to mtime/ctime MOUNT_ATTR_NOATIME - Do not update access times MOUNT_ATTR_STRICTATIME - Always perform atime updates MOUNT_ATTR_NODIRATIME Do not update directory access times In the event that fsmount() fails, it may be possible to get an error message by calling read() on fsfd. If no message is available, ENODATA will be reported. Signed-off-by: David Howells <[email protected]> cc: [email protected] Signed-off-by: Al Viro <[email protected]>
1 parent ecdab15 commit 93766fb

File tree

5 files changed

+165
-4
lines changed

5 files changed

+165
-4
lines changed

arch/x86/entry/syscalls/syscall_32.tbl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,8 @@
402402
388 i386 move_mount sys_move_mount __ia32_sys_move_mount
403403
389 i386 fsopen sys_fsopen __ia32_sys_fsopen
404404
390 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig
405-
# don't use numbers 391 through 392, add new calls at the end
405+
391 i386 fsmount sys_fsmount __ia32_sys_fsmount
406+
# don't use number 392, add new calls at the end
406407
393 i386 semget sys_semget __ia32_sys_semget
407408
394 i386 semctl sys_semctl __ia32_compat_sys_semctl
408409
395 i386 shmget sys_shmget __ia32_sys_shmget

arch/x86/entry/syscalls/syscall_64.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@
347347
336 common move_mount __x64_sys_move_mount
348348
337 common fsopen __x64_sys_fsopen
349349
338 common fsconfig __x64_sys_fsconfig
350+
339 common fsmount __x64_sys_fsmount
350351
# don't use numbers 387 through 423, add new calls after the last
351352
# 'common' entry
352353
424 common pidfd_send_signal __x64_sys_pidfd_send_signal

fs/namespace.c

Lines changed: 143 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3334,9 +3334,149 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
33343334
}
33353335

33363336
/*
3337-
* Move a mount from one place to another.
3338-
* In combination with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be
3339-
* used to copy a mount subtree.
3337+
* Create a kernel mount representation for a new, prepared superblock
3338+
* (specified by fs_fd) and attach to an open_tree-like file descriptor.
3339+
*/
3340+
SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
3341+
unsigned int, attr_flags)
3342+
{
3343+
struct mnt_namespace *ns;
3344+
struct fs_context *fc;
3345+
struct file *file;
3346+
struct path newmount;
3347+
struct mount *mnt;
3348+
struct fd f;
3349+
unsigned int mnt_flags = 0;
3350+
long ret;
3351+
3352+
if (!may_mount())
3353+
return -EPERM;
3354+
3355+
if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
3356+
return -EINVAL;
3357+
3358+
if (attr_flags & ~(MOUNT_ATTR_RDONLY |
3359+
MOUNT_ATTR_NOSUID |
3360+
MOUNT_ATTR_NODEV |
3361+
MOUNT_ATTR_NOEXEC |
3362+
MOUNT_ATTR__ATIME |
3363+
MOUNT_ATTR_NODIRATIME))
3364+
return -EINVAL;
3365+
3366+
if (attr_flags & MOUNT_ATTR_RDONLY)
3367+
mnt_flags |= MNT_READONLY;
3368+
if (attr_flags & MOUNT_ATTR_NOSUID)
3369+
mnt_flags |= MNT_NOSUID;
3370+
if (attr_flags & MOUNT_ATTR_NODEV)
3371+
mnt_flags |= MNT_NODEV;
3372+
if (attr_flags & MOUNT_ATTR_NOEXEC)
3373+
mnt_flags |= MNT_NOEXEC;
3374+
if (attr_flags & MOUNT_ATTR_NODIRATIME)
3375+
mnt_flags |= MNT_NODIRATIME;
3376+
3377+
switch (attr_flags & MOUNT_ATTR__ATIME) {
3378+
case MOUNT_ATTR_STRICTATIME:
3379+
break;
3380+
case MOUNT_ATTR_NOATIME:
3381+
mnt_flags |= MNT_NOATIME;
3382+
break;
3383+
case MOUNT_ATTR_RELATIME:
3384+
mnt_flags |= MNT_RELATIME;
3385+
break;
3386+
default:
3387+
return -EINVAL;
3388+
}
3389+
3390+
f = fdget(fs_fd);
3391+
if (!f.file)
3392+
return -EBADF;
3393+
3394+
ret = -EINVAL;
3395+
if (f.file->f_op != &fscontext_fops)
3396+
goto err_fsfd;
3397+
3398+
fc = f.file->private_data;
3399+
3400+
ret = mutex_lock_interruptible(&fc->uapi_mutex);
3401+
if (ret < 0)
3402+
goto err_fsfd;
3403+
3404+
/* There must be a valid superblock or we can't mount it */
3405+
ret = -EINVAL;
3406+
if (!fc->root)
3407+
goto err_unlock;
3408+
3409+
ret = -EPERM;
3410+
if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
3411+
pr_warn("VFS: Mount too revealing\n");
3412+
goto err_unlock;
3413+
}
3414+
3415+
ret = -EBUSY;
3416+
if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
3417+
goto err_unlock;
3418+
3419+
ret = -EPERM;
3420+
if ((fc->sb_flags & SB_MANDLOCK) && !may_mandlock())
3421+
goto err_unlock;
3422+
3423+
newmount.mnt = vfs_create_mount(fc);
3424+
if (IS_ERR(newmount.mnt)) {
3425+
ret = PTR_ERR(newmount.mnt);
3426+
goto err_unlock;
3427+
}
3428+
newmount.dentry = dget(fc->root);
3429+
newmount.mnt->mnt_flags = mnt_flags;
3430+
3431+
/* We've done the mount bit - now move the file context into more or
3432+
* less the same state as if we'd done an fspick(). We don't want to
3433+
* do any memory allocation or anything like that at this point as we
3434+
* don't want to have to handle any errors incurred.
3435+
*/
3436+
vfs_clean_context(fc);
3437+
3438+
ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);
3439+
if (IS_ERR(ns)) {
3440+
ret = PTR_ERR(ns);
3441+
goto err_path;
3442+
}
3443+
mnt = real_mount(newmount.mnt);
3444+
mnt->mnt_ns = ns;
3445+
ns->root = mnt;
3446+
ns->mounts = 1;
3447+
list_add(&mnt->mnt_list, &ns->list);
3448+
3449+
/* Attach to an apparent O_PATH fd with a note that we need to unmount
3450+
* it, not just simply put it.
3451+
*/
3452+
file = dentry_open(&newmount, O_PATH, fc->cred);
3453+
if (IS_ERR(file)) {
3454+
dissolve_on_fput(newmount.mnt);
3455+
ret = PTR_ERR(file);
3456+
goto err_path;
3457+
}
3458+
file->f_mode |= FMODE_NEED_UNMOUNT;
3459+
3460+
ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
3461+
if (ret >= 0)
3462+
fd_install(ret, file);
3463+
else
3464+
fput(file);
3465+
3466+
err_path:
3467+
path_put(&newmount);
3468+
err_unlock:
3469+
mutex_unlock(&fc->uapi_mutex);
3470+
err_fsfd:
3471+
fdput(f);
3472+
return ret;
3473+
}
3474+
3475+
/*
3476+
* Move a mount from one place to another. In combination with
3477+
* fsopen()/fsmount() this is used to install a new mount and in combination
3478+
* with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be used to copy
3479+
* a mount subtree.
33403480
*
33413481
* Note the flags value is a combination of MOVE_MOUNT_* flags.
33423482
*/

include/linux/syscalls.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,7 @@ asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path,
992992
asmlinkage long sys_fsopen(const char __user *fs_name, unsigned int flags);
993993
asmlinkage long sys_fsconfig(int fs_fd, unsigned int cmd, const char __user *key,
994994
const void __user *value, int aux);
995+
asmlinkage long sys_fsmount(int fs_fd, unsigned int flags, unsigned int ms_flags);
995996
asmlinkage long sys_pidfd_send_signal(int pidfd, int sig,
996997
siginfo_t __user *info,
997998
unsigned int flags);

include/uapi/linux/mount.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,4 +91,22 @@ enum fsconfig_command {
9191
FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */
9292
};
9393

94+
/*
95+
* fsmount() flags.
96+
*/
97+
#define FSMOUNT_CLOEXEC 0x00000001
98+
99+
/*
100+
* Mount attributes.
101+
*/
102+
#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */
103+
#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */
104+
#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */
105+
#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */
106+
#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */
107+
#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */
108+
#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */
109+
#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */
110+
#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */
111+
94112
#endif /* _UAPI_LINUX_MOUNT_H */

0 commit comments

Comments
 (0)