Skip to content

Commit 5f8e752

Browse files
author
Umer Saleem
committed
Fix inconsistent mount options for ZFS root
While mounting ZFS root during boot on Linux distributions from initrd, mount from busybox is effectively used which executes mount system call directly. This skips the ZFS helper mount.zfs, which checks and enables the mount options as specified in dataset properties. As a result, datasets mounted during boot from initrd do not have correct mount options as specified in ZFS dataset properties. There has been an attempt to use mount.zfs in zfs initrd script, responsible for mounting the ZFS root filesystem (PR#13305). This was later reverted (PR#14908) after discovering that using mount.zfs breaks mounting of snapshots on root (/) and other child datasets of root have the same issue (Issue#9461). This happens because switching from busybox mount to mount.zfs correctly parses the mount options but also adds 'mntpoint=/root' to the mount options, which is then prepended to the snapshot mountpoint in '.zfs/snapshot'. '/root' is the directory on Debian with initramfs-tools where root filesystem is mounted before pivot_root. When Linux runtime is reached, trying to access the snapshots on root results in automounting the snapshot on '/root/.zfs/*', which fails. This commit attempts to fix the automounting of snapshots on root, while using mount.zfs in initrd script. Since the mountpoint of dataset is stored in vfs_mntpoint field, we can check if current mountpoint of dataset and vfs_mntpoint are same or not. If they are not same, reset the vfs_mntpoint field with current mountpoint. This fixes the mountpoints of root dataset and children in respective vfs_mntpoint fields when we try to access the snapshots of root dataset or its children. With correct mountpoint for root dataset and children stored in vfs_mntpoint, all snapshots of root dataset are mounted correctly and become accessible. This fix will come into play only if current process, that is trying to access the snapshots is not in chroot context. The Linux kernel API that is used to convert struct path into char format (d_path), returns the complete path for given struct path. It works in chroot environment as well and returns the correct path from original filesystem root. However d_path fails to return the complete path if any directory from original root filesystem is mounted using --bind flag or --rbind flag in chroot environment. In this case, if we try to access the snapshot from outside the chroot environment, d_path returns the path correctly, i.e. it returns the correct path to the directory that is mounted with --bind flag. However inside the chroot environment, it only returns the path inside chroot. For now, there is not a better way in my understanding that gives the complete path in char format and handles the case where directories from root filesystem are mounted with --bind or --rbind on another path which user will later chroot into. So this fix gets enabled if current process trying to access the snapshot is not in chroot context. With the snapshots issue fixed for root filesystem, using mount.zfs in ZFS initrd script, mounts the datasets with correct mount options. Signed-off-by: Umer Saleem <[email protected]>
1 parent e0bf43d commit 5f8e752

File tree

4 files changed

+106
-10
lines changed

4 files changed

+106
-10
lines changed

contrib/initramfs/scripts/zfs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ mount_fs()
344344

345345
# Need the _original_ datasets mountpoint!
346346
mountpoint=$(get_fs_value "$fs" mountpoint)
347-
ZFS_CMD="mount -o zfsutil -t zfs"
347+
ZFS_CMD="mount.zfs -o zfsutil"
348348
if [ "$mountpoint" = "legacy" ] || [ "$mountpoint" = "none" ]; then
349349
# Can't use the mountpoint property. Might be one of our
350350
# clones. Check the 'org.zol:mountpoint' property set in
@@ -359,9 +359,8 @@ mount_fs()
359359
# isn't the root fs.
360360
return 0
361361
fi
362-
# Don't use mount.zfs -o zfsutils for legacy mountpoint
363362
if [ "$mountpoint" = "legacy" ]; then
364-
ZFS_CMD="mount -t zfs"
363+
ZFS_CMD="mount.zfs"
365364
fi
366365
# Last hail-mary: Hope 'rootmnt' is set!
367366
mountpoint=""

include/os/linux/zfs/sys/zfs_vfsops_os.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ typedef struct vfs {
6969
boolean_t vfs_do_relatime;
7070
boolean_t vfs_nbmand;
7171
boolean_t vfs_do_nbmand;
72+
kmutex_t vfs_mntpt_lock;
7273
} vfs_t;
7374

7475
typedef struct zfs_mnt {

module/os/linux/zfs/zfs_ctldir.c

Lines changed: 99 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -767,9 +767,6 @@ zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid,
767767
uint64_t id, pos = 0;
768768
int error = 0;
769769

770-
if (zfsvfs->z_vfs->vfs_mntpoint == NULL)
771-
return (SET_ERROR(ENOENT));
772-
773770
cookie = spl_fstrans_mark();
774771
snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
775772

@@ -786,8 +783,14 @@ zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid,
786783
break;
787784
}
788785

789-
snprintf(full_path, path_len, "%s/.zfs/snapshot/%s",
790-
zfsvfs->z_vfs->vfs_mntpoint, snapname);
786+
mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
787+
if (zfsvfs->z_vfs->vfs_mntpoint != NULL) {
788+
snprintf(full_path, path_len, "%s/.zfs/snapshot/%s",
789+
zfsvfs->z_vfs->vfs_mntpoint, snapname);
790+
} else
791+
error = SET_ERROR(ENOENT);
792+
mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
793+
791794
out:
792795
kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
793796
spl_fstrans_unmark(cookie);
@@ -1049,6 +1052,61 @@ exportfs_flush(void)
10491052
(void) call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
10501053
}
10511054

1055+
/*
1056+
* Returns the path in char format for given struct path. Uses
1057+
* d_path exported by kernel to convert struct path to char
1058+
* format. Returns the correct path for mountpoints and chroot
1059+
* environments.
1060+
*
1061+
* If chroot environment has directories that are mounted with
1062+
* --bind or --rbind flag, d_path returns the complete path inside
1063+
* chroot environment but does not return the absolute path, i.e.
1064+
* the path to chroot environment is missing.
1065+
*/
1066+
static int
1067+
get_root_path(struct path *path, char *buff, int len)
1068+
{
1069+
char *path_buffer, *path_ptr;
1070+
int error = 0;
1071+
1072+
path_get(path);
1073+
path_buffer = kmem_zalloc(len, KM_SLEEP);
1074+
path_ptr = d_path(path, path_buffer, len);
1075+
if (IS_ERR(path_ptr))
1076+
error = SET_ERROR(-PTR_ERR(path_ptr));
1077+
else
1078+
strcpy(buff, path_ptr);
1079+
1080+
kmem_free(path_buffer, len);
1081+
path_put(path);
1082+
return (error);
1083+
}
1084+
1085+
/*
1086+
* Returns if the current process root is chrooted or not. Linux
1087+
* kernel exposes the task_struct for current process and init.
1088+
* Since init process root points to actual root filesystem when
1089+
* Linux runtime is reached, we can compare the current process
1090+
* root with init process root to determine if root of the current
1091+
* process is different from init, which can reliably determine if
1092+
* current process is in chroot context or not.
1093+
*/
1094+
static int
1095+
is_current_chrooted(void)
1096+
{
1097+
struct task_struct *curr = current, *global = &init_task;
1098+
struct path cr_root, gl_root;
1099+
1100+
get_fs_root(global->fs, &gl_root);
1101+
get_fs_root(curr->fs, &cr_root);
1102+
int chrooted = !path_equal(&cr_root, &gl_root);
1103+
1104+
path_put(&cr_root);
1105+
path_put(&gl_root);
1106+
1107+
return (chrooted);
1108+
}
1109+
10521110
/*
10531111
* Attempt to unmount a snapshot by making a call to user space.
10541112
* There is no assurance that this can or will succeed, is just a
@@ -1123,14 +1181,50 @@ zfsctl_snapshot_mount(struct path *path, int flags)
11231181
if (error)
11241182
goto error;
11251183

1184+
if (is_current_chrooted() == 0) {
1185+
/*
1186+
* Current process is not in chroot context
1187+
*/
1188+
1189+
char *m = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1190+
struct path mnt_path;
1191+
mnt_path.mnt = path->mnt;
1192+
mnt_path.dentry = path->mnt->mnt_root;
1193+
1194+
/*
1195+
* Get path to current mountpoint
1196+
*/
1197+
error = get_root_path(&mnt_path, m, MAXPATHLEN);
1198+
if (error != 0) {
1199+
kmem_free(m, MAXPATHLEN);
1200+
goto error;
1201+
}
1202+
mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
1203+
if (zfsvfs->z_vfs->vfs_mntpoint != NULL) {
1204+
/*
1205+
* If current mnountpoint and vfs_mntpoint are not same,
1206+
* store current mountpoint in vfs_mntpoint.
1207+
*/
1208+
if (strcmp(zfsvfs->z_vfs->vfs_mntpoint, m) != 0) {
1209+
kmem_strfree(zfsvfs->z_vfs->vfs_mntpoint);
1210+
zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m);
1211+
}
1212+
} else
1213+
zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m);
1214+
mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
1215+
kmem_free(m, MAXPATHLEN);
1216+
}
1217+
11261218
/*
11271219
* Construct a mount point path from sb of the ctldir inode and dirent
11281220
* name, instead of from d_path(), so that chroot'd process doesn't fail
11291221
* on mount.zfs(8).
11301222
*/
1223+
mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
11311224
snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s",
11321225
zfsvfs->z_vfs->vfs_mntpoint ? zfsvfs->z_vfs->vfs_mntpoint : "",
11331226
dname(dentry));
1227+
mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
11341228

11351229
snprintf(options, 7, "%s",
11361230
zfs_snapshot_no_setuid ? "nosuid" : "suid");

module/os/linux/zfs/zfs_vfsops.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ zfsvfs_vfs_free(vfs_t *vfsp)
115115
if (vfsp != NULL) {
116116
if (vfsp->vfs_mntpoint != NULL)
117117
kmem_strfree(vfsp->vfs_mntpoint);
118-
118+
mutex_destroy(&vfsp->vfs_mntpt_lock);
119119
kmem_free(vfsp, sizeof (vfs_t));
120120
}
121121
}
@@ -197,10 +197,11 @@ zfsvfs_parse_option(char *option, int token, substring_t *args, vfs_t *vfsp)
197197
vfsp->vfs_do_nbmand = B_TRUE;
198198
break;
199199
case TOKEN_MNTPOINT:
200+
if (vfsp->vfs_mntpoint != NULL)
201+
kmem_strfree(vfsp->vfs_mntpoint);
200202
vfsp->vfs_mntpoint = match_strdup(&args[0]);
201203
if (vfsp->vfs_mntpoint == NULL)
202204
return (SET_ERROR(ENOMEM));
203-
204205
break;
205206
default:
206207
break;
@@ -219,6 +220,7 @@ zfsvfs_parse_options(char *mntopts, vfs_t **vfsp)
219220
int error;
220221

221222
tmp_vfsp = kmem_zalloc(sizeof (vfs_t), KM_SLEEP);
223+
mutex_init(&tmp_vfsp->vfs_mntpt_lock, NULL, MUTEX_DEFAULT, NULL);
222224

223225
if (mntopts != NULL) {
224226
substring_t args[MAX_OPT_ARGS];

0 commit comments

Comments
 (0)