Skip to content

Commit a822e9a

Browse files
robnbehlendorf
authored andcommitted
zpl_sync_fs: work around kernels that ignore sync_fs errors
If the kernel will honour our error returns, use them. If not, fool it by setting a writeback error on the superblock, if available. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Paul Dagnelie <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Alexander Motin <[email protected]> Signed-off-by: Rob Norris <[email protected]> Closes #17420
1 parent 3b0af7f commit a822e9a

File tree

3 files changed

+85
-1
lines changed

3 files changed

+85
-1
lines changed

config/kernel-sb-wb-err.m4

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# dnl
2+
# dnl 5.8 (735e4ae5ba28) introduced a superblock scoped errseq_t to use to
3+
# dnl record writeback errors for syncfs() to return. Up until 5.17, when
4+
# dnl sync_fs errors were returned directly, this is the only way for us to
5+
# dnl report an error from syncfs().
6+
# dnl
7+
AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_WB_ERR], [
8+
ZFS_LINUX_TEST_SRC([super_block_s_wb_err], [
9+
#include <linux/fs.h>
10+
11+
static const struct super_block
12+
sb __attribute__ ((unused)) = {
13+
.s_wb_err = 0,
14+
};
15+
],[])
16+
])
17+
18+
AC_DEFUN([ZFS_AC_KERNEL_SUPER_BLOCK_S_WB_ERR], [
19+
AC_MSG_CHECKING([whether super_block has s_wb_err])
20+
ZFS_LINUX_TEST_RESULT([super_block_s_wb_err], [
21+
AC_MSG_RESULT(yes)
22+
AC_DEFINE(HAVE_SUPER_BLOCK_S_WB_ERR, 1,
23+
[have super_block s_wb_err])
24+
],[
25+
AC_MSG_RESULT(no)
26+
])
27+
])

config/kernel.m4

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
131131
ZFS_AC_KERNEL_SRC_FILE
132132
ZFS_AC_KERNEL_SRC_PIN_USER_PAGES
133133
ZFS_AC_KERNEL_SRC_TIMER
134+
ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_WB_ERR
134135
case "$host_cpu" in
135136
powerpc*)
136137
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
@@ -246,6 +247,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
246247
ZFS_AC_KERNEL_FILE
247248
ZFS_AC_KERNEL_PIN_USER_PAGES
248249
ZFS_AC_KERNEL_TIMER
250+
ZFS_AC_KERNEL_SUPER_BLOCK_S_WB_ERR
249251
case "$host_cpu" in
250252
powerpc*)
251253
ZFS_AC_KERNEL_CPU_HAS_FEATURE

module/os/linux/zfs/zpl_super.c

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <sys/zfs_ctldir.h>
3232
#include <sys/zpl.h>
3333
#include <linux/iversion.h>
34+
#include <linux/version.h>
3435

3536

3637
static struct inode *
@@ -105,6 +106,42 @@ zpl_put_super(struct super_block *sb)
105106
ASSERT3S(error, <=, 0);
106107
}
107108

109+
/*
110+
* zfs_sync() is the underlying implementation for the sync(2) and syncfs(2)
111+
* syscalls, via sb->s_op->sync_fs().
112+
*
113+
* Before kernel 5.17 (torvalds/linux@5679897eb104), syncfs() ->
114+
* sync_filesystem() would ignore the return from sync_fs(), instead only
115+
* considing the error from syncing the underlying block device (sb->s_dev).
116+
* Since OpenZFS doesn't _have_ an underlying block device, there's no way for
117+
* us to report a sync directly.
118+
*
119+
* However, in 5.8 (torvalds/linux@735e4ae5ba28) the superblock gained an extra
120+
* error store `s_wb_err`, to carry errors seen on page writeback since the
121+
* last call to syncfs(). If sync_filesystem() does not return an error, any
122+
* existing writeback error on the superblock will be used instead (and cleared
123+
* either way). We don't use this (page writeback is a different thing for us),
124+
* so for 5.8-5.17 we can use that instead to get syncfs() to return the error.
125+
*
126+
* Before 5.8, we have no other good options - no matter what happens, the
127+
* userspace program will be told the call has succeeded, and so we must make
128+
* it so, Therefore, when we are asked to wait for sync to complete (wait ==
129+
* 1), if zfs_sync() has returned an error we have no choice but to block,
130+
* regardless of the reason.
131+
*
132+
* The 5.17 change was backported to the 5.10, 5.15 and 5.16 series, and likely
133+
* to some vendor kernels. Meanwhile, s_wb_err is still in use in 6.15 (the
134+
* mainline Linux series at time of writing), and has likely been backported to
135+
* vendor kernels before 5.8. We don't really want to use a workaround when we
136+
* don't have to, but we can't really detect whether or not sync_filesystem()
137+
* will return our errors (without a difficult runtime test anyway). So, we use
138+
* a static version check: any kernel reporting its version as 5.17+ will use a
139+
* direct error return, otherwise, we'll either use s_wb_err if it was detected
140+
* at configure (5.8-5.16 + vendor backports). If it's unavailable, we will
141+
* block to ensure the correct semantics.
142+
*
143+
* See https://github.com/openzfs/zfs/issues/17416 for further discussion.
144+
*/
108145
static int
109146
zpl_sync_fs(struct super_block *sb, int wait)
110147
{
@@ -115,10 +152,28 @@ zpl_sync_fs(struct super_block *sb, int wait)
115152
crhold(cr);
116153
cookie = spl_fstrans_mark();
117154
error = -zfs_sync(sb, wait, cr);
155+
156+
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 17, 0)
157+
#ifdef HAVE_SUPER_BLOCK_S_WB_ERR
158+
if (error && wait)
159+
errseq_set(&sb->s_wb_err, error);
160+
#else
161+
if (error && wait) {
162+
zfsvfs_t *zfsvfs = sb->s_fs_info;
163+
ASSERT3P(zfsvfs, !=, NULL);
164+
if (zfs_enter(zfsvfs, FTAG) == 0) {
165+
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
166+
zfs_exit(zfsvfs, FTAG);
167+
error = 0;
168+
}
169+
}
170+
#endif
171+
#endif /* < 5.17.0 */
172+
118173
spl_fstrans_unmark(cookie);
119174
crfree(cr);
120-
ASSERT3S(error, <=, 0);
121175

176+
ASSERT3S(error, <=, 0);
122177
return (error);
123178
}
124179

0 commit comments

Comments
 (0)