Skip to content

Commit 5fb1a0b

Browse files
robnFedorUporovVstack
authored andcommitted
Linux/vnops: implement STATX_DIOALIGN
This statx(2) mask returns the alignment restrictions for O_DIRECT access on the given file. We're expected to return both memory and IO alignment. For memory, it's always PAGE_SIZE. For IO, we return the current block size for the file, which is the required alignment for an arbitrary block, and for the first block we'll fall back to the ARC when necessary, so it should always work. Sponsored-by: https://despairlabs.com/sponsor/ Reviewed-by: Alexander Motin <[email protected]> Signed-off-by: Rob Norris <[email protected]> Closes openzfs#16972
1 parent de75705 commit 5fb1a0b

File tree

11 files changed

+548
-2
lines changed

11 files changed

+548
-2
lines changed

include/sys/zfs_vnops.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
*/
2121
/*
2222
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23+
* Copyright (c) 2025, Rob Norris <[email protected]>
2324
*/
2425

2526
#ifndef _SYS_FS_ZFS_VNOPS_H
@@ -42,6 +43,8 @@ extern int zfs_clone_range_replay(znode_t *, uint64_t, uint64_t, uint64_t,
4243
extern int zfs_getsecattr(znode_t *, vsecattr_t *, int, cred_t *);
4344
extern int zfs_setsecattr(znode_t *, vsecattr_t *, int, cred_t *);
4445

46+
extern int zfs_get_direct_alignment(znode_t *, uint64_t *);
47+
4548
extern int mappedread(znode_t *, int, zfs_uio_t *);
4649
extern int mappedread_sf(znode_t *, int, zfs_uio_t *);
4750
extern void update_pages(znode_t *, int64_t, int, objset_t *);

module/os/linux/zfs/zpl_inode.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
/*
2222
* Copyright (c) 2011, Lawrence Livermore National Security, LLC.
2323
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
24+
* Copyright (c) 2025, Rob Norris <[email protected]>
2425
*/
2526

2627

@@ -30,6 +31,7 @@
3031
#include <sys/zfs_vnops.h>
3132
#include <sys/zfs_znode.h>
3233
#include <sys/dmu_objset.h>
34+
#include <sys/spa_impl.h>
3335
#include <sys/vfs.h>
3436
#include <sys/zpl.h>
3537
#include <sys/file.h>
@@ -490,6 +492,17 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
490492
}
491493
#endif
492494

495+
#ifdef STATX_DIOALIGN
496+
if (request_mask & STATX_DIOALIGN) {
497+
uint64_t align;
498+
if (zfs_get_direct_alignment(zp, &align) == 0) {
499+
stat->dio_mem_align = PAGE_SIZE;
500+
stat->dio_offset_align = align;
501+
stat->result_mask |= STATX_DIOALIGN;
502+
}
503+
}
504+
#endif
505+
493506
#ifdef STATX_ATTR_IMMUTABLE
494507
if (zp->z_pflags & ZFS_IMMUTABLE)
495508
stat->attributes |= STATX_ATTR_IMMUTABLE;

module/zfs/zfs_vnops.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
2626
* Copyright 2017 Nexenta Systems, Inc.
2727
* Copyright (c) 2021, 2022 by Pawel Jakub Dawidek
28+
* Copyright (c) 2025, Rob Norris <[email protected]>
2829
*/
2930

3031
/* Portions Copyright 2007 Jeremy Teo */
@@ -1083,6 +1084,44 @@ zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr)
10831084
return (error);
10841085
}
10851086

1087+
/*
1088+
* Get the optimal alignment to ensure direct IO can be performed without
1089+
* incurring any RMW penalty on write. If direct IO is not enabled for this
1090+
* file, returns an error.
1091+
*/
1092+
int
1093+
zfs_get_direct_alignment(znode_t *zp, uint64_t *alignp)
1094+
{
1095+
zfsvfs_t *zfsvfs = ZTOZSB(zp);
1096+
1097+
if (!zfs_dio_enabled || zfsvfs->z_os->os_direct == ZFS_DIRECT_DISABLED)
1098+
return (SET_ERROR(EOPNOTSUPP));
1099+
1100+
/*
1101+
* If the file has multiple blocks, then its block size is fixed
1102+
* forever, and so is the ideal alignment.
1103+
*
1104+
* If however it only has a single block, then we want to return the
1105+
* max block size it could possibly grown to (ie, the dataset
1106+
* recordsize). We do this so that a program querying alignment
1107+
* immediately after the file is created gets a value that won't change
1108+
* once the file has grown into the second block and beyond.
1109+
*
1110+
* Because we don't have a count of blocks easily available here, we
1111+
* check if the apparent file size is smaller than its current block
1112+
* size (meaning, the file hasn't yet grown into the current block
1113+
* size) and then, check if the block size is smaller than the dataset
1114+
* maximum (meaning, if the file grew past the current block size, the
1115+
* block size could would be increased).
1116+
*/
1117+
if (zp->z_size <= zp->z_blksz && zp->z_blksz < zfsvfs->z_max_blksz)
1118+
*alignp = MAX(zfsvfs->z_max_blksz, PAGE_SIZE);
1119+
else
1120+
*alignp = MAX(zp->z_blksz, PAGE_SIZE);
1121+
1122+
return (0);
1123+
}
1124+
10861125
#ifdef ZFS_DEBUG
10871126
static int zil_fault_io = 0;
10881127
#endif

tests/runfiles/common.run

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -981,7 +981,7 @@ tests = ['sparse_001_pos']
981981
tags = ['functional', 'sparse']
982982

983983
[tests/functional/stat]
984-
tests = ['stat_001_pos']
984+
tests = ['stat_001_pos', 'statx_dioalign']
985985
tags = ['functional', 'stat']
986986

987987
[tests/functional/suid]

tests/test-runner/bin/zts-report.py.in

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,11 @@ idmap_reason = 'Idmapped mount needs kernel 5.12+'
128128
#
129129
cfr_reason = 'Kernel copy_file_range support required'
130130

131+
#
132+
# Some statx fields are not supported by all kernels
133+
#
134+
statx_reason = 'Needed statx(2) field not supported on this kernel'
135+
131136
if sys.platform.startswith('freebsd'):
132137
cfr_cross_reason = 'copy_file_range(2) cross-filesystem needs FreeBSD 14+'
133138
else:
@@ -293,7 +298,8 @@ if sys.platform.startswith('freebsd'):
293298
'block_cloning/block_cloning_cross_enc_dataset':
294299
['SKIP', cfr_cross_reason],
295300
'block_cloning/block_cloning_copyfilerange_cross_dataset':
296-
['SKIP', cfr_cross_reason]
301+
['SKIP', cfr_cross_reason],
302+
'stat/statx_dioalign': ['SKIP', 'na_reason'],
297303
})
298304
elif sys.platform.startswith('linux'):
299305
maybe.update({
@@ -361,6 +367,7 @@ elif sys.platform.startswith('linux'):
361367
'mmp/mmp_active_import': ['FAIL', known_reason],
362368
'mmp/mmp_exported_import': ['FAIL', known_reason],
363369
'mmp/mmp_inactive_import': ['FAIL', known_reason],
370+
'stat/statx_dioalign': ['SKIP', 'statx_reason'],
364371
})
365372

366373

tests/zfs-tests/cmd/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
/rename_dir
3737
/rm_lnkcnt_zero_file
3838
/send_doall
39+
/statx
3940
/stride_dd
4041
/threadsappend
4142
/user_ns_exec
@@ -54,3 +55,4 @@
5455
/skein_test
5556
/sha2_test
5657
/idmap_util
58+
/statx

tests/zfs-tests/cmd/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ if BUILD_LINUX
126126
scripts_zfs_tests_bin_PROGRAMS += %D%/getversion
127127
scripts_zfs_tests_bin_PROGRAMS += %D%/user_ns_exec
128128
scripts_zfs_tests_bin_PROGRAMS += %D%/renameat2
129+
scripts_zfs_tests_bin_PROGRAMS += %D%/statx
129130
scripts_zfs_tests_bin_PROGRAMS += %D%/xattrtest
130131
scripts_zfs_tests_bin_PROGRAMS += %D%/zed_fd_spill-zedlet
131132
scripts_zfs_tests_bin_PROGRAMS += %D%/idmap_util

0 commit comments

Comments
 (0)