Skip to content

Commit e1c8744

Browse files
committed
Linux/vnops: implement STATX_DIOALIGN
This statx(2) mask returns the alignment restrictions for O_DIRECT access on the given file. We're expected to return both memory and IO alignment. For memory, it's always PAGE_SIZE. For IO, we return the current block size for the file, which is the required alignment for an arbitrary block, and for the first block we'll fall back to the ARC when necessary, so it should always work. Sponsored-by: https://despairlabs.com/sponsor/ Signed-off-by: Rob Norris <[email protected]>
1 parent 049ca7a commit e1c8744

File tree

3 files changed

+55
-0
lines changed

3 files changed

+55
-0
lines changed

include/sys/zfs_vnops.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
*/
2121
/*
2222
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23+
* Copyright (c) 2025, Rob Norris <[email protected]>
2324
*/
2425

2526
#ifndef _SYS_FS_ZFS_VNOPS_H
@@ -42,6 +43,8 @@ extern int zfs_clone_range_replay(znode_t *, uint64_t, uint64_t, uint64_t,
4243
extern int zfs_getsecattr(znode_t *, vsecattr_t *, int, cred_t *);
4344
extern int zfs_setsecattr(znode_t *, vsecattr_t *, int, cred_t *);
4445

46+
extern int zfs_get_direct_alignment(znode_t *, uint64_t *);
47+
4548
extern int mappedread(znode_t *, int, zfs_uio_t *);
4649
extern int mappedread_sf(znode_t *, int, zfs_uio_t *);
4750
extern void update_pages(znode_t *, int64_t, int, objset_t *);

module/os/linux/zfs/zpl_inode.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
/*
2222
* Copyright (c) 2011, Lawrence Livermore National Security, LLC.
2323
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
24+
* Copyright (c) 2025, Rob Norris <[email protected]>
2425
*/
2526

2627

@@ -30,6 +31,7 @@
3031
#include <sys/zfs_vnops.h>
3132
#include <sys/zfs_znode.h>
3233
#include <sys/dmu_objset.h>
34+
#include <sys/spa_impl.h>
3335
#include <sys/vfs.h>
3436
#include <sys/zpl.h>
3537
#include <sys/file.h>
@@ -490,6 +492,17 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
490492
}
491493
#endif
492494

495+
#ifdef STATX_DIOALIGN
496+
if (request_mask & STATX_DIOALIGN) {
497+
uint64_t align;
498+
if (zfs_get_direct_alignment(zp, &align) == 0) {
499+
stat->dio_mem_align = PAGE_SIZE;
500+
stat->dio_offset_align = align;
501+
stat->result_mask |= STATX_DIOALIGN;
502+
}
503+
}
504+
#endif
505+
493506
#ifdef STATX_ATTR_IMMUTABLE
494507
if (zp->z_pflags & ZFS_IMMUTABLE)
495508
stat->attributes |= STATX_ATTR_IMMUTABLE;

module/zfs/zfs_vnops.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
2626
* Copyright 2017 Nexenta Systems, Inc.
2727
* Copyright (c) 2021, 2022 by Pawel Jakub Dawidek
28+
* Copyright (c) 2025, Rob Norris <[email protected]>
2829
*/
2930

3031
/* Portions Copyright 2007 Jeremy Teo */
@@ -1083,6 +1084,44 @@ zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr)
10831084
return (error);
10841085
}
10851086

1087+
/*
1088+
* Get the optimal alignment to ensure direct IO can be performed without
1089+
* incurring any RMW penalty on write. If direct IO is not enabled for this
1090+
* file, returns an error.
1091+
*/
1092+
int
1093+
zfs_get_direct_alignment(znode_t *zp, uint64_t *alignp)
1094+
{
1095+
zfsvfs_t *zfsvfs = ZTOZSB(zp);
1096+
1097+
if (!zfs_dio_enabled || zfsvfs->z_os->os_direct == ZFS_DIRECT_DISABLED)
1098+
return (SET_ERROR(EOPNOTSUPP));
1099+
1100+
/*
1101+
* If the file has multiple blocks, then its block size is fixed
1102+
* forever, and so is the ideal alignment.
1103+
*
1104+
* If however it only has a single block, then we want to return the
1105+
* max block size it could possibly grown to (ie, the dataset
1106+
* recordsize). We do this so that a program querying alignment
1107+
* immediately after the file is created gets a value that won't change
1108+
* once the file has grown into the second block and beyond.
1109+
*
1110+
* Because we don't have a count of blocks easily available here, we
1111+
* check if the apparent file size is smaller than its current block
1112+
* size (meaning, the file hasn't yet grown into the current block
1113+
* size) and then, check if the block size is smaller than the dataset
1114+
* maximum (meaning, if the file grew past the current block size, the
1115+
* block size could would be increased).
1116+
*/
1117+
if (zp->z_size <= zp->z_blksz && zp->z_blksz < zfsvfs->z_max_blksz)
1118+
*alignp = MAX(zfsvfs->z_max_blksz, PAGE_SIZE);
1119+
else
1120+
*alignp = MAX(zp->z_blksz, PAGE_SIZE);
1121+
1122+
return (0);
1123+
}
1124+
10861125
#ifdef ZFS_DEBUG
10871126
static int zil_fault_io = 0;
10881127
#endif

0 commit comments

Comments
 (0)