[PATCH v3 03/15] fs/bdev: Add atomic write support info to statx

From: John Garry
Date: Wed Jan 24 2024 - 06:46:09 EST


From: Prasad Singamsetty <prasad.singamsetty@xxxxxxxxxx>

Extend statx system call to return additional info for atomic write support
support if the specified file is a block device.

Signed-off-by: Prasad Singamsetty <prasad.singamsetty@xxxxxxxxxx>
Signed-off-by: John Garry <john.g.garry@xxxxxxxxxx>
---
block/bdev.c | 37 +++++++++++++++++++++---------
fs/stat.c | 47 +++++++++++++++++++++++++++++++++------
include/linux/blkdev.h | 5 +++--
include/linux/fs.h | 3 +++
include/linux/stat.h | 3 +++
include/uapi/linux/stat.h | 9 +++++++-
6 files changed, 84 insertions(+), 20 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index e9f1b12bd75c..2185084ffc89 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -1116,24 +1116,41 @@ void sync_bdevs(bool wait)
iput(old_inode);
}

+#define BDEV_STATX_SUPPORTED_MASK (STATX_DIOALIGN | STATX_WRITE_ATOMIC)
+
/*
- * Handle STATX_DIOALIGN for block devices.
- *
- * Note that the inode passed to this is the inode of a block device node file,
- * not the block device's internal inode. Therefore it is *not* valid to use
- * I_BDEV() here; the block device has to be looked up by i_rdev instead.
+ * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices.
*/
-void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
+void bdev_statx(struct dentry *dentry, struct kstat *stat, u32 request_mask)
{
struct block_device *bdev;

- bdev = blkdev_get_no_open(inode->i_rdev);
+ if (!(request_mask & BDEV_STATX_SUPPORTED_MASK))
+ return;
+
+ /*
+ * Note that d_backing_inode() returns the inode of a block device node
+ * file, not the block device's internal inode. Therefore it is *not*
+ * valid to use I_BDEV() here; the block device has to be looked up by
+ * i_rdev instead.
+ */
+ bdev = blkdev_get_no_open(d_backing_inode(dentry)->i_rdev);
if (!bdev)
return;

- stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
- stat->dio_offset_align = bdev_logical_block_size(bdev);
- stat->result_mask |= STATX_DIOALIGN;
+ if (request_mask & STATX_DIOALIGN) {
+ stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
+ stat->dio_offset_align = bdev_logical_block_size(bdev);
+ stat->result_mask |= STATX_DIOALIGN;
+ }
+
+ if (request_mask & STATX_WRITE_ATOMIC) {
+ struct request_queue *bd_queue = bdev->bd_queue;
+
+ generic_fill_statx_atomic_writes(stat,
+ queue_atomic_write_unit_min_bytes(bd_queue),
+ queue_atomic_write_unit_max_bytes(bd_queue));
+ }

blkdev_put_no_open(bdev);
}
diff --git a/fs/stat.c b/fs/stat.c
index 77cdc69eb422..bd0618477702 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -89,6 +89,37 @@ void generic_fill_statx_attr(struct inode *inode, struct kstat *stat)
}
EXPORT_SYMBOL(generic_fill_statx_attr);

+/**
+ * generic_fill_statx_atomic_writes - Fill in the atomic writes statx attributes
+ * @stat: Where to fill in the attribute flags
+ * @unit_min: Minimum supported atomic write length
+ * @unit_max: Maximum supported atomic write length
+ *
+ * Fill in the STATX{_ATTR}_WRITE_ATOMIC flags in the kstat structure from
+ * atomic write unit_min and unit_max values.
+ */
+void generic_fill_statx_atomic_writes(struct kstat *stat,
+ unsigned int unit_min,
+ unsigned int unit_max)
+{
+ /* Confirm that the request type is known */
+ stat->result_mask |= STATX_WRITE_ATOMIC;
+
+ /* Confirm that the file attribute type is known */
+ stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC;
+
+ if (unit_min) {
+ stat->atomic_write_unit_min = unit_min;
+ stat->atomic_write_unit_max = unit_max;
+ /* Initially only allow 1x segment */
+ stat->atomic_write_segments_max = 1;
+
+ /* Confirm atomic writes are actually supported */
+ stat->attributes |= STATX_ATTR_WRITE_ATOMIC;
+ }
+}
+EXPORT_SYMBOL(generic_fill_statx_atomic_writes);
+
/**
* vfs_getattr_nosec - getattr without security checks
* @path: file to get attributes from
@@ -259,13 +290,12 @@ static int vfs_statx(int dfd, struct filename *filename, int flags,
stat->attributes |= STATX_ATTR_MOUNT_ROOT;
stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;

- /* Handle STATX_DIOALIGN for block devices. */
- if (request_mask & STATX_DIOALIGN) {
- struct inode *inode = d_backing_inode(path.dentry);
-
- if (S_ISBLK(inode->i_mode))
- bdev_statx_dioalign(inode, stat);
- }
+ /* If this is a block device inode, override the filesystem
+ * attributes with the block device specific parameters
+ * that need to be obtained from the bdev backing inode
+ */
+ if (S_ISBLK(d_backing_inode(path.dentry)->i_mode))
+ bdev_statx(path.dentry, stat, request_mask);

path_put(&path);
if (retry_estale(error, lookup_flags)) {
@@ -658,6 +688,9 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
tmp.stx_mnt_id = stat->mnt_id;
tmp.stx_dio_mem_align = stat->dio_mem_align;
tmp.stx_dio_offset_align = stat->dio_offset_align;
+ tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min;
+ tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max;
+ tmp.stx_atomic_write_segments_max = stat->atomic_write_segments_max;

return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d5490b988918..443f08239308 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1541,7 +1541,7 @@ int sync_blockdev(struct block_device *bdev);
int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend);
int sync_blockdev_nowait(struct block_device *bdev);
void sync_bdevs(bool wait);
-void bdev_statx_dioalign(struct inode *inode, struct kstat *stat);
+void bdev_statx(struct dentry *dentry, struct kstat *stat, u32 request_mask);
void printk_all_partitions(void);
int __init early_lookup_bdev(const char *pathname, dev_t *dev);
#else
@@ -1559,7 +1559,8 @@ static inline int sync_blockdev_nowait(struct block_device *bdev)
static inline void sync_bdevs(bool wait)
{
}
-static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
+static inline void bdev_statx(struct dentry *dentry, struct kstat *stat,
+ u32 request_mask)
{
}
static inline void printk_all_partitions(void)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed5966a70495..c316c0a92fff 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3164,6 +3164,9 @@ extern const struct inode_operations page_symlink_inode_operations;
extern void kfree_link(void *);
void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
+void generic_fill_statx_atomic_writes(struct kstat *stat,
+ unsigned int unit_min,
+ unsigned int unit_max);
extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
void __inode_add_bytes(struct inode *inode, loff_t bytes);
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 52150570d37a..2c5e2b8c6559 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -53,6 +53,9 @@ struct kstat {
u32 dio_mem_align;
u32 dio_offset_align;
u64 change_cookie;
+ u32 atomic_write_unit_min;
+ u32 atomic_write_unit_max;
+ u32 atomic_write_segments_max;
};

/* These definitions are internal to the kernel for now. Mainly used by nfsd. */
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 2f2ee82d5517..c0e8e10d1de6 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -127,7 +127,12 @@ struct statx {
__u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
__u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
/* 0xa0 */
- __u64 __spare3[12]; /* Spare space for future expansion */
+ __u32 stx_atomic_write_unit_min;
+ __u32 stx_atomic_write_unit_max;
+ __u32 stx_atomic_write_segments_max;
+ __u32 __spare1;
+ /* 0xb0 */
+ __u64 __spare3[10]; /* Spare space for future expansion */
/* 0x100 */
};

@@ -155,6 +160,7 @@ struct statx {
#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */
#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
+#define STATX_WRITE_ATOMIC 0x00008000U /* Want/got atomic_write_* fields */

#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */

@@ -190,6 +196,7 @@ struct statx {
#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */
+#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */


#endif /* _UAPI_LINUX_STAT_H */
--
2.31.1