[PATCH 1/6] fs: iomap: Atomic write support

From: John Garry
Date: Wed Jan 24 2024 - 09:27:56 EST


Add flag IOMAP_ATOMIC_WRITE to indicate to the FS that an atomic write
bio is being created and all the rules there need to be followed.

It is the task of the FS iomap iter callbacks to ensure that the mapping
created adheres to those rules, like size is power-of-2, is at a
naturally-aligned offset, etc. However, checking for a single iovec, i.e.
iter type is ubuf, is done in __iomap_dio_rw().

A write should only produce a single bio, so error when it doesn't.

Signed-off-by: John Garry <john.g.garry@xxxxxxxxxx>
---
fs/iomap/direct-io.c | 21 ++++++++++++++++++++-
fs/iomap/trace.h | 3 ++-
include/linux/iomap.h | 1 +
3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index bcd3f8cf5ea4..25736d01b857 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -275,10 +275,12 @@ static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
struct iomap_dio *dio)
{
+ bool atomic_write = iter->flags & IOMAP_ATOMIC;
const struct iomap *iomap = &iter->iomap;
struct inode *inode = iter->inode;
unsigned int fs_block_size = i_blocksize(inode), pad;
loff_t length = iomap_length(iter);
+ const size_t iter_len = iter->len;
loff_t pos = iter->pos;
blk_opf_t bio_opf;
struct bio *bio;
@@ -381,6 +383,9 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
GFP_KERNEL);
bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
bio->bi_ioprio = dio->iocb->ki_ioprio;
+ if (atomic_write)
+ bio->bi_opf |= REQ_ATOMIC;
+
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;

@@ -397,6 +402,12 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
}

n = bio->bi_iter.bi_size;
+ if (atomic_write && n != iter_len) {
+ /* This bio should have covered the complete length */
+ ret = -EINVAL;
+ bio_put(bio);
+ goto out;
+ }
if (dio->flags & IOMAP_DIO_WRITE) {
task_io_account_write(n);
} else {
@@ -554,12 +565,17 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
struct blk_plug plug;
struct iomap_dio *dio;
loff_t ret = 0;
+ bool is_read = iov_iter_rw(iter) == READ;
+ bool atomic_write = (iocb->ki_flags & IOCB_ATOMIC) && !is_read;

trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before);

if (!iomi.len)
return NULL;

+ if (atomic_write && !iter_is_ubuf(iter))
+ return ERR_PTR(-EINVAL);
+
dio = kmalloc(sizeof(*dio), GFP_KERNEL);
if (!dio)
return ERR_PTR(-ENOMEM);
@@ -579,7 +595,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (iocb->ki_flags & IOCB_NOWAIT)
iomi.flags |= IOMAP_NOWAIT;

- if (iov_iter_rw(iter) == READ) {
+ if (is_read) {
/* reads can always complete inline */
dio->flags |= IOMAP_DIO_INLINE_COMP;

@@ -605,6 +621,9 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (iocb->ki_flags & IOCB_DIO_CALLER_COMP)
dio->flags |= IOMAP_DIO_CALLER_COMP;

+ if (atomic_write)
+ iomi.flags |= IOMAP_ATOMIC;
+
if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
ret = -EAGAIN;
if (iomi.pos >= dio->i_size ||
diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
index c16fd55f5595..c95576420bca 100644
--- a/fs/iomap/trace.h
+++ b/fs/iomap/trace.h
@@ -98,7 +98,8 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued);
{ IOMAP_REPORT, "REPORT" }, \
{ IOMAP_FAULT, "FAULT" }, \
{ IOMAP_DIRECT, "DIRECT" }, \
- { IOMAP_NOWAIT, "NOWAIT" }
+ { IOMAP_NOWAIT, "NOWAIT" }, \
+ { IOMAP_ATOMIC, "ATOMIC" }

#define IOMAP_F_FLAGS_STRINGS \
{ IOMAP_F_NEW, "NEW" }, \
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 96dd0acbba44..9eac704a0d6f 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -178,6 +178,7 @@ struct iomap_folio_ops {
#else
#define IOMAP_DAX 0
#endif /* CONFIG_FS_DAX */
+#define IOMAP_ATOMIC (1 << 9)

struct iomap_ops {
/*
--
2.31.1