Re: [PATCH 3/4] iomap: don't increase i_size if it's not a write operation

From: Darrick J. Wong
Date: Mon Mar 11 2024 - 11:48:37 EST


On Mon, Mar 11, 2024 at 08:22:54PM +0800, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@xxxxxxxxxx>
>
> Increase i_size in iomap_zero_range() and iomap_unshare_iter() is not
> needed, the caller should handle it. Especially, when truncate partial
> block, we could not increase i_size beyond the new EOF here. It doesn't
> affect xfs and gfs2 now because they set the new file size after zero
> out, it doesn't matter that a transient increase in i_size, but it will
> affect ext4 because it set file size before truncate.

> At the same time,
> iomap_write_failed() is also not needed for above two cases too, so
> factor them out and move them to iomap_write_iter() and
> iomap_zero_iter().

This change should be a separate patch with its own justification.
Which is, AFAICT, something along the lines of:

"Unsharing and zeroing can only happen within EOF, so there is never a
need to perform posteof pagecache truncation if write begin fails."

> Signed-off-by: Zhang Yi <yi.zhang@xxxxxxxxxx>

Doesn't this patch fix a bug in ext4?

> ---
> fs/iomap/buffered-io.c | 59 +++++++++++++++++++++---------------------
> 1 file changed, 30 insertions(+), 29 deletions(-)
>
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 093c4515b22a..19f91324c690 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -786,7 +786,6 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
>
> out_unlock:
> __iomap_put_folio(iter, pos, 0, folio);
> - iomap_write_failed(iter->inode, pos, len);
>
> return status;
> }
> @@ -838,34 +837,13 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
> size_t copied, struct folio *folio)
> {
> const struct iomap *srcmap = iomap_iter_srcmap(iter);
> - loff_t old_size = iter->inode->i_size;
> - size_t ret;
> -
> - if (srcmap->type == IOMAP_INLINE) {
> - ret = iomap_write_end_inline(iter, folio, pos, copied);
> - } else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
> - ret = block_write_end(NULL, iter->inode->i_mapping, pos, len,
> - copied, &folio->page, NULL);
> - } else {
> - ret = __iomap_write_end(iter->inode, pos, len, copied, folio);
> - }
>
> - /*
> - * Update the in-memory inode size after copying the data into the page
> - * cache. It's up to the file system to write the updated size to disk,
> - * preferably after I/O completion so that no stale data is exposed.
> - */
> - if (pos + ret > old_size) {
> - i_size_write(iter->inode, pos + ret);
> - iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
> - }
> - __iomap_put_folio(iter, pos, ret, folio);
> -
> - if (old_size < pos)
> - pagecache_isize_extended(iter->inode, old_size, pos);
> - if (ret < len)
> - iomap_write_failed(iter->inode, pos + ret, len - ret);
> - return ret;
> + if (srcmap->type == IOMAP_INLINE)
> + return iomap_write_end_inline(iter, folio, pos, copied);
> + if (srcmap->flags & IOMAP_F_BUFFER_HEAD)
> + return block_write_end(NULL, iter->inode->i_mapping, pos, len,
> + copied, &folio->page, NULL);
> + return __iomap_write_end(iter->inode, pos, len, copied, folio);
> }
>
> static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
> @@ -880,6 +858,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
>
> do {
> struct folio *folio;
> + loff_t old_size;
> size_t offset; /* Offset into folio */
> size_t bytes; /* Bytes to write to folio */
> size_t copied; /* Bytes copied from user */
> @@ -912,8 +891,10 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
> }
>
> status = iomap_write_begin(iter, pos, bytes, &folio);
> - if (unlikely(status))
> + if (unlikely(status)) {
> + iomap_write_failed(iter->inode, pos, bytes);
> break;
> + }
> if (iter->iomap.flags & IOMAP_F_STALE)
> break;
>
> @@ -927,6 +908,24 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
> copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
> status = iomap_write_end(iter, pos, bytes, copied, folio);
>
> + /*
> + * Update the in-memory inode size after copying the data into
> + * the page cache. It's up to the file system to write the
> + * updated size to disk, preferably after I/O completion so that
> + * no stale data is exposed.
> + */
> + old_size = iter->inode->i_size;
> + if (pos + status > old_size) {
> + i_size_write(iter->inode, pos + status);
> + iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
> + }
> + __iomap_put_folio(iter, pos, status, folio);

Why is it necessary to hoist the __iomap_put_folio calls from
iomap_write_end into iomap_write_iter, iomap_unshare_iter, and
iomap_zero_iter? None of those functions seem to use it, and it makes
more sense to me that iomap_write_end releases the folio that
iomap_write_begin returned.

--D

> +
> + if (old_size < pos)
> + pagecache_isize_extended(iter->inode, old_size, pos);
> + if (status < bytes)
> + iomap_write_failed(iter->inode, pos + status,
> + bytes - status);
> if (unlikely(copied != status))
> iov_iter_revert(i, copied - status);
>
> @@ -1296,6 +1295,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter)
> bytes = folio_size(folio) - offset;
>
> bytes = iomap_write_end(iter, pos, bytes, bytes, folio);
> + __iomap_put_folio(iter, pos, bytes, folio);
> if (WARN_ON_ONCE(bytes == 0))
> return -EIO;
>
> @@ -1360,6 +1360,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
> folio_mark_accessed(folio);
>
> bytes = iomap_write_end(iter, pos, bytes, bytes, folio);
> + __iomap_put_folio(iter, pos, bytes, folio);
> if (WARN_ON_ONCE(bytes == 0))
> return -EIO;
>
> --
> 2.39.2
>
>