[PATCH RFC v2 for-6.8/block 15/18] buffer: add a new helper to read sb block

From: Yu Kuai
Date: Mon Dec 11 2023 - 09:14:43 EST


From: Yu Kuai <yukuai3@xxxxxxxxxx>

Unlike __bread_gfp(), ext4 has special handing while reading sb block:

1) __GFP_NOFAIL is not set, and memory allocation can fail;
2) If buffer write failed before, set buffer uptodate and don't read
block from disk;
3) REQ_META is set for all IO, and REQ_PRIO is set for reading xattr;
4) If failed, return error ptr instead of NULL;

This patch add a new helper __bread_gfp2() that will match above 2 and 3(
1 will be used, and 4 will still be encapsulated by ext4), and prepare to
prevent calling mapping_gfp_constraint() directly on bd_inode->i_mapping
in ext4.

Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx>
---
fs/buffer.c | 68 ++++++++++++++++++++++++++-----------
include/linux/buffer_head.h | 18 +++++++++-
2 files changed, 65 insertions(+), 21 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 967f34b70aa8..188bd36c9fea 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1255,16 +1255,19 @@ void __bforget(struct buffer_head *bh)
}
EXPORT_SYMBOL(__bforget);

-static struct buffer_head *__bread_slow(struct buffer_head *bh)
+static struct buffer_head *__bread_slow(struct buffer_head *bh,
+ blk_opf_t op_flags,
+ bool check_write_error)
{
lock_buffer(bh);
- if (buffer_uptodate(bh)) {
+ if (buffer_uptodate(bh) ||
+ (check_write_error && buffer_uptodate_or_error(bh))) {
unlock_buffer(bh);
return bh;
} else {
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
- submit_bh(REQ_OP_READ, bh);
+ submit_bh(REQ_OP_READ | op_flags, bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return bh;
@@ -1445,6 +1448,31 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
}
EXPORT_SYMBOL(__breadahead);

+static struct buffer_head *
+bread_gfp(struct block_device *bdev, sector_t block, unsigned int size,
+ blk_opf_t op_flags, gfp_t gfp, bool check_write_error)
+{
+ struct buffer_head *bh;
+
+ gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
+
+ /*
+ * Prefer looping in the allocator rather than here, at least that
+ * code knows what it's doing.
+ */
+ gfp |= __GFP_NOFAIL;
+
+ bh = bdev_getblk(bdev, block, size, gfp);
+ if (unlikely(!bh))
+ return NULL;
+
+ if (buffer_uptodate(bh) ||
+ (check_write_error && buffer_uptodate_or_error(bh)))
+ return bh;
+
+ return __bread_slow(bh, op_flags, check_write_error);
+}
+
/**
* __bread_gfp() - reads a specified block and returns the bh
* @bdev: the block_device to read from
@@ -1458,27 +1486,27 @@ EXPORT_SYMBOL(__breadahead);
* It returns NULL if the block was unreadable.
*/
struct buffer_head *
-__bread_gfp(struct block_device *bdev, sector_t block,
- unsigned size, gfp_t gfp)
+__bread_gfp(struct block_device *bdev, sector_t block, unsigned int size,
+ gfp_t gfp)
{
- struct buffer_head *bh;
-
- gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
-
- /*
- * Prefer looping in the allocator rather than here, at least that
- * code knows what it's doing.
- */
- gfp |= __GFP_NOFAIL;
-
- bh = bdev_getblk(bdev, block, size, gfp);
-
- if (likely(bh) && !buffer_uptodate(bh))
- bh = __bread_slow(bh);
- return bh;
+ return bread_gfp(bdev, block, size, 0, gfp, false);
}
EXPORT_SYMBOL(__bread_gfp);

+/*
+ * This works like __bread_gfp() except:
+ * 1) If buffer write failed before, set buffer uptodate and don't read
+ * block from disk;
+ * 2) Caller can pass in additional op_flags like REQ_META;
+ */
+struct buffer_head *
+__bread_gfp2(struct block_device *bdev, sector_t block, unsigned int size,
+ blk_opf_t op_flags, gfp_t gfp)
+{
+ return bread_gfp(bdev, block, size, op_flags, gfp, true);
+}
+EXPORT_SYMBOL(__bread_gfp2);
+
static void __invalidate_bh_lrus(struct bh_lru *b)
{
int i;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 5f23ee599889..751b2744b4ae 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -171,6 +171,18 @@ static __always_inline int buffer_uptodate(const struct buffer_head *bh)
return test_bit_acquire(BH_Uptodate, &bh->b_state);
}

+static __always_inline int buffer_uptodate_or_error(struct buffer_head *bh)
+{
+ /*
+ * If the buffer has the write error flag, data was failed to write
+ * out in the block. In this case, set buffer uptodate to prevent
+ * reading old data.
+ */
+ if (buffer_write_io_error(bh))
+ set_buffer_uptodate(bh);
+ return buffer_uptodate(bh);
+}
+
static inline unsigned long bh_offset(const struct buffer_head *bh)
{
return (unsigned long)(bh)->b_data & (page_size(bh->b_page) - 1);
@@ -231,7 +243,11 @@ void __brelse(struct buffer_head *);
void __bforget(struct buffer_head *);
void __breadahead(struct block_device *, sector_t block, unsigned int size);
struct buffer_head *__bread_gfp(struct block_device *,
- sector_t block, unsigned size, gfp_t gfp);
+ sector_t block, unsigned int size, gfp_t gfp);
+struct buffer_head *__bread_gfp2(struct block_device *bdev, sector_t block,
+ unsigned int size, blk_opf_t op_flags,
+ gfp_t gfp);
+
struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
void free_buffer_head(struct buffer_head * bh);
void unlock_buffer(struct buffer_head *bh);
--
2.39.2