[PATCH 19/19] writeback: simplify writeback iteration

From: Christoph Hellwig
Date: Thu Jan 25 2024 - 04:05:29 EST


Based on the feedback from Jan I've tried to figure out how to
avoid the error magic in the for_each_writeback_folio. This patch
tries to implement this by switching to an open while loop over a
single writeback_iter() function:

while ((folio = writeback_iter(mapping, wbc, folio, &error))) {
...
}

the twist here is that the error value is passed by reference, so that
the iterator can restore it when breaking out of the loop.

Additionally it moves the AOP_WRITEPAGE_ACTIVATE out of the iterator
and into the callers, in preparation for eventually killing it off
with the phase out of write_cache_pages().

To me this form of the loop feels easier to follow, and it has the
added advantage that writeback_iter() can actually be nicely used in
nested loops, which should help with further iterizing the iomap
writeback code.

Signed-off-by: Christoph Hellwig <hch@xxxxxx>
---
fs/iomap/buffered-io.c | 7 +-
include/linux/writeback.h | 11 +--
mm/page-writeback.c | 174 +++++++++++++++++++++-----------------
3 files changed, 102 insertions(+), 90 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 58b3661f5eac9e..1593a783176ca2 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1985,12 +1985,13 @@ iomap_writepages(struct address_space *mapping, struct writeback_control *wbc,
struct iomap_writepage_ctx *wpc,
const struct iomap_writeback_ops *ops)
{
- struct folio *folio;
- int ret;
+ struct folio *folio = NULL;
+ int ret = 0;

wpc->ops = ops;
- for_each_writeback_folio(mapping, wbc, folio, ret)
+ while ((folio = writeback_iter(mapping, wbc, folio, &ret)))
ret = iomap_do_writepage(folio, wbc, wpc);
+
if (!wpc->ioend)
return ret;
return iomap_submit_ioend(wpc, wpc->ioend, ret);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 2416da933440e2..fc4605627496fc 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -367,15 +367,8 @@ int balance_dirty_pages_ratelimited_flags(struct address_space *mapping,

bool wb_over_bg_thresh(struct bdi_writeback *wb);

-struct folio *writeback_iter_init(struct address_space *mapping,
- struct writeback_control *wbc);
-struct folio *writeback_iter_next(struct address_space *mapping,
- struct writeback_control *wbc, struct folio *folio, int error);
-
-#define for_each_writeback_folio(mapping, wbc, folio, error) \
- for (folio = writeback_iter_init(mapping, wbc); \
- folio || ((error = wbc->err), false); \
- folio = writeback_iter_next(mapping, wbc, folio, error))
+struct folio *writeback_iter(struct address_space *mapping,
+ struct writeback_control *wbc, struct folio *folio, int *error);

typedef int (*writepage_t)(struct folio *folio, struct writeback_control *wbc,
void *data);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2a4b5aee5decd9..9e1cce9be63524 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2360,29 +2360,6 @@ void tag_pages_for_writeback(struct address_space *mapping,
}
EXPORT_SYMBOL(tag_pages_for_writeback);

-static void writeback_finish(struct address_space *mapping,
- struct writeback_control *wbc, pgoff_t done_index)
-{
- folio_batch_release(&wbc->fbatch);
-
- /*
- * For range cyclic writeback we need to remember where we stopped so
- * that we can continue there next time we are called. If we hit the
- * last page and there is more work to be done, wrap back to the start
- * of the file.
- *
- * For non-cyclic writeback we always start looking up at the beginning
- * of the file if we are called again, which can only happen due to
- * -ENOMEM from the file system.
- */
- if (wbc->range_cyclic) {
- if (wbc->err || wbc->nr_to_write <= 0)
- mapping->writeback_index = done_index;
- else
- mapping->writeback_index = 0;
- }
-}
-
static xa_mark_t wbc_to_tag(struct writeback_control *wbc)
{
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
@@ -2442,10 +2419,8 @@ static struct folio *writeback_get_folio(struct address_space *mapping,
filemap_get_folios_tag(mapping, &wbc->index, wbc_end(wbc),
wbc_to_tag(wbc), &wbc->fbatch);
folio = folio_batch_next(&wbc->fbatch);
- if (!folio) {
- writeback_finish(mapping, wbc, 0);
+ if (!folio)
return NULL;
- }
}

folio_lock(folio);
@@ -2458,60 +2433,92 @@ static struct folio *writeback_get_folio(struct address_space *mapping,
return folio;
}

-struct folio *writeback_iter_init(struct address_space *mapping,
- struct writeback_control *wbc)
-{
- if (wbc->range_cyclic)
- wbc->index = mapping->writeback_index; /* prev offset */
- else
- wbc->index = wbc->range_start >> PAGE_SHIFT;
-
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
- tag_pages_for_writeback(mapping, wbc->index, wbc_end(wbc));
-
- wbc->err = 0;
- folio_batch_init(&wbc->fbatch);
- return writeback_get_folio(mapping, wbc);
-}
-
-struct folio *writeback_iter_next(struct address_space *mapping,
- struct writeback_control *wbc, struct folio *folio, int error)
+/**
+ * writepage_iter - iterate folio of a mapping for writeback
+ * @mapping: address space structure to write
+ * @wbc: writeback context
+ * @folio: previously iterated folio (%NULL to start)
+ * @error: in-out pointer for writeback errors (see below)
+ *
+ * This function should be called in a while loop in the ->writepages
+ * implementation and returns the next folio for the writeback operation
+ * described by @wbc on @mapping.
+ *
+ * To start writeback @folio should be passed as NULL, for every following
+ * iteration the folio returned by this function previously should be passed.
+ * @error should contain the error from the previous writeback iteration when
+ * calling writeback_iter.
+ *
+ * Once the writeback described in @wbc has finished, this function will return
+ * %NULL and if there was an error in any iteration restore it to @error.
+ *
+ * Note: callers should not manually break out of the loop using break or goto.
+ */
+struct folio *writeback_iter(struct address_space *mapping,
+ struct writeback_control *wbc, struct folio *folio, int *error)
{
- unsigned long nr = folio_nr_pages(folio);
+ if (folio) {
+ wbc->nr_to_write -= folio_nr_pages(folio);
+ if (*error && !wbc->err)
+ wbc->err = *error;

- wbc->nr_to_write -= nr;
-
- /*
- * Handle the legacy AOP_WRITEPAGE_ACTIVATE magic return value.
- * Eventually all instances should just unlock the folio themselves and
- * return 0;
- */
- if (error == AOP_WRITEPAGE_ACTIVATE) {
- folio_unlock(folio);
- error = 0;
+ /*
+ * For integrity sync we have to keep going until we have
+ * written all the folios we tagged for writeback prior to
+ * entering the writeback loop, even if we run past
+ * wbc->nr_to_write or encounter errors.
+ *
+ * This is because the file system may still have state to clear
+ * for each folio. We'll eventually return the first error
+ * encountered.
+ *
+ * For background writeback just push done_index past this folio
+ * so that we can just restart where we left off and media
+ * errors won't choke writeout for the entire file.
+ */
+ if (wbc->sync_mode == WB_SYNC_NONE &&
+ (wbc->err || wbc->nr_to_write <= 0))
+ goto finish;
+ } else {
+ if (wbc->range_cyclic)
+ wbc->index = mapping->writeback_index; /* prev offset */
+ else
+ wbc->index = wbc->range_start >> PAGE_SHIFT;
+ if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+ tag_pages_for_writeback(mapping, wbc->index,
+ wbc_end(wbc));
+ folio_batch_init(&wbc->fbatch);
+ wbc->err = 0;
}

- if (error && !wbc->err)
- wbc->err = error;
+ folio = writeback_get_folio(mapping, wbc);
+ if (!folio)
+ goto finish;
+ return folio;
+
+finish:
+ folio_batch_release(&wbc->fbatch);

/*
- * For integrity sync we have to keep going until we have written all
- * the folios we tagged for writeback prior to entering the writeback
- * loop, even if we run past wbc->nr_to_write or encounter errors.
- * This is because the file system may still have state to clear for
- * each folio. We'll eventually return the first error encountered.
+ * For range cyclic writeback we need to remember where we stopped so
+ * that we can continue there next time we are called. If we hit the
+ * last page and there is more work to be done, wrap back to the start
+ * of the file.
*
- * For background writeback just push done_index past this folio so that
- * we can just restart where we left off and media errors won't choke
- * writeout for the entire file.
+ * For non-cyclic writeback we always start looking up at the beginning
+ * of the file if we are called again, which can only happen due to
+ * -ENOMEM from the file system.
*/
- if (wbc->sync_mode == WB_SYNC_NONE &&
- (wbc->err || wbc->nr_to_write <= 0)) {
- writeback_finish(mapping, wbc, folio->index + nr);
- return NULL;
+ if (wbc->range_cyclic) {
+ WARN_ON_ONCE(wbc->sync_mode != WB_SYNC_NONE);
+ if (wbc->err || wbc->nr_to_write <= 0)
+ mapping->writeback_index =
+ folio->index + folio_nr_pages(folio);
+ else
+ mapping->writeback_index = 0;
}
-
- return writeback_get_folio(mapping, wbc);
+ *error = wbc->err;
+ return NULL;
}

/**
@@ -2549,13 +2556,18 @@ int write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc, writepage_t writepage,
void *data)
{
- struct folio *folio;
- int error;
+ struct folio *folio = NULL;
+ int error = 0;

- for_each_writeback_folio(mapping, wbc, folio, error)
+ while ((folio = writeback_iter(mapping, wbc, folio, &error))) {
error = writepage(folio, wbc, data);
+ if (error == AOP_WRITEPAGE_ACTIVATE) {
+ folio_unlock(folio);
+ error = 0;
+ }
+ }

- return wbc->err;
+ return error;
}
EXPORT_SYMBOL(write_cache_pages);

@@ -2563,13 +2575,17 @@ static int writeback_use_writepage(struct address_space *mapping,
struct writeback_control *wbc)
{
struct blk_plug plug;
- struct folio *folio;
- int err;
+ struct folio *folio = 0;
+ int err = 0;

blk_start_plug(&plug);
- for_each_writeback_folio(mapping, wbc, folio, err) {
+ while ((folio = writeback_iter(mapping, wbc, folio, &err))) {
err = mapping->a_ops->writepage(&folio->page, wbc);
mapping_set_error(mapping, err);
+ if (err == AOP_WRITEPAGE_ACTIVATE) {
+ folio_unlock(folio);
+ err = 0;
+ }
}
blk_finish_plug(&plug);

@@ -2590,6 +2606,8 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
ret = mapping->a_ops->writepages(mapping, wbc);
} else if (mapping->a_ops->writepage) {
ret = writeback_use_writepage(mapping, wbc);
+ if (!ret)
+ ret = wbc->err;
} else {
/* deal with chardevs and other special files */
ret = 0;
--
2.39.2