Re: [PATCH 19/19] writeback: simplify writeback iteration

From: Christoph Hellwig
Date: Wed Jan 31 2024 - 02:15:48 EST


On Tue, Jan 30, 2024 at 10:50:16PM +0100, Jan Kara wrote:
> Well, batch release needs to be only here because if writeback_get_folio()
> returns NULL, the batch has been already released by it.

Indeed.

> So what would be
> duplicated is only the error assignment. But I'm fine with the version in
> the following email and actually somewhat prefer it compared the yet
> another variant you've sent.

So how about another variant, this is closer to your original suggestion.
But I've switched around the ordered of the folio or not branches
from my original patch, and completely reworked and (IMHO) improved the
comments. it replaces patch 19 instead of being incremental to be
readable:

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 58b3661f5eac9e..1593a783176ca2 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1985,12 +1985,13 @@ iomap_writepages(struct address_space *mapping, struct writeback_control *wbc,
struct iomap_writepage_ctx *wpc,
const struct iomap_writeback_ops *ops)
{
- struct folio *folio;
- int ret;
+ struct folio *folio = NULL;
+ int ret = 0;

wpc->ops = ops;
- for_each_writeback_folio(mapping, wbc, folio, ret)
+ while ((folio = writeback_iter(mapping, wbc, folio, &ret)))
ret = iomap_do_writepage(folio, wbc, wpc);
+
if (!wpc->ioend)
return ret;
return iomap_submit_ioend(wpc, wpc->ioend, ret);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 2416da933440e2..fc4605627496fc 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -367,15 +367,8 @@ int balance_dirty_pages_ratelimited_flags(struct address_space *mapping,

bool wb_over_bg_thresh(struct bdi_writeback *wb);

-struct folio *writeback_iter_init(struct address_space *mapping,
- struct writeback_control *wbc);
-struct folio *writeback_iter_next(struct address_space *mapping,
- struct writeback_control *wbc, struct folio *folio, int error);
-
-#define for_each_writeback_folio(mapping, wbc, folio, error) \
- for (folio = writeback_iter_init(mapping, wbc); \
- folio || ((error = wbc->err), false); \
- folio = writeback_iter_next(mapping, wbc, folio, error))
+struct folio *writeback_iter(struct address_space *mapping,
+ struct writeback_control *wbc, struct folio *folio, int *error);

typedef int (*writepage_t)(struct folio *folio, struct writeback_control *wbc,
void *data);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0763c4353a676a..eefcb00cb7b227 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2360,29 +2360,6 @@ void tag_pages_for_writeback(struct address_space *mapping,
}
EXPORT_SYMBOL(tag_pages_for_writeback);

-static void writeback_finish(struct address_space *mapping,
- struct writeback_control *wbc, pgoff_t done_index)
-{
- folio_batch_release(&wbc->fbatch);
-
- /*
- * For range cyclic writeback we need to remember where we stopped so
- * that we can continue there next time we are called. If we hit the
- * last page and there is more work to be done, wrap back to the start
- * of the file.
- *
- * For non-cyclic writeback we always start looking up at the beginning
- * of the file if we are called again, which can only happen due to
- * -ENOMEM from the file system.
- */
- if (wbc->range_cyclic) {
- if (wbc->err || wbc->nr_to_write <= 0)
- mapping->writeback_index = done_index;
- else
- mapping->writeback_index = 0;
- }
-}
-
static xa_mark_t wbc_to_tag(struct writeback_control *wbc)
{
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
@@ -2442,10 +2419,8 @@ static struct folio *writeback_get_folio(struct address_space *mapping,
filemap_get_folios_tag(mapping, &wbc->index, wbc_end(wbc),
wbc_to_tag(wbc), &wbc->fbatch);
folio = folio_batch_next(&wbc->fbatch);
- if (!folio) {
- writeback_finish(mapping, wbc, 0);
+ if (!folio)
return NULL;
- }
}

folio_lock(folio);
@@ -2458,60 +2433,107 @@ static struct folio *writeback_get_folio(struct address_space *mapping,
return folio;
}

-struct folio *writeback_iter_init(struct address_space *mapping,
- struct writeback_control *wbc)
+/**
+ * writepage_iter - iterate folio of a mapping for writeback
+ * @mapping: address space structure to write
+ * @wbc: writeback context
+ * @folio: previously iterated folio (%NULL to start)
+ * @error: in-out pointer for writeback errors (see below)
+ *
+ * This function should be called in a while loop in the ->writepages
+ * implementation and returns the next folio for the writeback operation
+ * described by @wbc on @mapping.
+ *
+ * To start writeback @folio should be passed as NULL, for every following
+ * iteration the folio returned by this function previously should be passed.
+ * @error should contain the error from the previous writeback iteration when
+ * calling writeback_iter.
+ *
+ * Once the writeback described in @wbc has finished, this function will return
+ * %NULL and if there was an error in any iteration restore it to @error.
+ *
+ * Note: callers should not manually break out of the loop using break or goto.
+ */
+struct folio *writeback_iter(struct address_space *mapping,
+ struct writeback_control *wbc, struct folio *folio, int *error)
{
- if (wbc->range_cyclic)
- wbc->index = mapping->writeback_index; /* prev offset */
- else
- wbc->index = wbc->range_start >> PAGE_SHIFT;
-
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
- tag_pages_for_writeback(mapping, wbc->index, wbc_end(wbc));
-
- wbc->err = 0;
- folio_batch_init(&wbc->fbatch);
- return writeback_get_folio(mapping, wbc);
-}
+ if (!folio) {
+ folio_batch_init(&wbc->fbatch);
+ wbc->err = 0;

-struct folio *writeback_iter_next(struct address_space *mapping,
- struct writeback_control *wbc, struct folio *folio, int error)
-{
- unsigned long nr = folio_nr_pages(folio);
+ /*
+ * For range cyclic writeback we remember where we stopped so
+ * that we can continue where we stopped.
+ *
+ * For non-cyclic writeback we always start at the beginning of
+ * the passed in range.
+ */
+ if (wbc->range_cyclic)
+ wbc->index = mapping->writeback_index;
+ else
+ wbc->index = wbc->range_start >> PAGE_SHIFT;

- wbc->nr_to_write -= nr;
+ /*
+ * To avoid livelocks when other processes dirty new pages, we
+ * first tag pages which should be written back and only then
+ * start writing them.
+ *
+ * For data-integrity sync we have to be careful so that we do
+ * not miss some pages (e.g., because some other process has
+ * cleared the TOWRITE tag we set). The rule we follow is that
+ * TOWRITE tag can be cleared only by the process clearing the
+ * DIRTY tag (and submitting the page for I/O).
+ */
+ if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+ tag_pages_for_writeback(mapping, wbc->index,
+ wbc_end(wbc));
+ } else {
+ wbc->nr_to_write -= folio_nr_pages(folio);

- /*
- * Handle the legacy AOP_WRITEPAGE_ACTIVATE magic return value.
- * Eventually all instances should just unlock the folio themselves and
- * return 0;
- */
- if (error == AOP_WRITEPAGE_ACTIVATE) {
- folio_unlock(folio);
- error = 0;
+ /*
+ * For integrity writeback we have to keep going until we have
+ * written all the folios we tagged for writeback prior to
+ * entering the writeback loop, even if we run past
+ * wbc->nr_to_write or encounter errors, and just stash away
+ * the first error we encounter in wbc->err so that it can
+ * be retrieved on return.
+ *
+ * This is because the file system may still have state to clear
+ * for each folio. We'll eventually return the first error
+ * encountered.
+ */
+ if (wbc->sync_mode == WB_SYNC_ALL) {
+ if (*error && !wbc->err)
+ wbc->err = *error;
+ } else {
+ if (*error || wbc->nr_to_write <= 0)
+ goto done;
+ }
}

- if (error && !wbc->err)
- wbc->err = error;
+ folio = writeback_get_folio(mapping, wbc);
+ if (!folio) {
+ /*
+ * For range cyclic writeback not finding another folios means
+ * that we are at the end of the file. In that case go back
+ * to the start of the file for the next call.
+ */
+ if (wbc->range_cyclic)
+ mapping->writeback_index = 0;

- /*
- * For integrity sync we have to keep going until we have written all
- * the folios we tagged for writeback prior to entering the writeback
- * loop, even if we run past wbc->nr_to_write or encounter errors.
- * This is because the file system may still have state to clear for
- * each folio. We'll eventually return the first error encountered.
- *
- * For background writeback just push done_index past this folio so that
- * we can just restart where we left off and media errors won't choke
- * writeout for the entire file.
- */
- if (wbc->sync_mode == WB_SYNC_NONE &&
- (wbc->err || wbc->nr_to_write <= 0)) {
- writeback_finish(mapping, wbc, folio->index + nr);
- return NULL;
+ /*
+ * Return the first error we encountered (if there was any) to
+ * the caller now that we are done.
+ */
+ *error = wbc->err;
}
+ return folio;

- return writeback_get_folio(mapping, wbc);
+done:
+ if (wbc->range_cyclic)
+ mapping->writeback_index = folio->index + folio_nr_pages(folio);
+ folio_batch_release(&wbc->fbatch);
+ return NULL;
}

/**
@@ -2549,13 +2571,18 @@ int write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc, writepage_t writepage,
void *data)
{
- struct folio *folio;
- int error;
+ struct folio *folio = NULL;
+ int error = 0;

- for_each_writeback_folio(mapping, wbc, folio, error)
+ while ((folio = writeback_iter(mapping, wbc, folio, &error))) {
error = writepage(folio, wbc, data);
+ if (error == AOP_WRITEPAGE_ACTIVATE) {
+ folio_unlock(folio);
+ error = 0;
+ }
+ }

- return wbc->err;
+ return error;
}
EXPORT_SYMBOL(write_cache_pages);

@@ -2563,13 +2590,17 @@ static int writeback_use_writepage(struct address_space *mapping,
struct writeback_control *wbc)
{
struct blk_plug plug;
- struct folio *folio;
- int err;
+ struct folio *folio = NULL;
+ int err = 0;

blk_start_plug(&plug);
- for_each_writeback_folio(mapping, wbc, folio, err) {
+ while ((folio = writeback_iter(mapping, wbc, folio, &err))) {
err = mapping->a_ops->writepage(&folio->page, wbc);
mapping_set_error(mapping, err);
+ if (err == AOP_WRITEPAGE_ACTIVATE) {
+ folio_unlock(folio);
+ err = 0;
+ }
}
blk_finish_plug(&plug);