[RFC PATCH 07/12] netfs: Initiate write request from a dirty region

From: David Howells
Date: Wed Jul 21 2021 - 09:47:36 EST


Handle the initiation of writeback of a piece of the dirty list. The first
region on the flush list is extracted and a write request is set up to
manage it. The pages in the affected region are flipped from dirty to
writeback-in-progress.

The writeback is then dispatched (which currently just logs a "--- WRITE
---" message to dmesg and then abandons it).

Notes:

(*) A page may host multiple disjoint dirty regions, each with its own
netfs_dirty_region, and a region may span multiple pages. Dirty
regions are not permitted to overlap, though they may be merged if
they would otherwise overlap.

(*) A page may be involved in multiple simultaneous writebacks. Each one
is managed by a separate netfs_dirty_region and netfs_write_request.

(*) Multiple pages may be required to form a write (for crypto/compression
purposes) and so adjacent non-dirty pages may also get marked for
writeback.

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
---

fs/afs/file.c | 128 ++----------------
fs/netfs/Makefile | 1
fs/netfs/internal.h | 16 ++
fs/netfs/objects.c | 78 +++++++++++
fs/netfs/read_helper.c | 34 +++++
fs/netfs/stats.c | 6 +
fs/netfs/write_back.c | 306 ++++++++++++++++++++++++++++++++++++++++++
fs/netfs/xa_iterator.h | 85 ++++++++++++
include/linux/netfs.h | 35 +++++
include/trace/events/netfs.h | 72 ++++++++++
10 files changed, 642 insertions(+), 119 deletions(-)
create mode 100644 fs/netfs/write_back.c
create mode 100644 fs/netfs/xa_iterator.h

diff --git a/fs/afs/file.c b/fs/afs/file.c
index 8400cdf086b6..a6d483fe4e74 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -19,9 +19,6 @@

static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
static int afs_symlink_readpage(struct file *file, struct page *page);
-static void afs_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length);
-static int afs_releasepage(struct page *page, gfp_t gfp_flags);

static ssize_t afs_direct_IO(struct kiocb *iocb, struct iov_iter *iter);

@@ -50,17 +47,17 @@ const struct address_space_operations afs_file_aops = {
.readahead = netfs_readahead,
.set_page_dirty = afs_set_page_dirty,
.launder_page = afs_launder_page,
- .releasepage = afs_releasepage,
- .invalidatepage = afs_invalidatepage,
+ .releasepage = netfs_releasepage,
+ .invalidatepage = netfs_invalidatepage,
.direct_IO = afs_direct_IO,
.writepage = afs_writepage,
- .writepages = afs_writepages,
+ .writepages = netfs_writepages,
};

const struct address_space_operations afs_symlink_aops = {
.readpage = afs_symlink_readpage,
- .releasepage = afs_releasepage,
- .invalidatepage = afs_invalidatepage,
+ .releasepage = netfs_releasepage,
+ .invalidatepage = netfs_invalidatepage,
};

static const struct vm_operations_struct afs_vm_ops = {
@@ -378,6 +375,11 @@ static void afs_free_dirty_region(struct netfs_dirty_region *region)
key_put(region->netfs_priv);
}

+static void afs_init_wreq(struct netfs_write_request *wreq)
+{
+ //wreq->netfs_priv = key_get(afs_file_key(file));
+}
+
static void afs_update_i_size(struct file *file, loff_t new_i_size)
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
@@ -400,6 +402,7 @@ const struct netfs_request_ops afs_req_ops = {
.init_dirty_region = afs_init_dirty_region,
.free_dirty_region = afs_free_dirty_region,
.update_i_size = afs_update_i_size,
+ .init_wreq = afs_init_wreq,
};

int afs_write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -408,115 +411,6 @@ int afs_write_inode(struct inode *inode, struct writeback_control *wbc)
return 0;
}

-/*
- * Adjust the dirty region of the page on truncation or full invalidation,
- * getting rid of the markers altogether if the region is entirely invalidated.
- */
-static void afs_invalidate_dirty(struct page *page, unsigned int offset,
- unsigned int length)
-{
- struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
- unsigned long priv;
- unsigned int f, t, end = offset + length;
-
- priv = page_private(page);
-
- /* we clean up only if the entire page is being invalidated */
- if (offset == 0 && length == thp_size(page))
- goto full_invalidate;
-
- /* If the page was dirtied by page_mkwrite(), the PTE stays writable
- * and we don't get another notification to tell us to expand it
- * again.
- */
- if (afs_is_page_dirty_mmapped(priv))
- return;
-
- /* We may need to shorten the dirty region */
- f = afs_page_dirty_from(page, priv);
- t = afs_page_dirty_to(page, priv);
-
- if (t <= offset || f >= end)
- return; /* Doesn't overlap */
-
- if (f < offset && t > end)
- return; /* Splits the dirty region - just absorb it */
-
- if (f >= offset && t <= end)
- goto undirty;
-
- if (f < offset)
- t = offset;
- else
- f = end;
- if (f == t)
- goto undirty;
-
- priv = afs_page_dirty(page, f, t);
- set_page_private(page, priv);
- trace_afs_page_dirty(vnode, tracepoint_string("trunc"), page);
- return;
-
-undirty:
- trace_afs_page_dirty(vnode, tracepoint_string("undirty"), page);
- clear_page_dirty_for_io(page);
-full_invalidate:
- trace_afs_page_dirty(vnode, tracepoint_string("inval"), page);
- detach_page_private(page);
-}
-
-/*
- * invalidate part or all of a page
- * - release a page and clean up its private data if offset is 0 (indicating
- * the entire page)
- */
-static void afs_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
-{
- _enter("{%lu},%u,%u", page->index, offset, length);
-
- BUG_ON(!PageLocked(page));
-
- if (PagePrivate(page))
- afs_invalidate_dirty(page, offset, length);
-
- wait_on_page_fscache(page);
- _leave("");
-}
-
-/*
- * release a page and clean up its private state if it's not busy
- * - return true if the page can now be released, false if not
- */
-static int afs_releasepage(struct page *page, gfp_t gfp_flags)
-{
- struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
-
- _enter("{{%llx:%llu}[%lu],%lx},%x",
- vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
- gfp_flags);
-
- /* deny if page is being written to the cache and the caller hasn't
- * elected to wait */
-#ifdef CONFIG_AFS_FSCACHE
- if (PageFsCache(page)) {
- if (!(gfp_flags & __GFP_DIRECT_RECLAIM) || !(gfp_flags & __GFP_FS))
- return false;
- wait_on_page_fscache(page);
- fscache_note_page_release(afs_vnode_cache(vnode));
- }
-#endif
-
- if (PagePrivate(page)) {
- trace_afs_page_dirty(vnode, tracepoint_string("rel"), page);
- detach_page_private(page);
- }
-
- /* indicate that the page can be released */
- _leave(" = T");
- return 1;
-}
-
/*
* Handle setting up a memory mapping on an AFS file.
*/
diff --git a/fs/netfs/Makefile b/fs/netfs/Makefile
index 3e11453ad2c5..a201fd7b22cf 100644
--- a/fs/netfs/Makefile
+++ b/fs/netfs/Makefile
@@ -3,6 +3,7 @@
netfs-y := \
objects.o \
read_helper.o \
+ write_back.o \
write_helper.o
# dio_helper.o

diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index 77ceab694348..fe85581d8ac0 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -8,6 +8,7 @@
#include <linux/netfs.h>
#include <linux/fscache.h>
#include <trace/events/netfs.h>
+#include "xa_iterator.h"

#ifdef pr_fmt
#undef pr_fmt
@@ -34,6 +35,19 @@ void netfs_free_dirty_region(struct netfs_i_context *ctx, struct netfs_dirty_reg
void netfs_put_dirty_region(struct netfs_i_context *ctx,
struct netfs_dirty_region *region,
enum netfs_region_trace what);
+struct netfs_write_request *netfs_alloc_write_request(struct address_space *mapping,
+ bool is_dio);
+void netfs_get_write_request(struct netfs_write_request *wreq,
+ enum netfs_wreq_trace what);
+void netfs_free_write_request(struct work_struct *work);
+void netfs_put_write_request(struct netfs_write_request *wreq,
+ bool was_async, enum netfs_wreq_trace what);
+
+static inline void netfs_see_write_request(struct netfs_write_request *wreq,
+ enum netfs_wreq_trace what)
+{
+ trace_netfs_ref_wreq(wreq->debug_id, refcount_read(&wreq->usage), what);
+}

/*
* read_helper.c
@@ -46,6 +60,7 @@ int netfs_prefetch_for_write(struct file *file, struct page *page, loff_t pos, s
/*
* write_helper.c
*/
+void netfs_writeback_worker(struct work_struct *work);
void netfs_flush_region(struct netfs_i_context *ctx,
struct netfs_dirty_region *region,
enum netfs_dirty_trace why);
@@ -74,6 +89,7 @@ extern atomic_t netfs_n_rh_write_failed;
extern atomic_t netfs_n_rh_write_zskip;
extern atomic_t netfs_n_wh_region;
extern atomic_t netfs_n_wh_flush_group;
+extern atomic_t netfs_n_wh_wreq;


static inline void netfs_stat(atomic_t *stat)
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index ba1e052aa352..6e9b2a00076d 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -111,3 +111,81 @@ void netfs_put_dirty_region(struct netfs_i_context *ctx,
netfs_free_dirty_region(ctx, region);
}
}
+
+struct netfs_write_request *netfs_alloc_write_request(struct address_space *mapping,
+ bool is_dio)
+{
+ static atomic_t debug_ids;
+ struct inode *inode = mapping->host;
+ struct netfs_i_context *ctx = netfs_i_context(inode);
+ struct netfs_write_request *wreq;
+
+ wreq = kzalloc(sizeof(struct netfs_write_request), GFP_KERNEL);
+ if (wreq) {
+ wreq->mapping = mapping;
+ wreq->inode = inode;
+ wreq->netfs_ops = ctx->ops;
+ wreq->debug_id = atomic_inc_return(&debug_ids);
+ xa_init(&wreq->buffer);
+ INIT_WORK(&wreq->work, netfs_writeback_worker);
+ refcount_set(&wreq->usage, 1);
+ ctx->ops->init_wreq(wreq);
+ netfs_stat(&netfs_n_wh_wreq);
+ trace_netfs_ref_wreq(wreq->debug_id, 1, netfs_wreq_trace_new);
+ }
+
+ return wreq;
+}
+
+void netfs_get_write_request(struct netfs_write_request *wreq,
+ enum netfs_wreq_trace what)
+{
+ int ref;
+
+ __refcount_inc(&wreq->usage, &ref);
+ trace_netfs_ref_wreq(wreq->debug_id, ref + 1, what);
+}
+
+void netfs_free_write_request(struct work_struct *work)
+{
+ struct netfs_write_request *wreq =
+ container_of(work, struct netfs_write_request, work);
+ struct netfs_i_context *ctx = netfs_i_context(wreq->inode);
+ struct page *page;
+ pgoff_t index;
+
+ if (wreq->netfs_priv)
+ wreq->netfs_ops->cleanup(wreq->mapping, wreq->netfs_priv);
+ trace_netfs_ref_wreq(wreq->debug_id, 0, netfs_wreq_trace_free);
+ if (wreq->cache_resources.ops)
+ wreq->cache_resources.ops->end_operation(&wreq->cache_resources);
+ if (wreq->region)
+ netfs_put_dirty_region(ctx, wreq->region,
+ netfs_region_trace_put_wreq);
+ xa_for_each(&wreq->buffer, index, page) {
+ __free_page(page);
+ }
+ xa_destroy(&wreq->buffer);
+ kfree(wreq);
+ netfs_stat_d(&netfs_n_wh_wreq);
+}
+
+void netfs_put_write_request(struct netfs_write_request *wreq,
+ bool was_async, enum netfs_wreq_trace what)
+{
+ unsigned int debug_id = wreq->debug_id;
+ bool dead;
+ int ref;
+
+ dead = __refcount_dec_and_test(&wreq->usage, &ref);
+ trace_netfs_ref_wreq(debug_id, ref - 1, what);
+ if (dead) {
+ if (was_async) {
+ wreq->work.func = netfs_free_write_request;
+ if (!queue_work(system_unbound_wq, &wreq->work))
+ BUG();
+ } else {
+ netfs_free_write_request(&wreq->work);
+ }
+ }
+}
diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c
index bfcdbbd32f4c..0b771f2f5449 100644
--- a/fs/netfs/read_helper.c
+++ b/fs/netfs/read_helper.c
@@ -1415,3 +1415,37 @@ int netfs_prefetch_for_write(struct file *file, struct page *page,
_leave(" = %d", ret);
return ret;
}
+
+/*
+ * Invalidate part or all of a page
+ * - release a page and clean up its private data if offset is 0 (indicating
+ * the entire page)
+ */
+void netfs_invalidatepage(struct page *page, unsigned int offset, unsigned int length)
+{
+ _enter("{%lu},%u,%u", page->index, offset, length);
+
+ wait_on_page_fscache(page);
+}
+EXPORT_SYMBOL(netfs_invalidatepage);
+
+/*
+ * Release a page and clean up its private state if it's not busy
+ * - return true if the page can now be released, false if not
+ */
+int netfs_releasepage(struct page *page, gfp_t gfp_flags)
+{
+ struct netfs_i_context *ctx = netfs_i_context(page->mapping->host);
+
+ kenter("");
+
+ if (PageFsCache(page)) {
+ if (!(gfp_flags & __GFP_DIRECT_RECLAIM) || !(gfp_flags & __GFP_FS))
+ return false;
+ wait_on_page_fscache(page);
+ fscache_note_page_release(ctx->cache);
+ }
+
+ return true;
+}
+EXPORT_SYMBOL(netfs_releasepage);
diff --git a/fs/netfs/stats.c b/fs/netfs/stats.c
index 7c079ca47b5b..ac2510f8cab0 100644
--- a/fs/netfs/stats.c
+++ b/fs/netfs/stats.c
@@ -29,6 +29,7 @@ atomic_t netfs_n_rh_write_failed;
atomic_t netfs_n_rh_write_zskip;
atomic_t netfs_n_wh_region;
atomic_t netfs_n_wh_flush_group;
+atomic_t netfs_n_wh_wreq;

void netfs_stats_show(struct seq_file *m)
{
@@ -56,8 +57,9 @@ void netfs_stats_show(struct seq_file *m)
atomic_read(&netfs_n_rh_write),
atomic_read(&netfs_n_rh_write_done),
atomic_read(&netfs_n_rh_write_failed));
- seq_printf(m, "WrHelp : R=%u F=%u\n",
+ seq_printf(m, "WrHelp : R=%u F=%u wr=%u\n",
atomic_read(&netfs_n_wh_region),
- atomic_read(&netfs_n_wh_flush_group));
+ atomic_read(&netfs_n_wh_flush_group),
+ atomic_read(&netfs_n_wh_wreq));
}
EXPORT_SYMBOL(netfs_stats_show);
diff --git a/fs/netfs/write_back.c b/fs/netfs/write_back.c
new file mode 100644
index 000000000000..9fcb2ac50ebb
--- /dev/null
+++ b/fs/netfs/write_back.c
@@ -0,0 +1,306 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Network filesystem high-level write support.
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@xxxxxxxxxx)
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+/*
+ * Process a write request.
+ */
+static void netfs_writeback(struct netfs_write_request *wreq)
+{
+ kdebug("--- WRITE ---");
+}
+
+void netfs_writeback_worker(struct work_struct *work)
+{
+ struct netfs_write_request *wreq =
+ container_of(work, struct netfs_write_request, work);
+
+ netfs_see_write_request(wreq, netfs_wreq_trace_see_work);
+ netfs_writeback(wreq);
+ netfs_put_write_request(wreq, false, netfs_wreq_trace_put_work);
+}
+
+/*
+ * Flush some of the dirty queue.
+ */
+static int netfs_flush_dirty(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct netfs_range *range,
+ loff_t *next)
+{
+ struct netfs_dirty_region *p, *q;
+ struct netfs_i_context *ctx = netfs_i_context(mapping->host);
+
+ kenter("%llx-%llx", range->start, range->end);
+
+ spin_lock(&ctx->lock);
+
+ /* Scan forwards to find dirty regions containing the suggested start
+ * point.
+ */
+ list_for_each_entry_safe(p, q, &ctx->dirty_regions, dirty_link) {
+ _debug("D=%x %llx-%llx", p->debug_id, p->dirty.start, p->dirty.end);
+ if (p->dirty.end <= range->start)
+ continue;
+ if (p->dirty.start >= range->end)
+ break;
+ if (p->state != NETFS_REGION_IS_DIRTY)
+ continue;
+ if (test_bit(NETFS_REGION_FLUSH_Q, &p->flags))
+ continue;
+
+ netfs_flush_region(ctx, p, netfs_dirty_trace_flush_writepages);
+ }
+
+ spin_unlock(&ctx->lock);
+ return 0;
+}
+
+static int netfs_unlock_pages_iterator(struct page *page)
+{
+ unlock_page(page);
+ put_page(page);
+ return 0;
+}
+
+/*
+ * Unlock all the pages in a range.
+ */
+static void netfs_unlock_pages(struct address_space *mapping,
+ pgoff_t start, pgoff_t end)
+{
+ netfs_iterate_pages(mapping, start, end, netfs_unlock_pages_iterator);
+}
+
+static int netfs_lock_pages_iterator(struct xa_state *xas,
+ struct page *page,
+ struct netfs_write_request *wreq,
+ struct writeback_control *wbc)
+{
+ int ret;
+
+ /* At this point we hold neither the i_pages lock nor the
+ * page lock: the page may be truncated or invalidated
+ * (changing page->mapping to NULL), or even swizzled
+ * back from swapper_space to tmpfs file mapping
+ */
+ if (wbc->sync_mode != WB_SYNC_NONE) {
+ xas_pause(xas);
+ rcu_read_unlock();
+ ret = lock_page_killable(page);
+ rcu_read_lock();
+ } else {
+ if (!trylock_page(page))
+ ret = -EBUSY;
+ }
+
+ return ret;
+}
+
+/*
+ * Lock all the pages in a range and add them to the write request.
+ */
+static int netfs_lock_pages(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct netfs_write_request *wreq)
+{
+ pgoff_t last = wreq->last;
+ int ret;
+
+ kenter("%lx-%lx", wreq->first, wreq->last);
+ ret = netfs_iterate_get_pages(mapping, wreq->first, wreq->last,
+ netfs_lock_pages_iterator, wreq, wbc);
+ if (ret < 0)
+ goto failed;
+
+ if (wreq->last < last) {
+ kdebug("Some pages missing %lx < %lx", wreq->last, last);
+ ret = -EIO;
+ goto failed;
+ }
+
+ return 0;
+
+failed:
+ netfs_unlock_pages(mapping, wreq->first, wreq->last);
+ return ret;
+}
+
+static int netfs_set_page_writeback(struct page *page)
+{
+ /* Now we need to clear the dirty flags on any page that's not shared
+ * with any other dirty region.
+ */
+ if (!clear_page_dirty_for_io(page))
+ BUG();
+
+ /* We set writeback unconditionally because a page may participate in
+ * more than one simultaneous writeback.
+ */
+ set_page_writeback(page);
+ return 0;
+}
+
+/*
+ * Extract a region to write back.
+ */
+static struct netfs_dirty_region *netfs_extract_dirty_region(
+ struct netfs_i_context *ctx,
+ struct netfs_write_request *wreq)
+{
+ struct netfs_dirty_region *region = NULL, *spare;
+
+ spare = netfs_alloc_dirty_region();
+ if (!spare)
+ return NULL;
+
+ spin_lock(&ctx->lock);
+
+ if (list_empty(&ctx->flush_queue))
+ goto out;
+
+ region = list_first_entry(&ctx->flush_queue,
+ struct netfs_dirty_region, flush_link);
+
+ wreq->region = netfs_get_dirty_region(ctx, region, netfs_region_trace_get_wreq);
+ wreq->start = region->dirty.start;
+ wreq->len = region->dirty.end - region->dirty.start;
+ wreq->first = region->dirty.start / PAGE_SIZE;
+ wreq->last = (region->dirty.end - 1) / PAGE_SIZE;
+
+ /* TODO: Split the region if it's larger than a certain size. This is
+ * tricky as we need to observe page, crypto and compression block
+ * boundaries. The crypto/comp bounds are defined by ctx->bsize, but
+ * we don't know where the page boundaries are.
+ *
+ * All of these boundaries, however, must be pow-of-2 sized and
+ * pow-of-2 aligned, so they never partially overlap
+ */
+
+ smp_store_release(&region->state, NETFS_REGION_IS_FLUSHING);
+ trace_netfs_dirty(ctx, region, NULL, netfs_dirty_trace_flushing);
+ wake_up_var(&region->state);
+ list_del_init(&region->flush_link);
+
+out:
+ spin_unlock(&ctx->lock);
+ netfs_free_dirty_region(ctx, spare);
+ kleave(" = D=%x", region ? region->debug_id : 0);
+ return region;
+}
+
+/*
+ * Schedule a write for the first region on the flush queue.
+ */
+static int netfs_begin_write(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct netfs_write_request *wreq;
+ struct netfs_dirty_region *region;
+ struct netfs_i_context *ctx = netfs_i_context(mapping->host);
+ int ret;
+
+ wreq = netfs_alloc_write_request(mapping, false);
+ if (!wreq)
+ return -ENOMEM;
+
+ ret = 0;
+ region = netfs_extract_dirty_region(ctx, wreq);
+ if (!region)
+ goto error;
+
+ ret = netfs_lock_pages(mapping, wbc, wreq);
+ if (ret < 0)
+ goto error;
+
+ trace_netfs_wreq(wreq);
+
+ netfs_iterate_pages(mapping, wreq->first, wreq->last,
+ netfs_set_page_writeback);
+ netfs_unlock_pages(mapping, wreq->first, wreq->last);
+ iov_iter_xarray(&wreq->source, WRITE, &wreq->mapping->i_pages,
+ wreq->start, wreq->len);
+
+ if (!queue_work(system_unbound_wq, &wreq->work))
+ BUG();
+
+ kleave(" = %lu", wreq->last - wreq->first + 1);
+ return wreq->last - wreq->first + 1;
+
+error:
+ netfs_put_write_request(wreq, wbc->sync_mode != WB_SYNC_NONE,
+ netfs_wreq_trace_put_discard);
+ kleave(" = %d", ret);
+ return ret;
+}
+
+/**
+ * netfs_writepages - Initiate writeback to the server and cache
+ * @mapping: The pagecache to write from
+ * @wbc: Hints from the VM as to what to write
+ *
+ * This is a helper intended to be called directly from a network filesystem's
+ * address space operations table to perform writeback to the server and the
+ * cache.
+ *
+ * We have to be careful as we can end up racing with setattr() truncating the
+ * pagecache since the caller doesn't take a lock here to prevent it.
+ */
+int netfs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct netfs_range range;
+ loff_t next;
+ int ret;
+
+ kenter("%lx,%llx-%llx,%u,%c%c%c%c,%u,%u",
+ wbc->nr_to_write,
+ wbc->range_start, wbc->range_end,
+ wbc->sync_mode,
+ wbc->for_kupdate ? 'k' : '-',
+ wbc->for_background ? 'b' : '-',
+ wbc->for_reclaim ? 'r' : '-',
+ wbc->for_sync ? 's' : '-',
+ wbc->tagged_writepages,
+ wbc->range_cyclic);
+
+ //dump_stack();
+
+ if (wbc->range_cyclic) {
+ range.start = mapping->writeback_index * PAGE_SIZE;
+ range.end = ULLONG_MAX;
+ ret = netfs_flush_dirty(mapping, wbc, &range, &next);
+ if (range.start > 0 && wbc->nr_to_write > 0 && ret == 0) {
+ range.start = 0;
+ range.end = mapping->writeback_index * PAGE_SIZE;
+ ret = netfs_flush_dirty(mapping, wbc, &range, &next);
+ }
+ mapping->writeback_index = next / PAGE_SIZE;
+ } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
+ range.start = 0;
+ range.end = ULLONG_MAX;
+ ret = netfs_flush_dirty(mapping, wbc, &range, &next);
+ if (wbc->nr_to_write > 0 && ret == 0)
+ mapping->writeback_index = next;
+ } else {
+ range.start = wbc->range_start;
+ range.end = wbc->range_end + 1;
+ ret = netfs_flush_dirty(mapping, wbc, &range, &next);
+ }
+
+ if (ret == 0)
+ ret = netfs_begin_write(mapping, wbc);
+
+ _leave(" = %d", ret);
+ return ret;
+}
+EXPORT_SYMBOL(netfs_writepages);
diff --git a/fs/netfs/xa_iterator.h b/fs/netfs/xa_iterator.h
new file mode 100644
index 000000000000..3f37827f0f99
--- /dev/null
+++ b/fs/netfs/xa_iterator.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* xarray iterator macros for netfslib.
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@xxxxxxxxxx)
+ */
+
+/*
+ * Iterate over a range of pages. xarray locks are not held over the iterator
+ * function, so it can sleep if necessary. The start and end positions are
+ * updated to indicate the span of pages actually processed.
+ */
+#define netfs_iterate_pages(MAPPING, START, END, ITERATOR, ...) \
+ ({ \
+ unsigned long __it_index; \
+ struct page *page; \
+ pgoff_t __it_start = (START); \
+ pgoff_t __it_end = (END); \
+ pgoff_t __it_tmp; \
+ int ret = 0; \
+ \
+ (END) = __it_start; \
+ xa_for_each_range(&(MAPPING)->i_pages, __it_index, page, \
+ __it_start, __it_end) { \
+ if (xa_is_value(page)) { \
+ ret = -EIO; /* Not a real page. */ \
+ break; \
+ } \
+ if (__it_index < (START)) \
+ (START) = __it_index; \
+ ret = ITERATOR(page, ##__VA_ARGS__); \
+ if (ret < 0) \
+ break; \
+ __it_tmp = __it_index + thp_nr_pages(page) - 1; \
+ if (__it_tmp > (END)) \
+ (END) = __it_tmp; \
+ } \
+ ret; \
+ })
+
+/*
+ * Iterate over a set of pages, getting each one before calling the iteration
+ * function. The iteration function may drop the RCU read lock, but should
+ * call xas_pause() before it does so. The start and end positions are updated
+ * to indicate the span of pages actually processed.
+ */
+#define netfs_iterate_get_pages(MAPPING, START, END, ITERATOR, ...) \
+ ({ \
+ unsigned long __it_index; \
+ struct page *page; \
+ pgoff_t __it_start = (START); \
+ pgoff_t __it_end = (END); \
+ pgoff_t __it_tmp; \
+ int ret = 0; \
+ \
+ XA_STATE(xas, &(MAPPING)->i_pages, __it_start); \
+ (END) = __it_start; \
+ rcu_read_lock(); \
+ for (page = xas_load(&xas); page; page = xas_next_entry(&xas, __it_end)) { \
+ if (xas_retry(&xas, page)) \
+ continue; \
+ if (xa_is_value(page)) \
+ break; \
+ if (!page_cache_get_speculative(page)) { \
+ xas_reset(&xas); \
+ continue; \
+ } \
+ if (unlikely(page != xas_reload(&xas))) { \
+ put_page(page); \
+ xas_reset(&xas); \
+ continue; \
+ } \
+ __it_index = page_index(page); \
+ if (__it_index < (START)) \
+ (START) = __it_index; \
+ ret = ITERATOR(&xas, page, ##__VA_ARGS__); \
+ if (ret < 0) \
+ break; \
+ __it_tmp = __it_index + thp_nr_pages(page) - 1; \
+ if (__it_tmp > (END)) \
+ (END) = __it_tmp; \
+ } \
+ rcu_read_unlock(); \
+ ret; \
+ })
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index fc91711d3178..9f874e7ed45a 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -242,6 +242,35 @@ struct netfs_dirty_region {
refcount_t ref;
};

+/*
+ * Descriptor for a write request. This is used to manage the preparation and
+ * storage of a sequence of dirty data - its compression/encryption and its
+ * writing to one or more servers and the cache.
+ *
+ * The prepared data is buffered here.
+ */
+struct netfs_write_request {
+ struct work_struct work;
+ struct inode *inode; /* The file being accessed */
+ struct address_space *mapping; /* The mapping being accessed */
+ struct netfs_dirty_region *region; /* The region we're writing back */
+ struct netfs_cache_resources cache_resources;
+ struct xarray buffer; /* Buffer for encrypted/compressed data */
+ struct iov_iter source; /* The iterator to be used */
+ struct list_head write_link; /* Link in i_context->write_requests */
+ void *netfs_priv; /* Private data for the netfs */
+ unsigned int debug_id;
+ short error; /* 0 or error that occurred */
+ loff_t i_size; /* Size of the file */
+ loff_t start; /* Start position */
+ size_t len; /* Length of the request */
+ pgoff_t first; /* First page included */
+ pgoff_t last; /* Last page included */
+ refcount_t usage;
+ unsigned long flags;
+ const struct netfs_request_ops *netfs_ops;
+};
+
enum netfs_write_compatibility {
NETFS_WRITES_COMPATIBLE, /* Dirty regions can be directly merged */
NETFS_WRITES_SUPERSEDE, /* Second write can supersede the first without first
@@ -275,6 +304,9 @@ struct netfs_request_ops {
struct netfs_dirty_region *candidate);
bool (*check_compatible_write)(struct netfs_dirty_region *region, struct file *file);
void (*update_i_size)(struct file *file, loff_t i_size);
+
+ /* Write request handling */
+ void (*init_wreq)(struct netfs_write_request *wreq);
};

/*
@@ -324,6 +356,9 @@ extern int netfs_write_begin(struct file *, struct address_space *,
loff_t, unsigned int, unsigned int, struct page **,
void **);
extern ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from);
+extern int netfs_writepages(struct address_space *mapping, struct writeback_control *wbc);
+extern void netfs_invalidatepage(struct page *page, unsigned int offset, unsigned int length);
+extern int netfs_releasepage(struct page *page, gfp_t gfp_flags);

extern void netfs_subreq_terminated(struct netfs_read_subrequest *, ssize_t, bool);
extern void netfs_stats_show(struct seq_file *);
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 808433e6ddd3..e70abb5033e6 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -63,6 +63,8 @@ enum netfs_dirty_trace {
netfs_dirty_trace_complete,
netfs_dirty_trace_flush_conflict,
netfs_dirty_trace_flush_dsync,
+ netfs_dirty_trace_flush_writepages,
+ netfs_dirty_trace_flushing,
netfs_dirty_trace_merged_back,
netfs_dirty_trace_merged_forw,
netfs_dirty_trace_merged_sub,
@@ -82,11 +84,20 @@ enum netfs_region_trace {
netfs_region_trace_get_wreq,
netfs_region_trace_put_discard,
netfs_region_trace_put_merged,
+ netfs_region_trace_put_wreq,
netfs_region_trace_put_write_iter,
netfs_region_trace_free,
netfs_region_trace_new,
};

+enum netfs_wreq_trace {
+ netfs_wreq_trace_free,
+ netfs_wreq_trace_put_discard,
+ netfs_wreq_trace_put_work,
+ netfs_wreq_trace_see_work,
+ netfs_wreq_trace_new,
+};
+
#endif

#define netfs_read_traces \
@@ -149,6 +160,8 @@ enum netfs_region_trace {
EM(netfs_dirty_trace_complete, "COMPLETE ") \
EM(netfs_dirty_trace_flush_conflict, "FLSH CONFL") \
EM(netfs_dirty_trace_flush_dsync, "FLSH DSYNC") \
+ EM(netfs_dirty_trace_flush_writepages, "WRITEPAGES") \
+ EM(netfs_dirty_trace_flushing, "FLUSHING ") \
EM(netfs_dirty_trace_merged_back, "MERGE BACK") \
EM(netfs_dirty_trace_merged_forw, "MERGE FORW") \
EM(netfs_dirty_trace_merged_sub, "SUBSUMED ") \
@@ -167,10 +180,19 @@ enum netfs_region_trace {
EM(netfs_region_trace_get_wreq, "GET WREQ ") \
EM(netfs_region_trace_put_discard, "PUT DISCARD") \
EM(netfs_region_trace_put_merged, "PUT MERGED ") \
+ EM(netfs_region_trace_put_wreq, "PUT WREQ ") \
EM(netfs_region_trace_put_write_iter, "PUT WRITER ") \
EM(netfs_region_trace_free, "FREE ") \
E_(netfs_region_trace_new, "NEW ")

+#define netfs_wreq_traces \
+ EM(netfs_wreq_trace_free, "FREE ") \
+ EM(netfs_wreq_trace_put_discard, "PUT DISCARD") \
+ EM(netfs_wreq_trace_put_work, "PUT WORK ") \
+ EM(netfs_wreq_trace_see_work, "SEE WORK ") \
+ E_(netfs_wreq_trace_new, "NEW ")
+
+
/*
* Export enum symbols via userspace.
*/
@@ -187,6 +209,7 @@ netfs_failures;
netfs_region_types;
netfs_region_states;
netfs_dirty_traces;
+netfs_wreq_traces;

/*
* Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -435,6 +458,55 @@ TRACE_EVENT(netfs_dirty,
)
);

+TRACE_EVENT(netfs_wreq,
+ TP_PROTO(struct netfs_write_request *wreq),
+
+ TP_ARGS(wreq),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, wreq )
+ __field(unsigned int, cookie )
+ __field(loff_t, start )
+ __field(size_t, len )
+ ),
+
+ TP_fast_assign(
+ __entry->wreq = wreq->debug_id;
+ __entry->cookie = wreq->cache_resources.debug_id;
+ __entry->start = wreq->start;
+ __entry->len = wreq->len;
+ ),
+
+ TP_printk("W=%08x c=%08x s=%llx %zx",
+ __entry->wreq,
+ __entry->cookie,
+ __entry->start, __entry->len)
+ );
+
+TRACE_EVENT(netfs_ref_wreq,
+ TP_PROTO(unsigned int wreq_debug_id, int ref,
+ enum netfs_wreq_trace what),
+
+ TP_ARGS(wreq_debug_id, ref, what),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, wreq )
+ __field(int, ref )
+ __field(enum netfs_wreq_trace, what )
+ ),
+
+ TP_fast_assign(
+ __entry->wreq = wreq_debug_id;
+ __entry->ref = ref;
+ __entry->what = what;
+ ),
+
+ TP_printk("W=%08x %s r=%u",
+ __entry->wreq,
+ __print_symbolic(__entry->what, netfs_wreq_traces),
+ __entry->ref)
+ );
+
#endif /* _TRACE_NETFS_H */

/* This part must be outside protection */