Re: [RFC PATCH v2 6/7] mm: zswap: simplify writeback function

From: Domenico Cerasuolo
Date: Fri Jun 09 2023 - 06:32:01 EST


On Wed, Jun 7, 2023 at 11:27 AM Yosry Ahmed <yosryahmed@xxxxxxxxxx> wrote:
>
> On Tue, Jun 6, 2023 at 7:56 AM Domenico Cerasuolo
> <cerasuolodomenico@xxxxxxxxx> wrote:
> >
> > Previously, in zswap, the writeback function was passed to zpool drivers
> > for their usage in calling the writeback operation. However, since the
> > drivers did not possess references to entries and the function was
> > specifically designed to work with handles, the writeback process has
> > been modified to occur directly within zswap. Consequently, this change
> > allows for some simplification of the writeback function, taking into
> > account the updated workflow.
> >
> > Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@xxxxxxxxx>
> > ---
> > mm/zswap.c | 69 ++++++++++++++----------------------------------------
> > 1 file changed, 17 insertions(+), 52 deletions(-)
> >
> > diff --git a/mm/zswap.c b/mm/zswap.c
> > index 2831bf56b168..ef8604812352 100644
> > --- a/mm/zswap.c
> > +++ b/mm/zswap.c
> > @@ -250,7 +250,8 @@ static bool zswap_has_pool;
> > pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \
> > zpool_get_type((p)->zpool))
> >
> > -static int zswap_writeback_entry(struct zpool *pool, unsigned long handle);
> > +static int zswap_writeback_entry(struct zswap_entry *entry, struct zswap_header *zhdr,
> > + struct zswap_tree *tree);
> > static int zswap_pool_get(struct zswap_pool *pool);
> > static void zswap_pool_put(struct zswap_pool *pool);
> >
> > @@ -632,15 +633,21 @@ static int zswap_shrink(struct zswap_pool *pool)
> > }
> > spin_unlock(&tree->lock);
> >
> > - ret = zswap_writeback_entry(pool->zpool, lru_entry->handle);
> > + ret = zswap_writeback_entry(lru_entry, zhdr, tree);
> >
> > spin_lock(&tree->lock);
> > if (ret) {
> > spin_lock(&pool->lru_lock);
> > list_move(&lru_entry->lru, &pool->lru);
> > spin_unlock(&pool->lru_lock);
> > + zswap_entry_put(tree, tree_entry);
> > + } else {
> > + /* free the local reference */
> > + zswap_entry_put(tree, tree_entry);
> > + /* free the entry if it's not been invalidated*/
> > + if (lru_entry == zswap_rb_search(&tree->rbroot, swpoffset))
> > + zswap_entry_put(tree, tree_entry);
>
> The comment that was here about the 2 possible cases was useful imo,
> maybe keep it?

I honestly found it brittle in that we're not interested in the refcount there,
but rather in releasing the base reference that keeps the entry valid.
There's not way to know which refcount value it should be though, consider
that throughout this series the values can be 1 or 0, but also 2 or 1,
depending on where this function is called (zpool_shrink or zswap_shrink).

>
> Also, I am not sure why we need to do a tree search vs. just reading
> the refcount here before the first put. We can even make
> zswap_entry_put() return the refcount after the put to know if we need
> the additional put or not.
>
> Can anyone think of any reason why we need to explicitly search the tree here?

I think that the reasoning here is that refcount > 0 doesn't necessarily mean
that the entry is on the tree. I'm not sure if reading refcount directly here
would cause an issue now, probably not, but I wonder if bugs could be
introduced if the context in which this function is called changes.

>
> > }
> > - zswap_entry_put(tree, tree_entry);
> > spin_unlock(&tree->lock);
> >
> > return ret ? -EAGAIN : 0;
> > @@ -1039,16 +1046,14 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
> > * the swap cache, the compressed version stored by zswap can be
> > * freed.
> > */
> > -static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
> > +static int zswap_writeback_entry(struct zswap_entry *entry, struct zswap_header *zhdr,
> > + struct zswap_tree *tree)
> > {
> > - struct zswap_header *zhdr;
> > - swp_entry_t swpentry;
> > - struct zswap_tree *tree;
> > - pgoff_t offset;
> > - struct zswap_entry *entry;
> > + swp_entry_t swpentry = zhdr->swpentry;
> > struct page *page;
> > struct scatterlist input, output;
> > struct crypto_acomp_ctx *acomp_ctx;
> > + struct zpool *pool = entry->pool->zpool;
> >
> > u8 *src, *tmp = NULL;
> > unsigned int dlen;
> > @@ -1063,25 +1068,6 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
> > return -ENOMEM;
> > }
> >
> > - /* extract swpentry from data */
> > - zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
> > - swpentry = zhdr->swpentry; /* here */
> > - tree = zswap_trees[swp_type(swpentry)];
> > - offset = swp_offset(swpentry);
> > - zpool_unmap_handle(pool, handle);
> > -
> > - /* find and ref zswap entry */
> > - spin_lock(&tree->lock);
> > - entry = zswap_entry_find_get(&tree->rbroot, offset);
> > - if (!entry) {
> > - /* entry was invalidated */
> > - spin_unlock(&tree->lock);
> > - kfree(tmp);
> > - return 0;
> > - }
> > - spin_unlock(&tree->lock);
> > - BUG_ON(offset != entry->offset);
> > -
> > /* try to allocate swap cache page */
> > switch (zswap_get_swap_cache_page(swpentry, &page)) {
> > case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */
> > @@ -1115,12 +1101,12 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
> > acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
> > dlen = PAGE_SIZE;
> >
> > - zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
> > + zhdr = zpool_map_handle(pool, entry->handle, ZPOOL_MM_RO);
> > src = (u8 *)zhdr + sizeof(struct zswap_header);
> > if (!zpool_can_sleep_mapped(pool)) {
> > memcpy(tmp, src, entry->length);
> > src = tmp;
> > - zpool_unmap_handle(pool, handle);
> > + zpool_unmap_handle(pool, entry->handle);
> > }
> >
> > mutex_lock(acomp_ctx->mutex);
> > @@ -1135,7 +1121,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
> > if (!zpool_can_sleep_mapped(pool))
> > kfree(tmp);
> > else
> > - zpool_unmap_handle(pool, handle);
> > + zpool_unmap_handle(pool, entry->handle);
> >
> > BUG_ON(ret);
> > BUG_ON(dlen != PAGE_SIZE);
> > @@ -1152,23 +1138,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
> > put_page(page);
> > zswap_written_back_pages++;
> >
> > - spin_lock(&tree->lock);
> > - /* drop local reference */
> > - zswap_entry_put(tree, entry);
> > -
> > - /*
> > - * There are two possible situations for entry here:
> > - * (1) refcount is 1(normal case), entry is valid and on the tree
> > - * (2) refcount is 0, entry is freed and not on the tree
> > - * because invalidate happened during writeback
> > - * search the tree and free the entry if find entry
> > - */
> > - if (entry == zswap_rb_search(&tree->rbroot, offset))
> > - zswap_entry_put(tree, entry);
> > - spin_unlock(&tree->lock);
> > -
> > return ret;
> > -
> > fail:
> > if (!zpool_can_sleep_mapped(pool))
> > kfree(tmp);
> > @@ -1177,13 +1147,8 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
> > * if we get here due to ZSWAP_SWAPCACHE_EXIST
> > * a load may be happening concurrently.
> > * it is safe and okay to not free the entry.
> > - * if we free the entry in the following put
> > * it is also okay to return !0
> > */
> > - spin_lock(&tree->lock);
> > - zswap_entry_put(tree, entry);
> > - spin_unlock(&tree->lock);
> > -
> > return ret;
> > }
> >
> > --
> > 2.34.1
> >