[RFC PATCH 1/7] mm: zswap: add pool shrinking mechanism

From: Domenico Cerasuolo
Date: Mon Jun 05 2023 - 04:54:41 EST


Each zpool driver (zbud, z3fold and zsmalloc) implements its own shrink
function, which is called from zpool_shrink. However, with this commit,
a unified shrink function is added to zswap. The ultimate goal is to
eliminate the need for zpool_shrink once all zpool implementations have
dropped their shrink code.

To ensure the functionality of each commit, this change focuses solely
on adding the mechanism itself. No modifications are made to
the backends, meaning that functionally, there are no immediate changes.
The zswap mechanism will only come into effect once the backends have
removed their shrink code. The subsequent commits will address the
modifications needed in the backends.

Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@xxxxxxxxx>
---
mm/zswap.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 78 insertions(+), 5 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index bcb82e09eb64..80d7780bf066 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -159,6 +159,8 @@ struct zswap_pool {
struct work_struct shrink_work;
struct hlist_node node;
char tfm_name[CRYPTO_MAX_ALG_NAME];
+ struct list_head lru;
+ spinlock_t lock;
};

/*
@@ -176,10 +178,12 @@ struct zswap_pool {
* be held while changing the refcount. Since the lock must
* be held, there is no reason to also make refcount atomic.
* length - the length in bytes of the compressed page data. Needed during
- * decompression. For a same value filled page length is 0.
+ * decompression. For a same value filled page length is 0, and both
+ * pool and lru are invalid and must be ignored.
* pool - the zswap_pool the entry's data is in
* handle - zpool allocation handle that stores the compressed page data
* value - value of the same-value filled pages which have same content
+ * lru - handle to the pool's lru used to evict pages.
*/
struct zswap_entry {
struct rb_node rbnode;
@@ -192,6 +196,7 @@ struct zswap_entry {
unsigned long value;
};
struct obj_cgroup *objcg;
+ struct list_head lru;
};

struct zswap_header {
@@ -364,6 +369,9 @@ static void zswap_free_entry(struct zswap_entry *entry)
if (!entry->length)
atomic_dec(&zswap_same_filled_pages);
else {
+ spin_lock(&entry->pool->lock);
+ list_del_init(&entry->lru);
+ spin_unlock(&entry->pool->lock);
zpool_free(entry->pool->zpool, entry->handle);
zswap_pool_put(entry->pool);
}
@@ -584,14 +592,65 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
return NULL;
}

+static int zswap_shrink(struct zswap_pool *pool)
+{
+ struct zswap_entry *lru_entry, *tree_entry = NULL;
+ struct zswap_header *zhdr;
+ struct zswap_tree *tree;
+ swp_entry_t swpentry;
+ int ret;
+
+ /* get a reclaimable entry from LRU */
+ spin_lock(&pool->lock);
+ if (list_empty(&pool->lru)) {
+ spin_unlock(&pool->lock);
+ return -EINVAL;
+ }
+ lru_entry = list_last_entry(&pool->lru, struct zswap_entry, lru);
+ list_del_init(&lru_entry->lru);
+ zhdr = zpool_map_handle(pool->zpool, lru_entry->handle, ZPOOL_MM_RO);
+ tree = zswap_trees[swp_type(zhdr->swpentry)];
+ zpool_unmap_handle(pool->zpool, lru_entry->handle);
+ swpentry = zhdr->swpentry;
+ spin_unlock(&pool->lock);
+
+ /* hold a reference from tree so it won't be freed during writeback */
+ spin_lock(&tree->lock);
+ tree_entry = zswap_entry_find_get(&tree->rbroot, swp_offset(swpentry));
+ if (tree_entry != lru_entry) {
+ if (tree_entry)
+ zswap_entry_put(tree, tree_entry);
+ spin_unlock(&tree->lock);
+ return -EAGAIN;
+ }
+ spin_unlock(&tree->lock);
+
+ ret = zswap_writeback_entry(pool->zpool, lru_entry->handle);
+
+ spin_lock(&tree->lock);
+ if (ret) {
+ spin_lock(&pool->lock);
+ list_move(&lru_entry->lru, &pool->lru);
+ spin_unlock(&pool->lock);
+ }
+ zswap_entry_put(tree, tree_entry);
+ spin_unlock(&tree->lock);
+
+ return ret ? -EAGAIN : 0;
+}
+
static void shrink_worker(struct work_struct *w)
{
struct zswap_pool *pool = container_of(w, typeof(*pool),
shrink_work);
int ret, failures = 0;

+ /* zpool_evictable will be removed once all 3 backends have migrated*/
do {
- ret = zpool_shrink(pool->zpool, 1, NULL);
+ if (zpool_evictable(pool->zpool))
+ ret = zpool_shrink(pool->zpool, 1, NULL);
+ else
+ ret = zswap_shrink(pool);
if (ret) {
zswap_reject_reclaim_fail++;
if (ret != -EAGAIN)
@@ -655,6 +714,8 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
*/
kref_init(&pool->kref);
INIT_LIST_HEAD(&pool->list);
+ INIT_LIST_HEAD(&pool->lru);
+ spin_lock_init(&pool->lock);
INIT_WORK(&pool->shrink_work, shrink_worker);

zswap_pool_debug("created", pool);
@@ -1270,7 +1331,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
}

/* store */
- hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
+ hlen = sizeof(zhdr);
gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
if (zpool_malloc_support_movable(entry->pool->zpool))
gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
@@ -1313,6 +1374,13 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
zswap_entry_put(tree, dupentry);
}
} while (ret == -EEXIST);
+ INIT_LIST_HEAD(&entry->lru);
+ /* zpool_evictable will be removed once all 3 backends have migrated*/
+ if (entry->length && !zpool_evictable(entry->pool->zpool)) {
+ spin_lock(&entry->pool->lock);
+ list_add(&entry->lru, &entry->pool->lru);
+ spin_unlock(&entry->pool->lock);
+ }
spin_unlock(&tree->lock);

/* update stats */
@@ -1384,8 +1452,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
/* decompress */
dlen = PAGE_SIZE;
src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO);
- if (zpool_evictable(entry->pool->zpool))
- src += sizeof(struct zswap_header);
+ src += sizeof(struct zswap_header);

if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
memcpy(tmp, src, entry->length);
@@ -1415,6 +1482,12 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
freeentry:
spin_lock(&tree->lock);
zswap_entry_put(tree, entry);
+ /* zpool_evictable will be removed once all 3 backends have migrated*/
+ if (entry->length && !zpool_evictable(entry->pool->zpool)) {
+ spin_lock(&entry->pool->lock);
+ list_move(&entry->lru, &entry->pool->lru);
+ spin_unlock(&entry->pool->lock);
+ }
spin_unlock(&tree->lock);

return ret;
--
2.34.1