[PATCH 3/4] mm: memcg: implement unbalance proactive reclaim

From: Huan Yang
Date: Wed Nov 08 2023 - 01:59:49 EST


This patch add swappiness arg into proactive reclaim interface.
User can type "bytes swappiness" into memory.reclaim to control
reclaim tendency.
Specially, user can type below to only reclaim anon folios:
`"100M" 200 > memory.reclaim`
Or, type below to only reclaim file folios:
`100M 1 > memory.reclaim`

User can only type key into memory.reclaim to keep original use.

Signed-off-by: Huan Yang <link@xxxxxxxx>
---
Documentation/admin-guide/cgroup-v2.rst | 16 ++++---
include/linux/swap.h | 2 +
mm/memcontrol.c | 55 +++++++++++++++++++++----
mm/vmscan.c | 2 +
4 files changed, 63 insertions(+), 12 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index c153d0c75f34..5a471ac7f0c3 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1275,16 +1275,22 @@ PAGE_SIZE multiple when read back.
This is a simple interface to trigger memory reclaim in the
target cgroup.

- This file accepts a single key, the number of bytes to reclaim.
- No nested keys are currently supported.
+ This file accepts a few key, the number of bytes to reclaim.
+ Few nested keys are currently supported.

Example::

echo "1G" > memory.reclaim

- The interface can be later extended with nested keys to
- configure the reclaim behavior. For example, specify the
- type of memory to reclaim from (anon, file, ..).
+ The interface extended with nested keys to configure the
+ reclaim behavior. For example, specify the swappiness of
+ memory to reclaim from (anon, file, ..).
+
+ Example::
+
+ echo "1G" 200 > memory.reclaim (only reclaim anon)
+ echo "1G" 0 > memory.reclaim (only reclaim file)
+ echo "1G" 1 > memory.reclaim (only reclaim file)

Please note that the kernel can over or under reclaim from
the target cgroup. If less bytes are reclaimed than the
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3ba146ae7cf5..a024194301d4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -407,6 +407,8 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,

#define MEMCG_RECLAIM_MAY_SWAP (1 << 1)
#define MEMCG_RECLAIM_PROACTIVE (1 << 2)
+#define MEMCG_RECLAIM_ANON (1 << 3)
+#define MEMCG_RECLAIM_FILE (1 << 4)
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index be2ad117515e..a0e460abd41c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6953,19 +6953,47 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
unsigned int nr_retries = MAX_RECLAIM_RETRIES;
unsigned long nr_to_reclaim, nr_reclaimed = 0;
unsigned int reclaim_options;
+ int swappiness = -1, org_swappiness, n;
+ char *tmpbuf;
int err;

- buf = strstrip(buf);
- err = page_counter_memparse(buf, "", &nr_to_reclaim);
+ tmpbuf = kvzalloc(nbytes, GFP_KERNEL);
+ if (unlikely(!tmpbuf))
+ return -ENOMEM;
+
+ buf = skip_spaces(buf);
+ n = sscanf(buf, "%s %d", tmpbuf, &swappiness);
+ if (n < 1) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ if (n == 2 && (swappiness > 200 || swappiness < 0)) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ err = page_counter_memparse(tmpbuf, "", &nr_to_reclaim);
if (err)
- return err;
+ goto out_free;

reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
+ if (swappiness != -1) {
+ org_swappiness = memcg->swappiness;
+ memcg->swappiness = swappiness;
+ if (swappiness == 200)
+ reclaim_options |= MEMCG_RECLAIM_ANON;
+ else if (swappiness == 0 || swappiness == 1)
+ reclaim_options |= MEMCG_RECLAIM_FILE;
+ }
+
while (nr_reclaimed < nr_to_reclaim) {
unsigned long reclaimed;

- if (signal_pending(current))
- return -EINTR;
+ if (signal_pending(current)) {
+ err = -EINTR;
+ goto out;
+ }

/*
* This is the final attempt, drain percpu lru caches in the
@@ -6979,13 +7007,26 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX),
GFP_KERNEL, reclaim_options);

- if (!reclaimed && !nr_retries--)
- return -EAGAIN;
+ if (!reclaimed && !nr_retries--) {
+ err = -EAGAIN;
+ goto out;
+ }

nr_reclaimed += reclaimed;
}

+ if (swappiness != -1)
+ memcg->swappiness = org_swappiness;
+
return nbytes;
+
+out:
+ if (swappiness != -1)
+ memcg->swappiness = org_swappiness;
+
+out_free:
+ kvfree(tmpbuf);
+ return err;
}

static struct cftype memory_files[] = {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9243a1f0d606..f4221ec833db 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6505,6 +6505,8 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
.may_unmap = 1,
.may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
.proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
+ .unbalance_anon = !!(reclaim_options & MEMCG_RECLAIM_ANON),
+ .unbalance_file = !!(reclaim_options & MEMCG_RECLAIM_FILE),
};
/*
* Traverse the ZONELIST_FALLBACK zonelist of the current node to put
--
2.34.1