[PATCH 4/4] mm: memcg: apply proactive reclaim into cgroupv1

From: Huan Yang
Date: Wed Nov 08 2023 - 02:00:05 EST


For android use, apply proactive reclaim into cgroupv1

Signed-off-by: Huan Yang <link@xxxxxxxx>
---
.../admin-guide/cgroup-v1/memory.rst | 38 +++-
mm/memcontrol.c | 170 +++++++++---------
2 files changed, 124 insertions(+), 84 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
index ca7d9402f6be..600bf26a470a 100644
--- a/Documentation/admin-guide/cgroup-v1/memory.rst
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@@ -90,6 +90,7 @@ Brief summary of control files.
This knob is deprecated and shouldn't be
used.
memory.oom_control set/show oom controls.
+ memory.memory proactive reclaim.
memory.numa_stat show the number of memory usage per numa
node
memory.kmem.limit_in_bytes Deprecated knob to set and read the kernel
@@ -972,7 +973,42 @@ Test:
(Expect a bunch of notifications, and eventually, the oom-killer will
trigger.)

-12. TODO
+12. Proactive Reclaim
+========
+memory.reclaim A write-only nested-keyed file which exists for all cgroups.
+
+This is a simple interface to trigger memory reclaim in the
+target cgroup.
+
+This file accepts a few key, the number of bytes to reclaim.
+Few nested keys are currently supported.
+
+Example::
+
+ echo "1G" > memory.reclaim
+
+The interface extended with nested keys to configure the
+reclaim behavior. For example, specify the swappiness of
+memory to reclaim from (anon, file, ..).
+
+Example::
+
+ echo "1G" 200 > memory.reclaim (only reclaim anon)
+ echo "1G" 0 > memory.reclaim (only reclaim file)
+ echo "1G" 1 > memory.reclaim (only reclaim file)
+
+Please note that the kernel can over or under reclaim from
+the target cgroup. If less bytes are reclaimed than the
+specified amount, -EAGAIN is returned.
+
+Please note that the proactive reclaim (triggered by this
+interface) is not meant to indicate memory pressure on the
+memory cgroup. Therefore socket memory balancing triggered by
+the memory reclaim normally is not exercised in this case.
+This means that the networking layer will not adapt based on
+reclaim induced by memory.reclaim.
+
+13. TODO
========

1. Make per-cgroup scanner reclaim not-shared pages first
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a0e460abd41c..03de3387d714 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5209,6 +5209,89 @@ static int mem_cgroup_slab_show(struct seq_file *m, void *p)

static int memory_stat_show(struct seq_file *m, void *v);

+static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+ unsigned int nr_retries = MAX_RECLAIM_RETRIES;
+ unsigned long nr_to_reclaim, nr_reclaimed = 0;
+ unsigned int reclaim_options;
+ int swappiness = -1, org_swappiness, n;
+ char *tmpbuf;
+ int err;
+
+ tmpbuf = kvzalloc(nbytes, GFP_KERNEL);
+ if (unlikely(!tmpbuf))
+ return -ENOMEM;
+
+ buf = skip_spaces(buf);
+ n = sscanf(buf, "%s %d", tmpbuf, &swappiness);
+ if (n < 1) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ if (n == 2 && (swappiness > 200 || swappiness < 0)) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ err = page_counter_memparse(tmpbuf, "", &nr_to_reclaim);
+ if (err)
+ goto out_free;
+
+ reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
+ if (swappiness != -1) {
+ org_swappiness = memcg->swappiness;
+ memcg->swappiness = swappiness;
+ if (swappiness == 200)
+ reclaim_options |= MEMCG_RECLAIM_ANON;
+ else if (swappiness == 0 || swappiness == 1)
+ reclaim_options |= MEMCG_RECLAIM_FILE;
+ }
+
+ while (nr_reclaimed < nr_to_reclaim) {
+ unsigned long reclaimed;
+
+ if (signal_pending(current)) {
+ err = -EINTR;
+ goto out;
+ }
+
+ /*
+ * This is the final attempt, drain percpu lru caches in the
+ * hope of introducing more evictable pages for
+ * try_to_free_mem_cgroup_pages().
+ */
+ if (!nr_retries)
+ lru_add_drain_all();
+
+ reclaimed = try_to_free_mem_cgroup_pages(memcg,
+ min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX),
+ GFP_KERNEL, reclaim_options);
+
+ if (!reclaimed && !nr_retries--) {
+ err = -EAGAIN;
+ goto out;
+ }
+
+ nr_reclaimed += reclaimed;
+ }
+
+ if (swappiness != -1)
+ memcg->swappiness = org_swappiness;
+
+ return nbytes;
+
+out:
+ if (swappiness != -1)
+ memcg->swappiness = org_swappiness;
+
+out_free:
+ kvfree(tmpbuf);
+ return err;
+}
+
static struct cftype mem_cgroup_legacy_files[] = {
{
.name = "usage_in_bytes",
@@ -5272,6 +5355,10 @@ static struct cftype mem_cgroup_legacy_files[] = {
.seq_show = mem_cgroup_oom_control_read,
.write_u64 = mem_cgroup_oom_control_write,
},
+ {
+ .name = "reclaim",
+ .write = memory_reclaim,
+ },
{
.name = "pressure_level",
.seq_show = mem_cgroup_dummy_seq_show,
@@ -6946,89 +7033,6 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
return nbytes;
}

-static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
- size_t nbytes, loff_t off)
-{
- struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
- unsigned int nr_retries = MAX_RECLAIM_RETRIES;
- unsigned long nr_to_reclaim, nr_reclaimed = 0;
- unsigned int reclaim_options;
- int swappiness = -1, org_swappiness, n;
- char *tmpbuf;
- int err;
-
- tmpbuf = kvzalloc(nbytes, GFP_KERNEL);
- if (unlikely(!tmpbuf))
- return -ENOMEM;
-
- buf = skip_spaces(buf);
- n = sscanf(buf, "%s %d", tmpbuf, &swappiness);
- if (n < 1) {
- err = -EINVAL;
- goto out_free;
- }
-
- if (n == 2 && (swappiness > 200 || swappiness < 0)) {
- err = -EINVAL;
- goto out_free;
- }
-
- err = page_counter_memparse(tmpbuf, "", &nr_to_reclaim);
- if (err)
- goto out_free;
-
- reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
- if (swappiness != -1) {
- org_swappiness = memcg->swappiness;
- memcg->swappiness = swappiness;
- if (swappiness == 200)
- reclaim_options |= MEMCG_RECLAIM_ANON;
- else if (swappiness == 0 || swappiness == 1)
- reclaim_options |= MEMCG_RECLAIM_FILE;
- }
-
- while (nr_reclaimed < nr_to_reclaim) {
- unsigned long reclaimed;
-
- if (signal_pending(current)) {
- err = -EINTR;
- goto out;
- }
-
- /*
- * This is the final attempt, drain percpu lru caches in the
- * hope of introducing more evictable pages for
- * try_to_free_mem_cgroup_pages().
- */
- if (!nr_retries)
- lru_add_drain_all();
-
- reclaimed = try_to_free_mem_cgroup_pages(memcg,
- min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX),
- GFP_KERNEL, reclaim_options);
-
- if (!reclaimed && !nr_retries--) {
- err = -EAGAIN;
- goto out;
- }
-
- nr_reclaimed += reclaimed;
- }
-
- if (swappiness != -1)
- memcg->swappiness = org_swappiness;
-
- return nbytes;
-
-out:
- if (swappiness != -1)
- memcg->swappiness = org_swappiness;
-
-out_free:
- kvfree(tmpbuf);
- return err;
-}
-
static struct cftype memory_files[] = {
{
.name = "current",
--
2.34.1