[RFC v1 4/4] kernel/fork.c: process_mmput_async: stop OOM while freeing memory

From: Claudio Imbrenda
Date: Thu Nov 11 2021 - 04:51:19 EST


This patch implements a simple OOM notifier to stop the OOM killer
while a mm is being reclaimed asynchronously using the
process_mmput_async syscall.

Tested on s390x.

Signed-off-by: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx>
---
kernel/fork.c | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)

diff --git a/kernel/fork.c b/kernel/fork.c
index 0da39b76005c..7279209eb69c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -118,6 +118,11 @@
*/
#define MAX_THREADS FUTEX_TID_MASK

+/*
+ * Priority for the OOM notifier used in process_mmput_async
+ */
+#define PROCESS_MMPUT_ASYNC_OOM_NOTIFY_PRIORITY 70
+
/*
* Protected counters by write_lock_irq(&tasklist_lock)
*/
@@ -3203,13 +3208,27 @@ int sysctl_max_threads(struct ctl_table *table, int write,
return 0;
}

+/* Prevent the OOM from being triggered while we are cleaning up asynchronously */
+static int mmput_async_oom_notifier(struct notifier_block *nb, unsigned long dummy, void *parm)
+{
+ /*
+ * We cannot know the speed at which pages are being freed, so we
+ * fake it and say it's at least one. This is already enough to
+ * stop the OOM killer.
+ */
+ *(unsigned long *)parm += PAGE_SIZE;
+ return NOTIFY_OK;
+}
+
SYSCALL_DEFINE2(process_mmput_async, int, pidfd, unsigned int, flags)
{
#ifdef CONFIG_MMU
+ struct notifier_block oom_nb;
struct mm_struct *mm = NULL;
struct task_struct *task;
unsigned int tmp;
struct pid *pid;
+ int r;

if (flags)
return -EINVAL;
@@ -3280,8 +3299,17 @@ SYSCALL_DEFINE2(process_mmput_async, int, pidfd, unsigned int, flags)
if (atomic_read(&mm->mm_users))
panic("mm_users not 0 but trying to __mmput anyway!");

+ /*
+ * Register an OOM notifier, to stop the OOM while we are
+ * asynchronously freeing the mm.
+ */
+ oom_nb.priority = PROCESS_MMPUT_ASYNC_OOM_NOTIFY_PRIORITY;
+ oom_nb.notifier_call = mmput_async_oom_notifier;
+ r = register_oom_notifier(&oom_nb);
/* Do the actual work */
__mmput(mm);
+ if (!r)
+ unregister_oom_notifier(&oom_nb);
/* And put the extra reference taken above */
mmdrop(mm);

--
2.31.1