[tip: sched/core] sched: Move mmdrop to RCU on RT

From: tip-bot2 for Thomas Gleixner
Date: Fri Oct 01 2021 - 11:06:43 EST


The following commit has been merged into the sched/core branch of tip:

Commit-ID: df89544263cd98ffcef1318b3bf18509b9420c8a
Gitweb: https://git.kernel.org/tip/df89544263cd98ffcef1318b3bf18509b9420c8a
Author: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
AuthorDate: Tue, 28 Sep 2021 14:24:32 +02:00
Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CommitterDate: Fri, 01 Oct 2021 13:58:06 +02:00

sched: Move mmdrop to RCU on RT

mmdrop() is invoked from finish_task_switch() by the incoming task to drop
the mm which was handed over by the previous task. mmdrop() can be quite
expensive which prevents an incoming real-time task from getting useful
work done.

Provide mmdrop_sched() which maps to mmdrop() on !RT kernels. On RT kernels
it delagates the eventually required invocation of __mmdrop() to RCU.

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Link: https://lkml.kernel.org/r/20210928122411.648582026@xxxxxxxxxxxxx
---
include/linux/mm_types.h | 4 ++++
include/linux/sched/mm.h | 29 +++++++++++++++++++++++++++++
kernel/sched/core.c | 2 +-
3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 8f0fb62..09a2885 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -12,6 +12,7 @@
#include <linux/completion.h>
#include <linux/cpumask.h>
#include <linux/uprobes.h>
+#include <linux/rcupdate.h>
#include <linux/page-flags-layout.h>
#include <linux/workqueue.h>
#include <linux/seqlock.h>
@@ -567,6 +568,9 @@ struct mm_struct {
bool tlb_flush_batched;
#endif
struct uprobes_state uprobes_state;
+#ifdef CONFIG_PREEMPT_RT
+ struct rcu_head delayed_drop;
+#endif
#ifdef CONFIG_HUGETLB_PAGE
atomic_long_t hugetlb_usage;
#endif
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index e24b1fe..0d81060 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,6 +49,35 @@ static inline void mmdrop(struct mm_struct *mm)
__mmdrop(mm);
}

+#ifdef CONFIG_PREEMPT_RT
+/*
+ * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is
+ * by far the least expensive way to do that.
+ */
+static inline void __mmdrop_delayed(struct rcu_head *rhp)
+{
+ struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
+
+ __mmdrop(mm);
+}
+
+/*
+ * Invoked from finish_task_switch(). Delegates the heavy lifting on RT
+ * kernels via RCU.
+ */
+static inline void mmdrop_sched(struct mm_struct *mm)
+{
+ /* Provides a full memory barrier. See mmdrop() */
+ if (atomic_dec_and_test(&mm->mm_count))
+ call_rcu(&mm->delayed_drop, __mmdrop_delayed);
+}
+#else
+static inline void mmdrop_sched(struct mm_struct *mm)
+{
+ mmdrop(mm);
+}
+#endif
+
/**
* mmget() - Pin the address space associated with a &struct mm_struct.
* @mm: The address space to pin.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b36b5d7..bb70a07 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4773,7 +4773,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
*/
if (mm) {
membarrier_mm_sync_core_before_usermode(mm);
- mmdrop(mm);
+ mmdrop_sched(mm);
}
if (unlikely(prev_state == TASK_DEAD)) {
if (prev->sched_class->task_dead)