[RFC PATCH 2/2] sched/membarrier: Use serialized smp_call_function APIs

From: Mathieu Desnoyers
Date: Wed Mar 13 2024 - 16:56:41 EST


Use the serialized smp_call_function APIs to issue IPIs, thus limiting
the rate at which IPIs can be generated for each CPU.

Limiting the rate of IPIs at the smp_call_function level ensures that
various mechanisms cannot be combined to overwhelm a CPU with IPIs.

This allows removing the IPI serialization mutex introduced by commit
944d5fe50f3f ("sched/membarrier: reduce the ability to hammer on
sys_membarrier"), which restores scaling of membarrier
MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ targeting a specific CPU with
MEMBARRIER_CMD_FLAG_CPU. This is used in Google tcmalloc for cross-CPU
operations.

[ I do not have numbers justifying the benefit of moving to a per-CPU
mutex for MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ targeting a specific
CPU. Perhaps Google folks using this have benchmarks that can provide
those numbers ? ]

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
Cc: Boqun Feng <boqun.feng@xxxxxxxxx>
Cc: Andrew Hunter <ahh@xxxxxxxxxx>
Cc: Maged Michael <maged.michael@xxxxxxxxx>
Cc: gromer@xxxxxxxxxx
Cc: Avi Kivity <avi@xxxxxxxxxxxx>
Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx>
Cc: Paul Mackerras <paulus@xxxxxxxxx>
Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
Cc: Peter Oskolkov <posk@xxxxxxxxxx>
---
kernel/sched/membarrier.c | 24 +++++++-----------------
1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index 4e715b9b278e..368afd35c1de 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -162,9 +162,6 @@
| MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \
| MEMBARRIER_CMD_GET_REGISTRATIONS)

-static DEFINE_MUTEX(membarrier_ipi_mutex);
-#define SERIALIZE_IPI() guard(mutex)(&membarrier_ipi_mutex)
-
static void ipi_mb(void *info)
{
smp_mb(); /* IPIs should be serializing but paranoid. */
@@ -262,7 +259,6 @@ static int membarrier_global_expedited(void)
if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
return -ENOMEM;

- SERIALIZE_IPI();
cpus_read_lock();
rcu_read_lock();
for_each_online_cpu(cpu) {
@@ -295,9 +291,7 @@ static int membarrier_global_expedited(void)
}
rcu_read_unlock();

- preempt_disable();
- smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
- preempt_enable();
+ smp_call_function_many_serialize(tmpmask, ipi_mb, NULL, 1);

free_cpumask_var(tmpmask);
cpus_read_unlock();
@@ -351,7 +345,6 @@ static int membarrier_private_expedited(int flags, int cpu_id)
if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
return -ENOMEM;

- SERIALIZE_IPI();
cpus_read_lock();

if (cpu_id >= 0) {
@@ -382,10 +375,10 @@ static int membarrier_private_expedited(int flags, int cpu_id)

if (cpu_id >= 0) {
/*
- * smp_call_function_single() will call ipi_func() if cpu_id
- * is the calling CPU.
+ * smp_call_function_single_serialize() will call
+ * ipi_func() if cpu_id is the calling CPU.
*/
- smp_call_function_single(cpu_id, ipi_func, NULL, 1);
+ smp_call_function_single_serialize(cpu_id, ipi_func, NULL, 1);
} else {
/*
* For regular membarrier, we can save a few cycles by
@@ -405,11 +398,9 @@ static int membarrier_private_expedited(int flags, int cpu_id)
* rseq critical section.
*/
if (flags != MEMBARRIER_FLAG_SYNC_CORE) {
- preempt_disable();
- smp_call_function_many(tmpmask, ipi_func, NULL, true);
- preempt_enable();
+ smp_call_function_many_serialize(tmpmask, ipi_func, NULL, true);
} else {
- on_each_cpu_mask(tmpmask, ipi_func, NULL, true);
+ on_each_cpu_mask_serialize(tmpmask, ipi_func, NULL, true);
}
}

@@ -465,7 +456,6 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm)
* between threads which are users of @mm has its membarrier state
* updated.
*/
- SERIALIZE_IPI();
cpus_read_lock();
rcu_read_lock();
for_each_online_cpu(cpu) {
@@ -478,7 +468,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm)
}
rcu_read_unlock();

- on_each_cpu_mask(tmpmask, ipi_sync_rq_state, mm, true);
+ on_each_cpu_mask_serialize(tmpmask, ipi_sync_rq_state, mm, true);

free_cpumask_var(tmpmask);
cpus_read_unlock();
--
2.39.2