[tip: x86/cache] x86/resctrl: Add cpumask_any_housekeeping() for limbo/overflow

From: tip-bot2 for James Morse
Date: Mon Feb 19 2024 - 13:39:56 EST


The following commit has been merged into the x86/cache branch of tip:

Commit-ID: a4846aaf39455fe69fce3522b385319383666eef
Gitweb: https://git.kernel.org/tip/a4846aaf39455fe69fce3522b385319383666eef
Author: James Morse <james.morse@xxxxxxx>
AuthorDate: Tue, 13 Feb 2024 18:44:26
Committer: Borislav Petkov (AMD) <bp@xxxxxxxxx>
CommitterDate: Fri, 16 Feb 2024 19:18:32 +01:00

x86/resctrl: Add cpumask_any_housekeeping() for limbo/overflow

The limbo and overflow code picks a CPU to use from the domain's list of online
CPUs. Work is then scheduled on these CPUs to maintain the limbo list and any
counters that may overflow.

cpumask_any() may pick a CPU that is marked nohz_full, which will either
penalise the work that CPU was dedicated to, or delay the processing of limbo
list or counters that may overflow. Perhaps indefinitely. Delaying the overflow
handling will skew the bandwidth values calculated by mba_sc, which expects to
be called once a second.

Add cpumask_any_housekeeping() as a replacement for cpumask_any() that prefers
housekeeping CPUs. This helper will still return a nohz_full CPU if that is the
only option. The CPU to use is re-evaluated each time the limbo/overflow work
runs. This ensures the work will move off a nohz_full CPU once a housekeeping
CPU is available.

Signed-off-by: James Morse <james.morse@xxxxxxx>
Signed-off-by: Borislav Petkov (AMD) <bp@xxxxxxxxx>
Reviewed-by: Shaopeng Tan <tan.shaopeng@xxxxxxxxxxx>
Reviewed-by: Reinette Chatre <reinette.chatre@xxxxxxxxx>
Reviewed-by: Babu Moger <babu.moger@xxxxxxx>
Tested-by: Shaopeng Tan <tan.shaopeng@xxxxxxxxxxx>
Tested-by: Peter Newman <peternewman@xxxxxxxxxx>
Tested-by: Babu Moger <babu.moger@xxxxxxx>
Tested-by: Carl Worth <carl@xxxxxxxxxxxxxxxxxxxxxx> # arm64
Link: https://lore.kernel.org/r/20240213184438.16675-13-james.morse@xxxxxxx
Signed-off-by: Borislav Petkov (AMD) <bp@xxxxxxxxx>
---
arch/x86/kernel/cpu/resctrl/internal.h | 24 ++++++++++++++++++++++++
arch/x86/kernel/cpu/resctrl/monitor.c | 20 +++++++++++++-------
2 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index b7b9d92..81f5de9 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -7,6 +7,7 @@
#include <linux/kernfs.h>
#include <linux/fs_context.h>
#include <linux/jump_label.h>
+#include <linux/tick.h>

#include <asm/resctrl.h>

@@ -55,6 +56,29 @@
/* Max event bits supported */
#define MAX_EVT_CONFIG_BITS GENMASK(6, 0)

+/**
+ * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that
+ * aren't marked nohz_full
+ * @mask: The mask to pick a CPU from.
+ *
+ * Returns a CPU in @mask. If there are housekeeping CPUs that don't use
+ * nohz_full, these are preferred.
+ */
+static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
+{
+ unsigned int cpu, hk_cpu;
+
+ cpu = cpumask_any(mask);
+ if (!tick_nohz_full_cpu(cpu))
+ return cpu;
+
+ hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
+ if (hk_cpu < nr_cpu_ids)
+ cpu = hk_cpu;
+
+ return cpu;
+}
+
struct rdt_fs_context {
struct kernfs_fs_context kfc;
bool enable_cdpl2;
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 101f1b1..38f85e5 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -761,7 +761,6 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
void cqm_handle_limbo(struct work_struct *work)
{
unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
- int cpu = smp_processor_id();
struct rdt_domain *d;

mutex_lock(&rdtgroup_mutex);
@@ -770,8 +769,11 @@ void cqm_handle_limbo(struct work_struct *work)

__check_limbo(d, false);

- if (has_busy_rmid(d))
- schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
+ if (has_busy_rmid(d)) {
+ d->cqm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask);
+ schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
+ delay);
+ }

mutex_unlock(&rdtgroup_mutex);
}
@@ -781,7 +783,7 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
unsigned long delay = msecs_to_jiffies(delay_ms);
int cpu;

- cpu = cpumask_any(&dom->cpu_mask);
+ cpu = cpumask_any_housekeeping(&dom->cpu_mask);
dom->cqm_work_cpu = cpu;

schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
@@ -791,7 +793,6 @@ void mbm_handle_overflow(struct work_struct *work)
{
unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
struct rdtgroup *prgrp, *crgrp;
- int cpu = smp_processor_id();
struct list_head *head;
struct rdt_resource *r;
struct rdt_domain *d;
@@ -815,7 +816,12 @@ void mbm_handle_overflow(struct work_struct *work)
update_mba_bw(prgrp, d);
}

- schedule_delayed_work_on(cpu, &d->mbm_over, delay);
+ /*
+ * Re-check for housekeeping CPUs. This allows the overflow handler to
+ * move off a nohz_full CPU quickly.
+ */
+ d->mbm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask);
+ schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);

out_unlock:
mutex_unlock(&rdtgroup_mutex);
@@ -828,7 +834,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)

if (!static_branch_likely(&rdt_mon_enable_key))
return;
- cpu = cpumask_any(&dom->cpu_mask);
+ cpu = cpumask_any_housekeeping(&dom->cpu_mask);
dom->mbm_work_cpu = cpu;
schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
}