Re: [PATCH v5 21/24] x86/resctrl: Allow overflow/limbo handlers to be scheduled on any-but cpu

From: Reinette Chatre
Date: Wed Aug 09 2023 - 18:39:33 EST


Hi James,

On 7/28/2023 9:42 AM, James Morse wrote:
> When a CPU is taken offline resctrl may need to move the overflow or
> limbo handlers to run on a different CPU.
>
> Once the offline callbacks have been split, cqm_setup_limbo_handler()
> will be called while the CPU that is going offline is still present
> in the cpu_mask.
>
> Pass the CPU to exclude to cqm_setup_limbo_handler() and
> mbm_setup_overflow_handler(). These functions can use a variant of
> cpumask_any_but() when selecting the CPU. -1 is used to indicate no CPUs
> need excluding.
>
> A subsequent patch moves these calls to be before CPUs have been removed,
> so this exclude_cpus behaviour is temporary.
>
> Tested-by: Shaopeng Tan <tan.shaopeng@xxxxxxxxxxx>
> Signed-off-by: James Morse <james.morse@xxxxxxx>
> ---
> Changes since v2:
> * Rephrased a comment to avoid a two letter bad-word. (we)
> * Avoid assigning mbm_work_cpu if the domain is going to be free()d
> * Added cpumask_any_housekeeping_but(), I dislike the name
>
> Changes since v3:
> * Marked an explanatory comment as temporary as the subsequent patch is
> no longer adjacent.
>
> Changes since v4:
> * Check against RESCTRL_PICK_ANY_CPU instead of -1.
> * Leave cqm_work_cpu as nr_cpu_ids when no CPU is available.
> * Made cpumask_any_housekeeping_but() more readable.
> ---
> arch/x86/kernel/cpu/resctrl/core.c | 8 +++--
> arch/x86/kernel/cpu/resctrl/internal.h | 36 ++++++++++++++++++++--
> arch/x86/kernel/cpu/resctrl/monitor.c | 42 +++++++++++++++++++++-----
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 6 ++--
> include/linux/resctrl.h | 2 ++
> 5 files changed, 81 insertions(+), 13 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index a694563d3929..d39572a0a3cd 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -582,12 +582,16 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
> if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
> if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
> cancel_delayed_work(&d->mbm_over);
> - mbm_setup_overflow_handler(d, 0);
> + /*
> + * temporary: exclude_cpu=-1 as this CPU has already
> + * been removed by cpumask_clear_cpu()d
> + */
> + mbm_setup_overflow_handler(d, 0, RESCTRL_PICK_ANY_CPU);
> }
> if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
> has_busy_rmid(d)) {
> cancel_delayed_work(&d->cqm_limbo);
> - cqm_setup_limbo_handler(d, 0);
> + cqm_setup_limbo_handler(d, 0, RESCTRL_PICK_ANY_CPU);
> }
> }
> }
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index f99e0a1f39c8..655418c23c0e 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -79,6 +79,36 @@ static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
> return cpu;
> }
>
> +/**
> + * cpumask_any_housekeeping_but() - Chose any cpu in @mask, preferring those

cpu -> CPU

> + * that aren't marked nohz_full, excluding
> + * the provided CPU
> + * @mask: The mask to pick a CPU from.
> + * @exclude_cpu:The CPU to avoid picking.
> + *
> + * Returns a CPU from @mask, but not @exclude_cpus. If there are housekeeping

exclude_cpus -> exclude_cpu

> + * CPUs that don't use nohz_full, these are preferred.
> + * Returns >= nr_cpu_ids if no CPUs are available.
> + */
> +static inline unsigned int
> +cpumask_any_housekeeping_but(const struct cpumask *mask, int exclude_cpu)
> +{
> + unsigned int cpu, hk_cpu;
> +
> + cpu = cpumask_any_but(mask, exclude_cpu);
> + if (!tick_nohz_full_cpu(cpu))
> + return cpu;
> +
> + hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
> + if (hk_cpu == exclude_cpu)
> + hk_cpu = cpumask_nth_andnot(1, mask, tick_nohz_full_mask);
> +
> + if (hk_cpu < nr_cpu_ids)
> + cpu = hk_cpu;
> +
> + return cpu;
> +}
> +
> struct rdt_fs_context {
> struct kernfs_fs_context kfc;
> bool enable_cdpl2;
> @@ -564,11 +594,13 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
> struct rdt_domain *d, struct rdtgroup *rdtgrp,
> int evtid, int first);
> void mbm_setup_overflow_handler(struct rdt_domain *dom,
> - unsigned long delay_ms);
> + unsigned long delay_ms,
> + int exclude_cpu);
> void mbm_handle_overflow(struct work_struct *work);
> void __init intel_rdt_mbm_apply_quirk(void);
> bool is_mba_sc(struct rdt_resource *r);
> -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
> +void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
> + int exclude_cpu);
> void cqm_handle_limbo(struct work_struct *work);
> bool has_busy_rmid(struct rdt_domain *d);
> void __check_limbo(struct rdt_domain *d, bool force_free);
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index c0b1ad8d8f6d..471cdc4e4eae 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -493,7 +493,8 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
> * setup up the limbo worker.
> */
> if (!has_busy_rmid(d))
> - cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
> + cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL,
> + RESCTRL_PICK_ANY_CPU);
> set_bit(idx, d->rmid_busy_llc);
> entry->busy++;
> }
> @@ -816,15 +817,28 @@ void cqm_handle_limbo(struct work_struct *work)
> mutex_unlock(&rdtgroup_mutex);
> }
>
> -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
> +/**
> + * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
> + * domain.
> + * @delay_ms: How far in the future the handler should run.
> + * @exclude_cpu: Which CPU the handler should not run on,
> + * RESCTRL_PICK_ANY_CPU to pick any CPU.
> + */
> +void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
> + int exclude_cpu)
> {
> unsigned long delay = msecs_to_jiffies(delay_ms);
> int cpu;
>
> - cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> + if (exclude_cpu == RESCTRL_PICK_ANY_CPU)
> + cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> + else
> + cpu = cpumask_any_housekeeping_but(&dom->cpu_mask,
> + exclude_cpu);

Having callers need to do this checking seems unnecessary and makes the
code complicated. Can cpumask_any_housekeeping_but() instead be made
slightly smarter to handle the case where exclude_cpu == RESCTRL_PICK_ANY_CPU ?

Looks like there is a bit of duplication between
cpumask_any_housekeeping() and cpumask_any_housekeeping_but().

> dom->cqm_work_cpu = cpu;
>
> - schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
> + if (cpu < nr_cpu_ids)
> + schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
> }
>
> void mbm_handle_overflow(struct work_struct *work)
> @@ -870,7 +884,15 @@ void mbm_handle_overflow(struct work_struct *work)
> mutex_unlock(&rdtgroup_mutex);
> }
>
> -void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
> +/**
> + * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
> + * domain.
> + * @delay_ms: How far in the future the handler should run.
> + * @exclude_cpu: Which CPU the handler should not run on,
> + * RESCTRL_PICK_ANY_CPU to pick any CPU.
> + */
> +void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms,
> + int exclude_cpu)
> {
> unsigned long delay = msecs_to_jiffies(delay_ms);
> int cpu;
> @@ -881,9 +903,15 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
> */
> if (!resctrl_mounted || !resctrl_arch_mon_capable())
> return;
> - cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> + if (exclude_cpu == RESCTRL_PICK_ANY_CPU)
> + cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> + else
> + cpu = cpumask_any_housekeeping_but(&dom->cpu_mask,
> + exclude_cpu);
> dom->mbm_work_cpu = cpu;
> - schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
> +
> + if (cpu < nr_cpu_ids)
> + schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
> }
>

Reinette