Re: [PATCH v6 21/24] x86/resctrl: Allow overflow/limbo handlers to be scheduled on any-but cpu

From: Reinette Chatre
Date: Tue Oct 03 2023 - 17:23:07 EST


Hi James,

On 9/14/2023 10:21 AM, James Morse wrote:

...

> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index c54fa86e4ef9..bd7f60bf49fe 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -60,11 +60,15 @@
> * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that
> * aren't marked nohz_full
> * @mask: The mask to pick a CPU from.
> + * @exclude_cpu:The CPU to avoid picking.
> *
> - * Returns a CPU in @mask. If there are housekeeping CPUs that don't use
> - * nohz_full, these are preferred.
> + * Returns a CPU from @mask, but not @exclude_cpu. If there are housekeeping
> + * CPUs that don't use nohz_full, these are preferred. Pass
> + * RESCTRL_PICK_ANY_CPU to avoid excluding any CPUs.
> + * Returns >= nr_cpu_ids if no CPUs are available.

It may be helpful to add that the function can only fail if exclude_cpu is
*not* RESCTRL_PICK_ANY_CPU. That helps to understand the sparse error checking.

> */
> -static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
> +static inline unsigned int
> +cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu)
> {
> unsigned int cpu, hk_cpu;
>
> @@ -73,6 +77,9 @@ static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
> return cpu;
>

It is not obvious from this hunk but I cannot see how this would work
on a system without any nohz_full CPUs.

At this point the function looks like:

cpu = cpumask_any(mask);
if (!tick_nohz_full_cpu(cpu))
return cpu;

I expected exclude_cpu to be taken into account. If I understand correctly
exclude_cpu can be picked by cpumask_any() and as long as it is not
a nohz_full CPU it would be returned.


> hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
> + if (hk_cpu == exclude_cpu)
> + hk_cpu = cpumask_nth_andnot(1, mask, tick_nohz_full_mask);
> +
> if (hk_cpu < nr_cpu_ids)
> cpu = hk_cpu;
>
> @@ -565,11 +572,13 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
> struct rdt_domain *d, struct rdtgroup *rdtgrp,
> int evtid, int first);
> void mbm_setup_overflow_handler(struct rdt_domain *dom,
> - unsigned long delay_ms);
> + unsigned long delay_ms,
> + int exclude_cpu);
> void mbm_handle_overflow(struct work_struct *work);
> void __init intel_rdt_mbm_apply_quirk(void);
> bool is_mba_sc(struct rdt_resource *r);
> -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
> +void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
> + int exclude_cpu);
> void cqm_handle_limbo(struct work_struct *work);
> bool has_busy_rmid(struct rdt_domain *d);
> void __check_limbo(struct rdt_domain *d, bool force_free);
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index 9c6d4b0970e2..208e46ba7368 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -480,7 +480,8 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
> * setup up the limbo worker.
> */
> if (!has_busy_rmid(d))
> - cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
> + cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL,
> + RESCTRL_PICK_ANY_CPU);
> set_bit(idx, d->rmid_busy_llc);
> entry->busy++;
> }
> @@ -807,22 +808,31 @@ void cqm_handle_limbo(struct work_struct *work)
> __check_limbo(d, false);
>
> if (has_busy_rmid(d)) {
> - cpu = cpumask_any_housekeeping(&d->cpu_mask);
> + cpu = cpumask_any_housekeeping(&d->cpu_mask, RESCTRL_PICK_ANY_CPU);
> schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
> }
>
> mutex_unlock(&rdtgroup_mutex);
> }
>
> -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
> +/**
> + * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
> + * domain.
> + * @delay_ms: How far in the future the handler should run.
> + * @exclude_cpu: Which CPU the handler should not run on,
> + * RESCTRL_PICK_ANY_CPU to pick any CPU.
> + */

arch/x86/kernel/cpu/resctrl/monitor.c:824: info: Scanning doc for function cqm_setup_limbo_handler
arch/x86/kernel/cpu/resctrl/monitor.c:832: warning: Function parameter or member 'dom' not described in 'cqm_setup_limbo_handler'


> +void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
> + int exclude_cpu)
> {
> unsigned long delay = msecs_to_jiffies(delay_ms);
> int cpu;
>
> - cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> + cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
> dom->cqm_work_cpu = cpu;
>
> - schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
> + if (cpu < nr_cpu_ids)
> + schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
> }
>
> void mbm_handle_overflow(struct work_struct *work)
> @@ -861,14 +871,22 @@ void mbm_handle_overflow(struct work_struct *work)
> * Re-check for housekeeping CPUs. This allows the overflow handler to
> * move off a nohz_full CPU quickly.
> */
> - cpu = cpumask_any_housekeeping(&d->cpu_mask);
> + cpu = cpumask_any_housekeeping(&d->cpu_mask, RESCTRL_PICK_ANY_CPU);
> schedule_delayed_work_on(cpu, &d->mbm_over, delay);
>
> out_unlock:
> mutex_unlock(&rdtgroup_mutex);
> }
>
> -void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
> +/**
> + * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
> + * domain.
> + * @delay_ms: How far in the future the handler should run.
> + * @exclude_cpu: Which CPU the handler should not run on,
> + * RESCTRL_PICK_ANY_CPU to pick any CPU.
> + */

arch/x86/kernel/cpu/resctrl/monitor.c:887: info: Scanning doc for function mbm_setup_overflow_handler
arch/x86/kernel/cpu/resctrl/monitor.c:895: warning: Function parameter or member 'dom' not described in 'mbm_setup_overflow_handler'


> +void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms,
> + int exclude_cpu)
> {
> unsigned long delay = msecs_to_jiffies(delay_ms);
> int cpu;
> @@ -879,9 +897,11 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
> */
> if (!resctrl_mounted || !resctrl_arch_mon_capable())
> return;
> - cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> + cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
> dom->mbm_work_cpu = cpu;
> - schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
> +
> + if (cpu < nr_cpu_ids)
> + schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
> }
>
> static int dom_data_init(struct rdt_resource *r)
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index 0c609cdfe7e5..49f100c73838 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -2552,7 +2552,8 @@ static int rdt_get_tree(struct fs_context *fc)
> if (is_mbm_enabled()) {
> r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
> list_for_each_entry(dom, &r->domains, list)
> - mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
> + mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
> + RESCTRL_PICK_ANY_CPU);
> }
>
> goto out;
> @@ -3850,7 +3851,8 @@ int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
>
> if (is_mbm_enabled()) {
> INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
> - mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL);
> + mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
> + RESCTRL_PICK_ANY_CPU);
> }
>
> if (is_llc_occupancy_enabled())
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index 9d5f75a4e192..0888d1975161 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -10,6 +10,8 @@
> #define RESCTRL_RESERVED_CLOSID 0
> #define RESCTRL_RESERVED_RMID 0
>
> +#define RESCTRL_PICK_ANY_CPU -1
> +
> #ifdef CONFIG_PROC_CPU_RESCTRL
>
> int proc_resctrl_show(struct seq_file *m,

Reinette