Re: [PATCH v4 21/24] x86/resctrl: Allow overflow/limbo handlers to be scheduled on any-but cpu

From: Reinette Chatre
Date: Thu Jun 15 2023 - 18:26:03 EST


Hi James,

On 5/25/2023 11:02 AM, James Morse wrote:

...

> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 021a8956518c..9cba8fc405b9 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -79,6 +79,37 @@ static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
> return cpu;
> }
>
> +/**
> + * cpumask_any_housekeeping_but() - Chose any cpu in @mask, preferring those
> + * that aren't marked nohz_full, excluding
> + * the provided CPU
> + * @mask: The mask to pick a CPU from.
> + * @exclude_cpu:The CPU to avoid picking.
> + *
> + * Returns a CPU from @mask, but not @but. If there are housekeeping CPUs that

"but not @exclude_cpu"

> + * don't use nohz_full, these are preferred.
> + * Returns >= nr_cpu_ids if no CPUs are available.
> + */
> +static inline unsigned int
> +cpumask_any_housekeeping_but(const struct cpumask *mask, int exclude_cpu)
> +{
> + int cpu, hk_cpu;

Should these be unsigned int?

> +
> + cpu = cpumask_any_but(mask, exclude_cpu);
> + if (tick_nohz_full_cpu(cpu)) {
> + hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
> + if (hk_cpu == exclude_cpu) {
> + hk_cpu = cpumask_nth_andnot(1, mask,
> + tick_nohz_full_mask);
> + }
> +

These braces are not necessary. If they are added to help readability then
perhaps the indentation can be reduced by using an earlier:

if (!tick_nohz_full_cpu(cpu))
return cpu;


> + if (hk_cpu < nr_cpu_ids)
> + cpu = hk_cpu;
> + }
> +
> + return cpu;
> +}
> +
> struct rdt_fs_context {
> struct kernfs_fs_context kfc;
> bool enable_cdpl2;
> @@ -564,11 +595,13 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
> struct rdt_domain *d, struct rdtgroup *rdtgrp,
> int evtid, int first);
> void mbm_setup_overflow_handler(struct rdt_domain *dom,
> - unsigned long delay_ms);
> + unsigned long delay_ms,
> + int exclude_cpu);
> void mbm_handle_overflow(struct work_struct *work);
> void __init intel_rdt_mbm_apply_quirk(void);
> bool is_mba_sc(struct rdt_resource *r);
> -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
> +void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
> + int exclude_cpu);
> void cqm_handle_limbo(struct work_struct *work);
> bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
> void __check_limbo(struct rdt_domain *d, bool force_free);
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index ced933694f60..ae02185f3354 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -485,7 +485,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
> * setup up the limbo worker.
> */
> if (!has_busy_rmid(r, d))
> - cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
> + cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL, -1);

Should this -1 be RESCTRL_PICK_ANY_CPU?

> set_bit(idx, d->rmid_busy_llc);
> entry->busy++;
> }
> @@ -810,15 +810,28 @@ void cqm_handle_limbo(struct work_struct *work)
> mutex_unlock(&rdtgroup_mutex);
> }
>
> -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
> +/**
> + * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
> + * domain.
> + * @delay_ms: How far in the future the handler should run.
> + * @exclude_cpu: Which CPU the handler should not run on, -1 to pick any CPU.

Should -1 be RESCTRL_PICK_ANY_CPU?

> + */
> +void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
> + int exclude_cpu)
> {
> unsigned long delay = msecs_to_jiffies(delay_ms);
> int cpu;
>
> - cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> - dom->cqm_work_cpu = cpu;
> + if (exclude_cpu == RESCTRL_PICK_ANY_CPU)
> + cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> + else
> + cpu = cpumask_any_housekeeping_but(&dom->cpu_mask,
> + exclude_cpu);
>
> - schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
> + if (cpu < nr_cpu_ids) {
> + dom->cqm_work_cpu = cpu;

Should cqm_work_cpu not perhaps be set to nr_cpu_ids on failure? If it keeps
pointing to CPU that ran worker previously there may be unexpected behavior.

Note the different behavior between cqm_setup_limbo_handler() and
mbm_setup_overflow_handler() in this regard.

> + schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
> + }
> }
>
> void mbm_handle_overflow(struct work_struct *work)
> @@ -864,7 +877,14 @@ void mbm_handle_overflow(struct work_struct *work)
> mutex_unlock(&rdtgroup_mutex);
> }
>
> -void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
> +/**
> + * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
> + * domain.
> + * @delay_ms: How far in the future the handler should run.
> + * @exclude_cpu: Which CPU the handler should not run on, -1 to pick any CPU.

RESCTRL_PICK_ANY_CPU?

> + */
> +void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms,
> + int exclude_cpu)
> {
> unsigned long delay = msecs_to_jiffies(delay_ms);
> int cpu;
> @@ -875,9 +895,15 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
> */
> if (!resctrl_mounted || !resctrl_arch_mon_capable())
> return;
> - cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> + if (exclude_cpu == -1)

same

> + cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> + else
> + cpu = cpumask_any_housekeeping_but(&dom->cpu_mask,
> + exclude_cpu);
> dom->mbm_work_cpu = cpu;
> - schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
> +
> + if (cpu < nr_cpu_ids)
> + schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
> }
>

...

> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index ecd41762d61a..089b91133e5e 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -9,6 +9,9 @@
> /* CLOSID value used by the default control group */
> #define RESCTRL_RESERVED_CLOSID 0
>
> +/* Indicates no CPU needs to be excluded */

This comment seems to just be a rewrite of the macro name.

> +#define RESCTRL_PICK_ANY_CPU -1
> +
> #ifdef CONFIG_PROC_CPU_RESCTRL
>
> int proc_resctrl_show(struct seq_file *m,

Reinette