Re: [PATCH v12 7/8] x86/resctrl: Sub NUMA Cluster detection and enable

From: Tony Luck
Date: Thu Nov 30 2023 - 17:43:25 EST


On Thu, Nov 30, 2023 at 01:47:10PM -0800, Reinette Chatre wrote:
> Hi Tony,
> > diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> > index 3293ab4c58b0..85f8a1b3feaf 100644
> > --- a/arch/x86/kernel/cpu/resctrl/core.c
> > +++ b/arch/x86/kernel/cpu/resctrl/core.c
> > @@ -1056,12 +1056,13 @@ static __init int snc_get_config(void)
> > unsigned long *node_caches;
> > int mem_only_nodes = 0;
> > int cpu, node, ret;
> > + int cache_id;
> > int num_l3_caches;
>
> Please do maintain reverse fir order.

Fixed.

>
> >
> > if (!x86_match_cpu(snc_cpu_ids))
> > return 1;
>
> I understand and welcome this change as motivated by robustness. Apart
> from that, with this being a model specific feature for this particular
> group of systems, it it not clear to me in which scenarios this could
> run on a system where a present CPU does not have access to L3 cache.

Agreed that on these systems there should always be an L3 cache. Should
I drop the check for "-1"?

> >
> > - node_caches = bitmap_zalloc(nr_node_ids, GFP_KERNEL);
> > + node_caches = bitmap_zalloc(num_online_cpus(), GFP_KERNEL);
>
> Please do take care to take new bitmap size into account in all
> places. From what I can tell there is a later bitmap_weight() call that
> still uses nr_node_ids as size.

Oops. I was also using num_online_cpus() before cpus_read_lock(), so
things could theoretically change before the bitmap_weight() call.
I switched to using num_present_cpus() in both places.

> > if (!node_caches)
> > return 1;
> >
> > @@ -1072,10 +1073,13 @@ static __init int snc_get_config(void)
> >
> > for_each_node(node) {
> > cpu = cpumask_first(cpumask_of_node(node));
> > - if (cpu < nr_cpu_ids)
> > - set_bit(get_cpu_cacheinfo_id(cpu, 3), node_caches);
> > - else
> > + if (cpu < nr_cpu_ids) {
> > + cache_id = get_cpu_cacheinfo_id(cpu, 3);
> > + if (cache_id != -1)
> > + set_bit(cache_id, node_caches);
> > + } else {
> > mem_only_nodes++;
> > + }
> > }
> > cpus_read_unlock();
> >
>
> Could this code be made even more robust by checking the computed
> snc_nodes_per_l3_cache against the limited actually possible values?
> Forcing it to 1 if something went wrong?

Added a couple of extra sanity checks. See updated incremental patch
below.

-Tony


diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 3293ab4c58b0..3684c6bf8224 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -1057,11 +1057,12 @@ static __init int snc_get_config(void)
int mem_only_nodes = 0;
int cpu, node, ret;
int num_l3_caches;
+ int cache_id;

if (!x86_match_cpu(snc_cpu_ids))
return 1;

- node_caches = bitmap_zalloc(nr_node_ids, GFP_KERNEL);
+ node_caches = bitmap_zalloc(num_present_cpus(), GFP_KERNEL);
if (!node_caches)
return 1;

@@ -1072,23 +1073,39 @@ static __init int snc_get_config(void)

for_each_node(node) {
cpu = cpumask_first(cpumask_of_node(node));
- if (cpu < nr_cpu_ids)
- set_bit(get_cpu_cacheinfo_id(cpu, 3), node_caches);
- else
+ if (cpu < nr_cpu_ids) {
+ cache_id = get_cpu_cacheinfo_id(cpu, 3);
+ if (cache_id != -1)
+ set_bit(cache_id, node_caches);
+ } else {
mem_only_nodes++;
+ }
}
cpus_read_unlock();

- num_l3_caches = bitmap_weight(node_caches, nr_node_ids);
+ num_l3_caches = bitmap_weight(node_caches, num_present_cpus());
kfree(node_caches);

if (!num_l3_caches)
return 1;

+ /* sanity check #1: Number of CPU nodes must be multiple of num_l3_caches */
+ if ((nr_node_ids - mem_only_nodes) % num_l3_caches)
+ return 1;
+
ret = (nr_node_ids - mem_only_nodes) / num_l3_caches;

- if (ret > 1)
+ /* sanity check #2: Only valid results are 1, 2, 4 */
+ switch (ret) {
+ case 1:
+ break;
+ case 2:
+ case 4:
rdt_resources_all[RDT_RESOURCE_L3].r_resctrl.mon_scope = RESCTRL_NODE;
+ break;
+ default:
+ return 1;
+ }

return ret;
}
--
2.41.0