[patch 16/30] x86/topology: Add a mechanism to track topology via APIC IDs

From: Thomas Gleixner
Date: Tue Feb 13 2024 - 16:18:33 EST


From: Thomas Gleixner <tglx@xxxxxxxxxxxxx>

Topology on X86 is determined by the registered APIC IDs and the
segmentation information retrieved from CPUID. Depending on the granularity
of the provided CPUID information the most fine grained scheme looks like
this according to Intel terminology:

[PKG][DIEGRP][DIE][TILE][MODULE][CORE][THREAD]

Not enumerated domain levels consume 0 bits in the APIC ID. This allows to
provide a consistent view at the topology and determine other information
precisely like the number of cores in a package on hybrid systems, where
the existing assumption that number or cores == number of threads / threads
per core does not hold.

Provide per domain level bitmaps which record the APIC ID split into the
domain levels to make later evaluation of domain level specific information
simple. This allows to calculate e.g. the logical IDs without any further
extra logic.

Contrary to the existing registration mechanism this records disabled CPUs,
which are subject to later hotplug as well. That's useful for boot time
sizing of package or die dependent allocations without using heuristics.

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>


---
arch/x86/kernel/cpu/topology.c | 48 +++++++++++++++++++++++++++++++++++++++--
1 file changed, 46 insertions(+), 2 deletions(-)
---

--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -1,5 +1,27 @@
// SPDX-License-Identifier: GPL-2.0-only
-
+/*
+ * CPU/APIC topology
+ *
+ * The APIC IDs describe the system topology in multiple domain levels.
+ * The CPUID topology parser provides the information which part of the
+ * APIC ID is associated to the individual levels:
+ *
+ * [PACKAGE][DIEGRP][DIE][TILE][MODULE][CORE][THREAD]
+ *
+ * The root space contains the package (socket) IDs.
+ *
+ * Not enumerated levels consume 0 bits space, but conceptually they are
+ * always represented. If e.g. only CORE and THREAD levels are enumerated
+ * then the DIE, MODULE and TILE have the same physical ID as the PACKAGE.
+ *
+ * If SMT is not supported, then the THREAD domain is still used. It then
+ * has the same physical ID as the CORE domain and is the only child of
+ * the core domain.
+ *
+ * This allows a unified view on the system independent of the enumerated
+ * domain levels without requiring any conditionals in the code.
+ */
+#define pr_fmt(fmt) "CPU topo: " fmt
#include <linux/cpu.h>

#include <xen/xen.h>
@@ -9,6 +31,8 @@
#include <asm/mpspec.h>
#include <asm/smp.h>

+#include "cpu.h"
+
/*
* Map cpu index to physical APIC ID
*/
@@ -23,6 +47,9 @@ DECLARE_BITMAP(phys_cpu_present_map, MAX
/* Used for CPU number allocation and parallel CPU bringup */
u32 cpuid_to_apicid[] __read_mostly = { [0 ... NR_CPUS - 1] = BAD_APICID, };

+/* Bitmaps to mark registered APICs at each topology domain */
+static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init;
+
/*
* Keep track of assigned, disabled and rejected CPUs. Present assigned
* with 1 as CPU #0 is reserved for the boot CPU.
@@ -39,6 +66,8 @@ static struct {
.real_bsp_apic_id = BAD_APICID,
};

+#define domain_weight(_dom) bitmap_weight(apic_maps[_dom].map, MAX_LOCAL_APIC)
+
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
return phys_id == (u64)cpuid_to_apicid[cpu];
@@ -81,6 +110,17 @@ early_initcall(smp_init_primary_thread_m
static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
#endif

+/*
+ * Convert the APIC ID to a domain level ID by masking out the low bits
+ * below the domain level @dom.
+ */
+static inline u32 topo_apicid(u32 apicid, enum x86_topology_domains dom)
+{
+ if (dom == TOPO_SMT_DOMAIN)
+ return apicid;
+ return apicid & (UINT_MAX << x86_topo_system.dom_shifts[dom - 1]);
+}
+
static int topo_lookup_cpuid(u32 apic_id)
{
int i;
@@ -151,7 +191,7 @@ static __init bool check_for_real_bsp(u3

static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
{
- int cpu;
+ int cpu, dom;

if (present) {
set_bit(apic_id, phys_cpu_present_map);
@@ -170,6 +210,10 @@ static __init void topo_register_apic(u3
} else {
topo_info.nr_disabled_cpus++;
}
+
+ /* Register present and possible CPUs in the domain maps */
+ for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++)
+ set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map);
}

/**