[PATCH] x86/numa: Map NUMA node to CPUs as per DeviceTree

From: Saurabh Sengar
Date: Tue Mar 12 2024 - 13:43:37 EST


Currently for DeviceTree bootup, x86 code does the default mapping of
CPUs to NUMA, which is wrong. This can cause incorrect mapping and WARN
on a SMT enabled system like below:

[0.417551] ------------[ cut here ]------------
[0.417551] Saurabh sched: CPU #1's smt-sibling CPU #0 is not on the same node! [node: 1 != 0]. Ignoring dependency.
[0.417551] WARNING: CPU: 1 PID: 0 at topology_sane.isra.0+0x5c/0x6d
[0.417551] Modules linked in:
[0.417551] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.1.71-microsoft-hcl+ #4
[0.417551] RIP: 0010:topology_sane.isra.0+0x5c/0x6d
[0.417551] Code: 41 39 dc 74 27 80 3d 32 ae 2d 00 00 75 1e 41 89 d9 45 89 e0 44 89 d6 48 c7 c7 00 a6 4a 88 c6 05 19 ae 2d 00 01 e8 6e 1f cb ff <0f> 0b 41 39 dc 5b 41 5c 0f 94 c0 5d c3 cc cc cc cc 55 48 8b 05 05
[0.417551] RSP: 0000:ffffc9000013feb0 EFLAGS: 00010086
[0.417551] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
[0.417551] RDX: 0000000000000003 RSI: 0000000000000086 RDI: 00000000ffffffff
[0.417551] RBP: ffffc9000013fec0 R08: ffffffff88778160 R09: ffffffff88778160
[0.417551] R10: ffff888227fe26da R11: ffff888227fe26c1 R12: 0000000000000001
[0.417551] R13: 0000000000000000 R14: ffff888216415040 R15: 0000000000000000
[0.417551] FS: 0000000000000000(0000) GS:ffff888216400000(0000) knlGS:0000000000000000
[0.417551] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[0.417551] CR2: 0000000000000000 CR3: 0000000208809001 CR4: 0000000000330ea0
[0.417551] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[0.417551] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400
[0.417551] Call Trace:
[0.417551] <TASK>
[0.417551] ? show_regs.cold+0x1a/0x1f
[0.417551] ? __warn+0x6e/0xc0
[0.417551] ? report_bug+0x101/0x1a0
[0.417551] ? handle_bug+0x40/0x70
[0.417551] ? exc_invalid_op+0x19/0x70
[0.417551] ? asm_exc_invalid_op+0x1b/0x20
[0.417551] ? topology_sane.isra.0+0x5c/0x6d
[0.417551] match_smt+0xf6/0xfc
[0.417551] set_cpu_sibling_map.cold+0x24f/0x512
[0.417551] start_secondary+0x5c/0x110
[0.417551] secondary_startup_64_no_verify+0xcd/0xdb
[0.417551] </TASK>
[0.417551] ---[ end trace 0000000000000000 ]---

Add the correct mapping of CPUs to NUMA node as per DeviceTree to fix
this issue.

Signed-off-by: Saurabh Sengar <ssengar@xxxxxxxxxxxxxxxxxxx>
---
arch/x86/mm/numa.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 65e9a6e..9dacf60 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -601,6 +601,23 @@ static void __init numa_init_array(void)
}
}

+static void __init of_parse_and_init_cpus(void)
+{
+ struct device_node *dn;
+ int cpuid = 0;
+ int nid;
+
+ for_each_of_cpu_node(dn) {
+ if (cpuid >= NR_CPUS) {
+ pr_warn("NR_CPUS too small for %d cpuid\n", cpuid);
+ return;
+ }
+ nid = of_node_to_nid(dn);
+ numa_set_node(cpuid, nid);
+ cpuid++;
+ }
+}
+
static int __init numa_init(int (*init_func)(void))
{
int i;
@@ -645,6 +662,9 @@ static int __init numa_init(int (*init_func)(void))
if (ret < 0)
return ret;

+ if (acpi_disabled)
+ of_parse_and_init_cpus();
+
for (i = 0; i < nr_cpu_ids; i++) {
int nid = early_cpu_to_node(i);

--
1.8.3.1