[tip: ras/core] x86/mce/amd: Make threshold bank setting hotplug robust

From: tip-bot2 for Thomas Gleixner
Date: Wed Apr 15 2020 - 05:53:24 EST


The following commit has been merged into the ras/core branch of tip:

Commit-ID: a037f3ca0ea0a660e3f961431095a88674b8f3c4
Gitweb: https://git.kernel.org/tip/a037f3ca0ea0a660e3f961431095a88674b8f3c4
Author: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
AuthorDate: Tue, 31 Mar 2020 13:16:44 +02:00
Committer: Borislav Petkov <bp@xxxxxxx>
CommitterDate: Tue, 14 Apr 2020 15:50:19 +02:00

x86/mce/amd: Make threshold bank setting hotplug robust

Handle the cases when the CPU goes offline before the bank
setting/reading happens.

[ bp: Write commit message. ]

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Borislav Petkov <bp@xxxxxxx>
Link: https://lkml.kernel.org/r/20200403161943.1458-8-bp@xxxxxxxxx
---
arch/x86/kernel/cpu/mce/amd.c | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 16e7aea..15c87b8 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -386,6 +386,10 @@ static void threshold_restart_bank(void *_tr)
struct thresh_restart *tr = _tr;
u32 hi, lo;

+ /* sysfs write might race against an offline operation */
+ if (this_cpu_read(threshold_banks))
+ return;
+
rdmsr(tr->b->address, lo, hi);

if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
@@ -1085,7 +1089,8 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
memset(&tr, 0, sizeof(tr));
tr.b = b;

- smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
+ if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1))
+ return -ENODEV;

return size;
}
@@ -1109,7 +1114,8 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
b->threshold_limit = new;
tr.b = b;

- smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
+ if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1))
+ return -ENODEV;

return size;
}
@@ -1118,7 +1124,9 @@ static ssize_t show_error_count(struct threshold_block *b, char *buf)
{
u32 lo, hi;

- rdmsr_on_cpu(b->cpu, b->address, &lo, &hi);
+ /* CPU might be offline by now */
+ if (rdmsr_on_cpu(b->cpu, b->address, &lo, &hi))
+ return -ENODEV;

return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
(THRESHOLD_MAX - b->threshold_limit)));