[PATCH 24/25] x86/mcheck: Move CPU_ONLINE to hotplug state machine

From: Sebastian Andrzej Siewior
Date: Thu Nov 03 2016 - 10:51:17 EST


This callback is still partly asymmetrical since the counterpart of
mce_device_create is done in CPU_DEAD.

On failure we don't undo mce_device_create() doing _but_ it will happen
once we move CPU_DEAD to the state machine.

Cc: Tony Luck <tony.luck@xxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: linux-edac@xxxxxxxxxxxxxxx
Cc: x86@xxxxxxxxxx
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
arch/x86/kernel/cpu/mcheck/mce.c | 61 ++++++++++++++++------------------------
1 file changed, 24 insertions(+), 37 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 596a7128a46b..b1770ebcb8de 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2487,18 +2487,6 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
unsigned int cpu = (unsigned long)hcpu;

switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- mce_device_create(cpu);
- if (threshold_cpu_callback_online) {
- int ret;
-
- ret = threshold_cpu_callback_online(cpu);
- if (ret) {
- mce_device_remove(cpu);
- return NOTIFY_BAD;
- }
- }
- break;
case CPU_DEAD:
if (threshold_cpu_callback_dead)
threshold_cpu_callback_dead(cpu);
@@ -2514,6 +2502,22 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
return NOTIFY_OK;
}

+static int mce_cpu_online(unsigned int cpu)
+{
+ int ret;
+
+ mce_device_create(cpu);
+ if (!threshold_cpu_callback_online)
+ return 0;
+
+ ret = threshold_cpu_callback_online(cpu);
+ if (ret) {
+ mce_device_remove(cpu);
+ return ret;
+ }
+ return 0;
+}
+
static int mce_cpu_down_dying(unsigned int cpu)
{
struct timer_list *t = this_cpu_ptr(&mce_timer);
@@ -2547,8 +2551,8 @@ static __init void mce_init_banks(void)

static __init int mcheck_init_device(void)
{
+ enum cpuhp_state hp_online;
int err;
- int i = 0;

if (!mce_available(&boot_cpu_data)) {
err = -EIO;
@@ -2580,22 +2584,13 @@ static __init int mcheck_init_device(void)
mcheck_cpu_starting, mce_cpu_down_dying);
if (err)
goto err_init_pool;
+ err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/mce:online",
+ mce_cpu_online, NULL);
+ if (err < 0)
+ goto err_hp_online;
+ hp_online = err;

cpu_notifier_register_begin();
- for_each_online_cpu(i) {
- err = mce_device_create(i);
- if (err) {
- /*
- * Register notifier anyway (and do not unreg it) so
- * that we don't leave undeleted timers, see notifier
- * callback above.
- */
- __register_hotcpu_notifier(&mce_cpu_notifier);
- cpu_notifier_register_done();
- goto err_device_create;
- }
- }
-
__register_hotcpu_notifier(&mce_cpu_notifier);
cpu_notifier_register_done();

@@ -2610,17 +2605,9 @@ static __init int mcheck_init_device(void)

err_register:
unregister_syscore_ops(&mce_syscore_ops);
+ cpuhp_remove_state(hp_online);

-err_device_create:
- /*
- * We didn't keep track of which devices were created above, but
- * even if we had, the set of online cpus might have changed.
- * Play safe and remove for every possible cpu, since
- * mce_device_remove() will do the right thing.
- */
- for_each_possible_cpu(i)
- mce_device_remove(i);
-
+err_hp_online:
cpuhp_remove_state(CPUHP_AP_X86_MCE_STARTING);

err_init_pool:
--
2.10.2