[GIT pull] cpu hotplug updates for 4.6

From: Thomas Gleixner
Date: Mon Mar 14 2016 - 07:32:53 EST


Linus,

please pull the latest smp-hotplug-for-linus git tree from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git smp-hotplug-for-linus

This is the first part of the ongoing cpu hotplug rework:

- Initial implementation of the state machine

- Runs all online and prepare down callbacks on the plugged cpu and not on
some random processor

- Replaces busy loop waiting with completions

- Adds tracepoints so the states can be followed

Further information is here:

http://lkml.kernel.org/r/20160226164321.657646833@xxxxxxxxxxxxx

Thanks,

tglx

------------------>
Thomas Gleixner (23):
cpu/hotplug: Restructure FROZEN state handling
cpu/hotplug: Restructure cpu_up code
cpu/hotplug: Split out cpu down functions
cpu/hotplug: Add tracepoints
cpu/hotplug: Convert to a state machine for the control processor
cpu/hotplug: Convert the hotplugged cpu work to a state machine
cpu/hotplug: Hand in target state to _cpu_up/down
cpu/hotplug: Add sysfs state interface
cpu/hotplug: Make target state writeable
cpu/hotplug: Implement setup/removal interface
cpu/hotplug: Move scheduler cpu_online notifier to hotplug core
cpu/hotplug: Unpark smpboot threads from the state machine
cpu/hotplug: Split out the state walk into functions
cpu/hotplug: Create hotplug threads
cpu/hotplug: Move online calls to hotplugged cpu
arch/hotplug: Call into idle with a proper state
cpu/hotplug: Let upcoming cpu bring itself fully up
cpu/hotplug: Make wait for dead cpu completion based
rcu: Make CPU_DYING_IDLE an explicit call
cpu/hotplug: Plug death reporting race
cpu/hotplug: Remove redundant state check
cpu/hotplug: Fix smpboot thread ordering
cpu/hotplug: Document states better


arch/alpha/kernel/smp.c | 2 +-
arch/arc/kernel/smp.c | 2 +-
arch/arm/kernel/smp.c | 2 +-
arch/arm64/kernel/smp.c | 2 +-
arch/blackfin/mach-common/smp.c | 2 +-
arch/hexagon/kernel/smp.c | 2 +-
arch/ia64/kernel/smpboot.c | 2 +-
arch/m32r/kernel/smpboot.c | 2 +-
arch/metag/kernel/smp.c | 2 +-
arch/mips/kernel/smp.c | 2 +-
arch/mn10300/kernel/smp.c | 2 +-
arch/parisc/kernel/smp.c | 2 +-
arch/powerpc/kernel/smp.c | 2 +-
arch/s390/kernel/smp.c | 2 +-
arch/sh/kernel/smp.c | 2 +-
arch/sparc/kernel/smp_32.c | 2 +-
arch/sparc/kernel/smp_64.c | 2 +-
arch/tile/kernel/smpboot.c | 2 +-
arch/x86/kernel/smpboot.c | 2 +-
arch/x86/xen/smp.c | 2 +-
arch/xtensa/kernel/smp.c | 2 +-
include/linux/cpu.h | 27 +-
include/linux/cpuhotplug.h | 93 ++++
include/linux/notifier.h | 2 +
include/linux/rcupdate.h | 4 +-
include/trace/events/cpuhp.h | 66 +++
init/main.c | 16 +-
kernel/cpu.c | 1162 ++++++++++++++++++++++++++++++++++-----
kernel/rcu/tree.c | 70 +--
kernel/sched/core.c | 10 -
kernel/sched/idle.c | 9 +-
kernel/smp.c | 1 +
kernel/smpboot.c | 6 +-
kernel/smpboot.h | 6 +-
lib/Kconfig.debug | 13 +
35 files changed, 1291 insertions(+), 236 deletions(-)
create mode 100644 include/linux/cpuhotplug.h
create mode 100644 include/trace/events/cpuhp.h

diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 2f24447fef92..46bf263c3153 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -168,7 +168,7 @@ smp_callin(void)
cpuid, current, current->active_mm));

preempt_disable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}

/* Wait until hwrpb->txrdy is clear for cpu. Return -1 on timeout. */
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 424e937da5c8..4cb3add77c75 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -142,7 +142,7 @@ void start_kernel_secondary(void)

local_irq_enable();
preempt_disable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}

/*
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 37312f6749f3..baee70267f29 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -409,7 +409,7 @@ asmlinkage void secondary_start_kernel(void)
/*
* OK, it's off to the idle thread for us
*/
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}

void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b1adc51b2c2e..460765799c64 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -195,7 +195,7 @@ asmlinkage void secondary_start_kernel(void)
/*
* OK, it's off to the idle thread for us
*/
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}

#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 0030e21cfceb..23c4ef5f8bdc 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -333,7 +333,7 @@ void secondary_start_kernel(void)

/* We are done with local CPU inits, unblock the boot CPU. */
set_cpu_online(cpu, true);
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}

void __init smp_prepare_boot_cpu(void)
diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c
index ff759f26b96a..983bae7d2665 100644
--- a/arch/hexagon/kernel/smp.c
+++ b/arch/hexagon/kernel/smp.c
@@ -180,7 +180,7 @@ void start_secondary(void)

local_irq_enable();

- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}


diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 0e76fad27975..74fe317477e6 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -454,7 +454,7 @@ start_secondary (void *unused)
preempt_disable();
smp_callin();

- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
return 0;
}

diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index a468467542f4..f98d2f6519d6 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -432,7 +432,7 @@ int __init start_secondary(void *unused)
*/
local_flush_tlb_all();

- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
return 0;
}

diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index c3c6f0864881..bad13232de51 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -396,7 +396,7 @@ asmlinkage void secondary_start_kernel(void)
/*
* OK, it's off to the idle thread for us
*/
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}

void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index bd4385a8e6e8..f2112a8ddf15 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -191,7 +191,7 @@ asmlinkage void start_secondary(void)
WARN_ON_ONCE(!irqs_disabled());
mp_ops->smp_finish();

- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}

static void stop_this_cpu(void *dummy)
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index f984193718b1..426173c4b0b9 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -675,7 +675,7 @@ int __init start_secondary(void *unused)
#ifdef CONFIG_GENERIC_CLOCKEVENTS
init_clockevents();
#endif
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
return 0;
}

diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 52e85973a283..c2a9cc55a62f 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -305,7 +305,7 @@ void __init smp_callin(void)

local_irq_enable(); /* Interrupts have been off until now */

- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);

/* NOTREACHED */
panic("smp_callin() AAAAaaaaahhhh....\n");
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ec9ec2058d2d..cc13d4c83291 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -727,7 +727,7 @@ void start_secondary(void *unused)

local_irq_enable();

- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);

BUG();
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 3c65a8eae34d..40a6b4f9c36c 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -798,7 +798,7 @@ static void smp_start_secondary(void *cpuvoid)
set_cpu_online(smp_processor_id(), true);
inc_irq_stat(CPU_RST);
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}

/* Upping and downing of CPUs */
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index de6be008fc01..13f633add29a 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -203,7 +203,7 @@ asmlinkage void start_secondary(void)
set_cpu_online(cpu, true);
per_cpu(cpu_state, cpu) = CPU_ONLINE;

- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}

extern struct {
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index b3a5d81b20f0..fb30e7c6a5b1 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -364,7 +364,7 @@ static void sparc_start_secondary(void *arg)
local_irq_enable();

wmb();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);

/* We should never reach here! */
BUG();
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 19cd08d18672..8a6151a628ce 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -134,7 +134,7 @@ void smp_callin(void)

local_irq_enable();

- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}

void cpu_panic(void)
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
index 20d52a98e171..6c0abaacec33 100644
--- a/arch/tile/kernel/smpboot.c
+++ b/arch/tile/kernel/smpboot.c
@@ -208,7 +208,7 @@ void online_secondary(void)
/* Set up tile-timer clock-event device on this cpu */
setup_tile_timer();

- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}

int __cpu_up(unsigned int cpu, struct task_struct *tidle)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 24d57f77b3c1..293b22a7ab02 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -248,7 +248,7 @@ static void notrace start_secondary(void *unused)
x86_cpuinit.setup_percpu_clockev();

wmb();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}

void __init smp_store_boot_cpu_info(void)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 3f4ebf0261f2..3c6d17fd423a 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -112,7 +112,7 @@ asmlinkage __visible void cpu_bringup_and_idle(int cpu)
xen_pvh_secondary_vcpu_init(cpu);
#endif
cpu_bringup();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}

static void xen_smp_intr_free(unsigned int cpu)
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c
index 4d02e38514f5..fc4ad21a5ed4 100644
--- a/arch/xtensa/kernel/smp.c
+++ b/arch/xtensa/kernel/smp.c
@@ -157,7 +157,7 @@ void secondary_start_kernel(void)

complete(&cpu_running);

- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}

static void mx_cpu_start(void *p)
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index d2ca8c38f9c4..f9b1fab4388a 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -16,6 +16,7 @@
#include <linux/node.h>
#include <linux/compiler.h>
#include <linux/cpumask.h>
+#include <linux/cpuhotplug.h>

struct device;
struct device_node;
@@ -27,6 +28,9 @@ struct cpu {
struct device dev;
};

+extern void boot_cpu_init(void);
+extern void boot_cpu_state_init(void);
+
extern int register_cpu(struct cpu *cpu, int num);
extern struct device *get_cpu_device(unsigned cpu);
extern bool cpu_is_hotpluggable(unsigned cpu);
@@ -74,7 +78,7 @@ enum {
/* migration should happen before other stuff but after perf */
CPU_PRI_PERF = 20,
CPU_PRI_MIGRATION = 10,
- CPU_PRI_SMPBOOT = 9,
+
/* bring up workqueues before normal notifiers and down after */
CPU_PRI_WORKQUEUE_UP = 5,
CPU_PRI_WORKQUEUE_DOWN = -5,
@@ -97,9 +101,7 @@ enum {
* Called on the new cpu, just before
* enabling interrupts. Must not sleep,
* must not fail */
-#define CPU_DYING_IDLE 0x000B /* CPU (unsigned)v dying, reached
- * idle loop. */
-#define CPU_BROKEN 0x000C /* CPU (unsigned)v did not die properly,
+#define CPU_BROKEN 0x000B /* CPU (unsigned)v did not die properly,
* perhaps due to preemption. */

/* Used for CPU hotplug events occurring while tasks are frozen due to a suspend
@@ -118,6 +120,7 @@ enum {


#ifdef CONFIG_SMP
+extern bool cpuhp_tasks_frozen;
/* Need to know about CPUs going up/down? */
#if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE)
#define cpu_notifier(fn, pri) { \
@@ -167,7 +170,6 @@ static inline void __unregister_cpu_notifier(struct notifier_block *nb)
}
#endif

-void smpboot_thread_init(void);
int cpu_up(unsigned int cpu);
void notify_cpu_starting(unsigned int cpu);
extern void cpu_maps_update_begin(void);
@@ -177,6 +179,7 @@ extern void cpu_maps_update_done(void);
#define cpu_notifier_register_done cpu_maps_update_done

#else /* CONFIG_SMP */
+#define cpuhp_tasks_frozen 0

#define cpu_notifier(fn, pri) do { (void)(fn); } while (0)
#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0)
@@ -215,10 +218,6 @@ static inline void cpu_notifier_register_done(void)
{
}

-static inline void smpboot_thread_init(void)
-{
-}
-
#endif /* CONFIG_SMP */
extern struct bus_type cpu_subsys;

@@ -265,11 +264,6 @@ static inline int disable_nonboot_cpus(void) { return 0; }
static inline void enable_nonboot_cpus(void) {}
#endif /* !CONFIG_PM_SLEEP_SMP */

-enum cpuhp_state {
- CPUHP_OFFLINE,
- CPUHP_ONLINE,
-};
-
void cpu_startup_entry(enum cpuhp_state state);

void cpu_idle_poll_ctrl(bool enable);
@@ -280,14 +274,15 @@ void arch_cpu_idle_enter(void);
void arch_cpu_idle_exit(void);
void arch_cpu_idle_dead(void);

-DECLARE_PER_CPU(bool, cpu_dead_idle);
-
int cpu_report_state(int cpu);
int cpu_check_up_prepare(int cpu);
void cpu_set_state_online(int cpu);
#ifdef CONFIG_HOTPLUG_CPU
bool cpu_wait_death(unsigned int cpu, int seconds);
bool cpu_report_death(void);
+void cpuhp_report_idle_dead(void);
+#else
+static inline void cpuhp_report_idle_dead(void) { }
#endif /* #ifdef CONFIG_HOTPLUG_CPU */

#endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
new file mode 100644
index 000000000000..5d68e15e46b7
--- /dev/null
+++ b/include/linux/cpuhotplug.h
@@ -0,0 +1,93 @@
+#ifndef __CPUHOTPLUG_H
+#define __CPUHOTPLUG_H
+
+enum cpuhp_state {
+ CPUHP_OFFLINE,
+ CPUHP_CREATE_THREADS,
+ CPUHP_NOTIFY_PREPARE,
+ CPUHP_BRINGUP_CPU,
+ CPUHP_AP_IDLE_DEAD,
+ CPUHP_AP_OFFLINE,
+ CPUHP_AP_NOTIFY_STARTING,
+ CPUHP_AP_ONLINE,
+ CPUHP_TEARDOWN_CPU,
+ CPUHP_AP_ONLINE_IDLE,
+ CPUHP_AP_SMPBOOT_THREADS,
+ CPUHP_AP_NOTIFY_ONLINE,
+ CPUHP_AP_ONLINE_DYN,
+ CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30,
+ CPUHP_ONLINE,
+};
+
+int __cpuhp_setup_state(enum cpuhp_state state, const char *name, bool invoke,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu));
+
+/**
+ * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks
+ * @state: The state for which the calls are installed
+ * @name: Name of the callback (will be used in debug output)
+ * @startup: startup callback function
+ * @teardown: teardown callback function
+ *
+ * Installs the callback functions and invokes the startup callback on
+ * the present cpus which have already reached the @state.
+ */
+static inline int cpuhp_setup_state(enum cpuhp_state state,
+ const char *name,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu))
+{
+ return __cpuhp_setup_state(state, name, true, startup, teardown);
+}
+
+/**
+ * cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling the
+ * callbacks
+ * @state: The state for which the calls are installed
+ * @name: Name of the callback.
+ * @startup: startup callback function
+ * @teardown: teardown callback function
+ *
+ * Same as @cpuhp_setup_state except that no calls are executed are invoked
+ * during installation of this callback. NOP if SMP=n or HOTPLUG_CPU=n.
+ */
+static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state,
+ const char *name,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu))
+{
+ return __cpuhp_setup_state(state, name, false, startup, teardown);
+}
+
+void __cpuhp_remove_state(enum cpuhp_state state, bool invoke);
+
+/**
+ * cpuhp_remove_state - Remove hotplug state callbacks and invoke the teardown
+ * @state: The state for which the calls are removed
+ *
+ * Removes the callback functions and invokes the teardown callback on
+ * the present cpus which have already reached the @state.
+ */
+static inline void cpuhp_remove_state(enum cpuhp_state state)
+{
+ __cpuhp_remove_state(state, true);
+}
+
+/**
+ * cpuhp_remove_state_nocalls - Remove hotplug state callbacks without invoking
+ * teardown
+ * @state: The state for which the calls are removed
+ */
+static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state)
+{
+ __cpuhp_remove_state(state, false);
+}
+
+#ifdef CONFIG_SMP
+void cpuhp_online_idle(enum cpuhp_state state);
+#else
+static inline void cpuhp_online_idle(enum cpuhp_state state) { }
+#endif
+
+#endif
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index d14a4c362465..4149868de4e6 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -47,6 +47,8 @@
* runtime initialization.
*/

+struct notifier_block;
+
typedef int (*notifier_fn_t)(struct notifier_block *nb,
unsigned long action, void *data);

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 14e6f47ee16f..fc46fe3ea259 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -332,9 +332,7 @@ void rcu_init(void);
void rcu_sched_qs(void);
void rcu_bh_qs(void);
void rcu_check_callbacks(int user);
-struct notifier_block;
-int rcu_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu);
+void rcu_report_dead(unsigned int cpu);

#ifndef CONFIG_TINY_RCU
void rcu_end_inkernel_boot(void);
diff --git a/include/trace/events/cpuhp.h b/include/trace/events/cpuhp.h
new file mode 100644
index 000000000000..a72bd93ec7e5
--- /dev/null
+++ b/include/trace/events/cpuhp.h
@@ -0,0 +1,66 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cpuhp
+
+#if !defined(_TRACE_CPUHP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CPUHP_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(cpuhp_enter,
+
+ TP_PROTO(unsigned int cpu,
+ int target,
+ int idx,
+ int (*fun)(unsigned int)),
+
+ TP_ARGS(cpu, target, idx, fun),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, cpu )
+ __field( int, target )
+ __field( int, idx )
+ __field( void *, fun )
+ ),
+
+ TP_fast_assign(
+ __entry->cpu = cpu;
+ __entry->target = target;
+ __entry->idx = idx;
+ __entry->fun = fun;
+ ),
+
+ TP_printk("cpu: %04u target: %3d step: %3d (%pf)",
+ __entry->cpu, __entry->target, __entry->idx, __entry->fun)
+);
+
+TRACE_EVENT(cpuhp_exit,
+
+ TP_PROTO(unsigned int cpu,
+ int state,
+ int idx,
+ int ret),
+
+ TP_ARGS(cpu, state, idx, ret),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, cpu )
+ __field( int, state )
+ __field( int, idx )
+ __field( int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->cpu = cpu;
+ __entry->state = state;
+ __entry->idx = idx;
+ __entry->ret = ret;
+ ),
+
+ TP_printk(" cpu: %04u state: %3d step: %3d ret: %d",
+ __entry->cpu, __entry->state, __entry->idx, __entry->ret)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/init/main.c b/init/main.c
index 58c9e374704b..55563fd36be3 100644
--- a/init/main.c
+++ b/init/main.c
@@ -388,7 +388,6 @@ static noinline void __init_refok rest_init(void)
int pid;

rcu_scheduler_starting();
- smpboot_thread_init();
/*
* We need to spawn init first so that it obtains pid 1, however
* the init task will end up wanting to create kthreads, which, if
@@ -452,20 +451,6 @@ void __init parse_early_param(void)
done = 1;
}

-/*
- * Activate the first processor.
- */
-
-static void __init boot_cpu_init(void)
-{
- int cpu = smp_processor_id();
- /* Mark the boot cpu "present", "online" etc for SMP and UP case */
- set_cpu_online(cpu, true);
- set_cpu_active(cpu, true);
- set_cpu_present(cpu, true);
- set_cpu_possible(cpu, true);
-}
-
void __init __weak smp_setup_processor_id(void)
{
}
@@ -530,6 +515,7 @@ asmlinkage __visible void __init start_kernel(void)
setup_command_line(command_line);
setup_nr_cpu_ids();
setup_per_cpu_areas();
+ boot_cpu_state_init();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */

build_all_zonelists(NULL, NULL);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5b9d39633ce9..6ea42e8da861 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -22,13 +22,88 @@
#include <linux/lockdep.h>
#include <linux/tick.h>
#include <linux/irq.h>
+#include <linux/smpboot.h>
+
#include <trace/events/power.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/cpuhp.h>

#include "smpboot.h"

+/**
+ * cpuhp_cpu_state - Per cpu hotplug state storage
+ * @state: The current cpu state
+ * @target: The target state
+ * @thread: Pointer to the hotplug thread
+ * @should_run: Thread should execute
+ * @cb_stat: The state for a single callback (install/uninstall)
+ * @cb: Single callback function (install/uninstall)
+ * @result: Result of the operation
+ * @done: Signal completion to the issuer of the task
+ */
+struct cpuhp_cpu_state {
+ enum cpuhp_state state;
+ enum cpuhp_state target;
+#ifdef CONFIG_SMP
+ struct task_struct *thread;
+ bool should_run;
+ enum cpuhp_state cb_state;
+ int (*cb)(unsigned int cpu);
+ int result;
+ struct completion done;
+#endif
+};
+
+static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
+
+/**
+ * cpuhp_step - Hotplug state machine step
+ * @name: Name of the step
+ * @startup: Startup function of the step
+ * @teardown: Teardown function of the step
+ * @skip_onerr: Do not invoke the functions on error rollback
+ * Will go away once the notifiers are gone
+ * @cant_stop: Bringup/teardown can't be stopped at this step
+ */
+struct cpuhp_step {
+ const char *name;
+ int (*startup)(unsigned int cpu);
+ int (*teardown)(unsigned int cpu);
+ bool skip_onerr;
+ bool cant_stop;
+};
+
+static DEFINE_MUTEX(cpuhp_state_mutex);
+static struct cpuhp_step cpuhp_bp_states[];
+static struct cpuhp_step cpuhp_ap_states[];
+
+/**
+ * cpuhp_invoke_callback _ Invoke the callbacks for a given state
+ * @cpu: The cpu for which the callback should be invoked
+ * @step: The step in the state machine
+ * @cb: The callback function to invoke
+ *
+ * Called from cpu hotplug and from the state register machinery
+ */
+static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state step,
+ int (*cb)(unsigned int))
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ int ret = 0;
+
+ if (cb) {
+ trace_cpuhp_enter(cpu, st->target, step, cb);
+ ret = cb(cpu);
+ trace_cpuhp_exit(cpu, st->state, step, ret);
+ }
+ return ret;
+}
+
#ifdef CONFIG_SMP
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
static DEFINE_MUTEX(cpu_add_remove_lock);
+bool cpuhp_tasks_frozen;
+EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);

/*
* The following two APIs (cpu_maps_update_begin/done) must be used when
@@ -207,31 +282,281 @@ int __register_cpu_notifier(struct notifier_block *nb)
return raw_notifier_chain_register(&cpu_chain, nb);
}

-static int __cpu_notify(unsigned long val, void *v, int nr_to_call,
+static int __cpu_notify(unsigned long val, unsigned int cpu, int nr_to_call,
int *nr_calls)
{
+ unsigned long mod = cpuhp_tasks_frozen ? CPU_TASKS_FROZEN : 0;
+ void *hcpu = (void *)(long)cpu;
+
int ret;

- ret = __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call,
+ ret = __raw_notifier_call_chain(&cpu_chain, val | mod, hcpu, nr_to_call,
nr_calls);

return notifier_to_errno(ret);
}

-static int cpu_notify(unsigned long val, void *v)
+static int cpu_notify(unsigned long val, unsigned int cpu)
{
- return __cpu_notify(val, v, -1, NULL);
+ return __cpu_notify(val, cpu, -1, NULL);
}

-#ifdef CONFIG_HOTPLUG_CPU
+/* Notifier wrappers for transitioning to state machine */
+static int notify_prepare(unsigned int cpu)
+{
+ int nr_calls = 0;
+ int ret;
+
+ ret = __cpu_notify(CPU_UP_PREPARE, cpu, -1, &nr_calls);
+ if (ret) {
+ nr_calls--;
+ printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n",
+ __func__, cpu);
+ __cpu_notify(CPU_UP_CANCELED, cpu, nr_calls, NULL);
+ }
+ return ret;
+}
+
+static int notify_online(unsigned int cpu)
+{
+ cpu_notify(CPU_ONLINE, cpu);
+ return 0;
+}
+
+static int notify_starting(unsigned int cpu)
+{
+ cpu_notify(CPU_STARTING, cpu);
+ return 0;
+}
+
+static int bringup_wait_for_ap(unsigned int cpu)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+
+ wait_for_completion(&st->done);
+ return st->result;
+}
+
+static int bringup_cpu(unsigned int cpu)
+{
+ struct task_struct *idle = idle_thread_get(cpu);
+ int ret;
+
+ /* Arch-specific enabling code. */
+ ret = __cpu_up(cpu, idle);
+ if (ret) {
+ cpu_notify(CPU_UP_CANCELED, cpu);
+ return ret;
+ }
+ ret = bringup_wait_for_ap(cpu);
+ BUG_ON(!cpu_online(cpu));
+ return ret;
+}
+
+/*
+ * Hotplug state machine related functions
+ */
+static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st,
+ struct cpuhp_step *steps)
+{
+ for (st->state++; st->state < st->target; st->state++) {
+ struct cpuhp_step *step = steps + st->state;
+
+ if (!step->skip_onerr)
+ cpuhp_invoke_callback(cpu, st->state, step->startup);
+ }
+}
+
+static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
+ struct cpuhp_step *steps, enum cpuhp_state target)
+{
+ enum cpuhp_state prev_state = st->state;
+ int ret = 0;
+
+ for (; st->state > target; st->state--) {
+ struct cpuhp_step *step = steps + st->state;
+
+ ret = cpuhp_invoke_callback(cpu, st->state, step->teardown);
+ if (ret) {
+ st->target = prev_state;
+ undo_cpu_down(cpu, st, steps);
+ break;
+ }
+ }
+ return ret;
+}
+
+static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st,
+ struct cpuhp_step *steps)
+{
+ for (st->state--; st->state > st->target; st->state--) {
+ struct cpuhp_step *step = steps + st->state;
+
+ if (!step->skip_onerr)
+ cpuhp_invoke_callback(cpu, st->state, step->teardown);
+ }
+}
+
+static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
+ struct cpuhp_step *steps, enum cpuhp_state target)
+{
+ enum cpuhp_state prev_state = st->state;
+ int ret = 0;
+
+ while (st->state < target) {
+ struct cpuhp_step *step;
+
+ st->state++;
+ step = steps + st->state;
+ ret = cpuhp_invoke_callback(cpu, st->state, step->startup);
+ if (ret) {
+ st->target = prev_state;
+ undo_cpu_up(cpu, st, steps);
+ break;
+ }
+ }
+ return ret;
+}
+
+/*
+ * The cpu hotplug threads manage the bringup and teardown of the cpus
+ */
+static void cpuhp_create(unsigned int cpu)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+
+ init_completion(&st->done);
+}
+
+static int cpuhp_should_run(unsigned int cpu)
+{
+ struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+
+ return st->should_run;
+}
+
+/* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */
+static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st)
+{
+ enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
+
+ return cpuhp_down_callbacks(cpu, st, cpuhp_ap_states, target);
+}
+
+/* Execute the online startup callbacks. Used to be CPU_ONLINE */
+static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
+{
+ return cpuhp_up_callbacks(cpu, st, cpuhp_ap_states, st->target);
+}
+
+/*
+ * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
+ * callbacks when a state gets [un]installed at runtime.
+ */
+static void cpuhp_thread_fun(unsigned int cpu)
+{
+ struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+ int ret = 0;
+
+ /*
+ * Paired with the mb() in cpuhp_kick_ap_work and
+ * cpuhp_invoke_ap_callback, so the work set is consistent visible.
+ */
+ smp_mb();
+ if (!st->should_run)
+ return;
+
+ st->should_run = false;
+
+ /* Single callback invocation for [un]install ? */
+ if (st->cb) {
+ if (st->cb_state < CPUHP_AP_ONLINE) {
+ local_irq_disable();
+ ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+ local_irq_enable();
+ } else {
+ ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+ }
+ } else {
+ /* Cannot happen .... */
+ BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
+
+ /* Regular hotplug work */
+ if (st->state < st->target)
+ ret = cpuhp_ap_online(cpu, st);
+ else if (st->state > st->target)
+ ret = cpuhp_ap_offline(cpu, st);
+ }
+ st->result = ret;
+ complete(&st->done);
+}

-static void cpu_notify_nofail(unsigned long val, void *v)
+/* Invoke a single callback on a remote cpu */
+static int cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state,
+ int (*cb)(unsigned int))
{
- BUG_ON(cpu_notify(val, v));
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+
+ if (!cpu_online(cpu))
+ return 0;
+
+ st->cb_state = state;
+ st->cb = cb;
+ /*
+ * Make sure the above stores are visible before should_run becomes
+ * true. Paired with the mb() above in cpuhp_thread_fun()
+ */
+ smp_mb();
+ st->should_run = true;
+ wake_up_process(st->thread);
+ wait_for_completion(&st->done);
+ return st->result;
}
+
+/* Regular hotplug invocation of the AP hotplug thread */
+static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
+{
+ st->result = 0;
+ st->cb = NULL;
+ /*
+ * Make sure the above stores are visible before should_run becomes
+ * true. Paired with the mb() above in cpuhp_thread_fun()
+ */
+ smp_mb();
+ st->should_run = true;
+ wake_up_process(st->thread);
+}
+
+static int cpuhp_kick_ap_work(unsigned int cpu)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ enum cpuhp_state state = st->state;
+
+ trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
+ __cpuhp_kick_ap_work(st);
+ wait_for_completion(&st->done);
+ trace_cpuhp_exit(cpu, st->state, state, st->result);
+ return st->result;
+}
+
+static struct smp_hotplug_thread cpuhp_threads = {
+ .store = &cpuhp_state.thread,
+ .create = &cpuhp_create,
+ .thread_should_run = cpuhp_should_run,
+ .thread_fn = cpuhp_thread_fun,
+ .thread_comm = "cpuhp/%u",
+ .selfparking = true,
+};
+
+void __init cpuhp_threads_init(void)
+{
+ BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
+ kthread_unpark(this_cpu_read(cpuhp_state.thread));
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
EXPORT_SYMBOL(register_cpu_notifier);
EXPORT_SYMBOL(__register_cpu_notifier);
-
void unregister_cpu_notifier(struct notifier_block *nb)
{
cpu_maps_update_begin();
@@ -311,57 +636,60 @@ static inline void check_for_tasks(int dead_cpu)
read_unlock(&tasklist_lock);
}

-struct take_cpu_down_param {
- unsigned long mod;
- void *hcpu;
-};
+static void cpu_notify_nofail(unsigned long val, unsigned int cpu)
+{
+ BUG_ON(cpu_notify(val, cpu));
+}
+
+static int notify_down_prepare(unsigned int cpu)
+{
+ int err, nr_calls = 0;
+
+ err = __cpu_notify(CPU_DOWN_PREPARE, cpu, -1, &nr_calls);
+ if (err) {
+ nr_calls--;
+ __cpu_notify(CPU_DOWN_FAILED, cpu, nr_calls, NULL);
+ pr_warn("%s: attempt to take down CPU %u failed\n",
+ __func__, cpu);
+ }
+ return err;
+}
+
+static int notify_dying(unsigned int cpu)
+{
+ cpu_notify(CPU_DYING, cpu);
+ return 0;
+}

/* Take this CPU down. */
static int take_cpu_down(void *_param)
{
- struct take_cpu_down_param *param = _param;
- int err;
+ struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+ enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
+ int err, cpu = smp_processor_id();

/* Ensure this CPU doesn't handle any more interrupts. */
err = __cpu_disable();
if (err < 0)
return err;

- cpu_notify(CPU_DYING | param->mod, param->hcpu);
+ /* Invoke the former CPU_DYING callbacks */
+ for (; st->state > target; st->state--) {
+ struct cpuhp_step *step = cpuhp_ap_states + st->state;
+
+ cpuhp_invoke_callback(cpu, st->state, step->teardown);
+ }
/* Give up timekeeping duties */
tick_handover_do_timer();
/* Park the stopper thread */
- stop_machine_park((long)param->hcpu);
+ stop_machine_park(cpu);
return 0;
}

-/* Requires cpu_add_remove_lock to be held */
-static int _cpu_down(unsigned int cpu, int tasks_frozen)
+static int takedown_cpu(unsigned int cpu)
{
- int err, nr_calls = 0;
- void *hcpu = (void *)(long)cpu;
- unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
- struct take_cpu_down_param tcd_param = {
- .mod = mod,
- .hcpu = hcpu,
- };
-
- if (num_online_cpus() == 1)
- return -EBUSY;
-
- if (!cpu_online(cpu))
- return -EINVAL;
-
- cpu_hotplug_begin();
-
- err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
- if (err) {
- nr_calls--;
- __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
- pr_warn("%s: attempt to take down CPU %u failed\n",
- __func__, cpu);
- goto out_release;
- }
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ int err;

/*
* By now we've cleared cpu_active_mask, wait for all preempt-disabled
@@ -378,6 +706,8 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
else
synchronize_rcu();

+ /* Park the smpboot threads */
+ kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
smpboot_park_threads(cpu);

/*
@@ -389,12 +719,12 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
/*
* So now all preempt/rcu users must observe !cpu_active().
*/
- err = stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
+ err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
if (err) {
/* CPU didn't die: tell everyone. Can't complain. */
- cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
+ cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
irq_unlock_sparse();
- goto out_release;
+ return err;
}
BUG_ON(cpu_online(cpu));

@@ -405,10 +735,8 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
*
* Wait for the stop thread to go away.
*/
- while (!per_cpu(cpu_dead_idle, cpu))
- cpu_relax();
- smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */
- per_cpu(cpu_dead_idle, cpu) = false;
+ wait_for_completion(&st->done);
+ BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);

/* Interrupts are moved away from the dying cpu, reenable alloc/free */
irq_unlock_sparse();
@@ -417,20 +745,104 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
/* This actually kills the CPU. */
__cpu_die(cpu);

- /* CPU is completely dead: tell everyone. Too late to complain. */
tick_cleanup_dead_cpu(cpu);
- cpu_notify_nofail(CPU_DEAD | mod, hcpu);
+ return 0;
+}

+static int notify_dead(unsigned int cpu)
+{
+ cpu_notify_nofail(CPU_DEAD, cpu);
check_for_tasks(cpu);
+ return 0;
+}

-out_release:
+static void cpuhp_complete_idle_dead(void *arg)
+{
+ struct cpuhp_cpu_state *st = arg;
+
+ complete(&st->done);
+}
+
+void cpuhp_report_idle_dead(void)
+{
+ struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+
+ BUG_ON(st->state != CPUHP_AP_OFFLINE);
+ rcu_report_dead(smp_processor_id());
+ st->state = CPUHP_AP_IDLE_DEAD;
+ /*
+ * We cannot call complete after rcu_report_dead() so we delegate it
+ * to an online cpu.
+ */
+ smp_call_function_single(cpumask_first(cpu_online_mask),
+ cpuhp_complete_idle_dead, st, 0);
+}
+
+#else
+#define notify_down_prepare NULL
+#define takedown_cpu NULL
+#define notify_dead NULL
+#define notify_dying NULL
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* Requires cpu_add_remove_lock to be held */
+static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
+ enum cpuhp_state target)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ int prev_state, ret = 0;
+ bool hasdied = false;
+
+ if (num_online_cpus() == 1)
+ return -EBUSY;
+
+ if (!cpu_present(cpu))
+ return -EINVAL;
+
+ cpu_hotplug_begin();
+
+ cpuhp_tasks_frozen = tasks_frozen;
+
+ prev_state = st->state;
+ st->target = target;
+ /*
+ * If the current CPU state is in the range of the AP hotplug thread,
+ * then we need to kick the thread.
+ */
+ if (st->state > CPUHP_TEARDOWN_CPU) {
+ ret = cpuhp_kick_ap_work(cpu);
+ /*
+ * The AP side has done the error rollback already. Just
+ * return the error code..
+ */
+ if (ret)
+ goto out;
+
+ /*
+ * We might have stopped still in the range of the AP hotplug
+ * thread. Nothing to do anymore.
+ */
+ if (st->state > CPUHP_TEARDOWN_CPU)
+ goto out;
+ }
+ /*
+ * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
+ * to do the further cleanups.
+ */
+ ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target);
+
+ hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
+out:
cpu_hotplug_done();
- if (!err)
- cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
- return err;
+ /* This post dead nonsense must die */
+ if (!ret && hasdied)
+ cpu_notify_nofail(CPU_POST_DEAD, cpu);
+ return ret;
}

-int cpu_down(unsigned int cpu)
+static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
{
int err;

@@ -441,100 +853,131 @@ int cpu_down(unsigned int cpu)
goto out;
}

- err = _cpu_down(cpu, 0);
+ err = _cpu_down(cpu, 0, target);

out:
cpu_maps_update_done();
return err;
}
+int cpu_down(unsigned int cpu)
+{
+ return do_cpu_down(cpu, CPUHP_OFFLINE);
+}
EXPORT_SYMBOL(cpu_down);
#endif /*CONFIG_HOTPLUG_CPU*/

-/*
- * Unpark per-CPU smpboot kthreads at CPU-online time.
+/**
+ * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
+ * @cpu: cpu that just started
+ *
+ * This function calls the cpu_chain notifiers with CPU_STARTING.
+ * It must be called by the arch code on the new cpu, before the new cpu
+ * enables interrupts and before the "boot" cpu returns from __cpu_up().
*/
-static int smpboot_thread_call(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+void notify_cpu_starting(unsigned int cpu)
{
- int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);

- case CPU_DOWN_FAILED:
- case CPU_ONLINE:
- smpboot_unpark_threads(cpu);
- break;
+ while (st->state < target) {
+ struct cpuhp_step *step;

- default:
- break;
+ st->state++;
+ step = cpuhp_ap_states + st->state;
+ cpuhp_invoke_callback(cpu, st->state, step->startup);
}
-
- return NOTIFY_OK;
}

-static struct notifier_block smpboot_thread_notifier = {
- .notifier_call = smpboot_thread_call,
- .priority = CPU_PRI_SMPBOOT,
-};
-
-void smpboot_thread_init(void)
+/*
+ * Called from the idle task. We need to set active here, so we can kick off
+ * the stopper thread and unpark the smpboot threads. If the target state is
+ * beyond CPUHP_AP_ONLINE_IDLE we kick cpuhp thread and let it bring up the
+ * cpu further.
+ */
+void cpuhp_online_idle(enum cpuhp_state state)
{
- register_cpu_notifier(&smpboot_thread_notifier);
+ struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+ unsigned int cpu = smp_processor_id();
+
+ /* Happens for the boot cpu */
+ if (state != CPUHP_AP_ONLINE_IDLE)
+ return;
+
+ st->state = CPUHP_AP_ONLINE_IDLE;
+
+ /* The cpu is marked online, set it active now */
+ set_cpu_active(cpu, true);
+ /* Unpark the stopper thread and the hotplug thread of this cpu */
+ stop_machine_unpark(cpu);
+ kthread_unpark(st->thread);
+
+ /* Should we go further up ? */
+ if (st->target > CPUHP_AP_ONLINE_IDLE)
+ __cpuhp_kick_ap_work(st);
+ else
+ complete(&st->done);
}

/* Requires cpu_add_remove_lock to be held */
-static int _cpu_up(unsigned int cpu, int tasks_frozen)
+static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
{
- int ret, nr_calls = 0;
- void *hcpu = (void *)(long)cpu;
- unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
struct task_struct *idle;
+ int ret = 0;

cpu_hotplug_begin();

- if (cpu_online(cpu) || !cpu_present(cpu)) {
+ if (!cpu_present(cpu)) {
ret = -EINVAL;
goto out;
}

- idle = idle_thread_get(cpu);
- if (IS_ERR(idle)) {
- ret = PTR_ERR(idle);
- goto out;
- }
-
- ret = smpboot_create_threads(cpu);
- if (ret)
+ /*
+ * The caller of do_cpu_up might have raced with another
+ * caller. Ignore it for now.
+ */
+ if (st->state >= target)
goto out;

- ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
- if (ret) {
- nr_calls--;
- pr_warn("%s: attempt to bring up CPU %u failed\n",
- __func__, cpu);
- goto out_notify;
+ if (st->state == CPUHP_OFFLINE) {
+ /* Let it fail before we try to bring the cpu up */
+ idle = idle_thread_get(cpu);
+ if (IS_ERR(idle)) {
+ ret = PTR_ERR(idle);
+ goto out;
+ }
}

- /* Arch-specific enabling code. */
- ret = __cpu_up(cpu, idle);
-
- if (ret != 0)
- goto out_notify;
- BUG_ON(!cpu_online(cpu));
+ cpuhp_tasks_frozen = tasks_frozen;

- /* Now call notifier in preparation. */
- cpu_notify(CPU_ONLINE | mod, hcpu);
+ st->target = target;
+ /*
+ * If the current CPU state is in the range of the AP hotplug thread,
+ * then we need to kick the thread once more.
+ */
+ if (st->state > CPUHP_BRINGUP_CPU) {
+ ret = cpuhp_kick_ap_work(cpu);
+ /*
+ * The AP side has done the error rollback already. Just
+ * return the error code..
+ */
+ if (ret)
+ goto out;
+ }

-out_notify:
- if (ret != 0)
- __cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
+ /*
+ * Try to reach the target state. We max out on the BP at
+ * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
+ * responsible for bringing it up to the target state.
+ */
+ target = min((int)target, CPUHP_BRINGUP_CPU);
+ ret = cpuhp_up_callbacks(cpu, st, cpuhp_bp_states, target);
out:
cpu_hotplug_done();
-
return ret;
}

-int cpu_up(unsigned int cpu)
+static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
{
int err = 0;

@@ -558,12 +1001,16 @@ int cpu_up(unsigned int cpu)
goto out;
}

- err = _cpu_up(cpu, 0);
-
+ err = _cpu_up(cpu, 0, target);
out:
cpu_maps_update_done();
return err;
}
+
+int cpu_up(unsigned int cpu)
+{
+ return do_cpu_up(cpu, CPUHP_ONLINE);
+}
EXPORT_SYMBOL_GPL(cpu_up);

#ifdef CONFIG_PM_SLEEP_SMP
@@ -586,7 +1033,7 @@ int disable_nonboot_cpus(void)
if (cpu == first_cpu)
continue;
trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
- error = _cpu_down(cpu, 1);
+ error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
if (!error)
cpumask_set_cpu(cpu, frozen_cpus);
@@ -636,7 +1083,7 @@ void enable_nonboot_cpus(void)

for_each_cpu(cpu, frozen_cpus) {
trace_suspend_resume(TPS("CPU_ON"), cpu, true);
- error = _cpu_up(cpu, 1);
+ error = _cpu_up(cpu, 1, CPUHP_ONLINE);
trace_suspend_resume(TPS("CPU_ON"), cpu, false);
if (!error) {
pr_info("CPU%d is up\n", cpu);
@@ -709,26 +1156,463 @@ core_initcall(cpu_hotplug_pm_sync_init);

#endif /* CONFIG_PM_SLEEP_SMP */

+#endif /* CONFIG_SMP */
+
+/* Boot processor state steps */
+static struct cpuhp_step cpuhp_bp_states[] = {
+ [CPUHP_OFFLINE] = {
+ .name = "offline",
+ .startup = NULL,
+ .teardown = NULL,
+ },
+#ifdef CONFIG_SMP
+ [CPUHP_CREATE_THREADS]= {
+ .name = "threads:create",
+ .startup = smpboot_create_threads,
+ .teardown = NULL,
+ .cant_stop = true,
+ },
+ /*
+ * Preparatory and dead notifiers. Will be replaced once the notifiers
+ * are converted to states.
+ */
+ [CPUHP_NOTIFY_PREPARE] = {
+ .name = "notify:prepare",
+ .startup = notify_prepare,
+ .teardown = notify_dead,
+ .skip_onerr = true,
+ .cant_stop = true,
+ },
+ /* Kicks the plugged cpu into life */
+ [CPUHP_BRINGUP_CPU] = {
+ .name = "cpu:bringup",
+ .startup = bringup_cpu,
+ .teardown = NULL,
+ .cant_stop = true,
+ },
+ /*
+ * Handled on controll processor until the plugged processor manages
+ * this itself.
+ */
+ [CPUHP_TEARDOWN_CPU] = {
+ .name = "cpu:teardown",
+ .startup = NULL,
+ .teardown = takedown_cpu,
+ .cant_stop = true,
+ },
+#endif
+};
+
+/* Application processor state steps */
+static struct cpuhp_step cpuhp_ap_states[] = {
+#ifdef CONFIG_SMP
+ /* Final state before CPU kills itself */
+ [CPUHP_AP_IDLE_DEAD] = {
+ .name = "idle:dead",
+ },
+ /*
+ * Last state before CPU enters the idle loop to die. Transient state
+ * for synchronization.
+ */
+ [CPUHP_AP_OFFLINE] = {
+ .name = "ap:offline",
+ .cant_stop = true,
+ },
+ /*
+ * Low level startup/teardown notifiers. Run with interrupts
+ * disabled. Will be removed once the notifiers are converted to
+ * states.
+ */
+ [CPUHP_AP_NOTIFY_STARTING] = {
+ .name = "notify:starting",
+ .startup = notify_starting,
+ .teardown = notify_dying,
+ .skip_onerr = true,
+ .cant_stop = true,
+ },
+ /* Entry state on starting. Interrupts enabled from here on. Transient
+ * state for synchronsization */
+ [CPUHP_AP_ONLINE] = {
+ .name = "ap:online",
+ },
+ /* Handle smpboot threads park/unpark */
+ [CPUHP_AP_SMPBOOT_THREADS] = {
+ .name = "smpboot:threads",
+ .startup = smpboot_unpark_threads,
+ .teardown = NULL,
+ },
+ /*
+ * Online/down_prepare notifiers. Will be removed once the notifiers
+ * are converted to states.
+ */
+ [CPUHP_AP_NOTIFY_ONLINE] = {
+ .name = "notify:online",
+ .startup = notify_online,
+ .teardown = notify_down_prepare,
+ },
+#endif
+ /*
+ * The dynamically registered state space is here
+ */
+
+ /* CPU is fully up and running. */
+ [CPUHP_ONLINE] = {
+ .name = "online",
+ .startup = NULL,
+ .teardown = NULL,
+ },
+};
+
+/* Sanity check for callbacks */
+static int cpuhp_cb_check(enum cpuhp_state state)
+{
+ if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
+ return -EINVAL;
+ return 0;
+}
+
+static bool cpuhp_is_ap_state(enum cpuhp_state state)
+{
+ /*
+ * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
+ * purposes as that state is handled explicitely in cpu_down.
+ */
+ return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
+}
+
+static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
+{
+ struct cpuhp_step *sp;
+
+ sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
+ return sp + state;
+}
+
+static void cpuhp_store_callbacks(enum cpuhp_state state,
+ const char *name,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu))
+{
+ /* (Un)Install the callbacks for further cpu hotplug operations */
+ struct cpuhp_step *sp;
+
+ mutex_lock(&cpuhp_state_mutex);
+ sp = cpuhp_get_step(state);
+ sp->startup = startup;
+ sp->teardown = teardown;
+ sp->name = name;
+ mutex_unlock(&cpuhp_state_mutex);
+}
+
+static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
+{
+ return cpuhp_get_step(state)->teardown;
+}
+
+/*
+ * Call the startup/teardown function for a step either on the AP or
+ * on the current CPU.
+ */
+static int cpuhp_issue_call(int cpu, enum cpuhp_state state,
+ int (*cb)(unsigned int), bool bringup)
+{
+ int ret;
+
+ if (!cb)
+ return 0;
+ /*
+ * The non AP bound callbacks can fail on bringup. On teardown
+ * e.g. module removal we crash for now.
+ */
+#ifdef CONFIG_SMP
+ if (cpuhp_is_ap_state(state))
+ ret = cpuhp_invoke_ap_callback(cpu, state, cb);
+ else
+ ret = cpuhp_invoke_callback(cpu, state, cb);
+#else
+ ret = cpuhp_invoke_callback(cpu, state, cb);
+#endif
+ BUG_ON(ret && !bringup);
+ return ret;
+}
+
+/*
+ * Called from __cpuhp_setup_state on a recoverable failure.
+ *
+ * Note: The teardown callbacks for rollback are not allowed to fail!
+ */
+static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
+ int (*teardown)(unsigned int cpu))
+{
+ int cpu;
+
+ if (!teardown)
+ return;
+
+ /* Roll back the already executed steps on the other cpus */
+ for_each_present_cpu(cpu) {
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ int cpustate = st->state;
+
+ if (cpu >= failedcpu)
+ break;
+
+ /* Did we invoke the startup call on that cpu ? */
+ if (cpustate >= state)
+ cpuhp_issue_call(cpu, state, teardown, false);
+ }
+}
+
+/*
+ * Returns a free for dynamic slot assignment of the Online state. The states
+ * are protected by the cpuhp_slot_states mutex and an empty slot is identified
+ * by having no name assigned.
+ */
+static int cpuhp_reserve_state(enum cpuhp_state state)
+{
+ enum cpuhp_state i;
+
+ mutex_lock(&cpuhp_state_mutex);
+ for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) {
+ if (cpuhp_ap_states[i].name)
+ continue;
+
+ cpuhp_ap_states[i].name = "Reserved";
+ mutex_unlock(&cpuhp_state_mutex);
+ return i;
+ }
+ mutex_unlock(&cpuhp_state_mutex);
+ WARN(1, "No more dynamic states available for CPU hotplug\n");
+ return -ENOSPC;
+}
+
/**
- * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
- * @cpu: cpu that just started
+ * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
+ * @state: The state to setup
+ * @invoke: If true, the startup function is invoked for cpus where
+ * cpu state >= @state
+ * @startup: startup callback function
+ * @teardown: teardown callback function
*
- * This function calls the cpu_chain notifiers with CPU_STARTING.
- * It must be called by the arch code on the new cpu, before the new cpu
- * enables interrupts and before the "boot" cpu returns from __cpu_up().
+ * Returns 0 if successful, otherwise a proper error code
*/
-void notify_cpu_starting(unsigned int cpu)
+int __cpuhp_setup_state(enum cpuhp_state state,
+ const char *name, bool invoke,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu))
{
- unsigned long val = CPU_STARTING;
+ int cpu, ret = 0;
+ int dyn_state = 0;

-#ifdef CONFIG_PM_SLEEP_SMP
- if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
- val = CPU_STARTING_FROZEN;
-#endif /* CONFIG_PM_SLEEP_SMP */
- cpu_notify(val, (void *)(long)cpu);
+ if (cpuhp_cb_check(state) || !name)
+ return -EINVAL;
+
+ get_online_cpus();
+
+ /* currently assignments for the ONLINE state are possible */
+ if (state == CPUHP_AP_ONLINE_DYN) {
+ dyn_state = 1;
+ ret = cpuhp_reserve_state(state);
+ if (ret < 0)
+ goto out;
+ state = ret;
+ }
+
+ cpuhp_store_callbacks(state, name, startup, teardown);
+
+ if (!invoke || !startup)
+ goto out;
+
+ /*
+ * Try to call the startup callback for each present cpu
+ * depending on the hotplug state of the cpu.
+ */
+ for_each_present_cpu(cpu) {
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ int cpustate = st->state;
+
+ if (cpustate < state)
+ continue;
+
+ ret = cpuhp_issue_call(cpu, state, startup, true);
+ if (ret) {
+ cpuhp_rollback_install(cpu, state, teardown);
+ cpuhp_store_callbacks(state, NULL, NULL, NULL);
+ goto out;
+ }
+ }
+out:
+ put_online_cpus();
+ if (!ret && dyn_state)
+ return state;
+ return ret;
}
+EXPORT_SYMBOL(__cpuhp_setup_state);

-#endif /* CONFIG_SMP */
+/**
+ * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
+ * @state: The state to remove
+ * @invoke: If true, the teardown function is invoked for cpus where
+ * cpu state >= @state
+ *
+ * The teardown callback is currently not allowed to fail. Think
+ * about module removal!
+ */
+void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
+{
+ int (*teardown)(unsigned int cpu) = cpuhp_get_teardown_cb(state);
+ int cpu;
+
+ BUG_ON(cpuhp_cb_check(state));
+
+ get_online_cpus();
+
+ if (!invoke || !teardown)
+ goto remove;
+
+ /*
+ * Call the teardown callback for each present cpu depending
+ * on the hotplug state of the cpu. This function is not
+ * allowed to fail currently!
+ */
+ for_each_present_cpu(cpu) {
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ int cpustate = st->state;
+
+ if (cpustate >= state)
+ cpuhp_issue_call(cpu, state, teardown, false);
+ }
+remove:
+ cpuhp_store_callbacks(state, NULL, NULL, NULL);
+ put_online_cpus();
+}
+EXPORT_SYMBOL(__cpuhp_remove_state);
+
+#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
+static ssize_t show_cpuhp_state(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+
+ return sprintf(buf, "%d\n", st->state);
+}
+static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
+
+static ssize_t write_cpuhp_target(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+ struct cpuhp_step *sp;
+ int target, ret;
+
+ ret = kstrtoint(buf, 10, &target);
+ if (ret)
+ return ret;
+
+#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
+ if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
+ return -EINVAL;
+#else
+ if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
+ return -EINVAL;
+#endif
+
+ ret = lock_device_hotplug_sysfs();
+ if (ret)
+ return ret;
+
+ mutex_lock(&cpuhp_state_mutex);
+ sp = cpuhp_get_step(target);
+ ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
+ mutex_unlock(&cpuhp_state_mutex);
+ if (ret)
+ return ret;
+
+ if (st->state < target)
+ ret = do_cpu_up(dev->id, target);
+ else
+ ret = do_cpu_down(dev->id, target);
+
+ unlock_device_hotplug();
+ return ret ? ret : count;
+}
+
+static ssize_t show_cpuhp_target(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+
+ return sprintf(buf, "%d\n", st->target);
+}
+static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
+
+static struct attribute *cpuhp_cpu_attrs[] = {
+ &dev_attr_state.attr,
+ &dev_attr_target.attr,
+ NULL
+};
+
+static struct attribute_group cpuhp_cpu_attr_group = {
+ .attrs = cpuhp_cpu_attrs,
+ .name = "hotplug",
+ NULL
+};
+
+static ssize_t show_cpuhp_states(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ ssize_t cur, res = 0;
+ int i;
+
+ mutex_lock(&cpuhp_state_mutex);
+ for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
+ struct cpuhp_step *sp = cpuhp_get_step(i);
+
+ if (sp->name) {
+ cur = sprintf(buf, "%3d: %s\n", i, sp->name);
+ buf += cur;
+ res += cur;
+ }
+ }
+ mutex_unlock(&cpuhp_state_mutex);
+ return res;
+}
+static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
+
+static struct attribute *cpuhp_cpu_root_attrs[] = {
+ &dev_attr_states.attr,
+ NULL
+};
+
+static struct attribute_group cpuhp_cpu_root_attr_group = {
+ .attrs = cpuhp_cpu_root_attrs,
+ .name = "hotplug",
+ NULL
+};
+
+static int __init cpuhp_sysfs_init(void)
+{
+ int cpu, ret;
+
+ ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
+ &cpuhp_cpu_root_attr_group);
+ if (ret)
+ return ret;
+
+ for_each_possible_cpu(cpu) {
+ struct device *dev = get_cpu_device(cpu);
+
+ if (!dev)
+ continue;
+ ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+device_initcall(cpuhp_sysfs_init);
+#endif

/*
* cpu_bit_bitmap[] is a special, "compressed" data structure that
@@ -789,3 +1673,25 @@ void init_cpu_online(const struct cpumask *src)
{
cpumask_copy(&__cpu_online_mask, src);
}
+
+/*
+ * Activate the first processor.
+ */
+void __init boot_cpu_init(void)
+{
+ int cpu = smp_processor_id();
+
+ /* Mark the boot cpu "present", "online" etc for SMP and UP case */
+ set_cpu_online(cpu, true);
+ set_cpu_active(cpu, true);
+ set_cpu_present(cpu, true);
+ set_cpu_possible(cpu, true);
+}
+
+/*
+ * Must be called _AFTER_ setting up the per_cpu areas
+ */
+void __init boot_cpu_state_init(void)
+{
+ per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
+}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e41dd4131f7a..85b41341272e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2607,28 +2607,6 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
}

/*
- * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
- * function. We now remove it from the rcu_node tree's ->qsmaskinit
- * bit masks.
- */
-static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
-{
- unsigned long flags;
- unsigned long mask;
- struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
- struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
-
- if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
- return;
-
- /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
- mask = rdp->grpmask;
- raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
- rnp->qsmaskinitnext &= ~mask;
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
-}
-
-/*
* The CPU has been completely removed, and some other CPU is reporting
* this fact from process context. Do the remainder of the cleanup,
* including orphaning the outgoing CPU's RCU callbacks, and also
@@ -4247,6 +4225,43 @@ static void rcu_prepare_cpu(int cpu)
rcu_init_percpu_data(cpu, rsp);
}

+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
+ * function. We now remove it from the rcu_node tree's ->qsmaskinit
+ * bit masks.
+ */
+static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
+{
+ unsigned long flags;
+ unsigned long mask;
+ struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+ struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
+
+ if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
+ return;
+
+ /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
+ mask = rdp->grpmask;
+ raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
+ rnp->qsmaskinitnext &= ~mask;
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
+}
+
+void rcu_report_dead(unsigned int cpu)
+{
+ struct rcu_state *rsp;
+
+ /* QS for any half-done expedited RCU-sched GP. */
+ preempt_disable();
+ rcu_report_exp_rdp(&rcu_sched_state,
+ this_cpu_ptr(rcu_sched_state.rda), true);
+ preempt_enable();
+ for_each_rcu_flavor(rsp)
+ rcu_cleanup_dying_idle_cpu(cpu, rsp);
+}
+#endif
+
/*
* Handle CPU online/offline notification events.
*/
@@ -4278,17 +4293,6 @@ int rcu_cpu_notify(struct notifier_block *self,
for_each_rcu_flavor(rsp)
rcu_cleanup_dying_cpu(rsp);
break;
- case CPU_DYING_IDLE:
- /* QS for any half-done expedited RCU-sched GP. */
- preempt_disable();
- rcu_report_exp_rdp(&rcu_sched_state,
- this_cpu_ptr(rcu_sched_state.rda), true);
- preempt_enable();
-
- for_each_rcu_flavor(rsp) {
- rcu_cleanup_dying_idle_cpu(cpu, rsp);
- }
- break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
case CPU_UP_CANCELED:
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9503d590e5ef..626646396ca0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5692,16 +5692,6 @@ static int sched_cpu_active(struct notifier_block *nfb,
set_cpu_rq_start_time();
return NOTIFY_OK;

- case CPU_ONLINE:
- /*
- * At this point a starting CPU has marked itself as online via
- * set_cpu_online(). But it might not yet have marked itself
- * as active, which is essential from here on.
- */
- set_cpu_active(cpu, true);
- stop_machine_unpark(cpu);
- return NOTIFY_OK;
-
case CPU_DOWN_FAILED:
set_cpu_active(cpu, true);
return NOTIFY_OK;
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 544a7133cbd1..bd12c6c714ec 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -4,6 +4,7 @@
#include <linux/sched.h>
#include <linux/cpu.h>
#include <linux/cpuidle.h>
+#include <linux/cpuhotplug.h>
#include <linux/tick.h>
#include <linux/mm.h>
#include <linux/stackprotector.h>
@@ -193,8 +194,6 @@ exit_idle:
rcu_idle_exit();
}

-DEFINE_PER_CPU(bool, cpu_dead_idle);
-
/*
* Generic idle loop implementation
*
@@ -221,10 +220,7 @@ static void cpu_idle_loop(void)
rmb();

if (cpu_is_offline(smp_processor_id())) {
- rcu_cpu_notify(NULL, CPU_DYING_IDLE,
- (void *)(long)smp_processor_id());
- smp_mb(); /* all activity before dead. */
- this_cpu_write(cpu_dead_idle, true);
+ cpuhp_report_idle_dead();
arch_cpu_idle_dead();
}

@@ -291,5 +287,6 @@ void cpu_startup_entry(enum cpuhp_state state)
boot_init_stack_canary();
#endif
arch_cpu_idle_prepare();
+ cpuhp_online_idle(state);
cpu_idle_loop();
}
diff --git a/kernel/smp.c b/kernel/smp.c
index d903c02223af..822ffb1ada3f 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -569,6 +569,7 @@ void __init smp_init(void)
unsigned int cpu;

idle_threads_init();
+ cpuhp_threads_init();

/* FIXME: This should be done in userspace --RR */
for_each_present_cpu(cpu) {
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index d264f59bff56..13bc43d1fb22 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -226,7 +226,7 @@ static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cp
kthread_unpark(tsk);
}

-void smpboot_unpark_threads(unsigned int cpu)
+int smpboot_unpark_threads(unsigned int cpu)
{
struct smp_hotplug_thread *cur;

@@ -235,6 +235,7 @@ void smpboot_unpark_threads(unsigned int cpu)
if (cpumask_test_cpu(cpu, cur->cpumask))
smpboot_unpark_thread(cur, cpu);
mutex_unlock(&smpboot_threads_lock);
+ return 0;
}

static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
@@ -245,7 +246,7 @@ static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
kthread_park(tsk);
}

-void smpboot_park_threads(unsigned int cpu)
+int smpboot_park_threads(unsigned int cpu)
{
struct smp_hotplug_thread *cur;

@@ -253,6 +254,7 @@ void smpboot_park_threads(unsigned int cpu)
list_for_each_entry_reverse(cur, &hotplug_threads, list)
smpboot_park_thread(cur, cpu);
mutex_unlock(&smpboot_threads_lock);
+ return 0;
}

static void smpboot_destroy_threads(struct smp_hotplug_thread *ht)
diff --git a/kernel/smpboot.h b/kernel/smpboot.h
index 72415a0eb955..485b81cfab34 100644
--- a/kernel/smpboot.h
+++ b/kernel/smpboot.h
@@ -14,7 +14,9 @@ static inline void idle_threads_init(void) { }
#endif

int smpboot_create_threads(unsigned int cpu);
-void smpboot_park_threads(unsigned int cpu);
-void smpboot_unpark_threads(unsigned int cpu);
+int smpboot_park_threads(unsigned int cpu);
+int smpboot_unpark_threads(unsigned int cpu);
+
+void __init cpuhp_threads_init(void);

#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8bfd1aca7a3d..f28f7fad452f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1442,6 +1442,19 @@ config DEBUG_BLOCK_EXT_DEVT

Say N if you are unsure.

+config CPU_HOTPLUG_STATE_CONTROL
+ bool "Enable CPU hotplug state control"
+ depends on DEBUG_KERNEL
+ depends on HOTPLUG_CPU
+ default n
+ help
+ Allows to write steps between "offline" and "online" to the CPUs
+ sysfs target file so states can be stepped granular. This is a debug
+ option for now as the hotplug machinery cannot be stopped and
+ restarted at arbitrary points yet.
+
+ Say N if your are unsure.
+
config NOTIFIER_ERROR_INJECTION
tristate "Notifier error injection"
depends on DEBUG_KERNEL