[PATCH 6/10] i386 implementation of cpu bulk removal

From: Shaohua Li
Date: Mon May 08 2006 - 01:47:19 EST



i386 specific implementation of cpu bulk removal. Add the config and make
__cpu_die/__cpu_disable work with cpumask_t.

Signed-off-by: Shaohua Li <shaohua.li@xxxxxxxxx>
---

linux-2.6.17-rc3-root/arch/i386/Kconfig | 11 ++
linux-2.6.17-rc3-root/arch/i386/kernel/smpboot.c | 118 ++++++++++++++++-------
2 files changed, 97 insertions(+), 32 deletions(-)

diff -puN arch/i386/Kconfig~i386-bulk-cpu-hotplug arch/i386/Kconfig
--- linux-2.6.17-rc3/arch/i386/Kconfig~i386-bulk-cpu-hotplug 2006-05-07 07:46:01.000000000 +0800
+++ linux-2.6.17-rc3-root/arch/i386/Kconfig 2006-05-07 07:46:01.000000000 +0800
@@ -763,6 +763,17 @@ config HOTPLUG_CPU

Say N.

+config BULK_CPU_REMOVE
+ bool "Support for bulk removal of CPUs (EXPERIMENTAL)"
+ depends on HOTPLUG_CPU && EXPERIMENTAL
+ help
+ Say Y if need the ability to remove more than one cpu during cpu
+ removal. Current mechanisms may be in-efficient when a NUMA
+ node is being removed, which would involve removing one cpu at a
+ time. This will let interrupts, timers and processes to be bound
+ to a CPU that might be removed right after the current cpu is
+ being offlined.
+
endmenu


diff -puN arch/i386/kernel/smpboot.c~i386-bulk-cpu-hotplug arch/i386/kernel/smpboot.c
--- linux-2.6.17-rc3/arch/i386/kernel/smpboot.c~i386-bulk-cpu-hotplug 2006-05-07 07:46:01.000000000 +0800
+++ linux-2.6.17-rc3-root/arch/i386/kernel/smpboot.c 2006-05-07 07:46:01.000000000 +0800
@@ -1317,35 +1317,43 @@ void __devinit smp_prepare_boot_cpu(void

#ifdef CONFIG_HOTPLUG_CPU
static void
-remove_siblinginfo(int cpu)
+remove_siblinginfo(cpumask_t remove_mask)
{
int sibling;
struct cpuinfo_x86 *c = cpu_data;
+ int cpu;

- for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
- cpu_clear(cpu, cpu_core_map[sibling]);
- /*
- * last thread sibling in this cpu core going down
- */
- if (cpus_weight(cpu_sibling_map[cpu]) == 1)
- c[sibling].booted_cores--;
- }
+ for_each_cpu_mask(cpu, remove_mask) {
+ for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
+ cpu_clear(cpu, cpu_core_map[sibling]);
+ /*
+ * last thread sibling in this cpu core going down
+ */
+ if (cpus_weight(cpu_sibling_map[cpu]) == 1)
+ c[sibling].booted_cores--;
+ }

- for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
- cpu_clear(cpu, cpu_sibling_map[sibling]);
- cpus_clear(cpu_sibling_map[cpu]);
- cpus_clear(cpu_core_map[cpu]);
- phys_proc_id[cpu] = BAD_APICID;
- cpu_core_id[cpu] = BAD_APICID;
- cpu_clear(cpu, cpu_sibling_setup_map);
+ for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
+ cpu_clear(cpu, cpu_sibling_map[sibling]);
+ cpus_clear(cpu_sibling_map[cpu]);
+ cpus_clear(cpu_core_map[cpu]);
+ phys_proc_id[cpu] = BAD_APICID;
+ cpu_core_id[cpu] = BAD_APICID;
+ cpu_clear(cpu, cpu_sibling_setup_map);
+ }
}

+static cpumask_t cpu_dead_mask = CPU_MASK_NONE;
+static cpumask_t cpu_dead_error_mask = CPU_MASK_NONE;
+static atomic_t disable_cpu_start = ATOMIC_INIT(0); /* 1:start, 2:error */
int __cpu_disable(cpumask_t remove_mask)
{
- cpumask_t map = cpu_online_map;
int cpu = smp_processor_id();
+ int master = 0;

- BUG_ON(cpus_weight(remove_mask) != 1);
+ /* are we the master cpu? */
+ if (first_cpu(remove_mask) == cpu)
+ master = 1;
/*
* Perhaps use cpufreq to drop frequency, but that could go
* into generic code.
@@ -1354,21 +1362,58 @@ int __cpu_disable(cpumask_t remove_mask)
* interrupts only being able to be serviced by the BSP.
* Especially so if we're not using an IOAPIC -zwane
*/
- if (cpu == 0)
- return -EBUSY;
+ if (master) {
+ if (cpu_isset(0, remove_mask)) {
+ /* report the error */
+ atomic_set(&disable_cpu_start, 2);
+ smp_wmb(); /* set error first */
+ cpu_set(cpu, cpu_dead_error_mask);
+ while (!cpus_equal(cpu_dead_error_mask, remove_mask))
+ cpu_relax();
+ atomic_set(&disable_cpu_start, 0);
+ cpus_clear(cpu_dead_error_mask);
+ return -EBUSY;
+ }
+ smp_mb();
+ atomic_set(&disable_cpu_start, 1);
+ } else {
+ while (atomic_read(&disable_cpu_start) == 0)
+ cpu_relax();
+ if (atomic_read(&disable_cpu_start) == 2) {
+ cpu_set(cpu, cpu_dead_error_mask);
+ return -EBUSY;
+ }
+ }

clear_local_APIC();
/* Allow any queued timer interrupts to get serviced */
local_irq_enable();
mdelay(1);
local_irq_disable();
-
- remove_siblinginfo(cpu);
-
- cpu_clear(cpu, map);
- fixup_irqs(map);
/* It's now safe to remove this processor from the online map */
cpu_clear(cpu, cpu_online_map);
+
+ /*
+ * Till here, all changes master cpu can't remotely do should be
+ * finished. Remainings don't require local cpu access.
+ */
+ smp_mb();
+ cpu_set(cpu, cpu_dead_mask);
+ if (!master)
+ return 0;
+ /* master does cleanup. */
+ while (!cpus_equal(cpu_dead_mask, remove_mask))
+ cpu_relax();
+
+ remove_siblinginfo(remove_mask);
+
+ cpus_clear(cpu_dead_mask);
+ atomic_set(&disable_cpu_start, 0);
+ /*
+ * Note: this depends on fixup_irqs doesn't need access other CPUs'
+ * local CPU registers, otherwise each cpu should call this routine.
+ */
+ fixup_irqs(cpu_online_map);
return 0;
}

@@ -1376,19 +1421,28 @@ void __cpu_die(cpumask_t remove_mask)
{
/* We don't do anything here: idle task is faking death itself. */
unsigned int i;
- int cpu = first_cpu(remove_mask);
+ int cpu;

for (i = 0; i < 10; i++) {
/* They ack this in play_dead by setting CPU_DEAD */
- if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
- printk ("CPU %d is now offline\n", cpu);
- if (1 == num_online_cpus())
- alternatives_smp_switch(0);
- return;
+ for_each_cpu_mask(cpu, remove_mask) {
+ if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
+ printk ("CPU %d is now offline\n", cpu);
+ cpu_clear(cpu, remove_mask);
+ }
}
+ if (cpus_empty(remove_mask))
+ break;
+
msleep(100);
}
- printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+
+ if (!cpus_empty(remove_mask)) {
+ for_each_cpu_mask(cpu, remove_mask)
+ printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+ }
+ if (1 == num_online_cpus())
+ alternatives_smp_switch(0);
}
#else /* ... !CONFIG_HOTPLUG_CPU */
int __cpu_disable(cpumask_t remove_mask)
_
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/