diff -urpN -X /home/fletch/.diff.exclude 01-ingo/arch/i386/Kconfig 02-pools/arch/i386/Kconfig
--- 01-ingo/arch/i386/Kconfig	Fri Jan 17 09:18:19 2003
+++ 02-pools/arch/i386/Kconfig	Sat Jan 18 11:59:54 2003
@@ -476,6 +476,11 @@ config NUMA
 	bool "Numa Memory Allocation Support"
 	depends on X86_NUMAQ
 
+config SCHED_NUMA
+	bool "NUMA aware scheduler"
+	depends on NUMA
+	default y
+
 config DISCONTIGMEM
 	bool
 	depends on NUMA
diff -urpN -X /home/fletch/.diff.exclude 01-ingo/arch/ia64/Kconfig 02-pools/arch/ia64/Kconfig
--- 01-ingo/arch/ia64/Kconfig	Thu Jan  9 19:15:56 2003
+++ 02-pools/arch/ia64/Kconfig	Sat Jan 18 12:00:08 2003
@@ -246,6 +246,11 @@ config DISCONTIGMEM
 	  or have huge holes in the physical address space for other reasons.
 	  See <file:Documentation/vm/numa> for more.
 
+config SCHED_NUMA
+	bool "NUMA aware scheduler"
+	depends on NUMA
+	default y
+
 config VIRTUAL_MEM_MAP
 	bool "Enable Virtual Mem Map"
 	depends on !NUMA
diff -urpN -X /home/fletch/.diff.exclude 01-ingo/include/linux/sched.h 02-pools/include/linux/sched.h
--- 01-ingo/include/linux/sched.h	Fri Jan 17 09:18:32 2003
+++ 02-pools/include/linux/sched.h	Sat Jan 18 12:21:09 2003
@@ -447,12 +447,12 @@ extern void set_cpus_allowed(task_t *p, 
 # define set_cpus_allowed(p, new_mask) do { } while (0)
 #endif
 
-#ifdef CONFIG_NUMA
+#ifdef CONFIG_SCHED_NUMA
 extern void sched_balance_exec(void);
-extern void node_nr_running_init(void);
+extern void pool_nr_running_init(void);
 #else
 #define sched_balance_exec()   {}
-#define node_nr_running_init() {}
+#define pool_nr_running_init() {}
 #endif
 
 extern void set_user_nice(task_t *p, long nice);
diff -urpN -X /home/fletch/.diff.exclude 01-ingo/include/linux/sched_topo_ht.h 02-pools/include/linux/sched_topo_ht.h
--- 01-ingo/include/linux/sched_topo_ht.h	Wed Dec 31 16:00:00 1969
+++ 02-pools/include/linux/sched_topo_ht.h	Sat Jan 18 12:20:00 2003
@@ -0,0 +1,17 @@
+#define CONFIG_SCHED_POOLS 1               /* should be a real config option */
+
+/* 
+ * The following is a temporary hack, for which I make no apologies - mbligh
+ * Assumes CPUs are paired together siblings (0,1) (2,3) (4,5) .... etc.
+ * We should probably do this in an arch topo file and use apicids.
+ */
+
+#define MAX_NUMPOOLS NR_CPUS
+#define numpools (num_online_cpus / 2)
+
+#define pool_to_cpu_mask(pool)	( (1UL << (pool*2)) || (1UL << (pool*2+1)) )
+#define cpu_to_pool(cpu)	(cpu / 2)
+#define cpu_to_pool_mask(cpu)	(pool_to_cpu_mask(cpu_to_pool(cpu)))
+
+#define IDLE_REBALANCE_RATIO 2
+#define BUSY_REBALANCE_RATIO 2
diff -urpN -X /home/fletch/.diff.exclude 01-ingo/include/linux/sched_topo_numa.h 02-pools/include/linux/sched_topo_numa.h
--- 01-ingo/include/linux/sched_topo_numa.h	Wed Dec 31 16:00:00 1969
+++ 02-pools/include/linux/sched_topo_numa.h	Sat Jan 18 12:20:05 2003
@@ -0,0 +1,11 @@
+#define CONFIG_SCHED_POOLS 1               /* should be a real config option */
+
+#define MAX_NUMPOOLS MAX_NUMNODES
+#define numpools numnodes
+
+#define pool_to_cpu_mask	__node_to_cpu_mask
+#define cpu_to_pool		__cpu_to_node
+#define cpu_to_pool_mask(cpu)	(__node_to_cpu_mask(__cpu_to_node(cpu)))
+
+#define IDLE_REBALANCE_RATIO 10
+#define BUSY_REBALANCE_RATIO 5
diff -urpN -X /home/fletch/.diff.exclude 01-ingo/include/linux/sched_topology.h 02-pools/include/linux/sched_topology.h
--- 01-ingo/include/linux/sched_topology.h	Wed Dec 31 16:00:00 1969
+++ 02-pools/include/linux/sched_topology.h	Sat Jan 18 11:59:36 2003
@@ -0,0 +1,14 @@
+#ifndef _LINUX_SCHED_TOPOLOGY_H
+#define _LINUX_SCHED_TOPOLOGY_H
+
+#ifdef CONFIG_SCHED_TOPO_ARCH
+#include <asm/sched_topo.h>
+#elif CONFIG_SCHED_NUMA
+#include <linux/sched_topo_numa.h>
+#elif CONFIG_SCHED_TOPO_HT
+#include <linux/sched_topo_ht.h>
+#else
+#include <linux/sched_topo_flat.h>
+#endif
+
+#endif /* _LINUX_SCHED_TOPOLOGY_H */
diff -urpN -X /home/fletch/.diff.exclude 01-ingo/init/main.c 02-pools/init/main.c
--- 01-ingo/init/main.c	Fri Jan 17 09:18:32 2003
+++ 02-pools/init/main.c	Sat Jan 18 11:48:10 2003
@@ -495,7 +495,7 @@ static void do_pre_smp_initcalls(void)
 
 	migration_init();
 #endif
-	node_nr_running_init();
+	pool_nr_running_init();
 	spawn_ksoftirqd();
 }
 
diff -urpN -X /home/fletch/.diff.exclude 01-ingo/kernel/sched.c 02-pools/kernel/sched.c
--- 01-ingo/kernel/sched.c	Sat Jan 18 10:58:57 2003
+++ 02-pools/kernel/sched.c	Sat Jan 18 11:49:00 2003
@@ -32,6 +32,7 @@
 #include <linux/delay.h>
 #include <linux/timer.h>
 #include <linux/rcupdate.h>
+#include <linux/sched_topology.h>
 
 /*
  * Convert user-nice values [ -20 ... 0 ... 19 ]
@@ -67,7 +68,7 @@
 #define INTERACTIVE_DELTA	2
 #define MAX_SLEEP_AVG		(2*HZ)
 #define STARVATION_LIMIT	(2*HZ)
-#define NODE_THRESHOLD          125
+#define POOL_THRESHOLD          125
 
 /*
  * If a task is 'interactive' then we reinsert it in the active
@@ -154,9 +155,9 @@ struct runqueue {
 	task_t *curr, *idle;
 	prio_array_t *active, *expired, arrays[2];
 	int prev_cpu_load[NR_CPUS];
-#ifdef CONFIG_NUMA
-	atomic_t *node_nr_running;
-	int prev_node_load[MAX_NUMNODES];
+#ifdef CONFIG_SCHED_POOLS
+	atomic_t *pool_nr_running;
+	int prev_pool_load[MAX_NUMPOOLS];
 #endif
 	task_t *migration_thread;
 	struct list_head migration_queue;
@@ -181,47 +182,47 @@ static struct runqueue runqueues[NR_CPUS
 # define task_running(rq, p)		((rq)->curr == (p))
 #endif
 
-#ifdef CONFIG_NUMA
+#ifdef CONFIG_SCHED_POOLS
 
 /*
  * Keep track of running tasks.
  */
 
-static atomic_t node_nr_running[MAX_NUMNODES] ____cacheline_maxaligned_in_smp =
-	{[0 ...MAX_NUMNODES-1] = ATOMIC_INIT(0)};
+static atomic_t pool_nr_running[MAX_NUMPOOLS] ____cacheline_maxaligned_in_smp =
+	{[0 ...MAX_NUMPOOLS-1] = ATOMIC_INIT(0)};
 
 static inline void nr_running_init(struct runqueue *rq)
 {
-	rq->node_nr_running = &node_nr_running[0];
+	rq->pool_nr_running = &pool_nr_running[0];
 }
 
 static inline void nr_running_inc(runqueue_t *rq)
 {
-	atomic_inc(rq->node_nr_running);
+	atomic_inc(rq->pool_nr_running);
 	rq->nr_running++;
 }
 
 static inline void nr_running_dec(runqueue_t *rq)
 {
-	atomic_dec(rq->node_nr_running);
+	atomic_dec(rq->pool_nr_running);
 	rq->nr_running--;
 }
 
-__init void node_nr_running_init(void)
+__init void pool_nr_running_init(void)
 {
 	int i;
 
 	for (i = 0; i < NR_CPUS; i++)
-		cpu_rq(i)->node_nr_running = &node_nr_running[__cpu_to_node(i)];
+		cpu_rq(i)->pool_nr_running = &pool_nr_running[cpu_to_pool(i)];
 }
 
-#else /* !CONFIG_NUMA */
+#else /* !CONFIG_SCHED_POOLS */
 
 # define nr_running_init(rq)   do { } while (0)
 # define nr_running_inc(rq)    do { (rq)->nr_running++; } while (0)
 # define nr_running_dec(rq)    do { (rq)->nr_running--; } while (0)
 
-#endif /* CONFIG_NUMA */
+#endif /* CONFIG_SCHED_POOLS */
 
 /*
  * task_rq_lock - lock the runqueue a given task resides on and disable
@@ -670,7 +671,7 @@ static inline void double_rq_unlock(runq
 		spin_unlock(&rq2->lock);
 }
 
-#if CONFIG_NUMA
+#if CONFIG_SCHED_POOLS
 /*
  * If dest_cpu is allowed for this process, migrate the task to it.
  * This is accomplished by forcing the cpu_allowed mask to only
@@ -697,7 +698,7 @@ static void sched_migrate_task(task_t *p
  */
 static int sched_best_cpu(struct task_struct *p)
 {
-	int i, minload, load, best_cpu, node = 0;
+	int i, minload, load, best_cpu, pool = 0;
 	unsigned long cpumask;
 
 	best_cpu = task_cpu(p);
@@ -705,16 +706,16 @@ static int sched_best_cpu(struct task_st
 		return best_cpu;
 
 	minload = 10000000;
-	for (i = 0; i < numnodes; i++) {
-		load = atomic_read(&node_nr_running[i]);
+	for (i = 0; i < numpools; i++) {
+		load = atomic_read(&pool_nr_running[i]);
 		if (load < minload) {
 			minload = load;
-			node = i;
+			pool = i;
 		}
 	}
 
 	minload = 10000000;
-	cpumask = __node_to_cpu_mask(node);
+	cpumask = pool_to_cpu_mask(pool);
 	for (i = 0; i < NR_CPUS; ++i) {
 		if (!(cpumask & (1UL << i)))
 			continue;
@@ -730,7 +731,7 @@ void sched_balance_exec(void)
 {
 	int new_cpu;
 
-	if (numnodes > 1) {
+	if (numpools > 1) {
 		new_cpu = sched_best_cpu(current);
 		if (new_cpu != smp_processor_id())
 			sched_migrate_task(current, new_cpu);
@@ -738,33 +739,33 @@ void sched_balance_exec(void)
 }
 
 /*
- * Find the busiest node. All previous node loads contribute with a 
+ * Find the busiest pool. All previous pool loads contribute with a 
  * geometrically deccaying weight to the load measure:
- *      load_{t} = load_{t-1}/2 + nr_node_running_{t}
+ *      load_{t} = load_{t-1}/2 + nr_pool_running_{t}
  * This way sudden load peaks are flattened out a bit.
  */
-static int find_busiest_node(int this_node)
+static int find_busiest_pool(int this_pool)
 {
-	int i, node = -1, load, this_load, maxload;
+	int i, pool = -1, load, this_load, maxload;
 	
-	this_load = maxload = (this_rq()->prev_node_load[this_node] >> 1)
-		+ atomic_read(&node_nr_running[this_node]);
-	this_rq()->prev_node_load[this_node] = this_load;
-	for (i = 0; i < numnodes; i++) {
-		if (i == this_node)
+	this_load = maxload = (this_rq()->prev_pool_load[this_pool] >> 1)
+		+ atomic_read(&pool_nr_running[this_pool]);
+	this_rq()->prev_pool_load[this_pool] = this_load;
+	for (i = 0; i < numpools; i++) {
+		if (i == this_pool)
 			continue;
-		load = (this_rq()->prev_node_load[i] >> 1)
-			+ atomic_read(&node_nr_running[i]);
-		this_rq()->prev_node_load[i] = load;
-		if (load > maxload && (100*load > NODE_THRESHOLD*this_load)) {
+		load = (this_rq()->prev_pool_load[i] >> 1)
+			+ atomic_read(&pool_nr_running[i]);
+		this_rq()->prev_pool_load[i] = load;
+		if (load > maxload && (100*load > POOL_THRESHOLD*this_load)) {
 			maxload = load;
-			node = i;
+			pool = i;
 		}
 	}
-	return node;
+	return pool;
 }
 
-#endif /* CONFIG_NUMA */
+#endif /* CONFIG_SCHED_POOLS */
 
 #if CONFIG_SMP
 
@@ -983,22 +984,20 @@ out:
  *
  * busy-rebalance every 200 msecs. idle-rebalance every 1 msec. (or on
  * systems with HZ=100, every 10 msecs.)
- *
- * On NUMA, do a node-rebalance every 400 msecs.
  */
 #define IDLE_REBALANCE_TICK (HZ/1000 ?: 1)
 #define BUSY_REBALANCE_TICK (HZ/5 ?: 1)
-#define IDLE_NODE_REBALANCE_TICK (IDLE_REBALANCE_TICK * 2)
-#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * 2)
+#define IDLE_POOL_REBALANCE_TICK (IDLE_REBALANCE_TICK * IDLE_REBALANCE_RATIO)
+#define BUSY_POOL_REBALANCE_TICK (BUSY_REBALANCE_TICK * BUSY_REBALANCE_RATIO)
 
-#if CONFIG_NUMA
-static void balance_node(runqueue_t *this_rq, int idle, int this_cpu)
+#if CONFIG_SCHED_POOLS
+static void balance_pool(runqueue_t *this_rq, int idle, int this_cpu)
 {
-	int node = find_busiest_node(__cpu_to_node(this_cpu));
+	int pool = find_busiest_pool(cpu_to_pool(this_cpu));
 	unsigned long cpumask, this_cpumask = 1UL << this_cpu;
 
-	if (node >= 0) {
-		cpumask = __node_to_cpu_mask(node) | this_cpumask;
+	if (pool >= 0) {
+		cpumask = pool_to_cpu_mask(pool) | this_cpumask;
 		spin_lock(&this_rq->lock);
 		load_balance(this_rq, idle, cpumask);
 		spin_unlock(&this_rq->lock);
@@ -1008,38 +1007,38 @@ static void balance_node(runqueue_t *thi
 
 static void rebalance_tick(runqueue_t *this_rq, int idle)
 {
-#if CONFIG_NUMA
+#if CONFIG_SCHED_POOLS
 	int this_cpu = smp_processor_id();
 #endif
 	unsigned long j = jiffies;
 
 	/*
-	 * First do inter-node rebalancing, then intra-node rebalancing,
-	 * if both events happen in the same tick. The inter-node
+	 * First do inter-pool rebalancing, then intra-pool rebalancing,
+	 * if both events happen in the same tick. The inter-pool
 	 * rebalancing does not necessarily have to create a perfect
-	 * balance within the node, since we load-balance the most loaded
-	 * node with the current CPU. (ie. other CPUs in the local node
+	 * balance within the pool, since we load-balance the most loaded
+	 * pool with the current CPU. (ie. other CPUs in the local pool
 	 * are not balanced.)
 	 */
 	if (idle) {
-#if CONFIG_NUMA
-		if (!(j % IDLE_NODE_REBALANCE_TICK))
-			balance_node(this_rq, idle, this_cpu);
+#if CONFIG_SCHED_POOLS
+		if (!(j % IDLE_POOL_REBALANCE_TICK))
+			balance_pool(this_rq, idle, this_cpu);
 #endif
 		if (!(j % IDLE_REBALANCE_TICK)) {
 			spin_lock(&this_rq->lock);
-			load_balance(this_rq, 0, __cpu_to_node_mask(this_cpu));
+			load_balance(this_rq, 0, cpu_to_pool_mask(this_cpu));
 			spin_unlock(&this_rq->lock);
 		}
 		return;
 	}
-#if CONFIG_NUMA
-	if (!(j % BUSY_NODE_REBALANCE_TICK))
-		balance_node(this_rq, idle, this_cpu);
+#if CONFIG_SCHED_POOLS
+	if (!(j % BUSY_POOL_REBALANCE_TICK))
+		balance_pool(this_rq, idle, this_cpu);
 #endif
 	if (!(j % BUSY_REBALANCE_TICK)) {
 		spin_lock(&this_rq->lock);
-		load_balance(this_rq, idle, __cpu_to_node_mask(this_cpu));
+		load_balance(this_rq, idle, cpu_to_pool_mask(this_cpu));
 		spin_unlock(&this_rq->lock);
 	}
 }
@@ -1208,7 +1207,7 @@ need_resched:
 pick_next_task:
 	if (unlikely(!rq->nr_running)) {
 #if CONFIG_SMP
-		load_balance(rq, 1, __cpu_to_node_mask(smp_processor_id()));
+		load_balance(rq, 1, cpu_to_pool_mask(smp_processor_id()));
 		if (rq->nr_running)
 			goto pick_next_task;
 #endif