[PATCH] percpu_counter: FBC_BATCH should be a variable

From: Eric Dumazet
Date: Mon Dec 08 2008 - 03:11:07 EST


Andrew Morton a écrit :
> On Sun, 07 Dec 2008 19:30:10 +0100 Eric Dumazet <dada1@xxxxxxxxxxxxx> wrote:
>
>>> Do
>>>
>>> $EDITOR $(grep -l hotcpu_notifier */*.c)
>>>
>>> and you'll see lots of code gets it right, and lots of code gets it wrong.
>> I see nothing interesting, I must be blind.
>>
>> lib/percpu_counter.c: In function 'percpu_counter_startup':
>> lib/percpu_counter.c:158: error: 'percpu_counter_hotcpu_callback' undeclared (first use in this function)
>> lib/percpu_counter.c:158: error: (Each undeclared identifier is reported only once
>> lib/percpu_counter.c:158: error: for each function it appears in.)
>> make[1]: *** [lib/percpu_counter.o] Error 1
>
> Perhaps you still had percpu_counter_hotcpu_callback inside #ifdef.
>
> That a look at kernel/workqueue.c, fs/buffer.c. No #ifdef CONFIG_HOTPLUG_CPU
> needed at all.

We still need some #ifdef or we also must also delete them around "struct list_head list"
in include/linux/percpu_counter.h and grow struct percpu_counter.

lib/percpu_counter.c: In function 'percpu_counter_hotcpu_callback':
lib/percpu_counter.c:137: error: 'struct percpu_counter' has no member named 'list'
lib/percpu_counter.c:137: warning: type defaults to 'int' in declaration of '__mptr'
lib/percpu_counter.c:137: warning: initialization from incompatible pointer type
...


Thank you Andrew


[PATCH] percpu_counter: FBC_BATCH should be a variable

For NR_CPUS >= 16 values, FBC_BATCH is 2*NR_CPUS

Considering more and more distros are using high NR_CPUS values,
it makes sense to use a more sensible value for FBC_BATCH, and
get rid of NR_CPUS.

A sensible value is 2*num_online_cpus(), with a minimum value of 32
(This minimum value helps branch prediction in __percpu_counter_add())

We already have a hotcpu notifier, so we can adjust FBC_BATCH dynamically.

We rename FBC_BATCH to percpu_counter_batch since its not a constant
anymore.

Signed-off-by: Eric Dumazet <dada1@xxxxxxxxxxxxx>
Acked-by: David S. Miller <davem@xxxxxxxxxxxxx>
Acked-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
---
fs/ext4/ext4.h | 6 +++---
fs/ext4/inode.c | 2 +-
include/linux/percpu_counter.h | 8 ++------
lib/percpu_counter.c | 18 ++++++++++++++----
4 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b0537c8..6c46c64 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1225,11 +1225,11 @@ do { \
} while (0)

#ifdef CONFIG_SMP
-/* Each CPU can accumulate FBC_BATCH blocks in their local
+/* Each CPU can accumulate percpu_counter_batch blocks in their local
* counters. So we need to make sure we have free blocks more
- * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times.
+ * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times.
*/
-#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
+#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
#else
#define EXT4_FREEBLOCKS_WATERMARK 0
#endif
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index be21a5a..db661e7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2497,7 +2497,7 @@ static int ext4_nonda_switch(struct super_block *sb)
/*
* switch to non delalloc mode if we are running low
* on free block. The free block accounting via percpu
- * counters can get slightly wrong with FBC_BATCH getting
+ * counters can get slightly wrong with percpu_counter_batch getting
* accumulated on each CPU without updating global counters
* Delalloc need an accurate free block accounting. So switch
* to non delalloc when we are near to error range.
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 9007ccd..99de7a3 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -24,11 +24,7 @@ struct percpu_counter {
s32 *counters;
};

-#if NR_CPUS >= 16
-#define FBC_BATCH (NR_CPUS*2)
-#else
-#define FBC_BATCH (NR_CPUS*4)
-#endif
+extern int percpu_counter_batch;

int percpu_counter_init(struct percpu_counter *fbc, s64 amount);
int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
@@ -39,7 +35,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc);

static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
{
- __percpu_counter_add(fbc, amount, FBC_BATCH);
+ __percpu_counter_add(fbc, amount, percpu_counter_batch);
}

static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index a866389..e73eb5b 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -9,10 +9,8 @@
#include <linux/cpu.h>
#include <linux/module.h>

-#ifdef CONFIG_HOTPLUG_CPU
static LIST_HEAD(percpu_counters);
static DEFINE_MUTEX(percpu_counters_lock);
-#endif

void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
{
@@ -114,13 +112,24 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
}
EXPORT_SYMBOL(percpu_counter_destroy);

-#ifdef CONFIG_HOTPLUG_CPU
+int percpu_counter_batch __read_mostly = 32;
+EXPORT_SYMBOL(percpu_counter_batch);
+
+static void compute_batch_value(void)
+{
+ int nr = num_online_cpus();
+
+ percpu_counter_batch = max(32, nr*2);
+}
+
static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
unsigned long action, void *hcpu)
{
+#ifdef CONFIG_HOTPLUG_CPU
unsigned int cpu;
struct percpu_counter *fbc;

+ compute_batch_value();
if (action != CPU_DEAD)
return NOTIFY_OK;

@@ -137,13 +146,14 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
spin_unlock_irqrestore(&fbc->lock, flags);
}
mutex_unlock(&percpu_counters_lock);
+#endif
return NOTIFY_OK;
}

static int __init percpu_counter_startup(void)
{
+ compute_batch_value();
hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
return 0;
}
module_init(percpu_counter_startup);
-#endif

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/