[PATCH v3 1/7] fs: Use a percpu_counter to track nr_dentry

From: Eric Dumazet
Date: Thu Dec 11 2008 - 17:40:36 EST


Adding a percpu_counter nr_dentry avoids cache line ping pongs
between cpus to maintain this metric, and dcache_lock is
no more needed to protect dentry_stat.nr_dentry

We centralize nr_dentry updates at the right place :
- increments in d_alloc()
- decrements in d_free()

d_alloc() can avoid taking dcache_lock if parent is NULL

("socketallocbench -n8" result : 27.5s to 25s)

Signed-off-by: Eric Dumazet <dada1@xxxxxxxxxxxxx>
---
fs/dcache.c | 49 +++++++++++++++++++++++++------------------
include/linux/fs.h | 2 +
kernel/sysctl.c | 2 -
3 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index fa1ba03..f463a81 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -61,12 +61,31 @@ static struct kmem_cache *dentry_cache __read_mostly;
static unsigned int d_hash_mask __read_mostly;
static unsigned int d_hash_shift __read_mostly;
static struct hlist_head *dentry_hashtable __read_mostly;
+static struct percpu_counter nr_dentry;

/* Statistics gathering. */
struct dentry_stat_t dentry_stat = {
.age_limit = 45,
};

+/*
+ * Handle nr_dentry sysctl
+ */
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+int proc_nr_dentry(ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry);
+ return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+}
+#else
+int proc_nr_dentry(ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
+#endif
+
static void __d_free(struct dentry *dentry)
{
WARN_ON(!list_empty(&dentry->d_alias));
@@ -82,8 +101,7 @@ static void d_callback(struct rcu_head *head)
}

/*
- * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry
- * inside dcache_lock.
+ * no dcache_lock, please.
*/
static void d_free(struct dentry *dentry)
{
@@ -94,6 +112,7 @@ static void d_free(struct dentry *dentry)
__d_free(dentry);
else
call_rcu(&dentry->d_u.d_rcu, d_callback);
+ percpu_counter_dec(&nr_dentry);
}

/*
@@ -172,7 +191,6 @@ static struct dentry *d_kill(struct dentry *dentry)
struct dentry *parent;

list_del(&dentry->d_u.d_child);
- dentry_stat.nr_dentry--; /* For d_free, below */
/*drops the locks, at that point nobody can reach this dentry */
dentry_iput(dentry);
if (IS_ROOT(dentry))
@@ -619,7 +637,6 @@ void shrink_dcache_sb(struct super_block * sb)
static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
{
struct dentry *parent;
- unsigned detached = 0;

BUG_ON(!IS_ROOT(dentry));

@@ -678,7 +695,6 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
}

list_del(&dentry->d_u.d_child);
- detached++;

inode = dentry->d_inode;
if (inode) {
@@ -696,7 +712,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
* otherwise we ascend to the parent and move to the
* next sibling if there is one */
if (!parent)
- goto out;
+ return;

dentry = parent;

@@ -705,11 +721,6 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
dentry = list_entry(dentry->d_subdirs.next,
struct dentry, d_u.d_child);
}
-out:
- /* several dentries were freed, need to correct nr_dentry */
- spin_lock(&dcache_lock);
- dentry_stat.nr_dentry -= detached;
- spin_unlock(&dcache_lock);
}

/*
@@ -943,8 +954,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
dentry->d_flags = DCACHE_UNHASHED;
spin_lock_init(&dentry->d_lock);
dentry->d_inode = NULL;
- dentry->d_parent = NULL;
- dentry->d_sb = NULL;
dentry->d_op = NULL;
dentry->d_fsdata = NULL;
dentry->d_mounted = 0;
@@ -959,16 +968,15 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
if (parent) {
dentry->d_parent = dget(parent);
dentry->d_sb = parent->d_sb;
+ spin_lock(&dcache_lock);
+ list_add(&dentry->d_u.d_child, &parent->d_subdirs);
+ spin_unlock(&dcache_lock);
} else {
+ dentry->d_parent = NULL;
+ dentry->d_sb = NULL;
INIT_LIST_HEAD(&dentry->d_u.d_child);
}
-
- spin_lock(&dcache_lock);
- if (parent)
- list_add(&dentry->d_u.d_child, &parent->d_subdirs);
- dentry_stat.nr_dentry++;
- spin_unlock(&dcache_lock);
-
+ percpu_counter_inc(&nr_dentry);
return dentry;
}

@@ -2282,6 +2290,7 @@ static void __init dcache_init(void)
{
int loop;

+ percpu_counter_init(&nr_dentry, 0);
/*
* A constructor could be added for stable state like the lists,
* but it is probably not worth it because of the cache nature
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4a853ef..114cb65 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2217,6 +2217,8 @@ static inline void free_secdata(void *secdata)
struct ctl_table;
int proc_nr_files(struct ctl_table *table, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos);
+int proc_nr_dentry(struct ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos);

int get_filesystem_list(char * buf);

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3d56fe7..777bee7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1246,7 +1246,7 @@ static struct ctl_table fs_table[] = {
.data = &dentry_stat,
.maxlen = 6*sizeof(int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &proc_nr_dentry,
},
{
.ctl_name = FS_OVERFLOWUID,
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/