[RFC] dcache scalability patch (2.4.17)

From: Maneesh Soni (maneesh@in.ibm.com)
Date: Fri Jul 12 2002 - 09:09:35 EST


Here is the dcache scalability patch (cleaned up) as disscussed in
the previous post to lkml by Dipankar. The patch uses RCU for doing fast
dcache lookup. It also does lazy updates to lru list of dentries to
avoid doing write operations while doing lookup.

Following changes were done in this version

o Removed the d_next_hash hack. Dentries are unhashed using list_del
  instead of list_del_init, and that too using d_drop interface
o Used d_drop_locked and d_unhashed instead of directly manipulating hash list
  using list macros
o Changed DCACHE_UNLINKED to DCACHE_UNHASHED

Regards,
Maneesh

diff -urN linux-2.4.17-base/fs/autofs4/root.c linux-2.4.17-dc8/fs/autofs4/root.c
--- linux-2.4.17-base/fs/autofs4/root.c Tue Oct 24 10:27:38 2000
+++ linux-2.4.17-dc8/fs/autofs4/root.c Fri Jul 12 10:59:38 2002
@@ -403,7 +403,7 @@
                 spin_unlock(&dcache_lock);
                 return -ENOTEMPTY;
         }
- list_del_init(&dentry->d_hash);
+ d_drop_locked(dentry);
         spin_unlock(&dcache_lock);
 
         dput(ino->dentry);
diff -urN linux-2.4.17-base/fs/dcache.c linux-2.4.17-dc8/fs/dcache.c
--- linux-2.4.17-base/fs/dcache.c Fri Dec 21 23:11:55 2001
+++ linux-2.4.17-dc8/fs/dcache.c Fri Jul 12 16:18:39 2002
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 
 #include <asm/uaccess.h>
+#include <linux/rcupdate.h>
 
 #define DCACHE_PARANOIA 1
 /* #define DCACHE_DEBUG 1 */
@@ -55,14 +56,21 @@
 /* Statistics gathering. */
 struct dentry_stat_t dentry_stat = {0, 0, 45, 0,};
 
+static void d_callback(void *arg)
+{
+ struct dentry * dentry = (struct dentry *)arg;
+
+ if (dname_external(dentry))
+ kfree((void *) dentry->d_name.name);
+ kmem_cache_free(dentry_cache, dentry);
+}
+
 /* no dcache_lock, please */
 static inline void d_free(struct dentry *dentry)
 {
         if (dentry->d_op && dentry->d_op->d_release)
                 dentry->d_op->d_release(dentry);
- if (dname_external(dentry))
- kfree(dentry->d_name.name);
- kmem_cache_free(dentry_cache, dentry);
+ call_rcu(&dentry->d_rcu, d_callback, dentry);
         dentry_stat.nr_dentry--;
 }
 
@@ -124,9 +132,13 @@
         if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
                 return;
 
- /* dput on a free dentry? */
- if (!list_empty(&dentry->d_lru))
- BUG();
+ spin_lock(&dentry->d_lock);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+ return;
+ }
+
         /*
          * AV: ->d_delete() is _NOT_ allowed to block now.
          */
@@ -135,18 +147,28 @@
                         goto unhash_it;
         }
         /* Unreachable? Get rid of it */
- if (list_empty(&dentry->d_hash))
+ if (d_unhashed(dentry))
                 goto kill_it;
- list_add(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
+
+ if (list_empty(&dentry->d_lru)) {
+ dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
+ list_add(&dentry->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
+ }
+ spin_unlock(&dentry->d_lock);
         spin_unlock(&dcache_lock);
         return;
 
 unhash_it:
- list_del_init(&dentry->d_hash);
+ __d_drop(dentry);
 
 kill_it: {
                 struct dentry *parent;
+ spin_unlock(&dentry->d_lock);
+ if (!list_empty(&dentry->d_lru)) {
+ list_del(&dentry->d_lru);
+ dentry_stat.nr_unused--;
+ }
                 list_del(&dentry->d_child);
                 /* drops the lock, at that point nobody can reach this dentry */
                 dentry_iput(dentry);
@@ -177,7 +199,7 @@
          * If it's already been dropped, return OK.
          */
         spin_lock(&dcache_lock);
- if (list_empty(&dentry->d_hash)) {
+ if (d_unhashed(dentry)) {
                 spin_unlock(&dcache_lock);
                 return 0;
         }
@@ -201,15 +223,18 @@
          * we might still populate it if it was a
          * working directory or similar).
          */
+ spin_lock(&dentry->d_lock);
         if (atomic_read(&dentry->d_count) > 1) {
                 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
+ spin_unlock(&dentry->d_lock);
                         spin_unlock(&dcache_lock);
                         return -EBUSY;
                 }
         }
-
- list_del_init(&dentry->d_hash);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
         spin_unlock(&dcache_lock);
+
         return 0;
 }
 
@@ -217,11 +242,14 @@
 
 static inline struct dentry * __dget_locked(struct dentry *dentry)
 {
+ spin_lock(&dentry->d_lock);
         atomic_inc(&dentry->d_count);
+ dentry->d_vfs_flags |= DCACHE_REFERENCED;
         if (atomic_read(&dentry->d_count) == 1) {
                 dentry_stat.nr_unused--;
                 list_del_init(&dentry->d_lru);
         }
+ spin_unlock(&dentry->d_lock);
         return dentry;
 }
 
@@ -252,8 +280,8 @@
                 tmp = next;
                 next = tmp->next;
                 alias = list_entry(tmp, struct dentry, d_alias);
- if (!list_empty(&alias->d_hash)) {
- __dget_locked(alias);
+ if (!d_unhashed(alias)) {
+ dget(alias);
                         spin_unlock(&dcache_lock);
                         return alias;
                 }
@@ -263,7 +291,7 @@
 }
 
 /*
- * Try to kill dentries associated with this inode.
+ * Try to kill dentries associated with this inode.
  * WARNING: you must own a reference to inode.
  */
 void d_prune_aliases(struct inode *inode)
@@ -274,13 +302,16 @@
         tmp = head;
         while ((tmp = tmp->next) != head) {
                 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
+ spin_lock(&dentry->d_lock);
                 if (!atomic_read(&dentry->d_count)) {
- __dget_locked(dentry);
+ __dget(dentry);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
                         spin_unlock(&dcache_lock);
- d_drop(dentry);
                         dput(dentry);
                         goto restart;
                 }
+ spin_unlock(&dentry->d_lock);
         }
         spin_unlock(&dcache_lock);
 }
@@ -295,7 +326,8 @@
 {
         struct dentry * parent;
 
- list_del_init(&dentry->d_hash);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
         list_del(&dentry->d_child);
         dentry_iput(dentry);
         parent = dentry->d_parent;
@@ -330,19 +362,20 @@
                 if (tmp == &dentry_unused)
                         break;
                 list_del_init(tmp);
+ dentry_stat.nr_unused--;
                 dentry = list_entry(tmp, struct dentry, d_lru);
 
+ spin_lock(&dentry->d_lock);
                 /* If the dentry was recently referenced, don't free it. */
                 if (dentry->d_vfs_flags & DCACHE_REFERENCED) {
                         dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
- list_add(&dentry->d_lru, &dentry_unused);
+ if (!atomic_read(&dentry->d_count)) {
+ list_add(&dentry->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
+ }
+ spin_unlock(&dentry->d_lock);
                         continue;
                 }
- dentry_stat.nr_unused--;
-
- /* Unused dentry with a count? */
- if (atomic_read(&dentry->d_count))
- BUG();
 
                 prune_one_dentry(dentry);
                 if (!--count)
@@ -405,10 +438,13 @@
                 dentry = list_entry(tmp, struct dentry, d_lru);
                 if (dentry->d_sb != sb)
                         continue;
- if (atomic_read(&dentry->d_count))
- continue;
+ spin_lock(&dentry->d_lock);
                 dentry_stat.nr_unused--;
                 list_del_init(tmp);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
+ continue;
+ }
                 prune_one_dentry(dentry);
                 goto repeat;
         }
@@ -488,11 +524,13 @@
                 struct list_head *tmp = next;
                 struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
                 next = tmp->next;
+ list_del_init(&dentry->d_lru);
+ spin_lock(&dentry->d_lock);
                 if (!atomic_read(&dentry->d_count)) {
- list_del(&dentry->d_lru);
                         list_add(&dentry->d_lru, dentry_unused.prev);
                         found++;
                 }
+ spin_unlock(&dentry->d_lock);
                 /*
                  * Descend a level if the d_subdirs list is non-empty.
                  */
@@ -606,8 +644,9 @@
         str[name->len] = 0;
 
         atomic_set(&dentry->d_count, 1);
- dentry->d_vfs_flags = 0;
+ dentry->d_vfs_flags = DCACHE_UNHASHED;
         dentry->d_flags = 0;
+ dentry->d_lock = SPIN_LOCK_UNLOCKED;
         dentry->d_inode = NULL;
         dentry->d_parent = NULL;
         dentry->d_sb = NULL;
@@ -708,8 +747,9 @@
         const unsigned char *str = name->name;
         struct list_head *head = d_hash(parent,hash);
         struct list_head *tmp;
+ struct dentry * found = NULL;
 
- spin_lock(&dcache_lock);
+ /* rcu_read_lock(); for pre-emptible kernel */
         tmp = head->next;
         for (;;) {
                 struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
@@ -729,13 +769,16 @@
                         if (memcmp(dentry->d_name.name, str, len))
                                 continue;
                 }
- __dget_locked(dentry);
- dentry->d_vfs_flags |= DCACHE_REFERENCED;
- spin_unlock(&dcache_lock);
- return dentry;
+ spin_lock(&dentry->d_lock);
+ if (!(dentry->d_vfs_flags & DCACHE_UNHASHED)) {
+ found = __dget(dentry);
+ }
+ spin_unlock(&dentry->d_lock);
+ /* rcu_read_unlock(); for pre-emptible kernel */
+ return found;
         }
- spin_unlock(&dcache_lock);
- return NULL;
+ /* rcu_read_unlock(); for pre-emptible kernel */
+ return found;
 }
 
 /**
@@ -774,7 +817,7 @@
         lhp = base = d_hash(dparent, dentry->d_name.hash);
         while ((lhp = lhp->next) != base) {
                 if (dentry == list_entry(lhp, struct dentry, d_hash)) {
- __dget_locked(dentry);
+ dget(dentry);
                         spin_unlock(&dcache_lock);
                         return 1;
                 }
@@ -834,9 +877,12 @@
 void d_rehash(struct dentry * entry)
 {
         struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
- if (!list_empty(&entry->d_hash)) BUG();
         spin_lock(&dcache_lock);
+ spin_lock(&entry->d_lock);
+ if (!list_empty(&entry->d_hash) && !d_unhashed(entry)) BUG();
         list_add(&entry->d_hash, list);
+ entry->d_vfs_flags &= ~DCACHE_UNHASHED;
+ spin_unlock(&entry->d_lock);
         spin_unlock(&dcache_lock);
 }
 
@@ -909,7 +955,7 @@
         list_add(&dentry->d_hash, &target->d_hash);
 
         /* Unhash the target: dput() will then get rid of it */
- list_del_init(&target->d_hash);
+ d_drop_locked(target);
 
         list_del(&dentry->d_child);
         list_del(&target->d_child);
@@ -951,7 +997,7 @@
 
         *--end = '\0';
         buflen--;
- if (!IS_ROOT(dentry) && list_empty(&dentry->d_hash)) {
+ if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
                 buflen -= 10;
                 end -= 10;
                 memcpy(end, " (deleted)", 10);
@@ -1034,7 +1080,7 @@
         error = -ENOENT;
         /* Has the current directory has been unlinked? */
         spin_lock(&dcache_lock);
- if (pwd->d_parent == pwd || !list_empty(&pwd->d_hash)) {
+ if (pwd->d_parent == pwd || !d_unhashed(pwd)) {
                 unsigned long len;
                 char * cwd;
 
diff -urN linux-2.4.17-base/fs/intermezzo/journal.c linux-2.4.17-dc8/fs/intermezzo/journal.c
--- linux-2.4.17-base/fs/intermezzo/journal.c Fri Dec 21 23:11:55 2001
+++ linux-2.4.17-dc8/fs/intermezzo/journal.c Mon Jul 8 16:18:43 2002
@@ -186,7 +186,7 @@
 
         *--end = '\0';
         buflen--;
- if (dentry->d_parent != dentry && list_empty(&dentry->d_hash)) {
+ if (dentry->d_parent != dentry && d_unhashed(dentry)) {
                 buflen -= 10;
                 end -= 10;
                 memcpy(end, " (deleted)", 10);
diff -urN linux-2.4.17-base/fs/nfsd/nfsfh.c linux-2.4.17-dc8/fs/nfsd/nfsfh.c
--- linux-2.4.17-base/fs/nfsd/nfsfh.c Thu Oct 4 11:29:22 2001
+++ linux-2.4.17-dc8/fs/nfsd/nfsfh.c Mon Jul 8 16:18:43 2002
@@ -348,7 +348,7 @@
         spin_lock(&dcache_lock);
         list_for_each(lp, &child->d_inode->i_dentry) {
                 struct dentry *tmp = list_entry(lp,struct dentry, d_alias);
- if (!list_empty(&tmp->d_hash) &&
+ if (!d_unhashed(tmp) &&
                     tmp->d_parent == parent) {
                         child = dget_locked(tmp);
                         spin_unlock(&dcache_lock);
diff -urN linux-2.4.17-base/fs/readdir.c linux-2.4.17-dc8/fs/readdir.c
--- linux-2.4.17-base/fs/readdir.c Mon Aug 13 03:29:08 2001
+++ linux-2.4.17-dc8/fs/readdir.c Mon Jul 8 16:18:43 2002
@@ -79,7 +79,7 @@
                         while(1) {
                                 struct dentry *de = list_entry(list, struct dentry, d_child);
 
- if (!list_empty(&de->d_hash) && de->d_inode) {
+ if (!d_unhashed(de) && de->d_inode) {
                                         spin_unlock(&dcache_lock);
                                         if (filldir(dirent, de->d_name.name, de->d_name.len, filp->f_pos, de->d_inode->i_ino, DT_UNKNOWN) < 0)
                                                 break;
diff -urN linux-2.4.17-base/include/linux/dcache.h linux-2.4.17-dc8/include/linux/dcache.h
--- linux-2.4.17-base/include/linux/dcache.h Fri Nov 23 01:16:18 2001
+++ linux-2.4.17-dc8/include/linux/dcache.h Fri Jul 12 10:59:09 2002
@@ -5,6 +5,8 @@
 
 #include <asm/atomic.h>
 #include <linux/mount.h>
+#include <linux/rcupdate.h>
+#include <asm/system.h>
 
 /*
  * linux/include/linux/dcache.h
@@ -65,11 +67,13 @@
 
 struct dentry {
         atomic_t d_count;
+ spinlock_t d_lock; /* lock for d_count & d_vfs_flags */
+ unsigned long d_vfs_flags; /* moved here to be on same cacheline */
         unsigned int d_flags;
         struct inode * d_inode; /* Where the name belongs to - NULL is negative */
         struct dentry * d_parent; /* parent directory */
         struct list_head d_hash; /* lookup hash list */
- struct list_head d_lru; /* d_count = 0 LRU list */
+ struct list_head d_lru; /* LRU list */
         struct list_head d_child; /* child of parent list */
         struct list_head d_subdirs; /* our children */
         struct list_head d_alias; /* inode alias list */
@@ -78,8 +82,8 @@
         unsigned long d_time; /* used by d_revalidate */
         struct dentry_operations *d_op;
         struct super_block * d_sb; /* The root of the dentry tree */
- unsigned long d_vfs_flags;
         void * d_fsdata; /* fs-specific data */
+ struct rcu_head d_rcu;
         unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
 };
 
@@ -123,10 +127,23 @@
                                          * s_nfsd_free_path semaphore will be down
                                          */
 #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
+#define DCACHE_UNHASHED 0x0010
 
 extern spinlock_t dcache_lock;
 
 /**
+ * d_unhashed - is dentry hashed
+ * @dentry: entry to check
+ *
+ * Returns true if the dentry passed is not currently hashed.
+ */
+
+static __inline__ int d_unhashed(struct dentry *dentry)
+{
+ return (dentry->d_vfs_flags & DCACHE_UNHASHED);
+}
+
+/**
  * d_drop - drop a dentry
  * @dentry: dentry to drop
  *
@@ -143,14 +160,32 @@
  * timeouts or autofs deletes).
  */
 
+static __inline__ void __d_drop(struct dentry * dentry)
+{
+ if (!d_unhashed(dentry)) {
+ dentry->d_vfs_flags |= DCACHE_UNHASHED;
+ wmb();
+ list_del(&dentry->d_hash);
+ wmb();
+ }
+}
+
 static __inline__ void d_drop(struct dentry * dentry)
 {
         spin_lock(&dcache_lock);
- list_del(&dentry->d_hash);
- INIT_LIST_HEAD(&dentry->d_hash);
+ spin_lock(&dentry->d_lock);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
         spin_unlock(&dcache_lock);
 }
 
+static __inline__ void d_drop_locked(struct dentry * dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+
 static __inline__ int dname_external(struct dentry *d)
 {
         return d->d_name.name != d->d_iname;
@@ -243,27 +278,25 @@
 static __inline__ struct dentry * dget(struct dentry *dentry)
 {
         if (dentry) {
- if (!atomic_read(&dentry->d_count))
- BUG();
+ spin_lock(&dentry->d_lock);
                 atomic_inc(&dentry->d_count);
+ dentry->d_vfs_flags |= DCACHE_REFERENCED;
+ spin_unlock(&dentry->d_lock);
         }
         return dentry;
 }
 
-extern struct dentry * dget_locked(struct dentry *);
-
-/**
- * d_unhashed - is dentry hashed
- * @dentry: entry to check
- *
- * Returns true if the dentry passed is not currently hashed.
- */
-
-static __inline__ int d_unhashed(struct dentry *dentry)
+static __inline__ struct dentry * __dget(struct dentry *dentry)
 {
- return list_empty(&dentry->d_hash);
+ if (dentry) {
+ atomic_inc(&dentry->d_count);
+ dentry->d_vfs_flags |= DCACHE_REFERENCED;
+ }
+ return dentry;
 }
 
+extern struct dentry * dget_locked(struct dentry *);
+
 extern void dput(struct dentry *);
 
 static __inline__ int d_mountpoint(struct dentry *dentry)

-- 
Maneesh Soni
IBM Linux Technology Center, 
IBM India Software Lab, Bangalore.
Phone: +91-80-5044999 email: maneesh@in.ibm.com
http://lse.sourceforge.net/locking/rcupdate.html
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Mon Jul 15 2002 - 22:00:22 EST