[CFT][PATCH] Re: 2.5.11 breakage

From: Alexander Viro (viro@math.psu.edu)
Date: Mon Apr 29 2002 - 17:18:44 EST


        OK, here comes. Patch below is an attempt to do the fastwalk
stuff in right way and so far it seems to be working.

        Differences from Hanna's stuff:

* dentry leak is plugged
* locked/unlocked state of nameidata doesn't depend on history - it
        depends only on point in code.
* LOOKUP_LOCKED is gone.
* following mounts and .. doesn't drop dcache_lock
* light-weight permission check distinguishes between "don't know" and
        "permission denied", so we don't call full-blown permission() unless
        we have to.
* code that changes root/pwd holds dcache_lock _and_ write lock on
        current->fs->lock. I.e. if we hold dcache_lock we can safely
        access our ->fs->{root,pwd}{,mnt}
* __d_lookup() does not increment refcount; callers do dget_locked()
        if they need it (behaviour of d_lookup() didn't change, obviously).
* link_path_walk() logics had been (somewhat) cleaned up.

        Comments/questions/testing are welcome. Patch is trivially
backportable to 2.4, BTW. Patch is against vanilla 2.5.11.

diff -urN C11/fs/dcache.c C11-current/fs/dcache.c
--- C11/fs/dcache.c Mon Apr 29 08:03:36 2002
+++ C11-current/fs/dcache.c Mon Apr 29 10:24:35 2002
@@ -849,6 +849,8 @@
         struct dentry * dentry;
         spin_lock(&dcache_lock);
         dentry = __d_lookup(parent,name);
+ if (dentry)
+ __dget_locked(dentry);
         spin_unlock(&dcache_lock);
         return dentry;
 }
@@ -881,7 +883,6 @@
                         if (memcmp(dentry->d_name.name, str, len))
                                 continue;
                 }
- __dget_locked(dentry);
                 dentry->d_vfs_flags |= DCACHE_REFERENCED;
                 return dentry;
         }
diff -urN C11/fs/namei.c C11-current/fs/namei.c
--- C11/fs/namei.c Mon Apr 29 08:03:36 2002
+++ C11-current/fs/namei.c Mon Apr 29 16:20:27 2002
@@ -279,36 +279,24 @@
 }
 
 /*for fastwalking*/
-static inline void undo_locked(struct nameidata *nd)
+static inline void unlock_nd(struct nameidata *nd)
 {
- if(nd->flags & LOOKUP_LOCKED){
- dget_locked(nd->dentry);
- mntget(nd->mnt);
- spin_unlock(&dcache_lock);
- nd->flags &= ~LOOKUP_LOCKED;
- }
+ struct vfsmount *mnt = nd->old_mnt;
+ struct dentry *dentry = nd->old_dentry;
+ mntget(nd->mnt);
+ dget_locked(nd->dentry);
+ nd->old_mnt = NULL;
+ nd->old_dentry = NULL;
+ spin_unlock(&dcache_lock);
+ dput(dentry);
+ mntput(mnt);
 }
 
-/*
- * For fast path lookup while holding the dcache_lock.
- * SMP-safe
- */
-static struct dentry * cached_lookup_nd(struct nameidata * nd, struct qstr * name, int flags)
+static inline void lock_nd(struct nameidata *nd)
 {
- struct dentry * dentry = NULL;
- if(!(nd->flags & LOOKUP_LOCKED))
- return cached_lookup(nd->dentry, name, flags);
-
- dentry = __d_lookup(nd->dentry, name);
-
- if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
- undo_locked(nd);
- if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
- dput(dentry);
- dentry = NULL;
- }
- }
- return dentry;
+ spin_lock(&dcache_lock);
+ nd->old_mnt = nd->mnt;
+ nd->old_dentry = nd->dentry;
 }
 
 /*
@@ -326,7 +314,7 @@
         umode_t mode = inode->i_mode;
 
         if ((inode->i_op && inode->i_op->permission))
- return -EACCES;
+ return -EAGAIN;
 
         if (current->fsuid == inode->i_uid)
                 mode >>= 6;
@@ -418,10 +406,10 @@
         return -ELOOP;
 }
 
-static inline int __follow_up(struct vfsmount **mnt, struct dentry **base)
+int follow_up(struct vfsmount **mnt, struct dentry **dentry)
 {
         struct vfsmount *parent;
- struct dentry *dentry;
+ struct dentry *mountpoint;
         spin_lock(&dcache_lock);
         parent=(*mnt)->mnt_parent;
         if (parent == *mnt) {
@@ -429,18 +417,27 @@
                 return 0;
         }
         mntget(parent);
- dentry=dget((*mnt)->mnt_mountpoint);
+ mountpoint=dget((*mnt)->mnt_mountpoint);
         spin_unlock(&dcache_lock);
- dput(*base);
- *base = dentry;
+ dput(*dentry);
+ *dentry = mountpoint;
         mntput(*mnt);
         *mnt = parent;
         return 1;
 }
 
-int follow_up(struct vfsmount **mnt, struct dentry **dentry)
+static int follow_mount(struct vfsmount **mnt, struct dentry **dentry)
 {
- return __follow_up(mnt, dentry);
+ int res = 0;
+ while (d_mountpoint(*dentry)) {
+ struct vfsmount *mounted = lookup_mnt(*mnt, *dentry);
+ if (!mounted)
+ break;
+ *mnt = mounted;
+ *dentry = mounted->mnt_root;
+ res = 1;
+ }
+ return res;
 }
 
 static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
@@ -466,41 +463,83 @@
         return __follow_down(mnt,dentry);
 }
  
-static inline void follow_dotdot(struct nameidata *nd)
+static inline void follow_dotdot(struct vfsmount **mnt, struct dentry **dentry)
 {
         while(1) {
                 struct vfsmount *parent;
- struct dentry *dentry;
- read_lock(&current->fs->lock);
- if (nd->dentry == current->fs->root &&
- nd->mnt == current->fs->rootmnt) {
- read_unlock(&current->fs->lock);
+ if (*dentry == current->fs->root &&
+ *mnt == current->fs->rootmnt)
                         break;
- }
- read_unlock(&current->fs->lock);
- spin_lock(&dcache_lock);
- if (nd->dentry != nd->mnt->mnt_root) {
- dentry = dget(nd->dentry->d_parent);
- spin_unlock(&dcache_lock);
- dput(nd->dentry);
- nd->dentry = dentry;
+ if (*dentry != (*mnt)->mnt_root) {
+ *dentry = (*dentry)->d_parent;
                         break;
                 }
- parent=nd->mnt->mnt_parent;
- if (parent == nd->mnt) {
- spin_unlock(&dcache_lock);
+ parent=(*mnt)->mnt_parent;
+ if (parent == *mnt)
                         break;
- }
- mntget(parent);
- dentry=dget(nd->mnt->mnt_mountpoint);
- spin_unlock(&dcache_lock);
- dput(nd->dentry);
- nd->dentry = dentry;
- mntput(nd->mnt);
- nd->mnt = parent;
+ *dentry=(*mnt)->mnt_mountpoint;
+ *mnt = parent;
         }
- while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry))
- ;
+ follow_mount(mnt, dentry);
+}
+
+struct path {
+ struct vfsmount *mnt;
+ struct dentry *dentry;
+};
+
+/*
+ * It's more convoluted than I'd like it to be, but... it's still fairly
+ * small and for now I'd prefer to have fast path as straight as possible.
+ * It _is_ time-critical.
+ */
+static int do_lookup(struct nameidata *nd, struct qstr *name,
+ struct path *path, struct path *cached_path,
+ int flags)
+{
+ struct vfsmount *mnt = nd->mnt;
+ struct dentry *dentry = __d_lookup(nd->dentry, name);
+
+ if (!dentry)
+ goto dcache_miss;
+ if (dentry->d_op && dentry->d_op->d_revalidate)
+ goto need_revalidate;
+done:
+ path->mnt = mnt;
+ path->dentry = dentry;
+ return 0;
+
+dcache_miss:
+ unlock_nd(nd);
+
+need_lookup:
+ dentry = real_lookup(nd->dentry, name, LOOKUP_CONTINUE);
+ if (IS_ERR(dentry))
+ goto fail;
+ mntget(mnt);
+relock:
+ dput(cached_path->dentry);
+ mntput(cached_path->mnt);
+ cached_path->mnt = mnt;
+ cached_path->dentry = dentry;
+ lock_nd(nd);
+ goto done;
+
+need_revalidate:
+ mntget(mnt);
+ dget_locked(dentry);
+ unlock_nd(nd);
+ if (dentry->d_op->d_revalidate(dentry, flags))
+ goto relock;
+ if (d_invalidate(dentry))
+ goto relock;
+ dput(dentry);
+ mntput(mnt);
+ goto need_lookup;
+
+fail:
+ lock_nd(nd);
+ return PTR_ERR(dentry);
 }
 
 /*
@@ -513,11 +552,11 @@
  */
 int link_path_walk(const char * name, struct nameidata *nd)
 {
- struct dentry *dentry;
+ struct path next, pinned = {NULL, NULL};
         struct inode *inode;
         int err;
         unsigned int lookup_flags = nd->flags;
-
+
         while (*name=='/')
                 name++;
         if (!*name)
@@ -534,11 +573,11 @@
                 unsigned int c;
 
                 err = exec_permission_lite(inode);
- if(err){
- undo_locked(nd);
+ if (err == -EAGAIN) {
+ unlock_nd(nd);
                         err = permission(inode, MAY_EXEC);
+ lock_nd(nd);
                 }
- dentry = ERR_PTR(err);
                  if (err)
                         break;
 
@@ -572,8 +611,7 @@
                         case 2:
                                 if (this.name[1] != '.')
                                         break;
- undo_locked(nd);
- follow_dotdot(nd);
+ follow_dotdot(&nd->mnt, &nd->dentry);
                                 inode = nd->dentry->d_inode;
                                 /* fallthrough */
                         case 1:
@@ -589,35 +627,30 @@
                                 break;
                 }
                 /* This does the actual lookups.. */
- dentry = cached_lookup_nd(nd, &this, LOOKUP_CONTINUE);
- if (!dentry) {
- undo_locked(nd);
- dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
- err = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- break;
- }
+ err = do_lookup(nd, &this, &next, &pinned, LOOKUP_CONTINUE);
+ if (err)
+ break;
                 /* Check mountpoints.. */
- if(d_mountpoint(dentry)){
- undo_locked(nd);
- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
- ;
- }
+ follow_mount(&next.mnt, &next.dentry);
 
                 err = -ENOENT;
- inode = dentry->d_inode;
+ inode = next.dentry->d_inode;
                 if (!inode)
- goto out_dput;
+ break;
                 err = -ENOTDIR;
                 if (!inode->i_op)
- goto out_dput;
+ break;
 
                 if (inode->i_op->follow_link) {
- undo_locked(nd);
- err = do_follow_link(dentry, nd);
- dput(dentry);
+ mntget(next.mnt);
+ dget_locked(next.dentry);
+ unlock_nd(nd);
+ err = do_follow_link(next.dentry, nd);
+ dput(next.dentry);
+ mntput(next.mnt);
                         if (err)
                                 goto return_err;
+ lock_nd(nd);
                         err = -ENOENT;
                         inode = nd->dentry->d_inode;
                         if (!inode)
@@ -626,9 +659,8 @@
                         if (!inode->i_op)
                                 break;
                 } else {
- if (!(nd->flags & LOOKUP_LOCKED))
- dput(nd->dentry);
- nd->dentry = dentry;
+ nd->mnt = next.mnt;
+ nd->dentry = next.dentry;
                 }
                 err = -ENOTDIR;
                 if (!inode->i_op->lookup)
@@ -647,8 +679,7 @@
                         case 2:
                                 if (this.name[1] != '.')
                                         break;
- undo_locked(nd);
- follow_dotdot(nd);
+ follow_dotdot(&nd->mnt, &nd->dentry);
                                 inode = nd->dentry->d_inode;
                                 /* fallthrough */
                         case 1:
@@ -659,27 +690,26 @@
                         if (err < 0)
                                 break;
                 }
- dentry = cached_lookup_nd(nd, &this, 0);
- undo_locked(nd);
- if (!dentry) {
- dentry = real_lookup(nd->dentry, &this, 0);
- err = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- break;
- }
- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
- ;
- inode = dentry->d_inode;
+ err = do_lookup(nd, &this, &next, &pinned, 0);
+ if (err)
+ break;
+ follow_mount(&next.mnt, &next.dentry);
+ inode = next.dentry->d_inode;
                 if ((lookup_flags & LOOKUP_FOLLOW)
                     && inode && inode->i_op && inode->i_op->follow_link) {
- err = do_follow_link(dentry, nd);
- dput(dentry);
+ mntget(next.mnt);
+ dget_locked(next.dentry);
+ unlock_nd(nd);
+ err = do_follow_link(next.dentry, nd);
+ dput(next.dentry);
+ mntput(next.mnt);
                         if (err)
                                 goto return_err;
                         inode = nd->dentry->d_inode;
+ lock_nd(nd);
                 } else {
- dput(nd->dentry);
- nd->dentry = dentry;
+ nd->mnt = next.mnt;
+ nd->dentry = next.dentry;
                 }
                 err = -ENOENT;
                 if (!inode)
@@ -700,22 +730,23 @@
                 else if (this.len == 2 && this.name[1] == '.')
                         nd->last_type = LAST_DOTDOT;
 return_base:
- undo_locked(nd);
+ unlock_nd(nd);
+ dput(pinned.dentry);
+ mntput(pinned.mnt);
                 return 0;
-out_dput:
- undo_locked(nd);
- dput(dentry);
- break;
         }
- undo_locked(nd);
+ unlock_nd(nd);
         path_release(nd);
 return_err:
+ dput(pinned.dentry);
+ mntput(pinned.mnt);
         return err;
 }
 
 int path_walk(const char * name, struct nameidata *nd)
 {
         current->total_link_count = 0;
+ lock_nd(nd);
         return link_path_walk(name, nd);
 }
 
@@ -803,6 +834,8 @@
 int path_init(const char *name, unsigned int flags, struct nameidata *nd)
 {
         nd->last_type = LAST_ROOT; /* if there are only slashes... */
+ nd->old_mnt = NULL;
+ nd->old_dentry = NULL;
         nd->flags = flags;
         if (*name=='/')
                 return walk_init_root(name,nd);
@@ -817,7 +850,7 @@
 {
         nd->last_type = LAST_ROOT; /* if there are only slashes... */
         nd->flags = flags;
- if (*name=='/'){
+ if (*name=='/') {
                 read_lock(&current->fs->lock);
                 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
                         nd->mnt = mntget(current->fs->altrootmnt);
@@ -827,20 +860,20 @@
                                 return 0;
                         read_lock(&current->fs->lock);
                 }
- spin_lock(&dcache_lock); /*to avoid cacheline bouncing with d_count*/
+ read_unlock(&current->fs->lock);
+ spin_lock(&dcache_lock);
                 nd->mnt = current->fs->rootmnt;
                 nd->dentry = current->fs->root;
- read_unlock(&current->fs->lock);
         }
         else{
- read_lock(&current->fs->lock);
                 spin_lock(&dcache_lock);
                 nd->mnt = current->fs->pwdmnt;
                 nd->dentry = current->fs->pwd;
- read_unlock(&current->fs->lock);
         }
- nd->flags |= LOOKUP_LOCKED;
- return (path_walk(name, nd));
+ nd->old_mnt = NULL;
+ nd->old_dentry = NULL;
+ current->total_link_count = 0;
+ return link_path_walk(name, nd);
 }
 
 /*
@@ -2029,6 +2062,7 @@
                         /* weird __emul_prefix() stuff did it */
                         goto out;
         }
+ lock_nd(nd);
         res = link_path_walk(link, nd);
 out:
         if (current->link_count || res || nd->last_type!=LAST_NORM)
diff -urN C11/include/linux/fs.h C11-current/include/linux/fs.h
--- C11/include/linux/fs.h Mon Apr 29 08:03:37 2002
+++ C11-current/include/linux/fs.h Mon Apr 29 16:03:36 2002
@@ -654,6 +654,8 @@
         struct qstr last;
         unsigned int flags;
         int last_type;
+ struct dentry *old_dentry;
+ struct vfsmount *old_mnt;
 };
 
 #define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */
@@ -1395,7 +1397,6 @@
 #define LOOKUP_CONTINUE (4)
 #define LOOKUP_PARENT (16)
 #define LOOKUP_NOALT (32)
-#define LOOKUP_LOCKED (64)
 
 /*
  * Type of the last component on LOOKUP_PARENT
diff -urN C11/include/linux/fs_struct.h C11-current/include/linux/fs_struct.h
--- C11/include/linux/fs_struct.h Mon Feb 25 21:12:16 2002
+++ C11-current/include/linux/fs_struct.h Mon Apr 29 16:03:36 2002
@@ -35,10 +35,12 @@
         struct dentry *old_root;
         struct vfsmount *old_rootmnt;
         write_lock(&fs->lock);
+ spin_lock(&dcache_lock);
         old_root = fs->root;
         old_rootmnt = fs->rootmnt;
         fs->rootmnt = mntget(mnt);
         fs->root = dget(dentry);
+ spin_unlock(&dcache_lock);
         write_unlock(&fs->lock);
         if (old_root) {
                 dput(old_root);
@@ -58,10 +60,12 @@
         struct dentry *old_pwd;
         struct vfsmount *old_pwdmnt;
         write_lock(&fs->lock);
+ spin_lock(&dcache_lock);
         old_pwd = fs->pwd;
         old_pwdmnt = fs->pwdmnt;
         fs->pwdmnt = mntget(mnt);
         fs->pwd = dget(dentry);
+ spin_unlock(&dcache_lock);
         write_unlock(&fs->lock);
         if (old_pwd) {
                 dput(old_pwd);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Tue Apr 30 2002 - 22:00:18 EST