Re: [2.3.17] Poor NFS client handling of server death

Trond Myklebust (trond.myklebust@fys.uio.no)
10 Sep 1999 11:35:37 +0200


Jeff Garzik <jgarzik@pobox.com> writes:

> Server: x86, knfsd and tools from 2.2.5+RH / RedHat 6.0
> Client: x86, nfs fs from 2.3.17 stock kernel
>
> The following messages spew out whenever the NFS server is restarted:
> nfs: server bum not responding, still trying
> nfs: task 48484 can't get a request slot
> nfs: task 48489 can't get a request slot
> nfs: RPC call returned error 111
> RPC: task of released request still queued!
> RPC: (task is on xprt_pending)
> nfs: RPC call returned error 111
> RPC: task of released request still queued!
> RPC: (task is on xprt_pending)
> nfs: RPC call returned error 111
> RPC: task of released request still queued!
> RPC: (task is on xprt_pending)
> nfs: server bum OK
>
> What is going on here? Also, I cannot isolate the cause, but I see gobs
> of the following messages _sometimes_ when I blow away my egcs build
> directory:

The problem is that the NFS client doesn't know that the RPC is having
trouble sending out requests. It therefore continues to send out new
asynchronous write requests. As they start to time out, the above
messages start to snowball.

I've fixed this in NFSv3 and also the 'ac' series by adding a wait
queue on which the process that is attempting to write can sleep while
the server is down. It is then woken up once the existing backlog of
requests has been cleared.

I hope to find time to backport the improved RPC stuff to the standard
kernel soon. I think it is good enough for 2.3.x.

>
> nfs_lookup: obj/gcc ino=2195829 in use, count=2, nlink=2
> show_dentry: obj.o/gcc, d_count=258
> __nfs_fhget: inode 2195949 still busy, i_count=2
> __nfs_fhget: killing gcc/intl filehandle

This is the stale inode stuff that is killing off filehandles. I'm not
really happy with this sort of thing. It is almost always the wrong
thing to do.

A patch for this has been put out (against 2.2.12). I'm appending it
to this posting. Perhaps you can manage to fit it to the 2.3.x stuff?

Cheers,
Trond

diff -u --recursive --new-file linux-2.2.12.orig/fs/dcache.c linux-2.2.12/fs/dcache.c
--- linux-2.2.12.orig/fs/dcache.c Mon Apr 26 08:17:56 1999
+++ linux-2.2.12/fs/dcache.c Mon Aug 30 16:13:24 1999
@@ -168,6 +168,11 @@
int d_invalidate(struct dentry * dentry)
{
/*
+ * If it's already been dropped, return OK.
+ */
+ if (&list_empty(&dentry->d_hash))
+ return 0;
+ /*
* Check whether to do a partial shrink_dcache
* to get rid of unused child entries.
*/
diff -u --recursive --new-file linux-2.2.12.orig/fs/nfs/dir.c linux-2.2.12/fs/nfs/dir.c
--- linux-2.2.12.orig/fs/nfs/dir.c Mon Aug 9 21:04:57 1999
+++ linux-2.2.12/fs/nfs/dir.c Mon Aug 30 16:15:54 1999
@@ -395,13 +395,14 @@
* If mtime is close to present time, we revalidate
* more often.
*/
+#define NFS_REVALIDATE_NEGATIVE (1 * HZ)
static inline int nfs_neg_need_reval(struct dentry *dentry)
{
- unsigned long timeout = 30 * HZ;
+ unsigned long timeout = NFS_ATTRTIMEO(dentry->d_parent->d_inode);
long diff = CURRENT_TIME - dentry->d_parent->d_inode->i_mtime;

- if (diff < 5*60)
- timeout = 1 * HZ;
+ if (diff < 5*60 && timeout > NFS_REVALIDATE_NEGATIVE)
+ timeout = NFS_REVALIDATE_NEGATIVE;

return time_after(jiffies, dentry->d_time + timeout);
}
@@ -462,12 +463,8 @@
goto out_bad;

/* Filehandle matches? */
- if (memcmp(dentry->d_fsdata, &fhandle, sizeof(struct nfs_fh))) {
- if (!list_empty(&dentry->d_subdirs))
- shrink_dcache_parent(dentry);
- if (dentry->d_count < 2)
- goto out_bad;
- }
+ if (memcmp(dentry->d_fsdata, &fhandle, sizeof(struct nfs_fh)))
+ goto out_bad;

/* Ok, remeber that we successfully checked it.. */
nfs_renew_times(dentry);
@@ -476,6 +473,9 @@
out_valid:
return 1;
out_bad:
+ d_drop(dentry);
+ if (!list_empty(&dentry->d_subdirs))
+ shrink_dcache_parent(dentry);
if (dentry->d_parent->d_inode)
nfs_invalidate_dircache(dentry->d_parent->d_inode);
if (inode && S_ISDIR(inode->i_mode))
diff -u --recursive --new-file linux-2.2.12.orig/fs/nfs/inode.c linux-2.2.12/fs/nfs/inode.c
--- linux-2.2.12.orig/fs/nfs/inode.c Mon Aug 9 21:05:05 1999
+++ linux-2.2.12/fs/nfs/inode.c Mon Aug 30 16:14:37 1999
@@ -492,6 +492,48 @@
}

/*
+ * The following may seem pretty minimal, but the stateless nature
+ * of NFS means that we can't do too much more. Previous attempts to use
+ * fattr->nlink to determine how well the cached state matches the
+ * server suffer from races with stale dentries. You also risk killing
+ * off processes by just doing 'mv file newdir' on the server.
+ *
+ * FIXME: Of course, if 2 exported files have the same fileid (but
+ * different fsid which makes it legal) you're still buggered...
+ * Trond, August 1999.
+ */
+static int
+nfs_inode_is_stale(struct inode *inode, struct nfs_fattr *fattr)
+{
+ int unhashed;
+ int is_stale = 0;
+
+ if (inode->i_mode &&
+ (fattr->mode & S_IFMT) != (inode->i_mode & S_IFMT))
+ is_stale = 1;
+
+ if (is_bad_inode(inode))
+ is_stale = 1;
+
+ /*
+ * Free up unused cached dentries to see if it's wise to unhash
+ * the inode (which we can do if all the dentries have been unhashed).
+ */
+ unhashed = nfs_free_dentries(inode);
+
+ /* Assume we're holding 1 lock on the inode from 'iget'
+ *
+ * NB: sockets sometimes have volatile file handles
+ * don't invalidate their inodes even if all dentries are
+ * unhashed. */
+ if (unhashed && inode->i_count == unhashed + 1
+ && !S_ISSOCK(inode->i_mode))
+ is_stale = 1;
+
+ return is_stale;
+}
+
+/*
* This is our own version of iget that looks up inodes by file handle
* instead of inode number. We use this technique instead of using
* the vfs read_inode function because there is no way to pass the
@@ -550,53 +592,26 @@
static struct inode *
__nfs_fhget(struct super_block *sb, struct nfs_fattr *fattr)
{
- struct inode *inode;
- int max_count, stale_inode, unhashed = 0;
+ struct inode *inode = NULL;

-retry:
- inode = iget(sb, fattr->fileid);
- if (!inode)
+ if (!fattr)
goto out_no_inode;
- /* N.B. This should be impossible ... */
- if (inode->i_ino != fattr->fileid)
- goto out_bad_id;

- /*
- * Check for busy inodes, and attempt to get rid of any
- * unused local references. If successful, we release the
- * inode and try again.
- *
- * Note that the busy test uses the values in the fattr,
- * as the inode may have become a different object.
- * (We can probably handle modes changes here, too.)
- */
- stale_inode = inode->i_mode &&
- ((fattr->mode ^ inode->i_mode) & S_IFMT);
- stale_inode |= inode->i_count && inode->i_count == unhashed;
- max_count = S_ISDIR(fattr->mode) ? 1 : fattr->nlink;
- if (stale_inode || inode->i_count > max_count + unhashed) {
- dprintk("__nfs_fhget: inode %ld busy, i_count=%d, i_nlink=%d\n",
- inode->i_ino, inode->i_count, inode->i_nlink);
- unhashed = nfs_free_dentries(inode);
- if (stale_inode || inode->i_count > max_count + unhashed) {
- printk("__nfs_fhget: inode %ld still busy, i_count=%d\n",
- inode->i_ino, inode->i_count);
- if (!list_empty(&inode->i_dentry)) {
- struct dentry *dentry;
- dentry = list_entry(inode->i_dentry.next,
- struct dentry, d_alias);
- printk("__nfs_fhget: killing %s/%s filehandle\n",
- dentry->d_parent->d_name.name,
- dentry->d_name.name);
- memset(dentry->d_fsdata, 0,
- sizeof(struct nfs_fh));
- }
- remove_inode_hash(inode);
- nfs_invalidate_inode(inode);
- unhashed = 0;
- }
+ while (!inode) {
+ inode = iget(sb, fattr->fileid);
+ if (!inode)
+ goto out_no_inode;
+ /* N.B. This should be impossible ... */
+ if (inode->i_ino != fattr->fileid)
+ goto out_bad_id;
+
+ if (!nfs_inode_is_stale(inode,fattr))
+ break;
+
+ remove_inode_hash(inode);
+ nfs_invalidate_inode(inode);
iput(inode);
- goto retry;
+ inode = NULL;
}
nfs_fill_inode(inode, fattr);
dprintk("NFS: __nfs_fhget(%x/%ld ct=%d)\n",
@@ -751,6 +766,8 @@
fh = (u32 *) &fhandle;
dfprintk(PAGECACHE, " %08x%08x%08x%08x%08x%08x%08x%08x\n",
fh[0],fh[1],fh[2],fh[3],fh[4],fh[5],fh[6],fh[7]);
+ if (!IS_ROOT(dentry))
+ d_drop(dentry);
goto out;
}

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/