patch for 2.1.66 nfs client

Bill Hawes (whawes@star.net)
Wed, 26 Nov 1997 18:34:01 -0500


This is a multi-part message in MIME format.
--------------2494768FDF9AD384294D14BB
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

The attached patch for nfs client should help with the problems reported
by Peter and Craig.

The EIO failures from getattr appear to be an improperly handled
timeout. In net/sunrpc/clnt.c the call_reserveresult() code installs a
new action following a timeout, but then falls through into the test for
no request slot, which kills the task. This seems pointless, as the
timeout resulted from waiting for a request in the first place. The
proper action would seem to be to return and let the call_timeout
procedure decide whether to keep trying.

The problems Craig reported have a common theme in which an attempted
create (mkdir, etc.) fails, but then the object can't be deleted
locally, and the server claims it exists. My theory is that a failure in
the return path has made a successful operation on the server appear to
have failed, and we then retain a negative dentry locally that prevents
an attempt to fix it. (I.e. you can't delete it, because it doesn't
exist locally, etc.)

The patch addresses this by dropping the dentry following a failed
create operation, allowing subsequent lookups to discover the object on
the server. If the diagnostic messages occur very often, I have an idea
for a smarter way to patch things up.

Regards,
Bill
--------------2494768FDF9AD384294D14BB
Content-Type: text/plain; charset=us-ascii; name="nfs_client66-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="nfs_client66-patch"

--- linux-2.1.66/net/sunrpc/clnt.c.old Wed Nov 26 10:16:17 1997
+++ linux-2.1.66/net/sunrpc/clnt.c Wed Nov 26 19:08:26 1997
@@ -325,16 +325,27 @@
{
dprintk("RPC: %4d call_reserveresult (status %d)\n",
task->tk_pid, task->tk_status);
+ /*
+ * After a call to xprt_reserve(), we must have either
+ * a request slot or else an error status.
+ */
+ if ((task->tk_status >= 0 && !task->tk_rqstp) ||
+ (task->tk_status < 0 && task->tk_rqstp))
+ printk("call_reserveresult: status=%d, request=%p??\n",
+ task->tk_status, task->tk_rqstp);

if (task->tk_status >= 0) {
task->tk_action = call_allocate;
+ goto out;
} else if (task->tk_status == -EAGAIN) {
task->tk_timeout = task->tk_client->cl_timeout.to_resrvval;
task->tk_status = 0;
xprt_reserve(task);
- return;
+ goto out;
} else if (task->tk_status == -ETIMEDOUT) {
+ printk("RPC: task timed out\n");
task->tk_action = call_timeout;
+ goto out;
} else {
task->tk_action = NULL;
}
@@ -342,6 +353,8 @@
printk("RPC: task has no request, exit EIO\n");
rpc_exit(task, -EIO);
}
+out:
+ return;
}

/*
--- linux-2.1.66/fs/nfs/dir.c.old Wed Nov 26 10:15:50 1997
+++ linux-2.1.66/fs/nfs/dir.c Wed Nov 26 16:10:47 1997
@@ -478,15 +478,16 @@
int error;

dfprintk(VFS, "NFS: lookup(%x/%ld, %.*s)\n",
- dir->i_dev, dir->i_ino, len, dentry->d_name.name);
+ dir->i_dev, dir->i_ino, len, dentry->d_name.name);

if (!dir || !S_ISDIR(dir->i_mode)) {
printk("nfs_lookup: inode is NULL or not a directory\n");
return -ENOENT;
}

+ error = -ENAMETOOLONG;
if (len > NFS_MAXNAMLEN)
- return -ENAMETOOLONG;
+ goto out;

error = nfs_proc_lookup(NFS_SERVER(dir), NFS_FH(dir),
dentry->d_name.name, &fhandle, &fattr);
@@ -504,6 +505,7 @@
error = 0;
}
}
+out:
return error;
}

@@ -516,7 +518,6 @@
struct inode *inode;
int error = -EACCES;

- nfs_invalidate_dircache(dir);
inode = nfs_fhget(dir->i_sb, fhandle, fattr);
if (inode) {
d_instantiate(dentry, inode);
@@ -526,6 +527,12 @@
return error;
}

+/*
+ * Following a failed create operation, we drop the dentry rather
+ * than retain a negative dentry. This avoids a problem in the event
+ * that the operation succeeded on the server, but an error in the
+ * reply path made it appear to have failed.
+ */
static int nfs_create(struct inode *dir, struct dentry * dentry, int mode)
{
struct nfs_sattr sattr;
@@ -541,19 +548,36 @@
return -ENOENT;
}

+ error = -ENAMETOOLONG;
if (dentry->d_name.len > NFS_MAXNAMLEN)
- return -ENAMETOOLONG;
+ goto out;

sattr.mode = mode;
sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
+
+ /*
+ * Invalidate the dir cache before the operation to avoid a race.
+ */
+ nfs_invalidate_dircache(dir);
error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir),
dentry->d_name.name, &sattr, &fhandle, &fattr);
if (!error)
error = nfs_instantiate(dir, dentry, &fattr, &fhandle);
+ else {
+#ifdef NFS_PARANOIA
+printk("nfs_create: %s/%s failed, error=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, error);
+#endif
+ d_drop(dentry);
+ }
+out:
return error;
}

+/*
+ * See comments for nfs_proc_create regarding failed operations.
+ */
static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev)
{
struct nfs_sattr sattr;
@@ -576,15 +600,26 @@
sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
if (S_ISCHR(mode) || S_ISBLK(mode))
sattr.size = rdev; /* get out your barf bag */
-
sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
+
+ nfs_invalidate_dircache(dir);
error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir),
dentry->d_name.name, &sattr, &fhandle, &fattr);
if (!error)
error = nfs_instantiate(dir, dentry, &fattr, &fhandle);
+ else {
+#ifdef NFS_PARANOIA
+printk("nfs_mknod: %s/%s failed, error=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, error);
+#endif
+ d_drop(dentry);
+ }
return error;
}

+/*
+ * See comments for nfs_proc_create regarding failed operations.
+ */
static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
struct nfs_sattr sattr;
@@ -607,10 +642,18 @@
sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;

+ nfs_invalidate_dircache(dir);
error = nfs_proc_mkdir(NFS_SERVER(dir), NFS_FH(dir),
dentry->d_name.name, &sattr, &fhandle, &fattr);
if (!error)
error = nfs_instantiate(dir, dentry, &fattr, &fhandle);
+ else {
+#ifdef NFS_PARANOIA
+printk("nfs_mkdir: %s/%s failed, error=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, error);
+#endif
+ d_drop(dentry);
+ }
return error;
}

@@ -897,7 +940,7 @@
int error;

dfprintk(VFS, "NFS: symlink(%x/%ld, %s, %s)\n",
- dir->i_dev, dir->i_ino, dentry->d_name.name, symname);
+ dir->i_dev, dir->i_ino, dentry->d_name.name, symname);

if (!dir || !S_ISDIR(dir->i_mode)) {
printk("nfs_symlink: inode is NULL or not a directory\n");
@@ -911,25 +954,35 @@
if (strlen(symname) > NFS_MAXPATHLEN)
goto out;

- sattr.mode = S_IFLNK | S_IRWXUGO; /* SunOS 4.1.2 crashes without this! */
+#ifdef NFS_PARANOIA
+if (dentry->d_inode)
+printk("nfs_proc_symlink: %s/%s not negative!\n",
+dentry->d_parent->d_name.name, dentry->d_name.name);
+#endif
+ /*
+ * Fill in the sattr for the call.
+ * Note: SunOS 4.1.2 crashes if the mode isn't initialized!
+ */
+ sattr.mode = S_IFLNK | S_IRWXUGO;
sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;

+ /*
+ * Drop the dentry in advance to force a new lookup.
+ * Since nfs_proc_symlink doesn't return a fattr, we
+ * can't instantiate the new inode.
+ */
+ d_drop(dentry);
error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dir),
dentry->d_name.name, symname, &sattr);
if (!error) {
nfs_invalidate_dircache(dir);
nfs_renew_times(dentry->d_parent);
- /* this looks _funny_ doesn't it? But: nfs_proc_symlink()
- * only fills in sattr, not fattr. Thus nfs_fhget() cannot be
- * called, it would be pointless, without a valid fattr
- * argument. Other possibility: call nfs_proc_lookup()
- * HERE. But why? If somebody wants to reference this
- * symlink, the cached_lookup() will fail, and
- * nfs_proc_symlink() will be called anyway.
- */
- d_drop(dentry);
+ } else if (error == -EEXIST) {
+ printk("nfs_proc_symlink: %s/%s already exists??\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
}
+
out:
return error;
}

--------------2494768FDF9AD384294D14BB--