[RFC] preliminary patch for files struct

Bill Hawes (whawes@star.net)
Fri, 30 Jan 1998 12:12:40 -0500


This is a multi-part message in MIME format.
--------------FE0E6D0E9B325F5D17C22645
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

A while back there was a discussion here of the problem of clone tasks closing a
file descriptor while another clone was using it. (I'm referring to the case
where multiple tasks share a files structure.) The basic problem is simply that
with a shared files structure, any clone can close one of the fds while it's in
use.

As I haven't seen any other proposals to address this problem, I've appended a
preliminary patch implementing a reasonable solution, which is to simply use a
wrapper function to bump the f_count value while the file is being used. This
function replaces the common idiom

if (fd < NR_OPEN && (file = current->files->fd[fd])) ...

with a file = fd_get(fd) to do the checking.

When the operation is complete, a check needs to be made that the file
is still installed for the original fd slot, and then the use count can be
decremented. If the fd slot has been closed (and maybe reassigned), the file
pointer must be closed to clean up potential lock files. This operation is
provided by a fd_put(fd, file) routine.

Note that the operations in fs/read_write.c were already using fget() to protect
the file pointer, but there was a slight problem with using fput() to decrement
the count. If the file had already been closed (by a clone task), this would
have omitted to call to remove locks for the current process. I've replaced the
fget/fputs with fd_get/fd_puts.

Once all of the references to the files structure are safely wrapped, it will be
much easier to implement some of the other suggestions for files-structure
improvements -- dynamic sizing of the fd array, etc.

The attached patch seems to work OK (I'm running it now), but I haven't replaced
all of the direct references to the files structure yet. I'd appreciate any
suggestions or possible problems with this approach.

Regards,
Bill
--------------FE0E6D0E9B325F5D17C22645
Content-Type: text/plain; charset=us-ascii; name="files_82-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="files_82-patch"

--- linux-2.1.82/include/linux/fs.h.old Tue Jan 27 10:10:55 1998
+++ linux-2.1.82/include/linux/fs.h Fri Jan 30 12:00:22 1998
@@ -633,14 +633,24 @@
extern int register_filesystem(struct file_system_type *);
extern int unregister_filesystem(struct file_system_type *);

+/* fs/open.c */
+
+/* Access wrappers for file pointers */
+struct file * fd_get(int);
+void fd_put(int, struct file *);
+void fd_install(int, struct file *);
asmlinkage int sys_open(const char *, int, int);
asmlinkage int sys_close(unsigned int); /* yes, it's really unsigned */
-
-extern void kill_fasync(struct fasync_struct *fa, int sig);
+extern int do_truncate(struct dentry *, unsigned long);
+extern int get_unused_fd(void);
+extern void put_unused_fd(int);
+extern int __fput(struct file *);
+extern int close_fp(struct file *);

extern char * getname(const char * filename);
extern void putname(char * name);
-extern int do_truncate(struct dentry *, unsigned long);
+
+extern void kill_fasync(struct fasync_struct *fa, int sig);
extern int register_blkdev(unsigned int, const char *, struct file_operations *);
extern int unregister_blkdev(unsigned int major, const char * name);
extern int blkdev_open(struct inode * inode, struct file * filp);
@@ -747,32 +757,6 @@
#define namei(pathname) __namei(pathname, 1)
#define lnamei(pathname) __namei(pathname, 0)

-#include <asm/semaphore.h>
-
-/* Intended for short locks of the global data structures in inode.c.
- * Could be replaced with spinlocks completely, since there is
- * no blocking during manipulation of the static data; however the
- * lock in invalidate_inodes() may last relatively long.
- */
-extern struct semaphore vfs_sem;
-extern inline void vfs_lock(void)
-{
-#if 0
-#ifdef __SMP__
- down(&vfs_sem);
-#endif
-#endif
-}
-
-extern inline void vfs_unlock(void)
-{
-#if 0
-#ifdef __SMP__
- up(&vfs_sem);
-#endif
-#endif
-}
-
extern void iput(struct inode *);
extern struct inode * iget(struct super_block *, unsigned long);
extern void clear_inode(struct inode *);
@@ -780,10 +764,7 @@

extern void insert_inode_hash(struct inode *);
extern void remove_inode_hash(struct inode *);
-extern int get_unused_fd(void);
-extern void put_unused_fd(int);
extern struct file * get_empty_filp(void);
-extern int close_fp(struct file *);
extern struct buffer_head * get_hash_table(kdev_t, int, int);
extern struct buffer_head * getblk(kdev_t, int, int);
extern struct buffer_head * find_buffer(kdev_t dev, int block, int size);
--- linux-2.1.82/fs/open.c.old Sat Jan 24 10:13:16 1998
+++ linux-2.1.82/fs/open.c Fri Jan 30 12:05:56 1998
@@ -24,6 +24,38 @@
#include <asm/uaccess.h>
#include <asm/bitops.h>

+/*
+ * Get the file pointer for the specified file descriptor.
+ *
+ * For now this is just an fget(), but we may need to add
+ * some locking later for clone tasks.
+ */
+struct file * fd_get(int fd)
+{
+ return fget(fd);
+}
+
+/*
+ * Check whether the file is still associated with the descriptor,
+ * and then decrement the count. If the descriptor has been closed
+ * (or changed), we must release the file.
+ */
+void fd_put(int fd, struct file *file)
+{
+ if (current->files->fd[fd] == file)
+ file->f_count--;
+ else
+ close_fp(file);
+}
+
+/*
+ * Install a file pointer in the files structure.
+ */
+void fd_install(int fd, struct file *file)
+{
+ current->files->fd[fd] = file;
+}
+
asmlinkage int sys_statfs(const char * path, struct statfs * buf)
{
struct dentry * dentry;
@@ -47,24 +79,31 @@

asmlinkage int sys_fstatfs(unsigned int fd, struct statfs * buf)
{
+ struct file * file;
struct inode * inode;
struct dentry * dentry;
- struct file * file;
+ struct super_block * sb;
int error;

lock_kernel();
- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
- error = -EBADF;
- else if (!(dentry = file->f_dentry))
- error = -ENOENT;
- else if (!(inode = dentry->d_inode))
- error = -ENOENT;
- else if (!inode->i_sb)
- error = -ENODEV;
- else if (!inode->i_sb->s_op->statfs)
- error = -ENOSYS;
- else
- error = inode->i_sb->s_op->statfs(inode->i_sb, buf, sizeof(struct statfs));
+ error = -EBADF;
+ file = fd_get(fd);
+ if (!file)
+ goto out;
+ error = -ENOENT;
+ if (!(dentry = file->f_dentry))
+ goto out_putf;
+ if (!(inode = dentry->d_inode))
+ goto out_putf;
+ error = -ENODEV;
+ if (!(sb = inode->i_sb))
+ goto out_putf;
+ error = -ENOSYS;
+ if (sb->s_op->statfs)
+ error = sb->s_op->statfs(sb, buf, sizeof(struct statfs));
+out_putf:
+ fd_put(fd, file);
+out:
unlock_kernel();
return error;
}
@@ -147,23 +186,29 @@
int error;

lock_kernel();
- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
- error = -EBADF;
- else if (!(dentry = file->f_dentry))
- error = -ENOENT;
- else if (!(inode = dentry->d_inode))
- error = -ENOENT;
- else if (S_ISDIR(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
- error = -EACCES;
- else if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- error = -EPERM;
- else {
- error = locks_verify_area(FLOCK_VERIFY_WRITE, inode, file,
- length<inode->i_size ? length : inode->i_size,
- abs(inode->i_size - length));
- if (!error)
- error = do_truncate(dentry, length);
- }
+ error = -EBADF;
+ file = fd_get(fd);
+ if (!file)
+ goto out;
+ error = -ENOENT;
+ if (!(dentry = file->f_dentry))
+ goto out_putf;
+ if (!(inode = dentry->d_inode))
+ goto out_putf;
+ error = -EACCES;
+ if (S_ISDIR(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
+ goto out_putf;
+ error = -EPERM;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ goto out_putf;
+ error = locks_verify_area(FLOCK_VERIFY_WRITE, inode, file,
+ length<inode->i_size ? length : inode->i_size,
+ abs(inode->i_size - length));
+ if (!error)
+ error = do_truncate(dentry, length);
+out_putf:
+ fd_put(fd, file);
+out:
unlock_kernel();
return error;
}
@@ -347,30 +392,28 @@
lock_kernel();

error = -EBADF;
- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
+ file = fd_get(fd);
+ if (!file)
goto out;

error = -ENOENT;
if (!(dentry = file->f_dentry))
- goto out;
+ goto out_putf;
if (!(inode = dentry->d_inode))
- goto out;
+ goto out_putf;

error = -ENOTDIR;
if (!S_ISDIR(inode->i_mode))
- goto out;
+ goto out_putf;

- error = permission(inode,MAY_EXEC);
- if (error)
- goto out;
-
- {
- struct dentry *tmp;
-
- tmp = current->fs->pwd;
+ error = permission(inode, MAY_EXEC);
+ if (!error) {
+ struct dentry *tmp = current->fs->pwd;
current->fs->pwd = dget(dentry);
dput(tmp);
}
+out_putf:
+ fd_put(fd, file);
out:
unlock_kernel();
return error;
@@ -421,28 +464,34 @@
struct inode * inode;
struct dentry * dentry;
struct file * file;
- struct iattr newattrs;
int err = -EBADF;
+ struct iattr newattrs;

lock_kernel();
- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
+ file = fd_get(fd);
+ if (!file)
goto out;
+
err = -ENOENT;
if (!(dentry = file->f_dentry))
- goto out;
+ goto out_putf;
if (!(inode = dentry->d_inode))
- goto out;
+ goto out_putf;
+
err = -EROFS;
if (IS_RDONLY(inode))
- goto out;
+ goto out_putf;
err = -EPERM;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- goto out;
+ goto out_putf;
if (mode == (mode_t) -1)
mode = inode->i_mode;
newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
err = notify_change(dentry, &newattrs);
+
+out_putf:
+ fd_put(fd, file);
out:
unlock_kernel();
return err;
@@ -487,8 +536,8 @@
static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
{
struct inode * inode;
- struct iattr newattrs;
int error;
+ struct iattr newattrs;

error = -ENOENT;
if (!(inode = dentry->d_inode)) {
@@ -581,13 +630,13 @@
int error = -EBADF;

lock_kernel();
- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
+ file = fd_get(fd);
+ if (!file)
goto out;
error = -ENOENT;
- if (!(dentry = file->f_dentry))
- goto out;
-
- error = chown_common(dentry, user, group);
+ if ((dentry = file->f_dentry) != NULL)
+ error = chown_common(dentry, user, group);
+ fd_put(fd, file);

out:
unlock_kernel();
@@ -608,16 +657,17 @@
* for the internal routines (ie open_namei()/follow_link() etc). 00 is
* used by symlinks.
*/
-static int do_open(const char * filename,int flags,int mode, int fd)
+static int do_open(const char * filename, int flags, int mode, int fd)
{
struct inode * inode;
struct dentry * dentry;
struct file * f;
int flag,error;

+ error = -ENFILE;
f = get_empty_filp();
if (!f)
- return -ENFILE;
+ goto out;
f->f_flags = flag = flags;
f->f_mode = (flag+1) & O_ACCMODE;
if (f->f_mode)
@@ -648,7 +698,7 @@
}
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);

- current->files->fd[fd] = f;
+ fd_install(fd, f);
return 0;

cleanup_all:
@@ -658,6 +708,7 @@
dput(dentry);
cleanup_file:
put_filp(f);
+out:
return error;
}

@@ -670,6 +721,10 @@
struct files_struct * files = current->files;

fd = find_first_zero_bit(&files->open_fds, NR_OPEN);
+ /*
+ * N.B. For clone tasks sharing a files structure, this test
+ * will limit the total number of files that can be opened.
+ */
if (fd < current->rlim[RLIMIT_NOFILE].rlim_cur) {
FD_SET(fd, &files->open_fds);
FD_CLR(fd, &files->close_on_exec);
@@ -683,31 +738,32 @@
FD_CLR(fd, &current->files->open_fds);
}

-asmlinkage int sys_open(const char * filename,int flags,int mode)
+asmlinkage int sys_open(const char * filename, int flags, int mode)
{
char * tmp;
int fd, error;

lock_kernel();
- error = get_unused_fd();
- if (error < 0)
+ fd = get_unused_fd();
+ if (fd < 0)
goto out;

- fd = error;
tmp = getname(filename);
error = PTR_ERR(tmp);
- if (!IS_ERR(tmp)) {
- error = do_open(tmp,flags,mode,fd);
- putname(tmp);
- if (!error) {
- error = fd;
- goto out;
- }
- }
- put_unused_fd(fd);
+ if (IS_ERR(tmp))
+ goto out_fail;
+ error = do_open(tmp, flags, mode, fd);
+ putname(tmp);
+ if (error)
+ goto out_fail;
out:
unlock_kernel();
- return error;
+ return fd;
+
+out_fail:
+ put_unused_fd(fd);
+ fd = error;
+ goto out;
}

#ifndef __alpha__
@@ -728,11 +784,14 @@

#endif

+/*
+ * Called when retiring the last use of a file pointer.
+ */
int __fput(struct file *filp)
{
- int error = 0;
struct dentry * dentry = filp->f_dentry;
struct inode * inode = dentry->d_inode;
+ int error = 0;

if (filp->f_op && filp->f_op->release)
error = filp->f_op->release(inode, filp);
@@ -749,16 +808,25 @@
struct inode *inode;

if (filp->f_count == 0) {
- printk("VFS: Close: file count is 0\n");
+ printk(KERN_ERR "VFS: Close: file count is 0\n");
return 0;
}
dentry = filp->f_dentry;
inode = dentry->d_inode;
+ /*
+ * N.B. There's a problem here if a clone task sharing the files
+ * structure has locks ... another clone might close the file.
+ */
if (inode)
locks_remove_locks(current, filp);
return fput(filp);
}

+/*
+ * Careful here! We test whether the file pointer is NULL before
+ * releasing the fd. This ensures that one clone task won't close
+ * the fd out from under another clone.
+ */
asmlinkage int sys_close(unsigned int fd)
{
int error;
--- linux-2.1.82/fs/read_write.c.old Sun Nov 30 11:24:44 1997
+++ linux-2.1.82/fs/read_write.c Fri Jan 30 12:47:57 1998
@@ -62,14 +62,18 @@

lock_kernel();
retval = -EBADF;
- if (fd >= NR_OPEN ||
- !(file = current->files->fd[fd]) ||
- !(dentry = file->f_dentry) ||
- !(inode = dentry->d_inode))
+ file = fd_get(fd);
+ if (!file)
goto bad;
+ /* N.B. Shouldn't this be ENOENT?? */
+ if (!(dentry = file->f_dentry) ||
+ !(inode = dentry->d_inode))
+ goto out_putf;
retval = -EINVAL;
if (origin <= 2)
retval = llseek(file, offset, origin);
+out_putf:
+ fd_put(fd, file);
bad:
unlock_kernel();
return retval;
@@ -88,24 +92,28 @@

lock_kernel();
retval = -EBADF;
- if (fd >= NR_OPEN ||
- !(file = current->files->fd[fd]) ||
- !(dentry = file->f_dentry) ||
- !(inode = dentry->d_inode))
+ file = fd_get(fd);
+ if (!file)
goto bad;
+ /* N.B. Shouldn't this be ENOENT?? */
+ if (!(dentry = file->f_dentry) ||
+ !(inode = dentry->d_inode))
+ goto out_putf;
retval = -EINVAL;
if (origin > 2)
- goto bad;
+ goto out_putf;

offset = llseek(file, ((loff_t) offset_high << 32) | offset_low,
origin);

retval = (int)offset & INT_MAX;
if (offset >= 0) {
- retval = copy_to_user(result, &offset, sizeof(offset));
- if (retval)
- retval = -EFAULT;
+ retval = -EFAULT;
+ if (!copy_to_user(result, &offset, sizeof(offset)))
+ retval = 0;
}
+out_putf:
+ fd_put(fd, file);
bad:
unlock_kernel();
return retval;
@@ -121,7 +129,7 @@
lock_kernel();

ret = -EBADF;
- file = fget(fd);
+ file = fd_get(fd);
if (!file)
goto bad_file;
ret = -EBADF;
@@ -136,7 +144,7 @@
goto out;
ret = read(file, buf, count, &file->f_pos);
out:
- fput(file);
+ fd_put(fd, file);
bad_file:
unlock_kernel();
return ret;
@@ -152,7 +160,7 @@
lock_kernel();

ret = -EBADF;
- file = fget(fd);
+ file = fd_get(fd);
if (!file)
goto bad_file;
if (!(file->f_mode & FMODE_WRITE))
@@ -170,7 +178,7 @@
ret = write(file, buf, count, &file->f_pos);
up(&inode->i_sem);
out:
- fput(file);
+ fd_put(fd, file);
bad_file:
unlock_kernel();
return ret;
@@ -201,9 +209,10 @@
if (count > UIO_MAXIOV)
goto out_nofree;
if (count > UIO_FASTIOV) {
- iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL);
ret = -ENOMEM;
- if (!iov) goto out_nofree;
+ iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL);
+ if (!iov)
+ goto out_nofree;
}
ret = -EFAULT;
if (copy_from_user(iov, vector, count*sizeof(*vector)))
@@ -277,14 +286,13 @@
lock_kernel();

ret = -EBADF;
- file = fget(fd);
+ file = fd_get(fd);
if (!file)
goto bad_file;
- if (!(file->f_mode & FMODE_READ))
- goto out;
- ret = do_readv_writev(VERIFY_WRITE, file, vector, count);
-out:
- fput(file);
+ if (file->f_mode & FMODE_READ)
+ ret = do_readv_writev(VERIFY_WRITE, file, vector, count);
+ fd_put(fd, file);
+
bad_file:
unlock_kernel();
return ret;
@@ -299,18 +307,16 @@
lock_kernel();

ret = -EBADF;
- file = fget(fd);
+ file = fd_get(fd);
if (!file)
goto bad_file;
- if (!(file->f_mode & FMODE_WRITE))
- goto out;
-
- down(&file->f_dentry->d_inode->i_sem);
- ret = do_readv_writev(VERIFY_READ, file, vector, count);
- up(&file->f_dentry->d_inode->i_sem);
+ if (file->f_mode & FMODE_WRITE) {
+ down(&file->f_dentry->d_inode->i_sem);
+ ret = do_readv_writev(VERIFY_READ, file, vector, count);
+ up(&file->f_dentry->d_inode->i_sem);
+ }
+ fd_put(fd, file);

-out:
- fput(file);
bad_file:
unlock_kernel();
return ret;
@@ -330,7 +336,7 @@
lock_kernel();

ret = -EBADF;
- file = fget(fd);
+ file = fd_get(fd);
if (!file)
goto bad_file;
if (!(file->f_mode & FMODE_READ))
@@ -346,7 +352,7 @@
goto out;
ret = read(file, buf, count, &pos);
out:
- fput(file);
+ fd_put(fd, file);
bad_file:
unlock_kernel();
return ret;
@@ -362,7 +368,7 @@
lock_kernel();

ret = -EBADF;
- file = fget(fd);
+ file = fd_get(fd);
if (!file)
goto bad_file;
if (!(file->f_mode & FMODE_WRITE))
@@ -382,7 +388,7 @@
up(&file->f_dentry->d_inode->i_sem);

out:
- fput(file);
+ fd_put(fd, file);
bad_file:
unlock_kernel();
return ret;
--- linux-2.1.82/fs/stat.c.old Tue Jan 6 11:39:08 1998
+++ linux-2.1.82/fs/stat.c Fri Jan 30 10:10:25 1998
@@ -220,12 +220,14 @@
int err = -EBADF;

lock_kernel();
- if (fd < NR_OPEN && (f = current->files->fd[fd]) != NULL) {
+ f = fd_get(fd);
+ if (f) {
struct dentry * dentry = f->f_dentry;

err = do_revalidate(dentry);
if (!err)
err = cp_old_stat(dentry->d_inode, statbuf);
+ fd_put(fd, f);
}
unlock_kernel();
return err;
@@ -239,12 +241,14 @@
int err = -EBADF;

lock_kernel();
- if (fd < NR_OPEN && (f = current->files->fd[fd]) != NULL) {
+ f = fd_get(fd);
+ if (f) {
struct dentry * dentry = f->f_dentry;

err = do_revalidate(dentry);
if (!err)
err = cp_new_stat(dentry->d_inode, statbuf);
+ fd_put(fd, f);
}
unlock_kernel();
return err;
--- linux-2.1.82/fs/ioctl.c.old Mon Jul 14 00:20:10 1997
+++ linux-2.1.82/fs/ioctl.c Fri Jan 30 11:52:00 1998
@@ -52,7 +52,8 @@
int on, error = -EBADF;

lock_kernel();
- if (fd >= NR_OPEN || !(filp = current->files->fd[fd]))
+ filp = fd_get(fd);
+ if (!filp)
goto out;
error = 0;
switch (cmd) {
@@ -90,13 +91,16 @@
break;

default:
- if (filp->f_dentry && filp->f_dentry->d_inode && S_ISREG(filp->f_dentry->d_inode->i_mode))
+ error = -ENOTTY;
+ if (!filp->f_dentry || !filp->f_dentry->d_inode)
+ error = -ENOENT;
+ else if (S_ISREG(filp->f_dentry->d_inode->i_mode))
error = file_ioctl(filp, cmd, arg);
else if (filp->f_op && filp->f_op->ioctl)
error = filp->f_op->ioctl(filp->f_dentry->d_inode, filp, cmd, arg);
- else
- error = -ENOTTY;
}
+ fd_put(fd, filp);
+
out:
unlock_kernel();
return error;

--------------FE0E6D0E9B325F5D17C22645--