Re: [PATCH] fs: fix i_writecount on shmem and friends

From: David Herrmann
Date: Thu Mar 13 2014 - 07:03:52 EST


Hi Al

On Thu, Mar 13, 2014 at 1:37 AM, Al Viro <viro@xxxxxxxxxxxxxxxxxx> wrote:
> On Wed, Mar 12, 2014 at 11:30:09PM +0100, David Herrmann wrote:
>> Please see:
>> shmem_zero_setup()
>> shmem_file_setup()
>> __shmem_file_setup()
>> alloc_file()
>>
>> shmem_zero_setup() is used by /dev/zero (drivers/char/mem.c) and
>> mmap(MAP_ANON). So yes, we do call mmap() on these files.
>
> We do, but how do you get anything even attempt deny_write_access() on
> those? And what would the semantics of that be, anyway?

I haven't found any such path either. Just wanted to point it out in
case you missed these.

> diff --git a/fs/file_table.c b/fs/file_table.c
> index 5b24008..ce1504f 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -178,47 +177,12 @@ struct file *alloc_file(struct path *path, fmode_t mode,
> file->f_mapping = path->dentry->d_inode->i_mapping;
> file->f_mode = mode;
> file->f_op = fop;
> -
> - /*
> - * These mounts don't really matter in practice
> - * for r/o bind mounts. They aren't userspace-
> - * visible. We do this for consistency, and so
> - * that we can do debugging checks at __fput()
> - */
> - if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) {
> - file_take_write(file);
> - WARN_ON(mnt_clone_write(path->mnt));
> - }
> if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
> i_readcount_inc(path->dentry->d_inode);
> return file;
> }
> EXPORT_SYMBOL(alloc_file);
>
> -/**
> - * drop_file_write_access - give up ability to write to a file
> - * @file: the file to which we will stop writing
> - *
> - * This is a central place which will give up the ability
> - * to write to @file, along with access to write through
> - * its vfsmount.
> - */
> -static void drop_file_write_access(struct file *file)
> -{
> - struct vfsmount *mnt = file->f_path.mnt;
> - struct dentry *dentry = file->f_path.dentry;
> - struct inode *inode = dentry->d_inode;
> -
> - put_write_access(inode);
> -
> - if (special_file(inode->i_mode))
> - return;
> - if (file_check_writeable(file) != 0)
> - return;
> - __mnt_drop_write(mnt);
> - file_release_write(file);
> -}
> -
> /* the real guts of fput() - releasing the last reference to file
> */
> static void __fput(struct file *file)
> @@ -253,8 +217,10 @@ static void __fput(struct file *file)
> put_pid(file->f_owner.pid);
> if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
> i_readcount_dec(inode);
> - if (file->f_mode & FMODE_WRITE)
> - drop_file_write_access(file);
> + if (file->f_mode & FMODE_WRITER) {
> + put_write_access(inode);
> + __mnt_drop_write(mnt);
> + }
> file->f_path.dentry = NULL;
> file->f_path.mnt = NULL;
> file->f_inode = NULL;
> diff --git a/fs/open.c b/fs/open.c
> index b9ed8b2..ea765e4 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -632,35 +632,6 @@ out:
> return error;
> }
>
> -/*
> - * You have to be very careful that these write
> - * counts get cleaned up in error cases and
> - * upon __fput(). This should probably never
> - * be called outside of __dentry_open().
> - */
> -static inline int __get_file_write_access(struct inode *inode,
> - struct vfsmount *mnt)
> -{
> - int error;
> - error = get_write_access(inode);
> - if (error)
> - return error;
> - /*
> - * Do not take mount writer counts on
> - * special files since no writes to
> - * the mount itself will occur.
> - */
> - if (!special_file(inode->i_mode)) {
> - /*
> - * Balanced in __fput()
> - */
> - error = __mnt_want_write(mnt);
> - if (error)
> - put_write_access(inode);
> - }
> - return error;
> -}
> -
> int open_check_o_direct(struct file *f)
> {
> /* NB: we're sure to have correct a_ops only after f_op->open */
> @@ -690,12 +661,15 @@ static int do_dentry_open(struct file *f,
>
> path_get(&f->f_path);
> inode = f->f_inode = f->f_path.dentry->d_inode;
> - if (f->f_mode & FMODE_WRITE) {
> - error = __get_file_write_access(inode, f->f_path.mnt);
> - if (error)
> + if ((f->f_mode & FMODE_WRITE) && !special_file(inode->i_mode)) {
> + error = get_write_access(inode);
> + if (unlikely(error))
> + goto cleanup_file;
> + error = __mnt_want_write(f->f_path.mnt);
> + if (unlikely(error)) {
> + put_write_access(inode);
> goto cleanup_file;
> - if (!special_file(inode->i_mode))
> - file_take_write(f);

+ f->f_mode |= FMODE_WRITER;

Apart from that, the patch looks fine to me. In fact, if anyone wanted
the FMODE_WRITER behavior for alloc_file(), they can do that
themselves after it returns.
>From a first glance you could splitt of the removal of
DEBUG_WRITECOUNT into a separate patch. Makes reviewing easier.

Thanks
David
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/