Re: [patch] remove BKL from inode_setattr

From: Andrew Morton (akpm@digeo.com)
Date: Mon Oct 14 2002 - 01:34:47 EST


Hugh Dickins wrote:
>
> On Sun, 13 Oct 2002, Andrew Morton wrote:
> >
> > Since April 05 of this year we've been holding the BKL across the
> > vmtruncate call out of inode_setattr(). By accident it seems.
>
> But until then, the lock_kernel was one level up in notify_change().

OK.

> > This does not affect unlink(). It affects ftruncate() and open(O_TRUNC).
>
> And the patch you give affects chown, chgrp, chmod, utime also:
> removing a synchronization point if nothing more. Could that matter?

I don't think so. If

        lock_kernel();
        foo = bar;
        unlock_kernel();

is racy then so is

        foo = bar;
 
> > Given that the drop_inode() path does not take the BKL, I would
> > suggest that it is safe to assume that the various filesystem's
> > truncate code is safe without this additional VFS-level lock_kernel(),
> > and that it can be simply removed.
>
> Isn't doing it when the references have gone rather easier/safer
> than when they remain? I'm not sure your argument holds.

well we hold i_sem. So races against operations against the same
file are pretty much blotted out. Only reads I guess. It's races
against operations on different files which are the concern. And
we're already exposed to them.

The number of filsystems which do not take the bkl in truncate/setattr
is in fact quite small. Here's the patch which removes all doubt:

 fs/affs/file.c | 13 ++++++++-----
 fs/attr.c | 2 --
 fs/cifs/inode.c | 7 ++++++-
 fs/jfs/file.c | 3 +++
 fs/reiserfs/file.c | 2 ++
 fs/smbfs/proc.c | 18 +++++++++++++++---
 fs/sysv/itree.c | 6 +++++-
 fs/xfs/linux/xfs_iops.c | 11 +++++++++--
 8 files changed, 48 insertions(+), 14 deletions(-)

--- 2.5.42/fs/attr.c~truncate-bkl Sun Oct 13 20:04:06 2002
+++ 2.5.42-akpm/fs/attr.c Sun Oct 13 21:04:22 2002
@@ -67,7 +67,6 @@ int inode_setattr(struct inode * inode,
         unsigned int ia_valid = attr->ia_valid;
         int error = 0;
         
- lock_kernel();
         if (ia_valid & ATTR_SIZE) {
                 error = vmtruncate(inode, attr->ia_size);
                 if (error)
@@ -91,7 +90,6 @@ int inode_setattr(struct inode * inode,
         }
         mark_inode_dirty(inode);
 out:
- unlock_kernel();
         return error;
 }
 
--- 2.5.42/fs/affs/file.c~truncate-bkl Sun Oct 13 21:02:43 2002
+++ 2.5.42-akpm/fs/affs/file.c Sun Oct 13 21:03:46 2002
@@ -832,6 +832,7 @@ affs_truncate(struct inode *inode)
         pr_debug("AFFS: truncate(inode=%d, oldsize=%u, newsize=%u)\n",
                  (u32)inode->i_ino, (u32)AFFS_I(inode)->mmu_private, (u32)inode->i_size);
 
+ lock_kernel();
         last_blk = 0;
         ext = 0;
         if (inode->i_size) {
@@ -847,7 +848,7 @@ affs_truncate(struct inode *inode)
 
                 page = grab_cache_page(mapping, size >> PAGE_CACHE_SHIFT);
                 if (!page)
- return;
+ goto out;
                 size = (size & (PAGE_CACHE_SIZE - 1)) + 1;
                 res = mapping->a_ops->prepare_write(NULL, page, size, size);
                 if (!res)
@@ -855,16 +856,16 @@ affs_truncate(struct inode *inode)
                 unlock_page(page);
                 page_cache_release(page);
                 mark_inode_dirty(inode);
- return;
+ goto out;
         } else if (inode->i_size == AFFS_I(inode)->mmu_private)
- return;
+ goto out;
 
         // lock cache
         ext_bh = affs_get_extblock(inode, ext);
         if (IS_ERR(ext_bh)) {
                 affs_warning(sb, "truncate", "unexpected read error for ext block %u (%d)",
                              ext, PTR_ERR(ext_bh));
- return;
+ goto out;
         }
         if (AFFS_I(inode)->i_lc) {
                 /* clear linear cache */
@@ -910,7 +911,7 @@ affs_truncate(struct inode *inode)
                         if (IS_ERR(ext_bh)) {
                                 affs_warning(sb, "truncate", "unexpected read error for last block %u (%d)",
                                              ext, PTR_ERR(ext_bh));
- return;
+ goto out;
                         }
                         tmp = be32_to_cpu(AFFS_DATA_HEAD(bh)->next);
                         AFFS_DATA_HEAD(bh)->next = 0;
@@ -936,4 +937,6 @@ affs_truncate(struct inode *inode)
                 affs_brelse(ext_bh);
         }
         affs_free_prealloc(inode);
+out:
+ unlock_kernel();
 }
--- 2.5.42/fs/cifs/inode.c~truncate-bkl Sun Oct 13 21:05:31 2002
+++ 2.5.42-akpm/fs/cifs/inode.c Sun Oct 13 21:05:55 2002
@@ -20,6 +20,7 @@
  */
 #include <linux/fs.h>
 #include <linux/stat.h>
+#include <linux/smp_lock.h>
 #include <asm/div64.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
@@ -517,6 +518,8 @@ cifs_truncate_file(struct inode *inode)
         struct dentry *dirent;
         char *full_path = NULL;
 
+ lock_kernel();
+
         xid = GetXid();
 
         cifs_sb = CIFS_SB(inode->i_sb);
@@ -527,7 +530,7 @@ cifs_truncate_file(struct inode *inode)
                        ("Can not get pathname from empty dentry in inode 0x%p ",
                         inode));
                 FreeXid(xid);
- return;
+ goto out;
         }
         dirent = list_entry(inode->i_dentry.next, struct dentry, d_alias);
         if (dirent) {
@@ -556,6 +559,8 @@ cifs_truncate_file(struct inode *inode)
         if (full_path)
                 kfree(full_path);
         FreeXid(xid);
+out:
+ unlock_kernel();
         return;
 }
 
--- 2.5.42/fs/jfs/file.c~truncate-bkl Sun Oct 13 21:11:06 2002
+++ 2.5.42-akpm/fs/jfs/file.c Sun Oct 13 21:11:11 2002
@@ -18,6 +18,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/smp_lock.h>
 #include "jfs_incore.h"
 #include "jfs_dmap.h"
 #include "jfs_txnmgr.h"
@@ -90,9 +91,11 @@ static void jfs_truncate(struct inode *i
 {
         jFYI(1, ("jfs_truncate: size = 0x%lx\n", (ulong) ip->i_size));
 
+ lock_kernel();
         IWRITE_LOCK(ip);
         jfs_truncate_nolock(ip, ip->i_size);
         IWRITE_UNLOCK(ip);
+ unlock_kernel();
 }
 
 static int jfs_open(struct inode *inode, struct file *file)
--- 2.5.42/fs/reiserfs/file.c~truncate-bkl Sun Oct 13 21:15:53 2002
+++ 2.5.42-akpm/fs/reiserfs/file.c Sun Oct 13 21:16:17 2002
@@ -66,7 +66,9 @@ static int reiserfs_file_release (struct
 }
 
 static void reiserfs_vfs_truncate_file(struct inode *inode) {
+ lock_kernel() ;
     reiserfs_truncate_file(inode, 1) ;
+ unlock_kernel() ;
 }
 
 /* Sync a reiserfs file. */
--- 2.5.42/fs/smbfs/proc.c~truncate-bkl Sun Oct 13 21:18:13 2002
+++ 2.5.42-akpm/fs/smbfs/proc.c Sun Oct 13 21:19:41 2002
@@ -1740,12 +1740,17 @@ out:
 static int
 smb_proc_trunc32(struct inode *inode, loff_t length)
 {
+ int ret;
+
         /*
          * Writing 0bytes is old-SMB magic for truncating files.
          * MAX_NON_LFS should prevent this from being called with a too
          * large offset.
          */
- return smb_proc_write(inode, length, 0, NULL);
+ lock_kernel();
+ ret = smb_proc_write(inode, length, 0, NULL);
+ unlock_kernel();
+ return ret;
 }
 
 static int
@@ -1757,6 +1762,7 @@ smb_proc_trunc64(struct inode *inode, lo
         char *data;
         struct smb_request *req;
 
+ lock_kernel();
         result = -ENOMEM;
         if (! (req = smb_alloc_request(server, 14)))
                 goto out;
@@ -1787,14 +1793,19 @@ smb_proc_trunc64(struct inode *inode, lo
 out_free:
         smb_rput(req);
 out:
+ unlock_kernel();
         return result;
 }
 
 static int
 smb_proc_trunc95(struct inode *inode, loff_t length)
 {
- struct smb_sb_info *server = server_from_inode(inode);
- int result = smb_proc_trunc32(inode, length);
+ struct smb_sb_info *server;
+ int result;
+
+ lock_kernel();
+ server = server_from_inode(inode);
+ result = smb_proc_trunc32(inode, length);
  
         /*
          * win9x doesn't appear to update the size immediately.
@@ -1804,6 +1815,7 @@ smb_proc_trunc95(struct inode *inode, lo
          * FIXME: is this still necessary?
          */
         smb_proc_flush(server, SMB_I(inode)->fileid);
+ unlock_kernel();
         return result;
 }
 
--- 2.5.42/fs/sysv/itree.c~truncate-bkl Sun Oct 13 21:20:07 2002
+++ 2.5.42-akpm/fs/sysv/itree.c Sun Oct 13 21:20:43 2002
@@ -373,6 +373,8 @@ void sysv_truncate (struct inode * inode
             S_ISLNK(inode->i_mode)))
                 return;
 
+ lock_kernel();
+
         blocksize = inode->i_sb->s_blocksize;
         iblock = (inode->i_size + blocksize-1)
>> inode->i_sb->s_blocksize_bits;
@@ -381,7 +383,7 @@ void sysv_truncate (struct inode * inode
 
         n = block_to_path(inode, iblock, offsets);
         if (n == 0)
- return;
+ goto out;
 
         if (n == 1) {
                 free_data(inode, i_data+offsets[0], i_data + DIRECT);
@@ -421,6 +423,8 @@ do_indirects:
                 sysv_sync_inode (inode);
         else
                 mark_inode_dirty(inode);
+out:
+ unlock_kernel();
 }
 
 static unsigned sysv_nblocks(struct super_block *s, loff_t size)
--- 2.5.42/fs/xfs/linux/xfs_iops.c~truncate-bkl Sun Oct 13 21:22:15 2002
+++ 2.5.42-akpm/fs/xfs/linux/xfs_iops.c Sun Oct 13 21:23:26 2002
@@ -32,6 +32,7 @@
 
 #include <xfs.h>
 #include <linux/xattr.h>
+#include <linux/smp_lock.h>
 
 
 /*
@@ -482,6 +483,8 @@ linvfs_setattr(
         int error;
         int flags = 0;
 
+ lock_kernel();
+
         memset(&vattr, 0, sizeof(vattr_t));
         if (ia_valid & ATTR_UID) {
                 vattr.va_mask |= AT_UID;
@@ -521,8 +524,10 @@ linvfs_setattr(
                 flags = ATTR_UTIME;
 
         VOP_SETATTR(vp, &vattr, flags, NULL, error);
- if (error)
- return(-error); /* Positive error up from XFS */
+ if (error) {
+ error = -error; /* Positive error up from XFS */
+ goto out;
+ }
         if (ia_valid & ATTR_SIZE) {
                 error = vmtruncate(inode, attr->ia_size);
         }
@@ -531,6 +536,8 @@ linvfs_setattr(
                 vn_revalidate(vp, 0);
                 mark_inode_dirty_sync(inode);
         }
+out:
+ unlock_kernel();
         return error;
 }
 

.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Tue Oct 15 2002 - 22:00:48 EST