[patch 53/71] xfs: fix freeing of inodes not yet added to the inode cache

From: Greg KH
Date: Fri Sep 04 2009 - 20:26:38 EST


2.6.30-stable review patch. If anyone has any objections, please let us know.

------------------
From: Christoph Hellwig <hch@xxxxxxxxxxxxx>

upstream commit b36ec0428a06fcbdb67d61e9e664154e5dd9a8c7

When freeing an inode that lost race getting added to the inode cache we
must not call into ->destroy_inode, because that would delete the inode
that won the race from the inode cache radix tree.

This patch uses splits a new xfs_inode_free helper out of xfs_ireclaim
and uses that plus __destroy_inode to make sure we really only free
the memory allocted for the inode that lost the race, and not mess with
the inode cache state.

Signed-off-by: Christoph Hellwig <hch@xxxxxx>
Reviewed-by: Eric Sandeen <sandeen@xxxxxxxxxxx>
Reported-by: Alex Samad <alex@xxxxxxxxxxxx>
Reported-by: Andrew Randrianasulu <randrik@xxxxxxx>
Reported-by: Stephane <sharnois@xxxxxxxxx>
Reported-by: Tommy <tommy@xxxxxxxxxxxxxxxx>
Reported-by: Miah Gregory <mace@xxxxxxxxxxxxxxx>
Reported-by: Gabriel Barazer <gabriel@xxxxxxxx>
Reported-by: Leandro Lucarella <llucax@xxxxxxxxx>
Reported-by: Daniel Burr <dburr@xxxxxxxxxxx>
Reported-by: Nickolay <newmail@xxxxxxxxx>
Reported-by: Michael Guntsche <mike@xxxxxxxxxxxx>
Reported-by: Dan Carley <dan.carley+linuxkern-bugs@xxxxxxxxx>
Reported-by: Michael Ole Olsen <gnu@xxxxxxx>
Reported-by: Michael Weissenbacher <mw@xxxxxxxxxxxx>
Reported-by: Martin Spott <Martin.Spott@xxxxxxxxx>
Reported-by: Christian Kujau <lists@xxxxxxxxxxxxxxx>
Tested-by: Michael Guntsche <mike@xxxxxxxxxxxx>
Tested-by: Dan Carley <dan.carley+linuxkern-bugs@xxxxxxxxx>
Tested-by: Christian Kujau <lists@xxxxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxx>

---
fs/xfs/xfs_iget.c | 125 ++++++++++++++++++++++++++++-------------------------
fs/xfs/xfs_inode.h | 17 -------
2 files changed, 68 insertions(+), 74 deletions(-)

--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -115,6 +115,71 @@ xfs_inode_alloc(
return ip;
}

+STATIC void
+xfs_inode_free(
+ struct xfs_inode *ip)
+{
+ switch (ip->i_d.di_mode & S_IFMT) {
+ case S_IFREG:
+ case S_IFDIR:
+ case S_IFLNK:
+ xfs_idestroy_fork(ip, XFS_DATA_FORK);
+ break;
+ }
+
+ if (ip->i_afp)
+ xfs_idestroy_fork(ip, XFS_ATTR_FORK);
+
+#ifdef XFS_INODE_TRACE
+ ktrace_free(ip->i_trace);
+#endif
+#ifdef XFS_BMAP_TRACE
+ ktrace_free(ip->i_xtrace);
+#endif
+#ifdef XFS_BTREE_TRACE
+ ktrace_free(ip->i_btrace);
+#endif
+#ifdef XFS_RW_TRACE
+ ktrace_free(ip->i_rwtrace);
+#endif
+#ifdef XFS_ILOCK_TRACE
+ ktrace_free(ip->i_lock_trace);
+#endif
+#ifdef XFS_DIR2_TRACE
+ ktrace_free(ip->i_dir_trace);
+#endif
+
+ if (ip->i_itemp) {
+ /*
+ * Only if we are shutting down the fs will we see an
+ * inode still in the AIL. If it is there, we should remove
+ * it to prevent a use-after-free from occurring.
+ */
+ xfs_log_item_t *lip = &ip->i_itemp->ili_item;
+ struct xfs_ail *ailp = lip->li_ailp;
+
+ ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
+ XFS_FORCED_SHUTDOWN(ip->i_mount));
+ if (lip->li_flags & XFS_LI_IN_AIL) {
+ spin_lock(&ailp->xa_lock);
+ if (lip->li_flags & XFS_LI_IN_AIL)
+ xfs_trans_ail_delete(ailp, lip);
+ else
+ spin_unlock(&ailp->xa_lock);
+ }
+ xfs_inode_item_destroy(ip);
+ ip->i_itemp = NULL;
+ }
+
+ /* asserts to verify all state is correct here */
+ ASSERT(atomic_read(&ip->i_iocount) == 0);
+ ASSERT(atomic_read(&ip->i_pincount) == 0);
+ ASSERT(!spin_is_locked(&ip->i_flags_lock));
+ ASSERT(completion_done(&ip->i_flush));
+
+ kmem_zone_free(xfs_inode_zone, ip);
+}
+
/*
* Check the validity of the inode we just found it the cache
*/
@@ -291,7 +356,8 @@ out_preload_end:
if (lock_flags)
xfs_iunlock(ip, lock_flags);
out_destroy:
- xfs_destroy_inode(ip);
+ __destroy_inode(VFS_I(ip));
+ xfs_inode_free(ip);
return error;
}

@@ -499,62 +565,7 @@ xfs_ireclaim(
XFS_QM_DQDETACH(ip->i_mount, ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);

- switch (ip->i_d.di_mode & S_IFMT) {
- case S_IFREG:
- case S_IFDIR:
- case S_IFLNK:
- xfs_idestroy_fork(ip, XFS_DATA_FORK);
- break;
- }
-
- if (ip->i_afp)
- xfs_idestroy_fork(ip, XFS_ATTR_FORK);
-
-#ifdef XFS_INODE_TRACE
- ktrace_free(ip->i_trace);
-#endif
-#ifdef XFS_BMAP_TRACE
- ktrace_free(ip->i_xtrace);
-#endif
-#ifdef XFS_BTREE_TRACE
- ktrace_free(ip->i_btrace);
-#endif
-#ifdef XFS_RW_TRACE
- ktrace_free(ip->i_rwtrace);
-#endif
-#ifdef XFS_ILOCK_TRACE
- ktrace_free(ip->i_lock_trace);
-#endif
-#ifdef XFS_DIR2_TRACE
- ktrace_free(ip->i_dir_trace);
-#endif
- if (ip->i_itemp) {
- /*
- * Only if we are shutting down the fs will we see an
- * inode still in the AIL. If it is there, we should remove
- * it to prevent a use-after-free from occurring.
- */
- xfs_log_item_t *lip = &ip->i_itemp->ili_item;
- struct xfs_ail *ailp = lip->li_ailp;
-
- ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
- XFS_FORCED_SHUTDOWN(ip->i_mount));
- if (lip->li_flags & XFS_LI_IN_AIL) {
- spin_lock(&ailp->xa_lock);
- if (lip->li_flags & XFS_LI_IN_AIL)
- xfs_trans_ail_delete(ailp, lip);
- else
- spin_unlock(&ailp->xa_lock);
- }
- xfs_inode_item_destroy(ip);
- ip->i_itemp = NULL;
- }
- /* asserts to verify all state is correct here */
- ASSERT(atomic_read(&ip->i_iocount) == 0);
- ASSERT(atomic_read(&ip->i_pincount) == 0);
- ASSERT(!spin_is_locked(&ip->i_flags_lock));
- ASSERT(completion_done(&ip->i_flush));
- kmem_zone_free(xfs_inode_zone, ip);
+ xfs_inode_free(ip);
}

/*
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -309,23 +309,6 @@ static inline struct inode *VFS_I(struct
}

/*
- * Get rid of a partially initialized inode.
- *
- * We have to go through destroy_inode to make sure allocations
- * from init_inode_always like the security data are undone.
- *
- * We mark the inode bad so that it takes the short cut in
- * the reclaim path instead of going through the flush path
- * which doesn't make sense for an inode that has never seen the
- * light of day.
- */
-static inline void xfs_destroy_inode(struct xfs_inode *ip)
-{
- make_bad_inode(VFS_I(ip));
- return destroy_inode(VFS_I(ip));
-}
-
-/*
* i_flags helper functions
*/
static inline void


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/