Re: [PATCH] lockfs patch for 2.6

From: Christoph Hellwig
Date: Fri Apr 02 2004 - 15:04:59 EST


On Fri, Apr 02, 2004 at 02:00:18PM -0600, Kevin Corry wrote:
> > Christoph's vfs patch looks good, I've stripped out the XFS bits (FS
> > parts should probably be in different patches) and made one small
> > change. freeze/thaw now check to make sure bdev != NULL.
>
> Does this mean there are patches required for XFS to work properly with
> this new VFS-lock patch? I'm getting hangs when suspending a DM device
> that contains an XFS filesystem with active I/O. Ext3, Reiser, and JFS
> seem to behave as expected.

Yes. Here's the patch I sent out earlier (xfs bits + common code):

--- 1.155/fs/block_dev.c Fri Mar 12 10:33:01 2004
+++ edited/fs/block_dev.c Sat Mar 13 15:28:49 2004
@@ -251,6 +251,7 @@
{
memset(bdev, 0, sizeof(*bdev));
sema_init(&bdev->bd_sem, 1);
+ sema_init(&bdev->bd_mount_sem, 1);
INIT_LIST_HEAD(&bdev->bd_inodes);
INIT_LIST_HEAD(&bdev->bd_list);
inode_init_once(&ei->vfs_inode);
===== fs/buffer.c 1.224 vs edited =====
--- 1.224/fs/buffer.c Sun Mar 7 08:16:11 2004
+++ edited/fs/buffer.c Sun Mar 14 15:37:00 2004
@@ -260,6 +260,69 @@
}

/*
+ * triggered by the device mapper code to lock a filesystem and force
+ * it into a consistent state.
+ *
+ * This takes the block device bd_mount_sem to make sure no new mounts
+ * happen on bdev until unlockfs is called. If a super is found on this
+ * block device, we hould a read lock on the s->s_umount sem to make sure
+ * nobody unmounts until the snapshot creation is done
+ */
+struct super_block *freeze_bdev(struct block_device *bdev)
+{
+ struct super_block *sb;
+
+ down(&bdev->bd_mount_sem);
+ sb = get_super(bdev);
+ if (sb && !(sb->s_flags & MS_RDONLY)) {
+ sb->s_frozen = SB_FREEZE_WRITE;
+ wmb();
+
+ sync_inodes_sb(sb, 0);
+ DQUOT_SYNC(sb);
+
+ sb->s_frozen = SB_FREEZE_TRANS;
+ wmb();
+
+ lock_super(sb);
+ if (sb->s_dirt && sb->s_op->write_super)
+ sb->s_op->write_super(sb);
+ unlock_super(sb);
+
+ if (sb->s_op->sync_fs)
+ sb->s_op->sync_fs(sb, 1);
+
+ sync_blockdev(sb->s_bdev);
+ sync_inodes_sb(sb, 1);
+ sync_blockdev(sb->s_bdev);
+
+ if (sb->s_op->write_super_lockfs)
+ sb->s_op->write_super_lockfs(sb);
+ }
+
+ sync_blockdev(bdev);
+ return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */
+}
+EXPORT_SYMBOL(freeze_bdev);
+
+void thaw_bdev(struct block_device *bdev, struct super_block *sb)
+{
+ if (sb) {
+ BUG_ON(sb->s_bdev != bdev);
+
+ if (sb->s_op->unlockfs)
+ sb->s_op->unlockfs(sb);
+ sb->s_frozen = SB_UNFROZEN;
+ wmb();
+ wake_up(&sb->s_wait_unfrozen);
+ drop_super(sb);
+ }
+
+ up(&bdev->bd_mount_sem);
+}
+EXPORT_SYMBOL(thaw_bdev);
+
+/*
* sync everything. Start out by waking pdflush, because that writes back
* all queues in parallel.
*/
===== fs/super.c 1.115 vs edited =====
--- 1.115/fs/super.c Fri Mar 12 10:30:24 2004
+++ edited/fs/super.c Sun Mar 14 14:24:56 2004
@@ -77,6 +77,7 @@
sema_init(&s->s_dquot.dqio_sem, 1);
sema_init(&s->s_dquot.dqonoff_sem, 1);
init_rwsem(&s->s_dquot.dqptr_sem);
+ init_waitqueue_head(&s->s_wait_unfrozen);
s->s_maxbytes = MAX_NON_LFS;
s->dq_op = sb_dquot_ops;
s->s_qcop = sb_quotactl_ops;
@@ -621,7 +622,14 @@
if (IS_ERR(bdev))
return (struct super_block *)bdev;

+ /*
+ * once the super is inserted into the list by sget, s_umount
+ * will protect the lockfs code from trying to start a snapshot
+ * while we are mounting
+ */
+ down(&bdev->bd_mount_sem);
s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
+ up(&bdev->bd_mount_sem);
if (IS_ERR(s))
goto out;

===== fs/xfs/xfs_fsops.c 1.11 vs edited =====
--- 1.11/fs/xfs/xfs_fsops.c Fri Feb 27 07:28:05 2004
+++ edited/fs/xfs/xfs_fsops.c Sun Mar 14 14:03:01 2004
@@ -582,63 +582,25 @@
}

int
-xfs_fs_freeze(
- xfs_mount_t *mp)
-{
- vfs_t *vfsp;
- /*REFERENCED*/
- int error;
-
- vfsp = XFS_MTOVFS(mp);
-
- /* Stop new writers */
- xfs_start_freeze(mp, XFS_FREEZE_WRITE);
-
- /* Flush the refcache */
- xfs_refcache_purge_mp(mp);
-
- /* Flush delalloc and delwri data */
- VFS_SYNC(vfsp, SYNC_DELWRI|SYNC_WAIT, NULL, error);
-
- /* Pause transaction subsystem */
- xfs_start_freeze(mp, XFS_FREEZE_TRANS);
-
- /* Flush any remaining inodes into buffers */
- VFS_SYNC(vfsp, SYNC_ATTR|SYNC_WAIT, NULL, error);
-
- /* Push all buffers out to disk */
- xfs_binval(mp->m_ddev_targp);
- if (mp->m_rtdev_targp) {
- xfs_binval(mp->m_rtdev_targp);
- }
-
- /* Push the superblock and write an unmount record */
- xfs_log_unmount_write(mp);
- xfs_unmountfs_writesb(mp);
-
- return 0;
-}
-
-int
-xfs_fs_thaw(
- xfs_mount_t *mp)
-{
- xfs_finish_freeze(mp);
- return 0;
-}
-
-int
xfs_fs_goingdown(
xfs_mount_t *mp,
__uint32_t inflags)
{
- switch (inflags)
- {
- case XFS_FSOP_GOING_FLAGS_DEFAULT:
- xfs_fs_freeze(mp);
- xfs_force_shutdown(mp, XFS_FORCE_UMOUNT);
- xfs_fs_thaw(mp);
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ switch (inflags) {
+ case XFS_FSOP_GOING_FLAGS_DEFAULT: {
+ struct vfs *vfsp = XFS_MTOVFS(mp);
+ struct super_block *sb = freeze_bdev(vfsp->vfs_super->s_bdev);
+
+ if (sb) {
+ xfs_force_shutdown(mp, XFS_FORCE_UMOUNT);
+ thaw_bdev(sb->s_bdev, sb);
+ }
+
break;
+ }
case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
xfs_force_shutdown(mp, XFS_FORCE_UMOUNT);
break;
===== fs/xfs/xfs_fsops.h 1.3 vs edited =====
--- 1.3/fs/xfs/xfs_fsops.h Fri Feb 27 07:28:05 2004
+++ edited/fs/xfs/xfs_fsops.h Sun Mar 14 14:03:33 2004
@@ -60,14 +60,6 @@
xfs_fsop_resblks_t *outval);

int
-xfs_fs_freeze(
- xfs_mount_t *mp);
-
-int
-xfs_fs_thaw(
- xfs_mount_t *mp);
-
-int
xfs_fs_goingdown(
xfs_mount_t *mp,
__uint32_t inflags);
===== fs/xfs/xfs_log.c 1.34 vs edited =====
--- 1.34/fs/xfs/xfs_log.c Sat Mar 6 04:16:30 2004
+++ edited/fs/xfs/xfs_log.c Sun Mar 14 13:59:50 2004
@@ -820,7 +820,7 @@
xlog_t *log = mp->m_log;
vfs_t *vfsp = XFS_MTOVFS(mp);

- if (mp->m_frozen || XFS_FORCED_SHUTDOWN(mp) ||
+ if (vfsp->vfs_super->s_frozen || XFS_FORCED_SHUTDOWN(mp) ||
(vfsp->vfs_flag & VFS_RDONLY))
return 0;

===== fs/xfs/xfs_mount.c 1.41 vs edited =====
--- 1.41/fs/xfs/xfs_mount.c Fri Feb 27 07:54:47 2004
+++ edited/fs/xfs/xfs_mount.c Sun Mar 14 14:01:45 2004
@@ -139,9 +139,6 @@
*/
xfs_trans_ail_init(mp);

- /* Init freeze sync structures */
- spinlock_init(&mp->m_freeze_lock, "xfs_freeze");
- init_sv(&mp->m_wait_unfreeze, SV_DEFAULT, "xfs_freeze", 0);
atomic_set(&mp->m_active_trans, 0);

return mp;
@@ -191,8 +188,6 @@
VFS_REMOVEBHV(vfsp, &mp->m_bhv);
}

- spinlock_destroy(&mp->m_freeze_lock);
- sv_destroy(&mp->m_wait_unfreeze);
kmem_free(mp, sizeof(xfs_mount_t));
}

@@ -1584,60 +1579,4 @@
}
xfs_mod_sb(tp, fields);
xfs_trans_commit(tp, 0, NULL);
-}
-
-/* Functions to lock access out of the filesystem for forced
- * shutdown or snapshot.
- */
-
-void
-xfs_start_freeze(
- xfs_mount_t *mp,
- int level)
-{
- unsigned long s = mutex_spinlock(&mp->m_freeze_lock);
-
- mp->m_frozen = level;
- mutex_spinunlock(&mp->m_freeze_lock, s);
-
- if (level == XFS_FREEZE_TRANS) {
- while (atomic_read(&mp->m_active_trans) > 0)
- delay(100);
- }
-}
-
-void
-xfs_finish_freeze(
- xfs_mount_t *mp)
-{
- unsigned long s = mutex_spinlock(&mp->m_freeze_lock);
-
- if (mp->m_frozen) {
- mp->m_frozen = 0;
- sv_broadcast(&mp->m_wait_unfreeze);
- }
-
- mutex_spinunlock(&mp->m_freeze_lock, s);
-}
-
-void
-xfs_check_frozen(
- xfs_mount_t *mp,
- bhv_desc_t *bdp,
- int level)
-{
- unsigned long s;
-
- if (mp->m_frozen) {
- s = mutex_spinlock(&mp->m_freeze_lock);
-
- if (mp->m_frozen < level) {
- mutex_spinunlock(&mp->m_freeze_lock, s);
- } else {
- sv_wait(&mp->m_wait_unfreeze, 0, &mp->m_freeze_lock, s);
- }
- }
-
- if (level == XFS_FREEZE_TRANS)
- atomic_inc(&mp->m_active_trans);
}
===== fs/xfs/xfs_mount.h 1.25 vs edited =====
--- 1.25/fs/xfs/xfs_mount.h Wed Mar 3 05:52:57 2004
+++ edited/fs/xfs/xfs_mount.h Sun Mar 14 14:02:01 2004
@@ -379,10 +379,6 @@
struct xfs_dmops m_dm_ops; /* vector of DMI ops */
struct xfs_qmops m_qm_ops; /* vector of XQM ops */
struct xfs_ioops m_io_ops; /* vector of I/O ops */
- lock_t m_freeze_lock; /* Lock for m_frozen */
- uint m_frozen; /* FS frozen for shutdown or
- * snapshot */
- sv_t m_wait_unfreeze;/* waiting to unfreeze */
atomic_t m_active_trans; /* number trans frozen */
} xfs_mount_t;

@@ -557,16 +553,6 @@
extern void xfs_initialize_perag(xfs_mount_t *, int);
extern void xfs_xlatesb(void *, struct xfs_sb *, int, xfs_arch_t,
__int64_t);
-
-/*
- * Flags for freeze operations.
- */
-#define XFS_FREEZE_WRITE 1
-#define XFS_FREEZE_TRANS 2
-
-extern void xfs_start_freeze(xfs_mount_t *, int);
-extern void xfs_finish_freeze(xfs_mount_t *);
-extern void xfs_check_frozen(xfs_mount_t *, bhv_desc_t *, int);

extern struct vfsops xfs_vfsops;
extern struct vnodeops xfs_vnodeops;
===== fs/xfs/xfs_trans.c 1.14 vs edited =====
--- 1.14/fs/xfs/xfs_trans.c Fri Jan 9 00:18:09 2004
+++ edited/fs/xfs/xfs_trans.c Sun Mar 14 14:01:41 2004
@@ -131,7 +131,9 @@
xfs_mount_t *mp,
uint type)
{
- xfs_check_frozen(mp, NULL, XFS_FREEZE_TRANS);
+ vfs_check_frozen(XFS_MTOVFS(mp)->vfs_super, SB_FREEZE_TRANS);
+ atomic_inc(&mp->m_active_trans);
+
return (_xfs_trans_alloc(mp, type));

}
===== fs/xfs/xfs_vfsops.c 1.57 vs edited =====
--- 1.57/fs/xfs/xfs_vfsops.c Wed Mar 3 05:52:58 2004
+++ edited/fs/xfs/xfs_vfsops.c Sun Mar 14 14:42:21 2004
@@ -1544,6 +1544,11 @@
}
}

+ if (XFS_MTOVFS(mp)->vfs_super->s_frozen == SB_FREEZE_TRANS) {
+ while (atomic_read(&mp->m_active_trans) > 0)
+ delay(100);
+ }
+
return XFS_ERROR(last_error);
}

@@ -1853,6 +1858,17 @@
return 0;
}

+STATIC void
+xfs_freeze(
+ bhv_desc_t *bdp)
+{
+ xfs_mount_t *mp = XFS_BHVTOM(bdp);
+
+ /* Push the superblock and write an unmount record */
+ xfs_log_unmount_write(mp);
+ xfs_unmountfs_writesb(mp);
+}
+

vfsops_t xfs_vfsops = {
BHV_IDENTITY_INIT(VFS_BHV_XFS,VFS_POSITION_XFS),
@@ -1869,4 +1885,5 @@
.vfs_quotactl = (vfs_quotactl_t)fs_nosys,
.vfs_init_vnode = xfs_initialize_vnode,
.vfs_force_shutdown = xfs_do_force_shutdown,
+ .vfs_freeze = xfs_freeze,
};
===== fs/xfs/linux/xfs_ioctl.c 1.21 vs edited =====
--- 1.21/fs/xfs/linux/xfs_ioctl.c Fri Feb 27 07:28:05 2004
+++ edited/fs/xfs/linux/xfs_ioctl.c Sat Mar 13 18:23:23 2004
@@ -825,13 +825,14 @@
case XFS_IOC_FREEZE:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- xfs_fs_freeze(mp);
+
+ freeze_bdev(inode->i_sb->s_bdev);
return 0;

case XFS_IOC_THAW:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- xfs_fs_thaw(mp);
+ thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
return 0;

case XFS_IOC_GOINGDOWN: {
===== fs/xfs/linux/xfs_lrw.c 1.40 vs edited =====
--- 1.40/fs/xfs/linux/xfs_lrw.c Wed Mar 3 05:52:57 2004
+++ edited/fs/xfs/linux/xfs_lrw.c Sun Mar 14 14:47:00 2004
@@ -682,8 +682,6 @@
io = &xip->i_iocore;
mp = io->io_mount;

- xfs_check_frozen(mp, bdp, XFS_FREEZE_WRITE);
-
if (XFS_FORCED_SHUTDOWN(mp)) {
return -EIO;
}
===== fs/xfs/linux/xfs_super.c 1.70 vs edited =====
--- 1.70/fs/xfs/linux/xfs_super.c Sat Mar 6 04:46:51 2004
+++ edited/fs/xfs/linux/xfs_super.c Sun Mar 14 14:43:46 2004
@@ -589,28 +589,7 @@
linvfs_freeze_fs(
struct super_block *sb)
{
- vfs_t *vfsp = LINVFS_GET_VFS(sb);
- vnode_t *vp;
- int error;
-
- if (sb->s_flags & MS_RDONLY)
- return;
- VFS_ROOT(vfsp, &vp, error);
- VOP_IOCTL(vp, LINVFS_GET_IP(vp), NULL, 0, XFS_IOC_FREEZE, 0, error);
- VN_RELE(vp);
-}
-
-STATIC void
-linvfs_unfreeze_fs(
- struct super_block *sb)
-{
- vfs_t *vfsp = LINVFS_GET_VFS(sb);
- vnode_t *vp;
- int error;
-
- VFS_ROOT(vfsp, &vp, error);
- VOP_IOCTL(vp, LINVFS_GET_IP(vp), NULL, 0, XFS_IOC_THAW, 0, error);
- VN_RELE(vp);
+ VFS_FREEZE(LINVFS_GET_VFS(sb));
}

STATIC struct dentry *
@@ -850,7 +829,6 @@
.write_super = linvfs_write_super,
.sync_fs = linvfs_sync_super,
.write_super_lockfs = linvfs_freeze_fs,
- .unlockfs = linvfs_unfreeze_fs,
.statfs = linvfs_statfs,
.remount_fs = linvfs_remount,
.show_options = linvfs_show_options,
===== fs/xfs/linux/xfs_vfs.c 1.11 vs edited =====
--- 1.11/fs/xfs/linux/xfs_vfs.c Wed Mar 3 05:52:57 2004
+++ edited/fs/xfs/linux/xfs_vfs.c Sun Mar 14 14:43:15 2004
@@ -230,6 +230,18 @@
((*bhvtovfsops(next)->vfs_force_shutdown)(next, fl, file, line));
}

+void
+vfs_freeze(
+ struct bhv_desc *bdp)
+{
+ struct bhv_desc *next = bdp;
+
+ ASSERT(next);
+ while (! (bhvtovfsops(next))->vfs_freeze)
+ next = BHV_NEXT(next);
+ ((*bhvtovfsops(next)->vfs_freeze)(next));
+}
+
vfs_t *
vfs_allocate( void )
{
===== fs/xfs/linux/xfs_vfs.h 1.18 vs edited =====
--- 1.18/fs/xfs/linux/xfs_vfs.h Fri Jan 9 06:59:58 2004
+++ edited/fs/xfs/linux/xfs_vfs.h Sun Mar 14 14:43:07 2004
@@ -112,6 +112,7 @@
typedef void (*vfs_init_vnode_t)(bhv_desc_t *,
struct vnode *, bhv_desc_t *, int);
typedef void (*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int);
+typedef void (*vfs_freeze_t)(bhv_desc_t *);

typedef struct vfsops {
bhv_position_t vf_position; /* behavior chain position */
@@ -128,6 +129,7 @@
vfs_quotactl_t vfs_quotactl; /* disk quota */
vfs_init_vnode_t vfs_init_vnode; /* initialize a new vnode */
vfs_force_shutdown_t vfs_force_shutdown; /* crash and burn */
+ vfs_freeze_t vfs_freeze; /* freeze fs for snapshot */
} vfsops_t;

/*
@@ -147,6 +149,7 @@
#define VFS_QUOTACTL(v, c,id,p, rv) ((rv) = vfs_quotactl(VHEAD(v), c,id,p))
#define VFS_INIT_VNODE(v, vp,b,ul) ( vfs_init_vnode(VHEAD(v), vp,b,ul) )
#define VFS_FORCE_SHUTDOWN(v, fl,f,l) ( vfs_force_shutdown(VHEAD(v), fl,f,l) )
+#define VFS_FREEZE(v) ( vfs_freeze(VHEAD(v)) )

/*
* PVFS's. Operates on behavior descriptor pointers.
@@ -164,6 +167,7 @@
#define PVFS_QUOTACTL(b, c,id,p, rv) ((rv) = vfs_quotactl(b, c,id,p))
#define PVFS_INIT_VNODE(b, vp,b2,ul) ( vfs_init_vnode(b, vp,b2,ul) )
#define PVFS_FORCE_SHUTDOWN(b, fl,f,l) ( vfs_force_shutdown(b, fl,f,l) )
+#define PVFS_FREEZE(b) ( vfs_freeze(b) )

extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *);
extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int);
@@ -178,6 +182,7 @@
extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t);
extern void vfs_init_vnode(bhv_desc_t *, struct vnode *, bhv_desc_t *, int);
extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int);
+extern void vfs_freeze(bhv_desc_t *);

typedef struct bhv_vfsops {
struct vfsops bhv_common;
===== include/linux/buffer_head.h 1.46 vs edited =====
--- 1.46/include/linux/buffer_head.h Tue Jan 20 00:38:11 2004
+++ edited/include/linux/buffer_head.h Sat Mar 13 15:35:32 2004
@@ -164,6 +164,8 @@
wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
void wake_up_buffer(struct buffer_head *bh);
int fsync_bdev(struct block_device *);
+struct super_block *freeze_bdev(struct block_device *);
+void thaw_bdev(struct block_device *, struct super_block *);
int fsync_super(struct super_block *);
int fsync_no_super(struct block_device *);
struct buffer_head *__find_get_block(struct block_device *, sector_t, int);
===== include/linux/fs.h 1.290 vs edited =====
--- 1.290/include/linux/fs.h Fri Mar 12 10:32:59 2004
+++ edited/include/linux/fs.h Sun Mar 14 14:08:14 2004
@@ -344,6 +344,7 @@
struct inode * bd_inode; /* will die */
int bd_openers;
struct semaphore bd_sem; /* open/close mutex */
+ struct semaphore bd_mount_sem; /* mount mutex */
struct list_head bd_inodes;
void * bd_holder;
int bd_holders;
@@ -712,6 +713,9 @@
struct list_head s_instances;
struct quota_info s_dquot; /* Diskquota specific options */

+ int s_frozen;
+ wait_queue_head_t s_wait_unfrozen;
+
char s_id[32]; /* Informational name */

struct kobject kobj; /* anchor for sysfs */
@@ -723,6 +727,18 @@
*/
struct semaphore s_vfs_rename_sem; /* Kludge */
};
+
+/*
+ * Snapshotting support.
+ */
+enum {
+ SB_UNFROZEN = 0,
+ SB_FREEZE_WRITE = 1,
+ SB_FREEZE_TRANS = 2,
+};
+
+#define vfs_check_frozen(sb, level) \
+ wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))

/*
* Superblock locking.
===== mm/filemap.c 1.225 vs edited =====
--- 1.225/mm/filemap.c Mon Mar 8 15:21:17 2004
+++ edited/mm/filemap.c Sun Mar 14 14:16:40 2004
@@ -1746,6 +1746,8 @@
unsigned long seg;
char __user *buf;

+ vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+
ocount = 0;
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *iv = &iov[seg];
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/