[PATCH] zram: fix race between reset and mount/mkswap

From: Ganesh Mahendran
Date: Sat Jan 31 2015 - 03:27:24 EST


Currently there is a racy between reset and mount/mkswap, so that it could
make oops when umount a corropted filesystem.

This issue can be reproduced by adding delay between bdput() and zram_reset_device
reset_store(...) {
bdput(bdev);

msleep(2000); // test code

zram_reset_device(zram, true);
}

Steps:

$ echo 1 > /sys/block/zram0/reset &
$ mount /dev/zram0 /mnt/
$ umount /mnt
BUG: failure at fs/buffer.c:3006/_submit_bh()!
Kernel panic - not syncing: BUG!
CPU: 0 PID: 726 Comm: umount Not tainted 3.19.0-rc6+ #32
Hardware name: linux,dummy-virt (DT)
Call trace:
[<ffffffc00008a020>] dump_backtrace+0x0/0x124
[<ffffffc00008a154>] show_stack+0x10/0x1c
[<ffffffc000559514>] dump_stack+0x80/0xc4
[<ffffffc0005587d8>] panic+0xe0/0x220
[<ffffffc0001c7fd8>] _submit_bh+0x18c/0x1e0
[<ffffffc0001c9c8c>] __sync_dirty_buffer+0x6c/0xfc
[<ffffffc0001c9d28>] sync_dirty_buffer+0xc/0x18
[<ffffffc0002205bc>] ext2_sync_super+0xa8/0xbc
[<ffffffc000220628>] ext2_sync_fs+0x58/0x70
[<ffffffc0001c3ab0>] sync_filesystem+0x80/0xb0
[<ffffffc00019a294>] generic_shutdown_super+0x2c/0xd8
[<ffffffc00019a64c>] kill_block_super+0x1c/0x70
[<ffffffc00019a95c>] deactivate_locked_super+0x54/0x84
[<ffffffc00019ae5c>] deactivate_super+0x8c/0x9c
[<ffffffc0001b5fd0>] cleanup_mnt+0x38/0x84
[<ffffffc0001b6074>] __cleanup_mnt+0xc/0x18
[<ffffffc0000cc5bc>] task_work_run+0x94/0xec
[<ffffffc000089d54>] do_notify_resume+0x54/0x68
---[ end Kernel panic - not syncing: BUG!

The problem is caused by:

CPU0 CPU1
t1: bdput
t2: mount /dev/zram0 /mnt
t3: zram_reset_device

At time 3: the mounted filesystem will be corrputed by CPU0, oops will happen
when admin umounts /mnt or reset linux system.

This patch uses bdev->bd_mutex to prevent concurrent visit of /dev/zram0.

Signed-off-by: Ganesh Mahendran <opensource.ganesh@xxxxxxxxx>
Cc: Nitin Gupta <ngupta@xxxxxxxxxx>
Cc: Minchan Kim <minchan@xxxxxxxxxx>
Cc: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx>
---
drivers/block/zram/zram_drv.c | 79 ++++++++++++++++++++++-------------------
1 file changed, 42 insertions(+), 37 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index aa5a4c5..2b6b0dc 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -717,17 +717,35 @@ static void zram_bio_discard(struct zram *zram, u32 index,
}
}

-static void zram_reset_device(struct zram *zram, bool reset_capacity)
+static int zram_reset_device(struct zram *zram, bool reset_capacity)
{
- down_write(&zram->init_lock);
+ int ret;
+ struct block_device *bdev;

- zram->limit_pages = 0;
+ bdev = bdget_disk(zram->disk, 0);
+ if (!bdev)
+ return -ENOMEM;
+
+ mutex_lock(&bdev->bd_mutex);
+
+ /* Do not reset an active device! */
+ if (bdev->bd_holders) {
+ ret = -EBUSY;
+ goto err;
+ }
+
+ /* Make sure all pending I/O is finished */
+ fsync_bdev(bdev);
+
+ down_write(&zram->init_lock);

if (!init_done(zram)) {
- up_write(&zram->init_lock);
- return;
+ ret = -EIO;
+ goto err_init_done;
}

+ zram->limit_pages = 0;
+
zcomp_destroy(zram->comp);
zram->max_comp_streams = 1;
zram_meta_free(zram->meta, zram->disksize);
@@ -740,6 +758,8 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)
set_capacity(zram->disk, 0);

up_write(&zram->init_lock);
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdev);

/*
* Revalidate disk out of the init_lock to avoid lockdep splat.
@@ -748,6 +768,16 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)
*/
if (reset_capacity)
revalidate_disk(zram->disk);
+
+ return 0;
+
+err_init_done:
+ up_write(&zram->init_lock);
+err:
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdev);
+
+ return ret;
}

static ssize_t disksize_store(struct device *dev,
@@ -811,40 +841,19 @@ static ssize_t reset_store(struct device *dev,
{
int ret;
unsigned short do_reset;
- struct zram *zram;
- struct block_device *bdev;
-
- zram = dev_to_zram(dev);
- bdev = bdget_disk(zram->disk, 0);
-
- if (!bdev)
- return -ENOMEM;
-
- /* Do not reset an active device! */
- if (bdev->bd_holders) {
- ret = -EBUSY;
- goto out;
- }

ret = kstrtou16(buf, 10, &do_reset);
if (ret)
- goto out;
+ return ret;

- if (!do_reset) {
- ret = -EINVAL;
- goto out;
- }
+ if (!do_reset)
+ return -EINVAL;

- /* Make sure all pending I/O is finished */
- fsync_bdev(bdev);
- bdput(bdev);
+ ret = zram_reset_device(dev_to_zram(dev), true);
+ if (ret)
+ return ret;

- zram_reset_device(zram, true);
return len;
-
-out:
- bdput(bdev);
- return ret;
}

static void __zram_make_request(struct zram *zram, struct bio *bio)
@@ -1183,12 +1192,8 @@ static void __exit zram_exit(void)
for (i = 0; i < num_devices; i++) {
zram = &zram_devices[i];

- destroy_device(zram);
- /*
- * Shouldn't access zram->disk after destroy_device
- * because destroy_device already released zram->disk.
- */
zram_reset_device(zram, false);
+ destroy_device(zram);
}

unregister_blkdev(zram_major, "zram");
--
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/