[RFC 6/7] zram: write incompressible pages to backing device

From: Minchan Kim
Date: Mon Jun 12 2017 - 01:04:15 EST


This patch enables write IO to transfer data to backing device.
For that, it implements write_to_bdev function which creates
new bio and chaining with parent bio to make the parent bio
asynchrnous.
For rw_page which don't have parent bio, it submit owned bio
and handle IO completion by zram_page_end_io.

Also, this patch defines new flag ZRAM_WB to mark written page
for later read IO.

Signed-off-by: Minchan Kim <minchan@xxxxxxxxxx>
---
drivers/block/zram/zram_drv.c | 108 ++++++++++++++++++++++++++++++++++++++----
drivers/block/zram/zram_drv.h | 1 +
2 files changed, 99 insertions(+), 10 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index f5924ef..9b0db9b 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -468,9 +468,75 @@ static void put_entry_bdev(struct zram *zram, unsigned long entry)
WARN_ON_ONCE(!was_set);
}

+void zram_page_end_io(struct bio *bio)
+{
+ struct page *page = bio->bi_io_vec[0].bv_page;
+
+ page_endio(page, op_is_write(bio_op(bio)), bio->bi_error);
+ bio_put(bio);
+}
+
+static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *parent,
+ unsigned long *pentry)
+{
+ struct bio *bio;
+ unsigned long entry;
+
+ bio = bio_alloc(GFP_ATOMIC, 1);
+ if (!bio)
+ return -ENOMEM;
+
+ entry = get_entry_bdev(zram);
+ if (!entry) {
+ bio_put(bio);
+ return -ENOSPC;
+ }
+
+ bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
+ bio->bi_bdev = zram->bdev;
+ if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len,
+ bvec->bv_offset)) {
+ bio_put(bio);
+ put_entry_bdev(zram, entry);
+ return -EIO;
+ }
+
+ if (!parent) {
+ bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
+ bio->bi_end_io = zram_page_end_io;
+ } else {
+ bio->bi_opf = parent->bi_opf;
+ bio_chain(bio, parent);
+ }
+
+ submit_bio(bio);
+ *pentry = entry;
+
+ return 0;
+}
+
+static void zram_wb_clear(struct zram *zram, u32 index)
+{
+ unsigned long entry;
+
+ zram_clear_flag(zram, index, ZRAM_WB);
+ entry = zram_get_element(zram, index);
+ zram_set_element(zram, index, 0);
+ put_entry_bdev(zram, entry);
+}
+
#else
static bool zram_wb_enabled(struct zram *zram) { return false; }
static void reset_bdev(struct zram *zram) {};
+static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *parent,
+ unsigned long *pentry)
+
+{
+ return -EIO;
+}
+static void zram_wb_clear(struct zram *zram, u32 index) {}
#endif


@@ -789,7 +855,15 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
*/
static void zram_free_page(struct zram *zram, size_t index)
{
- struct zram_entry *entry = zram_get_entry(zram, index);
+ struct zram_entry *uninitialized_var(entry);
+
+ if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) {
+ zram_wb_clear(zram, index);
+ atomic64_dec(&zram->stats.pages_stored);
+ return;
+ }
+
+ entry = zram_get_entry(zram, index);

/*
* No memory is allocated for same element filled pages.
@@ -895,7 +969,8 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
return ret;
}

-static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
+static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *bio)
{
int ret = 0;
struct zram_entry *uninitialized_var(entry);
@@ -907,6 +982,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
enum zram_pageflags flags = 0;
unsigned long uninitialized_var(element);
unsigned long alloced_pages;
+ bool allow_wb = true;

mem = kmap_atomic(page);
if (page_same_filled(mem, &element)) {
@@ -940,8 +1016,20 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
return ret;
}

- if (unlikely(comp_len > max_zpage_size))
+ if (unlikely(comp_len > max_zpage_size)) {
+ if (zram_wb_enabled(zram) && allow_wb) {
+ zcomp_stream_put(zram->comp);
+ ret = write_to_bdev(zram, bvec, index, bio, &element);
+ if (!ret) {
+ flags = ZRAM_WB;
+ ret = 1;
+ goto out;
+ }
+ allow_wb = false;
+ goto compress_again;
+ }
comp_len = PAGE_SIZE;
+ }

/*
* entry allocation has 2 paths:
@@ -1005,7 +1093,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
zram_free_page(zram, index);
if (flags)
zram_set_flag(zram, index, flags);
- if (flags != ZRAM_SAME) {
+ if (flags != ZRAM_SAME && flags != ZRAM_WB) {
zram_set_obj_size(zram, index, comp_len);
zram_set_entry(zram, index, entry);
} else {
@@ -1018,7 +1106,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
}

static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
- u32 index, int offset)
+ u32 index, int offset, struct bio *bio)
{
int ret;
struct page *page = NULL;
@@ -1051,7 +1139,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
vec.bv_offset = 0;
}

- ret = __zram_bvec_write(zram, &vec, index);
+ ret = __zram_bvec_write(zram, &vec, index, bio);
out:
if (is_partial_io(bvec))
__free_page(page);
@@ -1102,7 +1190,7 @@ static void zram_bio_discard(struct zram *zram, u32 index,
* Returns 1 if IO request was successfully submitted.
*/
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
- int offset, bool is_write)
+ int offset, bool is_write, struct bio *bio)
{
unsigned long start_time = jiffies;
int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
@@ -1117,7 +1205,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
flush_dcache_page(bvec->bv_page);
} else {
atomic64_inc(&zram->stats.num_writes);
- ret = zram_bvec_write(zram, bvec, index, offset);
+ ret = zram_bvec_write(zram, bvec, index, offset, bio);
}

generic_end_io_acct(rw_acct, &zram->disk->part0, start_time);
@@ -1161,7 +1249,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
unwritten);
if (zram_bvec_rw(zram, &bv, index, offset,
- op_is_write(bio_op(bio))) < 0)
+ op_is_write(bio_op(bio)), bio) < 0)
goto out;

bv.bv_offset += bv.bv_len;
@@ -1235,7 +1323,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
bv.bv_len = PAGE_SIZE;
bv.bv_offset = 0;

- ret = zram_bvec_rw(zram, &bv, index, offset, is_write);
+ ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL);
out:
/*
* If I/O fails, just return error(ie, non-zero) without
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 8ae3b3f..98fb07c 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -66,6 +66,7 @@ enum zram_pageflags {
ZRAM_SAME = ZRAM_FLAG_SHIFT,
ZRAM_DUP,
ZRAM_ACCESS, /* page is now accessed */
+ ZRAM_WB, /* page is stored on backing_device */

__NR_ZRAM_PAGEFLAGS,
};
--
2.7.4