[RFC] zram: support page-based parallel write

From: Minchan Kim
Date: Tue Sep 06 2016 - 03:24:29 EST


zram supports stream-based parallel compression. IOW, it can compress
in parallel on multi-core system only if there are *several* streams
in the system because each stream can be compressed on each CPUs.

However, if there is *a* stream in the system, it cannot be compressed
in parallel although the system supports multiple CPUs.
This patch enables parallel compression using multiple CPUs even though
there is a single stream in the system.

When I tested benchmark, random read which is important for zram-swap
case was worse so it supports async-write at the moment. Later, We might
support async-read, I hope.

It is useful for single stream scenario.

I tested on 4-CPU ARM machine.

FIO job=1 benchmark result

Before:

seq-write: (groupid=0, jobs=1): err= 0: pid=2971: Tue Sep 6 08:14:06 2016
write: io=163840KB, bw=28239KB/s, iops=441, runt= 5802msec
rand-write: (groupid=1, jobs=1): err= 0: pid=2977: Tue Sep 6 08:14:06 2016
write: io=163840KB, bw=22300KB/s, iops=5575, runt= 7347msec
seq-read: (groupid=2, jobs=1): err= 0: pid=2983: Tue Sep 6 08:14:06 2016
read : io=163840KB, bw=66928KB/s, iops=1045, runt= 2448msec
rand-read: (groupid=3, jobs=1): err= 0: pid=2984: Tue Sep 6 08:14:06 2016
read : io=163840KB, bw=40980KB/s, iops=10245, runt= 3998msec
mixed-seq: (groupid=4, jobs=1): err= 0: pid=2985: Tue Sep 6 08:14:06 2016
read : io=82240KB, bw=18308KB/s, iops=286, runt= 4492msec
write: io=81600KB, bw=18166KB/s, iops=283, runt= 4492msec
mixed-rand: (groupid=5, jobs=1): err= 0: pid=2989: Tue Sep 6 08:14:06 2016
read : io=84120KB, bw=14771KB/s, iops=3692, runt= 5695msec
write: io=79720KB, bw=13998KB/s, iops=3499, runt= 5695msec

After:

write: io=163840KB, bw=60547KB/s, iops=946, runt= 2706msec
rand-write: (groupid=1, jobs=1): err= 0: pid=2940: Tue Sep 6 08:13:04 2016
write: io=163840KB, bw=39337KB/s, iops=9834, runt= 4165msec
seq-read: (groupid=2, jobs=1): err= 0: pid=2946: Tue Sep 6 08:13:04 2016
read : io=163840KB, bw=66225KB/s, iops=1034, runt= 2474msec
rand-read: (groupid=3, jobs=1): err= 0: pid=2947: Tue Sep 6 08:13:04 2016
read : io=163840KB, bw=40970KB/s, iops=10242, runt= 3999msec
mixed-seq: (groupid=4, jobs=1): err= 0: pid=2948: Tue Sep 6 08:13:04 2016
read : io=82240KB, bw=31963KB/s, iops=499, runt= 2573msec
write: io=81600KB, bw=31714KB/s, iops=495, runt= 2573msec
mixed-rand: (groupid=5, jobs=1): err= 0: pid=2952: Tue Sep 6 08:13:04 2016
read : io=84120KB, bw=20192KB/s, iops=5048, runt= 4166msec
write: io=79720KB, bw=19136KB/s, iops=4783, runt= 4166msec

So, write/mixed-rw is 2 times faster.

I tested fio 4 jobs to catch up regression of full stream workloads
and result is not regression but enhanced two times.

FIO job=4 benchmark result
Before:
seq-write: (groupid=0, jobs=4): err= 0: pid=3060: Tue Sep 6 08:22:13 2016
write: io=655360KB, bw=114834KB/s, iops=1794, runt= 5707msec
rand-write: (groupid=1, jobs=4): err= 0: pid=3071: Tue Sep 6 08:22:13 2016
write: io=655360KB, bw=95520KB/s, iops=23879, runt= 6861msec
seq-read: (groupid=2, jobs=4): err= 0: pid=3083: Tue Sep 6 08:22:13 2016
read : io=655360KB, bw=533247KB/s, iops=8331, runt= 1229msec
rand-read: (groupid=3, jobs=4): err= 0: pid=3087: Tue Sep 6 08:22:13 2016
read : io=655360KB, bw=295874KB/s, iops=73968, runt= 2215msec
mixed-seq: (groupid=4, jobs=4): err= 0: pid=3091: Tue Sep 6 08:22:13 2016
read : io=326272KB, bw=85861KB/s, iops=1341, runt= 3800msec
write: io=329088KB, bw=86602KB/s, iops=1353, runt= 3800msec
mixed-rand: (groupid=5, jobs=4): err= 0: pid=3101: Tue Sep 6 08:22:13 2016
read : io=326296KB, bw=49521KB/s, iops=12380, runt= 6589msec
write: io=329064KB, bw=49941KB/s, iops=12485, runt= 6589msec

After:
seq-write: (groupid=0, jobs=4): err= 0: pid=3129: Tue Sep 6 08:23:02 2016
write: io=655360KB, bw=246098KB/s, iops=3845, runt= 2663msec
rand-write: (groupid=1, jobs=4): err= 0: pid=3141: Tue Sep 6 08:23:02 2016
write: io=655360KB, bw=179158KB/s, iops=44789, runt= 3658msec
seq-read: (groupid=2, jobs=4): err= 0: pid=3154: Tue Sep 6 08:23:02 2016
read : io=655360KB, bw=560616KB/s, iops=8759, runt= 1169msec
rand-read: (groupid=3, jobs=4): err= 0: pid=3158: Tue Sep 6 08:23:02 2016
read : io=655360KB, bw=290368KB/s, iops=72591, runt= 2257msec
mixed-seq: (groupid=4, jobs=4): err= 0: pid=3162: Tue Sep 6 08:23:02 2016
read : io=326272KB, bw=196905KB/s, iops=3076, runt= 1657msec
write: io=329088KB, bw=198605KB/s, iops=3103, runt= 1657msec
mixed-rand: (groupid=5, jobs=4): err= 0: pid=3172: Tue Sep 6 08:23:02 2016
read : io=326296KB, bw=89152KB/s, iops=22287, runt= 3660msec
write: io=329064KB, bw=89908KB/s, iops=22477, runt= 3660msec

Signed-off-by: Minchan Kim <minchan@xxxxxxxxxx>
---

It's a RFC so intentionally, I didn't add any docuement about use_aio
because it would waste my time to spend a document if we change mind
after discussion(e.g., We might go default IO model as aio so user never
need to know use_aio).

Other thing I should do is that we should change the number of zram thread
with changing the number of online CPU. It would be trivial.

drivers/block/zram/zram_drv.c | 558 +++++++++++++++++++++++++++++++++++++-----
drivers/block/zram/zram_drv.h | 1 +
2 files changed, 504 insertions(+), 55 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 04365b17ee67..feb6a4195c2f 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -27,6 +27,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
+#include <linux/kthread.h>
#include <linux/err.h>
#include <linux/idr.h>
#include <linux/sysfs.h>
@@ -366,6 +367,46 @@ static ssize_t comp_algorithm_store(struct device *dev,
return len;
}

+static ssize_t use_aio_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ bool val;
+ struct zram *zram = dev_to_zram(dev);
+
+ down_read(&zram->init_lock);
+ val = zram->use_aio;
+ up_read(&zram->init_lock);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", val);
+}
+
+static ssize_t use_aio_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ int ret;
+ u16 do_async;
+ struct zram *zram = dev_to_zram(dev);
+
+ ret = kstrtou16(buf, 10, &do_async);
+ if (ret)
+ return ret;
+
+ down_write(&zram->init_lock);
+ if (init_done(zram)) {
+ up_write(&zram->init_lock);
+ pr_info("Can't change for initialized device\n");
+ return -EBUSY;
+ }
+
+ if (do_async)
+ zram->use_aio = true;
+ else
+ zram->use_aio = false;
+ up_write(&zram->init_lock);
+
+ return len;
+}
+
static ssize_t compact_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
@@ -872,7 +913,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
return ret;
}

-static void __zram_make_request(struct zram *zram, struct bio *bio)
+static void __zram_make_sync_request(struct zram *zram, struct bio *bio)
{
int offset;
u32 index;
@@ -883,12 +924,6 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
offset = (bio->bi_iter.bi_sector &
(SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;

- if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
- zram_bio_discard(zram, index, offset, bio);
- bio_endio(bio);
- return;
- }
-
bio_for_each_segment(bvec, bio, iter) {
int max_transfer_size = PAGE_SIZE - offset;

@@ -921,95 +956,502 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
}

bio_endio(bio);
+ zram_meta_put(zram);
return;

out:
bio_io_error(bio);
+ zram_meta_put(zram);
}

-/*
- * Handler function for all zram I/O requests.
- */
-static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
+static int zram_rw_sync_page(struct block_device *bdev, struct zram *zram,
+ struct bio_vec *bv, u32 index,
+ int offset, bool is_write)
{
- struct zram *zram = queue->queuedata;
+ int err;

- if (unlikely(!zram_meta_get(zram)))
- goto error;
+ err = zram_bvec_rw(zram, bv, index, offset, is_write);
+ /*
+ * If I/O fails, just return error(ie, non-zero) without
+ * calling page_endio.
+ * It causes resubmit the I/O with bio request by upper functions
+ * of rw_page(e.g., swap_readpage, __swap_writepage) and
+ * bio->bi_end_io does things to handle the error
+ * (e.g., SetPageError, set_page_dirty and extra works).
+ */
+ if (err == 0)
+ page_endio(bv->bv_page, is_write, 0);

- blk_queue_split(queue, &bio, queue->bio_split);
+ zram_meta_put(zram);
+ return err;
+}

- if (!valid_io_request(zram, bio->bi_iter.bi_sector,
- bio->bi_iter.bi_size)) {
- atomic64_inc(&zram->stats.invalid_io);
- goto put_zram;
+const int NR_BATCH_PAGES = 64;
+
+struct zram_worker {
+ struct task_struct *task;
+ struct list_head list;
+};
+
+struct zram_workers {
+ spinlock_t req_lock;
+ struct list_head req_list;
+ unsigned int nr_req;
+ struct list_head worker_list;
+ wait_queue_head_t req_wait;
+ int nr_running;
+} workers;
+
+struct bio_request {
+ struct bio *bio;
+ atomic_t nr_pages;
+};
+
+struct page_request {
+ struct zram *zram;
+ struct bio_request *bio_req;
+ struct bio_vec bvec;
+ u32 index;
+ int offset;
+ bool write;
+ struct list_head list;
+};
+
+static void worker_wake_up(void)
+{
+ if (workers.nr_running * NR_BATCH_PAGES < workers.nr_req) {
+ int nr_wakeup = (workers.nr_req + NR_BATCH_PAGES) /
+ NR_BATCH_PAGES - workers.nr_running;
+
+ WARN_ON(!nr_wakeup);
+ wake_up_nr(&workers.req_wait, nr_wakeup);
}
+}

- __zram_make_request(zram, bio);
- zram_meta_put(zram);
- return BLK_QC_T_NONE;
-put_zram:
- zram_meta_put(zram);
-error:
- bio_io_error(bio);
- return BLK_QC_T_NONE;
+static void zram_unplug(struct blk_plug_cb *cb, bool from_schedule)
+{
+ spin_lock_irq(&workers.req_lock);
+ if (workers.nr_req)
+ worker_wake_up();
+ spin_unlock_irq(&workers.req_lock);
+ kfree(cb);
}

-static void zram_slot_free_notify(struct block_device *bdev,
- unsigned long index)
+static int zram_check_plugged(void)
+{
+ return !!blk_check_plugged(zram_unplug, NULL,
+ sizeof(struct blk_plug_cb));
+}
+
+int queue_page_request(struct zram *zram, struct bio_vec *bvec, u32 index,
+ int offset, bool write)
+{
+ struct page_request *page_req = kmalloc(sizeof(*page_req), GFP_NOIO);
+
+ if (!page_req)
+ return -ENOMEM;
+
+ page_req->bio_req = NULL;
+ page_req->zram = zram;
+ page_req->bvec = *bvec;
+ page_req->index = index;
+ page_req->offset = offset;
+ page_req->write = write;
+
+ spin_lock(&workers.req_lock);
+ list_add(&page_req->list, &workers.req_list);
+ workers.nr_req += 1;
+ if (!zram_check_plugged())
+ worker_wake_up();
+ spin_unlock(&workers.req_lock);
+
+
+ return 0;
+}
+
+int queue_page_request_list(struct zram *zram, struct bio_request *bio_req,
+ struct bio_vec *bvec, u32 index, int offset,
+ bool write, struct list_head *page_list)
+{
+ struct page_request *page_req = kmalloc(sizeof(*page_req), GFP_NOIO);
+
+ if (!page_req) {
+ while (!list_empty(page_list)) {
+ page_req = list_first_entry(page_list,
+ struct page_request, list);
+ list_del(&page_req->list);
+ kfree(page_req);
+ }
+
+ return -ENOMEM;
+ }
+
+ page_req->bio_req = bio_req;
+ atomic_inc(&bio_req->nr_pages);
+ page_req->zram = zram;
+ page_req->bvec = *bvec;
+ page_req->index = index;
+ page_req->offset = offset;
+ page_req->write = write;
+
+ list_add_tail(&page_req->list, page_list);
+
+ return 0;
+}
+
+/* Caller should hold on req_lock */
+static void get_page_requests(struct list_head *page_list)
+{
+ struct page_request *page_req;
+ struct bio_request *bio_req;
+ int nr_batch = NR_BATCH_PAGES;
+
+ while (nr_batch--) {
+ if (list_empty(&workers.req_list))
+ break;
+
+ page_req = list_first_entry(&workers.req_list,
+ struct page_request, list);
+ list_move(&page_req->list, page_list);
+ bio_req = page_req->bio_req;
+ workers.nr_req--;
+ }
+}
+
+static int page_request_rw(struct page_request *page_req)
+{
+ struct zram *zram = page_req->zram;
+
+ return zram_bvec_rw(zram, &page_req->bvec, page_req->index,
+ page_req->offset, page_req->write);
+}
+
+static void run_worker(struct bio *bio, struct list_head *page_list,
+ unsigned int nr_pages)
{
+ WARN_ON(list_empty(page_list));
+
+ spin_lock(&workers.req_lock);
+ list_splice_tail(page_list, &workers.req_list);
+ workers.nr_req += nr_pages;
+ if (bio->bi_opf & REQ_SYNC || !zram_check_plugged())
+ worker_wake_up();
+ spin_unlock(&workers.req_lock);
+}
+
+static int __zram_make_async_request(struct zram *zram, struct bio *bio)
+{
+ int offset;
+ u32 index;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
+ LIST_HEAD(page_list);
+ struct bio_request *bio_req;
+ unsigned int nr_pages = 0;
+ bool write = op_is_write(bio_op(bio));
+
+ index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
+ offset = (bio->bi_iter.bi_sector &
+ (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
+
+ bio_req = kmalloc(sizeof(*bio_req), GFP_NOIO);
+ if (!bio_req)
+ return 1;
+
+ /*
+ * Keep bi_vcnt to complete bio handling when all of pages
+ * in the bio are handled.
+ */
+ bio_req->bio = bio;
+ atomic_set(&bio_req->nr_pages, 0);
+
+ bio_for_each_segment(bvec, bio, iter) {
+ int max_transfer_size = PAGE_SIZE - offset;
+
+ if (bvec.bv_len > max_transfer_size) {
+ /*
+ * zram_bvec_rw() can only make operation on a single
+ * zram page. Split the bio vector.
+ */
+ struct bio_vec bv;
+
+ bv.bv_page = bvec.bv_page;
+ bv.bv_len = max_transfer_size;
+ bv.bv_offset = bvec.bv_offset;
+
+ if (queue_page_request_list(zram, bio_req, &bv,
+ index, offset, write, &page_list))
+ goto out;
+ nr_pages++;
+
+ bv.bv_len = bvec.bv_len - max_transfer_size;
+ bv.bv_offset += max_transfer_size;
+ if (queue_page_request_list(zram, bio_req, &bv,
+ index + 1, 0, write, &page_list))
+ goto out;
+ nr_pages++;
+ } else
+ if (queue_page_request_list(zram, bio_req, &bvec,
+ index, offset, write, &page_list))
+ goto out;
+ nr_pages++;
+
+ update_position(&index, &offset, &bvec);
+ }
+
+ run_worker(bio, &page_list, nr_pages);
+ return 0;
+
+out:
+ kfree(bio_req);
+
+ WARN_ON(!list_empty(&page_list));
+ return 1;
+}
+
+
+void page_requests_rw(struct list_head *page_list)
+{
+ struct page_request *page_req;
+ bool write;
+ struct page *page;
struct zram *zram;
- struct zram_meta *meta;

- zram = bdev->bd_disk->private_data;
- meta = zram->meta;
+ while (!list_empty(page_list)) {
+ bool free_bio = false;
+ struct bio_request *bio_req;
+ int err;
+
+ page_req = list_last_entry(page_list, struct page_request,
+ list);
+ write = page_req->write;
+ page = page_req->bvec.bv_page;
+ zram = page_req->zram;
+ bio_req = page_req->bio_req;
+ if (bio_req && atomic_dec_and_test(&bio_req->nr_pages))
+ free_bio = true;
+ list_del(&page_req->list);
+
+ err = page_request_rw(page_req);
+ kfree(page_req);
+ /* page-based request */
+ if (!bio_req) {
+ page_endio(page, write, err);
+ zram_meta_put(zram);
+ /* bio-based request */
+ } else if (free_bio) {
+ if (likely(!err))
+ bio_endio(bio_req->bio);
+ else
+ bio_io_error(bio_req->bio);
+ kfree(bio_req);
+ zram_meta_put(zram);
+ }

- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
- zram_free_page(zram, index);
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
- atomic64_inc(&zram->stats.notify_free);
+ }
+}
+
+static int zram_thread(void *data)
+{
+ DEFINE_WAIT(wait);
+ LIST_HEAD(page_list);
+
+ spin_lock(&workers.req_lock);
+ workers.nr_running++;
+
+ while (1) {
+ if (kthread_should_stop()) {
+ workers.nr_running--;
+ spin_unlock(&workers.req_lock);
+ break;
+ }
+
+ if (list_empty(&workers.req_list)) {
+ prepare_to_wait_exclusive(&workers.req_wait, &wait,
+ TASK_INTERRUPTIBLE);
+ workers.nr_running--;
+ spin_unlock(&workers.req_lock);
+ schedule();
+ spin_lock(&workers.req_lock);
+ workers.nr_running++;
+ finish_wait(&workers.req_wait, &wait);
+ continue;
+ }
+
+ get_page_requests(&page_list);
+ if (list_empty(&page_list))
+ continue;
+
+ spin_unlock(&workers.req_lock);
+ page_requests_rw(&page_list);
+ WARN_ON(!list_empty(&page_list));
+ cond_resched();
+ spin_lock(&workers.req_lock);
+ }
+
+ return 0;
+}
+
+static void destroy_workers(void)
+{
+ struct zram_worker *worker;
+
+ while (!list_empty(&workers.worker_list)) {
+ worker = list_first_entry(&workers.worker_list,
+ struct zram_worker,
+ list);
+ kthread_stop(worker->task);
+ list_del(&worker->list);
+ kfree(worker);
+ }
+
+ WARN_ON(workers.nr_running);
+}
+
+static int create_workers(void)
+{
+ int i;
+ int nr_cpu = num_online_cpus();
+ struct zram_worker *worker;
+
+ INIT_LIST_HEAD(&workers.worker_list);
+ INIT_LIST_HEAD(&workers.req_list);
+ spin_lock_init(&workers.req_lock);
+ init_waitqueue_head(&workers.req_wait);
+
+ for (i = 0; i < nr_cpu; i++) {
+ worker = kmalloc(sizeof(*worker), GFP_KERNEL);
+ if (!worker)
+ goto error;
+
+ worker->task = kthread_run(zram_thread, NULL, "zramd-%d", i);
+ if (IS_ERR(worker->task)) {
+ kfree(worker);
+ goto error;
+ }
+
+ list_add(&worker->list, &workers.worker_list);
+ }
+
+ return 0;
+
+error:
+ destroy_workers();
+ return 1;
+}
+
+static int zram_rw_async_page(struct zram *zram,
+ struct bio_vec *bv, u32 index, int offset,
+ bool is_write)
+{
+
+ return queue_page_request(zram, bv, index, offset, is_write);
}

static int zram_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, bool is_write)
{
- int offset, err = -EIO;
- u32 index;
+ int err = -EIO;
struct zram *zram;
+ int offset;
+ u32 index;
struct bio_vec bv;

zram = bdev->bd_disk->private_data;
if (unlikely(!zram_meta_get(zram)))
- goto out;
+ return err;

if (!valid_io_request(zram, sector, PAGE_SIZE)) {
atomic64_inc(&zram->stats.invalid_io);
- err = -EINVAL;
- goto put_zram;
+ zram_meta_put(zram);
+ return -EINVAL;
}

+
index = sector >> SECTORS_PER_PAGE_SHIFT;
- offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT;
+ offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;

bv.bv_page = page;
bv.bv_len = PAGE_SIZE;
bv.bv_offset = 0;

- err = zram_bvec_rw(zram, &bv, index, offset, is_write);
-put_zram:
- zram_meta_put(zram);
-out:
+ if (!zram->use_aio || !is_write) {
+ err = zram_rw_sync_page(bdev, zram, &bv, index, offset,
+ is_write);
+ } else {
+ err = zram_rw_async_page(zram, &bv, index, offset, is_write);
+ if (err)
+ err = zram_rw_sync_page(bdev, zram, &bv, index,
+ offset, is_write);
+ }
+
+ return err;
+}
+
+
+static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
+{
+ struct zram *zram = queue->queuedata;
+
/*
- * If I/O fails, just return error(ie, non-zero) without
- * calling page_endio.
- * It causes resubmit the I/O with bio request by upper functions
- * of rw_page(e.g., swap_readpage, __swap_writepage) and
- * bio->bi_end_io does things to handle the error
- * (e.g., SetPageError, set_page_dirty and extra works).
+ * request handler should take care of reference count and
+ * bio_endio.
*/
- if (err == 0)
- page_endio(page, is_write, 0);
- return err;
+ if (unlikely(!zram_meta_get(zram)))
+ goto error;
+
+ blk_queue_split(queue, &bio, queue->bio_split);
+
+ if (!valid_io_request(zram, bio->bi_iter.bi_sector,
+ bio->bi_iter.bi_size)) {
+ atomic64_inc(&zram->stats.invalid_io);
+ goto fail;
+ }
+
+ if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
+ int offset;
+ u32 index;
+
+ index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
+ offset = (bio->bi_iter.bi_sector &
+ (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
+
+ zram_bio_discard(zram, index, offset, bio);
+ bio_endio(bio);
+ zram_meta_put(zram);
+ goto out;
+ }
+
+ if (!zram->use_aio || !op_is_write(bio_op(bio))) {
+ __zram_make_sync_request(zram, bio);
+ } else {
+ if (__zram_make_async_request(zram, bio))
+ __zram_make_sync_request(zram, bio);
+ }
+
+ return BLK_QC_T_NONE;
+
+fail:
+ zram_meta_put(zram);
+error:
+ bio_io_error(bio);
+out:
+ return BLK_QC_T_NONE;
+}
+
+static void zram_slot_free_notify(struct block_device *bdev,
+ unsigned long index)
+{
+ struct zram *zram;
+ struct zram_meta *meta;
+
+ zram = bdev->bd_disk->private_data;
+ meta = zram->meta;
+
+ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_free_page(zram, index);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ atomic64_inc(&zram->stats.notify_free);
}

static void zram_reset_device(struct zram *zram)
@@ -1190,6 +1632,7 @@ static DEVICE_ATTR_RW(mem_limit);
static DEVICE_ATTR_RW(mem_used_max);
static DEVICE_ATTR_RW(max_comp_streams);
static DEVICE_ATTR_RW(comp_algorithm);
+static DEVICE_ATTR_RW(use_aio);

static struct attribute *zram_disk_attrs[] = {
&dev_attr_disksize.attr,
@@ -1210,6 +1653,7 @@ static struct attribute *zram_disk_attrs[] = {
&dev_attr_mem_used_max.attr,
&dev_attr_max_comp_streams.attr,
&dev_attr_comp_algorithm.attr,
+ &dev_attr_use_aio.attr,
&dev_attr_io_stat.attr,
&dev_attr_mm_stat.attr,
&dev_attr_debug_stat.attr,
@@ -1464,6 +1908,9 @@ static int __init zram_init(void)
num_devices--;
}

+ if (create_workers())
+ goto out_error;
+
return 0;

out_error:
@@ -1474,6 +1921,7 @@ static int __init zram_init(void)
static void __exit zram_exit(void)
{
destroy_devices();
+ destroy_workers();
}

module_init(zram_init);
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 74fcf10da374..4819a33ab1cf 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -119,5 +119,6 @@ struct zram {
* zram is claimed so open request will be failed
*/
bool claim; /* Protected by bdev->bd_mutex */
+ bool use_aio; /* asynchronous IO mode */
};
#endif
--
2.7.4