[RFC v1] add new io-scheduler to use cgroup on high-speed device

From: Robin Dong
Date: Tue Jun 04 2013 - 22:23:38 EST


From: Robin Dong <sanbai@xxxxxxxxxx>

We want to use blkio.cgroup on high-speed device (like fusionio) for our mysql clusters.
After testing different io-scheduler, we found that cfq is too slow and deadline can't run on cgroup.
So we developed a new io-scheduler: tpps (Tiny Parallel Proportion Scheduler).It dispatch requests
only by using their individual weight and total weight (proportion) therefore it's simply and efficient.

Test case: fusionio card, 4 cgroups, iodepth-512

groupname weight
test1 1000
test2 800
test3 600
test4 400

Use tpps, the result is:

groupname iops avg-rt(ms) max-rt(ms)
test1 30220 16 54
test2 28261 18 56
test3 26333 19 69
test4 20152 25 87

Use cfq, the result is:

groupname iops avg-rt(ms) max-rt(ms)
test1 16478 30 242
test2 13015 39 347
test3 9300 54 371
test4 5806 87 393

Signed-off-by: Robin Dong <sanbai@xxxxxxxxxx>
Signed-off-by: Zhu Yanhai <gaoyang.zyh@xxxxxxxxxx>
Cc: Tejun Heo <tj@xxxxxxxxxx>
Cc: Vivek Goyal <vgoyal@xxxxxxxxxx>
Cc: Jens Axboe <axboe@xxxxxxxxx>
Cc: Tao Ma <taoma.tm@xxxxxxxxx>
---
block/Kconfig.iosched | 13 +
block/Makefile | 1 +
block/tpps-iosched.c | 1272 ++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/blkdev.h | 2 +-
4 files changed, 1287 insertions(+), 1 deletions(-)
create mode 100644 block/tpps-iosched.c

diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 421bef9..e5e28c2 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -21,6 +21,16 @@ config IOSCHED_DEADLINE
a new point in the service tree and doing a batch of IO from there
in case of expiry.

+config IOSCHED_TPPS
+ tristate "TPPS I/O scheduler"
+ # If BLK_CGROUP is a module, TPPS has to be built as module.
+ default y
+ ---help---
+ The TPPS I/O scheduler tries to distribute iops proportional
+ among all cgroups in the system. It should also provide a low
+ latency working environment, suitable for flash-based device.
+ Note: If BLK_CGROUP=m, then TPPS can be built only as module.
+
config IOSCHED_CFQ
tristate "CFQ I/O scheduler"
default y
@@ -49,6 +59,9 @@ choice
config DEFAULT_DEADLINE
bool "Deadline" if IOSCHED_DEADLINE=y

+ config DEFAULT_TPPS
+ bool "Tiny Parallel Proportion" if IOSCHED_TPPS=y
+
config DEFAULT_CFQ
bool "CFQ" if IOSCHED_CFQ=y

diff --git a/block/Makefile b/block/Makefile
index 39b76ba..6e30ef4 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
+obj-$(CONFIG_IOSCHED_TPPS) += tpps-iosched.o

obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
diff --git a/block/tpps-iosched.c b/block/tpps-iosched.c
new file mode 100644
index 0000000..981fde2
--- /dev/null
+++ b/block/tpps-iosched.c
@@ -0,0 +1,1272 @@
+/*
+ * TPPS, or Tiny Parallel Proportion disk Scheduler.
+ *
+ * Based on ideas from Zhu Yanhai <gaoyang.zyh@xxxxxxxxxx>
+ *
+ * Copyright (C) 2013 Robin Dong <sanbai@xxxxxxxxxx>
+ */
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/elevator.h>
+#include <linux/jiffies.h>
+#include <linux/rbtree.h>
+#include <linux/ioprio.h>
+#include <linux/blktrace_api.h>
+#include "blk-cgroup.h"
+#include "blk.h"
+
+static struct kmem_cache *tpps_pool;
+
+struct tpps_queue {
+ /* reference count */
+ int ref;
+ /* parent tpps_data */
+ struct tpps_data *tppd;
+ /* tpps_group member */
+ struct list_head tppg_node;
+ /* sorted list of pending requests */
+ struct list_head sort_list;
+ struct tpps_group *tppg;
+ pid_t pid;
+ int online;
+ int rq_queued;
+};
+
+struct tppg_stats {
+ /* total bytes transferred */
+ struct blkg_rwstat service_bytes;
+ /* total IOs serviced, post merge */
+ struct blkg_rwstat serviced;
+ /* number of ios merged */
+ struct blkg_rwstat merged;
+ /* total time spent on device in ns, may not be accurate w/ queueing */
+ struct blkg_rwstat service_time;
+ /* total time spent waiting in scheduler queue in ns */
+ struct blkg_rwstat wait_time;
+ /* number of IOs queued up */
+ struct blkg_rwstat queued;
+ /* total sectors transferred */
+ struct blkg_stat sectors;
+ /* total disk time and nr sectors dispatched by this group */
+ struct blkg_stat time;
+};
+
+struct tpps_group {
+ struct blkg_policy_data pd;
+ /* tpps_data member */
+ struct list_head tppd_node;
+ struct list_head *cur_dispatcher;
+
+ unsigned int weight;
+ unsigned int new_weight;
+ unsigned int dev_weight;
+ unsigned int leaf_weight;
+ unsigned int new_leaf_weight;
+ unsigned int dev_leaf_weight;
+
+ bool needs_update;
+
+ /*
+ * lists of queues with requests.
+ */
+ struct list_head queue_list;
+ int nr_tppq;
+ int rq_queued;
+ int rq_in_driver;
+
+ struct tppg_stats stats; /* stats for this tppg */
+ struct tppg_stats dead_stats; /* stats pushed from dead children */
+};
+
+struct tpps_io_cq {
+ struct io_cq icq; /* must be the first member */
+ struct tpps_queue *tppq;
+ uint64_t blkcg_id; /* the current blkcg ID */
+};
+
+struct tpps_data {
+ struct request_queue *queue;
+ struct tpps_group *root_group;
+
+ /* List of tpps groups being managed on this device*/
+ struct list_head group_list;
+
+ unsigned int busy_queues;
+ int dispatched;
+ int rq_in_driver;
+
+ struct work_struct unplug_work;
+
+ /* Number of groups which are on blkcg->blkg_list */
+ unsigned int nr_blkcg_linked_grps;
+
+ unsigned total_weight;
+};
+
+static inline struct blkcg_gq *tppg_to_blkg(struct tpps_group *tppg)
+{
+ return pd_to_blkg(&tppg->pd);
+}
+
+#define tpps_log_tppq(tppd, tppq, fmt, args...) do { \
+ char __pbuf[128]; \
+ \
+ blkg_path(tppg_to_blkg((tppq)->tppg), __pbuf, sizeof(__pbuf)); \
+ blk_add_trace_msg((tppd)->queue, "tpps%d %s " fmt, (tppq)->pid, \
+ __pbuf, ##args); \
+} while (0)
+
+#define tpps_log_tppg(tppd, tppg, fmt, args...) do { \
+ char __pbuf[128]; \
+ \
+ blkg_path(tppg_to_blkg(tppg), __pbuf, sizeof(__pbuf)); \
+ blk_add_trace_msg((tppd)->queue, "%s " fmt, __pbuf, ##args); \
+} while (0)
+#define tpps_log(tppd, fmt, args...) \
+ blk_add_trace_msg((tppd)->queue, "tpps " fmt, ##args)
+
+static inline struct tpps_io_cq *icq_to_tic(struct io_cq *icq)
+{
+ /* tic->icq is the first member, %NULL will convert to %NULL */
+ return container_of(icq, struct tpps_io_cq, icq);
+}
+
+#define RQ_TIC(rq) icq_to_tic((rq)->elv.icq)
+#define RQ_TPPQ(rq) (struct tpps_queue *) ((rq)->elv.priv[0])
+#define RQ_TPPG(rq) (struct tpps_group *) ((rq)->elv.priv[1])
+
+#define TPPS_WEIGHT_DEFAULT (500)
+#define MIN_DISPATCH_RQ (8)
+
+static struct blkcg_policy blkcg_policy_tpps;
+
+static inline struct tpps_group *pd_to_tppg(struct blkg_policy_data *pd)
+{
+ return pd ? container_of(pd, struct tpps_group, pd) : NULL;
+}
+
+static inline struct tpps_group *blkg_to_tppg(struct blkcg_gq *blkg)
+{
+ return pd_to_tppg(blkg_to_pd(blkg, &blkcg_policy_tpps));
+}
+
+static inline struct tpps_io_cq *
+tpps_tic_lookup(struct tpps_data *tppd, struct io_context *ioc)
+{
+ if (ioc)
+ return icq_to_tic(ioc_lookup_icq(ioc, tppd->queue));
+ return NULL;
+}
+
+static inline struct tpps_queue *tic_to_tppq(struct tpps_io_cq *tic)
+{
+ return tic->tppq;
+}
+
+static inline void tic_set_tppq(struct tpps_io_cq *tic, struct tpps_queue *tppq)
+{
+ tic->tppq = tppq;
+}
+
+static inline struct tpps_data *tic_to_tppd(struct tpps_io_cq *tic)
+{
+ return tic->icq.q->elevator->elevator_data;
+}
+
+static inline void tppg_get(struct tpps_group *tppg)
+{
+ return blkg_get(tppg_to_blkg(tppg));
+}
+
+static inline void tppg_put(struct tpps_group *tppg)
+{
+ return blkg_put(tppg_to_blkg(tppg));
+}
+
+static inline void tppg_stats_update_io_add(struct tpps_group *tppg,
+ struct tpps_group *curr_tppg, int rw)
+{
+ blkg_rwstat_add(&tppg->stats.queued, rw, 1);
+}
+
+static inline void tppg_stats_update_io_remove(struct tpps_group *tppg, int rw)
+{
+ blkg_rwstat_add(&tppg->stats.queued, rw, -1);
+}
+
+static inline void tppg_stats_update_io_merged(struct tpps_group *tppg, int rw)
+{
+ blkg_rwstat_add(&tppg->stats.merged, rw, 1);
+}
+
+static inline void tppg_stats_update_dispatch(struct tpps_group *tppg,
+ uint64_t bytes, int rw)
+{
+ blkg_stat_add(&tppg->stats.sectors, bytes >> 9);
+ blkg_rwstat_add(&tppg->stats.serviced, rw, 1);
+ blkg_rwstat_add(&tppg->stats.service_bytes, rw, bytes);
+}
+
+static inline void tppg_stats_update_completion(struct tpps_group *tppg,
+ uint64_t start_time, uint64_t io_start_time, int rw)
+{
+ struct tppg_stats *stats = &tppg->stats;
+ unsigned long long now = sched_clock();
+
+ if (time_after64(now, io_start_time))
+ blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
+ if (time_after64(io_start_time, start_time))
+ blkg_rwstat_add(&stats->wait_time, rw,
+ io_start_time - start_time);
+}
+
+static void tpps_del_queue(struct tpps_queue *tppq)
+{
+ struct tpps_data *tppd = tppq->tppd;
+ struct tpps_group *tppg = tppq->tppg;
+
+ if (!list_empty(&tppq->tppg_node)) {
+ list_del_init(&tppq->tppg_node);
+ tpps_log_tppq(tppd, tppq, "del queue\n");
+ tppg->cur_dispatcher = NULL;
+ tppq->tppg = NULL;
+ }
+
+ printk("%p nr_tppq:%d\n", tppg, tppg->nr_tppq);
+ BUG_ON(tppg->nr_tppq < 1);
+ tppg->nr_tppq--;
+ if (!tppg->nr_tppq)
+ tppd->total_weight -= tppg->pd.blkg->blkcg->cfq_weight;
+
+ BUG_ON(!tppd->busy_queues);
+ tppd->busy_queues--;
+}
+
+/*
+ * task holds one reference to the queue, dropped when task exits. each rq
+ * in-flight on this queue also holds a reference, dropped when rq is freed.
+ *
+ * Each tpps queue took a reference on the parent group. Drop it now.
+ * queue lock must be held here.
+ */
+static void tpps_put_queue(struct tpps_queue *tppq)
+{
+ struct tpps_data *tppd = tppq->tppd;
+ struct tpps_group *tppg;
+
+ BUG_ON(tppq->ref <= 0);
+
+ tppq->ref--;
+ if (tppq->ref)
+ return;
+
+ tpps_log_tppq(tppd, tppq, "put_queue");
+ BUG_ON(!list_empty(&tppq->sort_list));
+ tppg = tppq->tppg;
+
+ tpps_del_queue(tppq);
+ kmem_cache_free(tpps_pool, tppq);
+ tppg_put(tppg);
+}
+
+static void tpps_init_tppq(struct tpps_data *tppd, struct tpps_queue *tppq,
+ pid_t pid)
+{
+ INIT_LIST_HEAD(&tppq->tppg_node);
+ INIT_LIST_HEAD(&tppq->sort_list);
+
+ tppq->ref = 0;
+ tppq->tppd = tppd;
+ tppq->pid = pid;
+
+}
+
+static void tpps_link_tppq_tppg(struct tpps_queue *tppq,
+ struct tpps_group *tppg)
+{
+ tppq->tppg = tppg;
+ /* tppq reference on tppg */
+ tppg_get(tppg);
+}
+
+static struct tpps_group *tpps_lookup_create_tppg(struct tpps_data *tppd,
+ struct blkcg *blkcg)
+{
+ struct request_queue *q = tppd->queue;
+ struct tpps_group *tppg = NULL;
+
+ /* avoid lookup for the common case where there's no blkcg */
+ if (blkcg == &blkcg_root) {
+ tppg = tppd->root_group;
+ } else {
+ struct blkcg_gq *blkg;
+
+ blkg = blkg_lookup_create(blkcg, q);
+ if (!IS_ERR(blkg))
+ tppg = blkg_to_tppg(blkg);
+ }
+
+ return tppg;
+}
+
+static struct tpps_queue *
+tpps_find_alloc_queue(struct tpps_data *tppd, struct tpps_io_cq* tic, struct bio *bio,
+ gfp_t gfp_mask)
+{
+ struct tpps_queue *tppq, *new_tppq = NULL;
+ struct tpps_group *tppg;
+ struct blkcg *blkcg;
+
+retry:
+ rcu_read_lock();
+
+ blkcg = bio_blkcg(bio);
+ tppg = tpps_lookup_create_tppg(tppd, blkcg);
+ tppq = tic_to_tppq(tic);
+
+ if (!tppq) {
+ if (new_tppq) {
+ tppq = new_tppq;
+ new_tppq = NULL;
+ } else if (gfp_mask & __GFP_WAIT) {
+ rcu_read_unlock();
+ spin_unlock_irq(tppd->queue->queue_lock);
+ new_tppq = kmem_cache_alloc_node(tpps_pool,
+ gfp_mask | __GFP_ZERO,
+ tppd->queue->node);
+ spin_lock_irq(tppd->queue->queue_lock);
+ if (new_tppq)
+ goto retry;
+ } else
+ tppq = kmem_cache_alloc_node(tpps_pool,
+ gfp_mask | __GFP_ZERO,
+ tppd->queue->node);
+
+ if (tppq) {
+ tpps_init_tppq(tppd, tppq, current->pid);
+ tpps_link_tppq_tppg(tppq, tppg);
+ tpps_log_tppq(tppd, tppq, "alloced");
+ }
+ }
+
+ if (new_tppq)
+ kmem_cache_free(tpps_pool, new_tppq);
+
+ rcu_read_unlock();
+ return tppq;
+}
+
+static struct tpps_queue *
+tpps_get_queue(struct tpps_data *tppd, struct tpps_io_cq *tic, struct bio *bio,
+ gfp_t gfp_mask)
+{
+ struct tpps_queue *tppq;
+
+ tppq = tpps_find_alloc_queue(tppd, tic, bio, gfp_mask);
+ tppq->ref++;
+ return tppq;
+}
+
+/*
+ * scheduler run of queue, if there are requests pending and no one in the
+ * driver that will restart queueing
+ */
+static inline void tpps_schedule_dispatch(struct tpps_data *tppd)
+{
+ if (tppd->busy_queues) {
+ tpps_log(tppd, "schedule dispatch");
+ kblockd_schedule_work(tppd->queue, &tppd->unplug_work);
+ }
+}
+
+static void check_blkcg_changed(struct tpps_io_cq *tic, struct bio *bio)
+{
+ struct tpps_data *tppd = tic_to_tppd(tic);
+ struct tpps_queue *tppq;
+ uint64_t id;
+
+ rcu_read_lock();
+ id = bio_blkcg(bio)->id;
+ rcu_read_unlock();
+
+ /*
+ * Check whether blkcg has changed. The condition may trigger
+ * spuriously on a newly created tic but there's no harm.
+ */
+ if (unlikely(!tppd) || likely(tic->blkcg_id == id))
+ return;
+
+ tppq = tic_to_tppq(tic);
+ if (tppq) {
+ /*
+ * Drop reference to sync queue. A new sync queue will be
+ * assigned in new group upon arrival of a fresh request.
+ */
+ tpps_log_tppq(tppd, tppq, "changed cgroup");
+ tic_set_tppq(tic, NULL);
+ tpps_put_queue(tppq);
+ }
+
+ tic->blkcg_id = id;
+}
+
+static int
+tpps_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
+ gfp_t gfp_mask)
+{
+ struct tpps_data *tppd = q->elevator->elevator_data;
+ struct tpps_io_cq *tic = icq_to_tic(rq->elv.icq);
+ struct tpps_queue *tppq;
+
+ might_sleep_if(gfp_mask & __GFP_WAIT);
+
+ spin_lock_irq(q->queue_lock);
+
+ check_blkcg_changed(tic, bio);
+
+ tppq = tic_to_tppq(tic);
+ if (!tppq) {
+ tppq = tpps_get_queue(tppd, tic, bio, gfp_mask);
+ tic_set_tppq(tic, tppq);
+ }
+
+ tppq->ref++;
+ tppg_get(tppq->tppg);
+ rq->elv.priv[0] = tppq;
+ rq->elv.priv[1] = tppq->tppg;
+ spin_unlock_irq(q->queue_lock);
+ return 0;
+}
+
+/*
+ * queue lock held here
+ */
+static void tpps_put_request(struct request *rq)
+{
+ struct tpps_queue *tppq = RQ_TPPQ(rq);
+
+ if (tppq) {
+ WARN_ON(tppq->tppg != RQ_TPPG(rq));
+
+ /* Put down rq reference on cfqg */
+ tppg_put(RQ_TPPG(rq));
+ rq->elv.priv[0] = NULL;
+ rq->elv.priv[1] = NULL;
+
+ tpps_put_queue(tppq);
+ }
+}
+
+static void
+tpps_update_group_weight(struct tpps_group *tppg)
+{
+ if (tppg->needs_update) {
+ tppg->weight = tppg->new_weight;
+ tppg->needs_update = false;
+ }
+}
+
+static void tpps_add_queue(struct tpps_data *tppd, struct tpps_queue *tppq)
+{
+ struct tpps_group *tppg;
+
+ if (!tppq->online) {
+ tppq->online = 1;
+ tppg = tppq->tppg;
+ tpps_log_tppq(tppd, tppq, "add queue");
+ tppg->nr_tppq++;
+ tppd->busy_queues++;
+ list_add(&tppq->tppg_node, &tppg->queue_list);
+ printk("add tppq %p to %p\n", tppq, tppg);
+ tpps_update_group_weight(tppg);
+ if (tppg->nr_tppq <= 1) {
+ tppd->total_weight += tppg->pd.blkg->blkcg->cfq_weight;
+ list_add(&tppg->tppd_node, &tppd->group_list);
+ printk("twt:%u, wt:%u %u %d %p\n", tppd->total_weight, tppg->weight,
+ tppg->pd.blkg->blkcg->cfq_weight,
+ tppg->nr_tppq,
+ tppg);
+ }
+ }
+}
+
+static void tpps_insert_request(struct request_queue *q, struct request *rq)
+{
+ struct tpps_data *tppd = q->elevator->elevator_data;
+ struct tpps_queue *tppq = RQ_TPPQ(rq);
+
+ tpps_log_tppq(tppd, tppq, "insert_request");
+
+ list_add_tail(&rq->queuelist, &tppq->sort_list);
+ tppq->rq_queued++;
+ tppq->tppg->rq_queued++;
+ tppd->dispatched++;
+ tpps_add_queue(tppd, tppq);
+ tppg_stats_update_io_add(RQ_TPPG(rq), tppq->tppg, rq->cmd_flags);
+}
+
+static void tpps_remove_request(struct request *rq)
+{
+ struct tpps_queue *tppq = RQ_TPPQ(rq);
+
+ list_del_init(&rq->queuelist);
+ tppq->rq_queued--;
+ tppq->tppg->rq_queued--;
+ tppg_stats_update_io_remove(RQ_TPPG(rq), rq->cmd_flags);
+}
+
+/*
+ * Move request from internal lists to the request queue dispatch list.
+ */
+static int tpps_dispatch_insert(struct request_queue *q,
+ struct tpps_queue *tppq)
+{
+ struct list_head *rbnext = tppq->sort_list.next;
+ struct request *rq;
+
+ if (rbnext == &tppq->sort_list)
+ return 0;
+
+ rq = rq_entry_fifo(rbnext);
+ tpps_remove_request(rq);
+ elv_dispatch_sort(q, rq);
+ tppg_stats_update_dispatch(tppq->tppg, blk_rq_bytes(rq), rq->cmd_flags);
+ return 1;
+}
+
+static int tpps_dispatch_requests_nr(struct tpps_data *tppd,
+ struct tpps_queue *tppq, int count)
+{
+ int cnt = 0, ret;
+
+ if (!tppq->rq_queued)
+ return cnt;
+
+ do {
+ ret = tpps_dispatch_insert(tppd->queue, tppq);
+ if (ret) {
+ cnt++;
+ tppd->dispatched--;
+ }
+ } while (ret && cnt < count);
+
+ return cnt;
+}
+
+static int tpps_dispatch_requests(struct request_queue *q, int force)
+{
+ struct tpps_data *tppd = q->elevator->elevator_data;
+ struct tpps_group *tppg, *group_n;
+ struct tpps_queue *tppq;
+ struct list_head *next;
+ int count = 0, total = 0, ret;
+ int quota, grp_quota;
+
+ if (!tppd->total_weight)
+ return 0;
+
+ quota = q->nr_requests - tppd->rq_in_driver;
+ if (quota < MIN_DISPATCH_RQ && !force)
+ return 0;
+
+ list_for_each_entry_safe(tppg, group_n, &tppd->group_list, tppd_node) {
+ if (!tppg->nr_tppq)
+ continue;
+ grp_quota = (quota * tppg->pd.blkg->blkcg->cfq_weight
+ / tppd->total_weight) - tppg->rq_in_driver;
+ tpps_log_tppg(tppd, tppg,
+ "nr:%d, wt:%u total_wt:%u in_driver:%d %d quota:%d grp_quota:%d",
+ tppg->nr_tppq, tppg->pd.blkg->blkcg->cfq_weight,
+ tppd->total_weight, tppg->rq_in_driver, tppg->rq_queued,
+ quota, grp_quota);
+ if (grp_quota <= 0 && !force)
+ continue;
+ BUG_ON(tppg->queue_list.next == &tppg->queue_list);
+ if (!tppg->cur_dispatcher)
+ tppg->cur_dispatcher = tppg->queue_list.next;
+ next = tppg->cur_dispatcher;
+ count = 0;
+ do {
+ tppq = list_entry(next, struct tpps_queue, tppg_node);
+ tpps_log_tppq(tppd, tppq, "tppq: %d\n", tppq->rq_queued);
+ if (force)
+ ret = tpps_dispatch_requests_nr(tppd, tppq, -1);
+ else
+ ret = tpps_dispatch_requests_nr(tppd, tppq, 1);
+ count += ret;
+ total += ret;
+ next = next->next;
+ if (next == &tppg->queue_list)
+ next = tppg->queue_list.next;
+ if (count >= grp_quota && !force) {
+ tppg->cur_dispatcher = next;
+ break;
+ }
+ BUG_ON(tppg->cur_dispatcher == &tppg->queue_list);
+ } while (next != tppg->cur_dispatcher);
+ }
+ return total > 0;
+}
+
+static void tpps_kick_queue(struct work_struct *work)
+{
+ struct tpps_data *tppd =
+ container_of(work, struct tpps_data, unplug_work);
+ struct request_queue *q = tppd->queue;
+
+ spin_lock_irq(q->queue_lock);
+ __blk_run_queue(q);
+ spin_unlock_irq(q->queue_lock);
+}
+
+static void tpps_init_tppg_base(struct tpps_group *tppg)
+{
+ INIT_LIST_HEAD(&tppg->tppd_node);
+ INIT_LIST_HEAD(&tppg->queue_list);
+ tppg->cur_dispatcher = NULL;
+
+}
+
+static int tpps_init_queue(struct request_queue *q)
+{
+ struct tpps_data *tppd;
+ struct tpps_group *tppg;
+ int ret;
+
+ tppd = kmalloc_node(sizeof(*tppd), GFP_KERNEL | __GFP_ZERO, q->node);
+ if (!tppd)
+ return -ENOMEM;
+
+ tppd->queue = q;
+ q->elevator->elevator_data = tppd;
+
+ INIT_LIST_HEAD(&tppd->group_list);
+
+ ret = blkcg_activate_policy(q, &blkcg_policy_tpps);
+ if (ret)
+ goto out_free;
+
+ /* Init root group */
+ tppd->root_group = blkg_to_tppg(q->root_blkg);
+ tppg = tppd->root_group;
+ tpps_init_tppg_base(tppg);
+
+ /* Give preference to root group over other groups */
+ tppg->weight = 2 * TPPS_WEIGHT_DEFAULT;
+ tppg->leaf_weight = 2 * TPPS_WEIGHT_DEFAULT;
+
+ INIT_WORK(&tppd->unplug_work, tpps_kick_queue);
+
+ return 0;
+
+out_free:
+ kfree(tppd);
+ return ret;
+}
+
+static void tpps_exit_queue(struct elevator_queue *e)
+{
+ struct tpps_data *tppd = e->elevator_data;
+ struct request_queue *q = tppd->queue;
+
+ cancel_work_sync(&tppd->unplug_work);
+
+ blkcg_deactivate_policy(q, &blkcg_policy_tpps);
+ kfree(tppd->root_group);
+ kfree(tppd);
+}
+
+static void tpps_activate_request(struct request_queue *q, struct request *rq)
+{
+ struct tpps_queue *tppq = RQ_TPPQ(rq);
+ struct tpps_data *tppd = q->elevator->elevator_data;
+ tppd->rq_in_driver++;
+ tppq->tppg->rq_in_driver++;
+ tpps_log_tppq(tppd, RQ_TPPQ(rq), "activate rq, drv=%d",
+ tppd->rq_in_driver);
+}
+
+static void tpps_deactivate_request(struct request_queue *q, struct request *rq)
+{
+ struct tpps_queue *tppq = RQ_TPPQ(rq);
+ struct tpps_data *tppd = q->elevator->elevator_data;
+
+ WARN_ON(!tppd->rq_in_driver);
+ tppd->rq_in_driver--;
+ tppq->tppg->rq_in_driver--;
+ tpps_log_tppq(tppd, RQ_TPPQ(rq), "deactivate rq, drv=%d",
+ tppd->rq_in_driver);
+}
+
+static void tpps_completed_request(struct request_queue *q, struct request *rq)
+{
+ struct tpps_queue *tppq = RQ_TPPQ(rq);
+ struct tpps_data *tppd = tppq->tppd;
+
+ WARN_ON(!tppq);
+ WARN_ON(tppq->tppg != RQ_TPPG(rq));
+
+ tpps_log_tppq(tppd, tppq, "complete rqnoidle %d",
+ !!(rq->cmd_flags & REQ_NOIDLE));
+ WARN_ON(!tppd->rq_in_driver);
+ tppd->rq_in_driver--;
+ tppq->tppg->rq_in_driver--;
+ tppg_stats_update_completion(tppq->tppg,
+ rq_start_time_ns(rq), rq_io_start_time_ns(rq), rq->cmd_flags);
+
+ if (!tppd->rq_in_driver)
+ tpps_schedule_dispatch(tppd);
+}
+
+static void
+tpps_merged_request(struct request_queue *q, struct request *rq, int type)
+{
+ if (type == ELEVATOR_FRONT_MERGE) {
+ struct tpps_queue *tppq = RQ_TPPQ(rq);
+ list_del_init(&rq->queuelist);
+ tppq->rq_queued--;
+ tppg_stats_update_io_remove(RQ_TPPG(rq), rq->cmd_flags);
+ list_add_tail(&rq->queuelist, &tppq->sort_list);
+ tppq->rq_queued++;
+ tppg_stats_update_io_add(RQ_TPPG(rq), tppq->tppg, rq->cmd_flags);
+ }
+}
+
+static void
+tpps_merged_requests(struct request_queue *q, struct request *rq,
+ struct request *next)
+{
+ tpps_remove_request(next);
+ tppg_stats_update_io_merged(RQ_TPPG(rq), rq->cmd_flags);
+}
+
+static void tpps_init_icq(struct io_cq *icq)
+{ }
+
+static void tpps_exit_icq(struct io_cq *icq)
+{
+ struct tpps_io_cq *tic = icq_to_tic(icq);
+
+ if (tic->tppq) {
+ tpps_put_queue(tic->tppq);
+ tic->tppq = NULL;
+ }
+}
+
+static struct elevator_type iosched_tpps = {
+ .ops = {
+ .elevator_merged_fn = tpps_merged_request,
+ .elevator_merge_req_fn = tpps_merged_requests,
+ .elevator_dispatch_fn = tpps_dispatch_requests,
+ .elevator_add_req_fn = tpps_insert_request,
+ .elevator_activate_req_fn = tpps_activate_request,
+ .elevator_deactivate_req_fn = tpps_deactivate_request,
+ .elevator_completed_req_fn = tpps_completed_request,
+ .elevator_init_icq_fn = tpps_init_icq,
+ .elevator_exit_icq_fn = tpps_exit_icq,
+ .elevator_set_req_fn = tpps_set_request,
+ .elevator_put_req_fn = tpps_put_request,
+ .elevator_init_fn = tpps_init_queue,
+ .elevator_exit_fn = tpps_exit_queue,
+ },
+ .icq_size = sizeof(struct tpps_io_cq),
+ .icq_align = __alignof__(struct tpps_io_cq),
+ .elevator_name = "tpps",
+ .elevator_owner = THIS_MODULE,
+};
+
+static u64 tppg_prfill_weight_device(struct seq_file *sf,
+ struct blkg_policy_data *pd, int off)
+{
+ struct tpps_group *tppg = pd_to_tppg(pd);
+
+ if (!tppg->dev_weight)
+ return 0;
+ return __blkg_prfill_u64(sf, pd, tppg->dev_weight);
+}
+
+static int tppg_print_weight_device(struct cgroup *cgrp, struct cftype *cft,
+ struct seq_file *sf)
+{
+ blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp),
+ tppg_prfill_weight_device, &blkcg_policy_tpps, 0,
+ false);
+ return 0;
+}
+
+static u64 tppg_prfill_leaf_weight_device(struct seq_file *sf,
+ struct blkg_policy_data *pd, int off)
+{
+ struct tpps_group *tppg = pd_to_tppg(pd);
+
+ if (!tppg->dev_leaf_weight)
+ return 0;
+ return __blkg_prfill_u64(sf, pd, tppg->dev_leaf_weight);
+}
+
+static int tppg_print_leaf_weight_device(struct cgroup *cgrp,
+ struct cftype *cft,
+ struct seq_file *sf)
+{
+ blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp),
+ tppg_prfill_leaf_weight_device, &blkcg_policy_tpps, 0,
+ false);
+ return 0;
+}
+
+static int tppg_print_weight(struct cgroup *cgrp, struct cftype *cft,
+ struct seq_file *sf)
+{
+ seq_printf(sf, "%u\n", cgroup_to_blkcg(cgrp)->cfq_weight);
+ return 0;
+}
+
+static int tppg_print_leaf_weight(struct cgroup *cgrp, struct cftype *cft,
+ struct seq_file *sf)
+{
+ seq_printf(sf, "%u\n",
+ cgroup_to_blkcg(cgrp)->cfq_leaf_weight);
+ return 0;
+}
+
+static int __tppg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
+ const char *buf, bool is_leaf_weight)
+{
+ struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+ struct blkg_conf_ctx ctx;
+ struct tpps_group *tppg;
+ int ret;
+
+ ret = blkg_conf_prep(blkcg, &blkcg_policy_tpps, buf, &ctx);
+ if (ret)
+ return ret;
+
+ ret = -EINVAL;
+ tppg = blkg_to_tppg(ctx.blkg);
+ if (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && ctx.v <= CFQ_WEIGHT_MAX)) {
+ if (!is_leaf_weight) {
+ tppg->dev_weight = ctx.v;
+ tppg->new_weight = ctx.v ?: blkcg->cfq_weight;
+ } else {
+ tppg->dev_leaf_weight = ctx.v;
+ tppg->new_leaf_weight = ctx.v ?: blkcg->cfq_leaf_weight;
+ }
+ ret = 0;
+ }
+
+ blkg_conf_finish(&ctx);
+ return ret;
+}
+
+static int tppg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
+ const char *buf)
+{
+ return __tppg_set_weight_device(cgrp, cft, buf, false);
+}
+
+static int tppg_set_leaf_weight_device(struct cgroup *cgrp, struct cftype *cft,
+ const char *buf)
+{
+ return __tppg_set_weight_device(cgrp, cft, buf, true);
+}
+
+static int __tpps_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val,
+ bool is_leaf_weight)
+{
+ struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+ struct blkcg_gq *blkg;
+
+ if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX)
+ return -EINVAL;
+
+ spin_lock_irq(&blkcg->lock);
+
+ if (!is_leaf_weight)
+ blkcg->cfq_weight = val;
+ else
+ blkcg->cfq_leaf_weight = val;
+
+ hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
+ struct tpps_group *tppg = blkg_to_tppg(blkg);
+
+ if (!tppg)
+ continue;
+
+ if (!is_leaf_weight) {
+ if (!tppg->dev_weight)
+ tppg->new_weight = blkcg->cfq_weight;
+ } else {
+ if (!tppg->dev_leaf_weight)
+ tppg->new_leaf_weight = blkcg->cfq_leaf_weight;
+ }
+ }
+
+ spin_unlock_irq(&blkcg->lock);
+ return 0;
+}
+
+static int tpps_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
+{
+ return __tpps_set_weight(cgrp, cft, val, false);
+}
+
+static int tpps_set_leaf_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
+{
+ return __tpps_set_weight(cgrp, cft, val, true);
+}
+
+/* offset delta from tppg->stats to tppg->dead_stats */
+static const int dead_stats_off_delta = offsetof(struct tpps_group, dead_stats) -
+ offsetof(struct tpps_group, stats);
+
+/* to be used by recursive prfill, sums live and dead rwstats recursively */
+static struct blkg_rwstat tppg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd,
+ int off)
+{
+ struct blkg_rwstat a, b;
+
+ a = blkg_rwstat_recursive_sum(pd, off);
+ b = blkg_rwstat_recursive_sum(pd, off + dead_stats_off_delta);
+ blkg_rwstat_merge(&a, &b);
+ return a;
+}
+
+/* to be used by recursive prfill, sums live and dead stats recursively */
+static u64 tppg_stat_pd_recursive_sum(struct blkg_policy_data *pd, int off)
+{
+ u64 sum = 0;
+
+ sum += blkg_stat_recursive_sum(pd, off);
+ sum += blkg_stat_recursive_sum(pd, off + dead_stats_off_delta);
+ return sum;
+}
+
+static int tppg_print_stat(struct cgroup *cgrp, struct cftype *cft,
+ struct seq_file *sf)
+{
+ struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+
+ blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkcg_policy_tpps,
+ cft->private, false);
+ return 0;
+}
+
+static int tppg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
+ struct seq_file *sf)
+{
+ struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+
+ blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkcg_policy_tpps,
+ cft->private, true);
+ return 0;
+}
+
+static u64 tppg_prfill_stat_recursive(struct seq_file *sf,
+ struct blkg_policy_data *pd, int off)
+{
+ u64 sum = tppg_stat_pd_recursive_sum(pd, off);
+
+ return __blkg_prfill_u64(sf, pd, sum);
+}
+
+static u64 tppg_prfill_rwstat_recursive(struct seq_file *sf,
+ struct blkg_policy_data *pd, int off)
+{
+ struct blkg_rwstat sum = tppg_rwstat_pd_recursive_sum(pd, off);
+
+ return __blkg_prfill_rwstat(sf, pd, &sum);
+}
+
+static int tppg_print_stat_recursive(struct cgroup *cgrp, struct cftype *cft,
+ struct seq_file *sf)
+{
+ struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+
+ blkcg_print_blkgs(sf, blkcg, tppg_prfill_stat_recursive,
+ &blkcg_policy_tpps, cft->private, false);
+ return 0;
+}
+
+static int tppg_print_rwstat_recursive(struct cgroup *cgrp, struct cftype *cft,
+ struct seq_file *sf)
+{
+ struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+
+ blkcg_print_blkgs(sf, blkcg, tppg_prfill_rwstat_recursive,
+ &blkcg_policy_tpps, cft->private, true);
+ return 0;
+}
+
+static struct cftype tpps_blkcg_files[] = {
+ /* on root, weight is mapped to leaf_weight */
+ {
+ .name = "tpps.weight_device",
+ .flags = CFTYPE_ONLY_ON_ROOT,
+ .read_seq_string = tppg_print_leaf_weight_device,
+ .write_string = tppg_set_leaf_weight_device,
+ .max_write_len = 256,
+ },
+ {
+ .name = "tpps.weight",
+ .flags = CFTYPE_ONLY_ON_ROOT,
+ .read_seq_string = tppg_print_leaf_weight,
+ .write_u64 = tpps_set_leaf_weight,
+ },
+
+ /* no such mapping necessary for !roots */
+ {
+ .name = "tpps.weight_device",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .read_seq_string = tppg_print_weight_device,
+ .write_string = tppg_set_weight_device,
+ .max_write_len = 256,
+ },
+ {
+ .name = "tpps.weight",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .read_seq_string = tppg_print_weight,
+ .write_u64 = tpps_set_weight,
+ },
+
+ {
+ .name = "tpps.leaf_weight_device",
+ .read_seq_string = tppg_print_leaf_weight_device,
+ .write_string = tppg_set_leaf_weight_device,
+ .max_write_len = 256,
+ },
+ {
+ .name = "tpps.leaf_weight",
+ .read_seq_string = tppg_print_leaf_weight,
+ .write_u64 = tpps_set_leaf_weight,
+ },
+
+ /* statistics, covers only the tasks in the tppg */
+ {
+ .name = "tpps.time",
+ .private = offsetof(struct tpps_group, stats.time),
+ .read_seq_string = tppg_print_stat,
+ },
+ {
+ .name = "tpps.sectors",
+ .private = offsetof(struct tpps_group, stats.sectors),
+ .read_seq_string = tppg_print_stat,
+ },
+ {
+ .name = "tpps.io_service_bytes",
+ .private = offsetof(struct tpps_group, stats.service_bytes),
+ .read_seq_string = tppg_print_rwstat,
+ },
+ {
+ .name = "tpps.io_serviced",
+ .private = offsetof(struct tpps_group, stats.serviced),
+ .read_seq_string = tppg_print_rwstat,
+ },
+ {
+ .name = "tpps.io_service_time",
+ .private = offsetof(struct tpps_group, stats.service_time),
+ .read_seq_string = tppg_print_rwstat,
+ },
+ {
+ .name = "tpps.io_wait_time",
+ .private = offsetof(struct tpps_group, stats.wait_time),
+ .read_seq_string = tppg_print_rwstat,
+ },
+ {
+ .name = "tpps.io_merged",
+ .private = offsetof(struct tpps_group, stats.merged),
+ .read_seq_string = tppg_print_rwstat,
+ },
+ {
+ .name = "tpps.io_queued",
+ .private = offsetof(struct tpps_group, stats.queued),
+ .read_seq_string = tppg_print_rwstat,
+ },
+
+ /* the same statictics which cover the tppg and its descendants */
+ {
+ .name = "tpps.time_recursive",
+ .private = offsetof(struct tpps_group, stats.time),
+ .read_seq_string = tppg_print_stat_recursive,
+ },
+ {
+ .name = "tpps.sectors_recursive",
+ .private = offsetof(struct tpps_group, stats.sectors),
+ .read_seq_string = tppg_print_stat_recursive,
+ },
+ {
+ .name = "tpps.io_service_bytes_recursive",
+ .private = offsetof(struct tpps_group, stats.service_bytes),
+ .read_seq_string = tppg_print_rwstat_recursive,
+ },
+ {
+ .name = "tpps.io_serviced_recursive",
+ .private = offsetof(struct tpps_group, stats.serviced),
+ .read_seq_string = tppg_print_rwstat_recursive,
+ },
+ {
+ .name = "tpps.io_service_time_recursive",
+ .private = offsetof(struct tpps_group, stats.service_time),
+ .read_seq_string = tppg_print_rwstat_recursive,
+ },
+ {
+ .name = "tpps.io_wait_time_recursive",
+ .private = offsetof(struct tpps_group, stats.wait_time),
+ .read_seq_string = tppg_print_rwstat_recursive,
+ },
+ {
+ .name = "tpps.io_merged_recursive",
+ .private = offsetof(struct tpps_group, stats.merged),
+ .read_seq_string = tppg_print_rwstat_recursive,
+ },
+ {
+ .name = "tpps.io_queued_recursive",
+ .private = offsetof(struct tpps_group, stats.queued),
+ .read_seq_string = tppg_print_rwstat_recursive,
+ },
+ { } /* terminate */
+};
+
+static void tpps_pd_init(struct blkcg_gq *blkg)
+{
+ struct tpps_group *tppg = blkg_to_tppg(blkg);
+
+ tpps_init_tppg_base(tppg);
+ tppg->weight = blkg->blkcg->cfq_weight;
+ tppg->leaf_weight = blkg->blkcg->cfq_leaf_weight;
+}
+
+static inline struct tpps_group *tppg_parent(struct tpps_group *tppg)
+{
+ struct blkcg_gq *pblkg = tppg_to_blkg(tppg)->parent;
+
+ return pblkg ? blkg_to_tppg(pblkg) : NULL;
+}
+
+static void tppg_stats_reset(struct tppg_stats *stats)
+{
+ /* queued stats shouldn't be cleared */
+ blkg_rwstat_reset(&stats->service_bytes);
+ blkg_rwstat_reset(&stats->serviced);
+ blkg_rwstat_reset(&stats->merged);
+ blkg_rwstat_reset(&stats->service_time);
+ blkg_rwstat_reset(&stats->wait_time);
+ blkg_stat_reset(&stats->time);
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+ blkg_stat_reset(&stats->unaccounted_time);
+ blkg_stat_reset(&stats->avg_queue_size_sum);
+ blkg_stat_reset(&stats->avg_queue_size_samples);
+ blkg_stat_reset(&stats->dequeue);
+ blkg_stat_reset(&stats->group_wait_time);
+ blkg_stat_reset(&stats->idle_time);
+ blkg_stat_reset(&stats->empty_time);
+#endif
+}
+
+/* @to += @from */
+static void tppg_stats_merge(struct tppg_stats *to, struct tppg_stats *from)
+{
+ /* queued stats shouldn't be cleared */
+ blkg_rwstat_merge(&to->service_bytes, &from->service_bytes);
+ blkg_rwstat_merge(&to->serviced, &from->serviced);
+ blkg_rwstat_merge(&to->merged, &from->merged);
+ blkg_rwstat_merge(&to->service_time, &from->service_time);
+ blkg_rwstat_merge(&to->wait_time, &from->wait_time);
+ blkg_stat_merge(&from->time, &from->time);
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+ blkg_stat_merge(&to->unaccounted_time, &from->unaccounted_time);
+ blkg_stat_merge(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
+ blkg_stat_merge(&to->avg_queue_size_samples, &from->avg_queue_size_samples);
+ blkg_stat_merge(&to->dequeue, &from->dequeue);
+ blkg_stat_merge(&to->group_wait_time, &from->group_wait_time);
+ blkg_stat_merge(&to->idle_time, &from->idle_time);
+ blkg_stat_merge(&to->empty_time, &from->empty_time);
+#endif
+}
+
+static void tppg_stats_xfer_dead(struct tpps_group *tppg)
+{
+ struct tpps_group *parent = tppg_parent(tppg);
+
+ lockdep_assert_held(tppg_to_blkg(tppg)->q->queue_lock);
+
+ if (unlikely(!parent))
+ return;
+
+ tppg_stats_merge(&parent->dead_stats, &tppg->stats);
+ tppg_stats_merge(&parent->dead_stats, &tppg->dead_stats);
+ tppg_stats_reset(&tppg->stats);
+ tppg_stats_reset(&tppg->dead_stats);
+}
+
+static void tpps_pd_offline(struct blkcg_gq *blkg)
+{
+ struct tpps_group *tppg = blkg_to_tppg(blkg);
+ /*
+ * @blkg is going offline and will be ignored by
+ * blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so
+ * that they don't get lost. If IOs complete after this point, the
+ * stats for them will be lost. Oh well...
+ */
+ tppg_stats_xfer_dead(tppg);
+
+ if (!list_empty(&tppg->tppd_node))
+ list_del_init(&tppg->tppd_node);
+
+ //BUG_ON(!list_empty(&(tppg->queue_list)));
+}
+
+static void tpps_pd_reset_stats(struct blkcg_gq *blkg)
+{
+ struct tpps_group *tppg = blkg_to_tppg(blkg);
+
+ tppg_stats_reset(&tppg->stats);
+ tppg_stats_reset(&tppg->dead_stats);
+}
+
+static struct blkcg_policy blkcg_policy_tpps = {
+ .pd_size = sizeof(struct tpps_group),
+ .cftypes = tpps_blkcg_files,
+ .pd_init_fn = tpps_pd_init,
+ .pd_offline_fn = tpps_pd_offline,
+ .pd_reset_stats_fn = tpps_pd_reset_stats,
+};
+
+static int __init tpps_init(void)
+{
+ int ret;
+
+ ret = blkcg_policy_register(&blkcg_policy_tpps);
+ if (ret)
+ return ret;
+
+ ret = -ENOMEM;
+ tpps_pool = KMEM_CACHE(tpps_queue, 0);
+ if (!tpps_pool)
+ goto err_pol_unreg;
+
+ ret = elv_register(&iosched_tpps);
+ if (ret)
+ goto err_free_pool;
+
+ return 0;
+
+err_free_pool:
+ kmem_cache_destroy(tpps_pool);
+err_pol_unreg:
+ blkcg_policy_unregister(&blkcg_policy_tpps);
+ return ret;
+}
+
+static void __exit tpps_exit(void)
+{
+ blkcg_policy_unregister(&blkcg_policy_tpps);
+ elv_unregister(&iosched_tpps);
+ kmem_cache_destroy(tpps_pool);
+}
+
+module_init(tpps_init);
+module_exit(tpps_exit);
+
+MODULE_AUTHOR("Robin Dong");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Tiny Parallel Proportion io Scheduler");
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2fdb4a4..489257a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -42,7 +42,7 @@ struct blkcg_gq;
* Maximum number of blkcg policies allowed to be registered concurrently.
* Defined here to simplify include dependency.
*/
-#define BLKCG_MAX_POLS 2
+#define BLKCG_MAX_POLS 3

struct request;
typedef void (rq_end_io_fn)(struct request *, int);
--
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/