[PATCH][cfq-cgroups] Introduce ioprio class for top layer.

From: Satoshi UCHIDA
Date: Fri Nov 07 2008 - 05:21:19 EST


This patch introduces iprio class for cfq data control layer.
By applying this patch, controller can also handle the RT/IDLE properties
among groups.

Signed-off-by: Satoshi UCHIDA <s-uchida@xxxxxxxxxxxxx>

---
block/cfq-cgroup.c | 344 +++++++++++++++++++++++++------------------
include/linux/cfq-iosched.h | 1 +
2 files changed, 203 insertions(+), 142 deletions(-)

diff --git a/block/cfq-cgroup.c b/block/cfq-cgroup.c
index bb8cb6f..993a3b6 100644
--- a/block/cfq-cgroup.c
+++ b/block/cfq-cgroup.c
@@ -20,11 +20,24 @@

static const int cfq_cgroup_slice = HZ / 10;

+/*
+ * offset from end of service tree
+ */
+#define CFQ_CGROUP_IDLE_DELAY (HZ / 5)
+
+#define cfq_data_class_idle(cfqd) \
+ ((cfqd)->ioprio_class == IOPRIO_CLASS_IDLE)
+#define cfq_data_class_rt(cfqd) \
+ ((cfqd)->ioprio_class == IOPRIO_CLASS_RT)
+
+
+
static struct cfq_ops cfq_cgroup_op;

struct cfq_cgroup {
struct cgroup_subsys_state css;
unsigned int ioprio;
+ unsigned short ioprio_class;

struct rb_root sibling_tree;
unsigned int siblings;
@@ -161,6 +174,7 @@ static void *cfq_cgroup_init_cfq_data(struct cfq_cgroup *cfqc,
cfqc = cgroup_to_cfq_cgroup(get_root_subsys(&cfq_subsys));
cfq_cgroup_sibling_tree_add(cfqc, cfqd);
cfqd->ioprio = cfqc->ioprio;
+ cfqd->ioprio_class = cfqc->ioprio_class;
} else {
struct cfq_data *__cfqd;
__cfqd = __cfq_cgroup_init_queue(cfqd->cfqdd->queue,
@@ -168,7 +182,7 @@ static void *cfq_cgroup_init_cfq_data(struct cfq_cgroup *cfqc,
if (!__cfqd)
return NULL;
cfq_cgroup_sibling_tree_add(cfqc, __cfqd);
- __cfqd->ioprio = cfqc->ioprio;
+ __cfqd->ioprio_class = cfqc->ioprio_class;
}

/* check and create cfq_data for children */
@@ -250,6 +264,7 @@ cfq_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
return ERR_PTR(-ENOMEM);

cfqc->ioprio = 3;
+ cfqc->ioprio = IOPRIO_CLASS_BE;

cfqc->sibling_tree = RB_ROOT;
cfqc->siblings = 0;
@@ -378,7 +393,15 @@ static void cfq_cgroup_service_tree_add(struct cfq_data *cfqd, int add_front)
unsigned long rb_key;
int left;

- if (!add_front) {
+ if (cfq_data_class_idle(cfqd)) {
+ rb_key = CFQ_CGROUP_IDLE_DELAY;
+ parent = rb_last(&cfqdd->service_tree.rb);
+ if (parent && parent != &cfqd->rb_node) {
+ __cfqd = rb_entry(parent, struct cfq_data, rb_node);
+ rb_key += __cfqd->rb_key;
+ } else
+ rb_key += jiffies;
+ } else if (!add_front) {
rb_key = cfq_cgroup_slice_offset(cfqd) + jiffies;
rb_key += cfqd->slice_resid;
cfqd->slice_resid = 0;
@@ -400,7 +423,23 @@ static void cfq_cgroup_service_tree_add(struct cfq_data *cfqd, int add_front)
parent = *p;
__cfqd = rb_entry(parent, struct cfq_data, rb_node);

- if (rb_key < __cfqd->rb_key)
+
+ /*
+ * sort RT cfq_data first, we always want to give
+ * preference to them. IDLE cfq_data goes to the back.
+ * after that, sort on the next service time.
+ */
+ if (cfq_data_class_rt(cfqd) > cfq_data_class_rt(__cfqd))
+ n = &(*p)->rb_left;
+ else if (cfq_data_class_rt(cfqd) < cfq_data_class_rt(__cfqd))
+ n = &(*p)->rb_right;
+ else if (cfq_data_class_idle(cfqd) <
+ cfq_data_class_idle(__cfqd))
+ n = &(*p)->rb_left;
+ else if (cfq_data_class_idle(cfqd) >
+ cfq_data_class_idle(__cfqd))
+ n = &(*p)->rb_right;
+ else if (rb_key < __cfqd->rb_key)
n = &(*p)->rb_left;
else
n = &(*p)->rb_right;
@@ -542,6 +579,14 @@ int cfq_cgroup_dispatch_requests(struct request_queue *q, int force)
if (cfqd)
dispatched = cfq_queue_dispatch_requests(cfqd, force);

+ /*
+ * idle cfq_data always expire after 1 dispatch round.
+ */
+ if (cfqdd->busy_data > 1 && cfq_data_class_idle(cfqd)) {
+ cfqd->slice_end = jiffies + 1;
+ cfq_cgroup_slice_expired(cfqdd, 0);
+ }
+
return dispatched;
}

@@ -699,149 +744,164 @@ param_separate(const char *master, char *valbuf, char *pathbuf, int size)
*pc2 = '\0';
}

-static ssize_t cfq_cgroup_read(struct cgroup *cont, struct cftype *cft,
- struct file *file, char __user *userbuf,
- size_t nbytes, loff_t *ppos)
-{
- struct cfq_cgroup *cfqc;
- char *page;
- ssize_t ret;
- struct rb_node *p;
-
- page = (char *)__get_free_page(GFP_TEMPORARY);
- if (!page)
- return -ENOMEM;
-
- cgroup_lock();
- if (cgroup_is_removed(cont)) {
- cgroup_unlock();
- ret = -ENODEV;
- goto out;
- }
-
- cfqc = cgroup_to_cfq_cgroup(cont);
-
- cgroup_unlock();
-
- /* print priority */
- ret = snprintf(page, PAGE_SIZE, "default priority: %d\n", cfqc->ioprio);
-
- p = rb_first(&cfqc->sibling_tree);
- while (p) {
- struct cfq_data *__cfqd;
-
- __cfqd = rb_entry(p, struct cfq_data, group_node);
-
- ret += snprintf(page + ret, PAGE_SIZE - ret, " %s %d\n",
- __cfqd->cfqdd->queue->kobj.parent->name,
- __cfqd->ioprio);
-
- p = rb_next(p);
- }

- ret = simple_read_from_buffer(userbuf, nbytes, ppos, page, ret);
-
-out:
- free_page((unsigned long)page);
- return ret;
+#define READ_FUNCTION(__FUNC, __VAR, __DEF_MSG) \
+static ssize_t __FUNC(struct cgroup *cont, struct cftype *cft, \
+ struct file *file, char __user *userbuf, \
+ size_t nbytes, loff_t *ppos) \
+{ \
+ struct cfq_cgroup *cfqc; \
+ char *page; \
+ ssize_t ret; \
+ struct rb_node *p; \
+ \
+ page = (char *)__get_free_page(GFP_TEMPORARY); \
+ if (!page) \
+ return -ENOMEM; \
+ \
+ cgroup_lock(); \
+ if (cgroup_is_removed(cont)) { \
+ cgroup_unlock(); \
+ ret = -ENODEV; \
+ goto out; \
+ } \
+ \
+ cfqc = cgroup_to_cfq_cgroup(cont); \
+ \
+ cgroup_unlock(); \
+ \
+ /* print */ \
+ ret = snprintf(page, PAGE_SIZE, "default " __DEF_MSG ": %d\n", \
+ cfqc->__VAR); \
+ \
+ p = rb_first(&cfqc->sibling_tree); \
+ while (p) { \
+ struct cfq_data *__cfqd; \
+ \
+ __cfqd = rb_entry(p, struct cfq_data, group_node); \
+ \
+ ret += snprintf(page + ret, PAGE_SIZE - ret, " %s %d\n",\
+ __cfqd->cfqdd->queue->kobj.parent->name, \
+ __cfqd->__VAR); \
+ \
+ p = rb_next(p); \
+ } \
+ \
+ ret = simple_read_from_buffer(userbuf, nbytes, ppos, page, ret);\
+ \
+out: \
+ free_page((unsigned long)page); \
+ return ret; \
}
-
-static ssize_t cfq_cgroup_write(struct cgroup *cont, struct cftype *cft,
- struct file *file, const char __user *userbuf,
- size_t nbytes, loff_t *ppos)
-{
- struct cfq_cgroup *cfqc;
- ssize_t ret;
- long new_prio;
- int err, sn;
- char *buffer = NULL;
- char *valbuf = NULL, *pathbuf = NULL;
- struct rb_node *p;
-
- cgroup_lock();
- if (cgroup_is_removed(cont)) {
- cgroup_unlock();
- ret = -ENODEV;
- goto out;
- }
-
- cfqc = cgroup_to_cfq_cgroup(cont);
- cgroup_unlock();
-
- /* set priority */
- buffer = kmalloc(nbytes + 1, GFP_KERNEL);
- if (buffer == NULL)
- return -ENOMEM;
-
- if (copy_from_user(buffer, userbuf, nbytes)) {
- ret = -EFAULT;
- goto free_buf;
- }
- buffer[nbytes] = 0;
-
- valbuf = kmalloc(nbytes + 1, GFP_KERNEL);
- if (!valbuf) {
- ret = -ENOMEM;
- goto free_buf;
- }
-
- pathbuf = kmalloc(nbytes + 1, GFP_KERNEL);
- if (!pathbuf) {
- ret = -ENOMEM;
- goto free_val;
- }
-
- param_separate(buffer, valbuf, pathbuf, nbytes);
-
- err = strict_strtoul(valbuf, 10, &new_prio);
- if ((err) || ((new_prio < 0) || (new_prio > CFQ_CGROUP_MAX_IOPRIO))) {
- ret = -EINVAL;
- goto free_path;
- }
-
- sn = strlen(pathbuf);
-
- p = rb_first(&cfqc->sibling_tree);
- while (p) {
- struct cfq_data *__cfqd;
- const char *namep;
-
- __cfqd = rb_entry(p, struct cfq_data, group_node);
- namep = __cfqd->cfqdd->queue->kobj.parent->name;
-
- if (sn == 0) {
- __cfqd->ioprio = new_prio;
- } else if ((sn == strlen(namep)) &&
- (strncmp(pathbuf, namep, sn) == 0)) {
- __cfqd->ioprio = new_prio;
- break;
- }
-
- p = rb_next(p);
- }
-
- if ((sn == 0) ||
- ((sn == 7) && (strncmp(pathbuf, "default", 7) == 0)))
- cfqc->ioprio = new_prio;
-
- ret = nbytes;
-
-free_path:
- kfree(pathbuf);
-free_val:
- kfree(valbuf);
-free_buf:
- kfree(buffer);
-out:
- return ret;
+READ_FUNCTION(cfq_cgroup_ioprio_read, ioprio, "priority");
+READ_FUNCTION(cfq_cgroup_ioprio_class_read, ioprio_class, "priority class");
+#undef READ_FUNCTION
+
+#define WRITE_FUNCTION(__FUNC, __VAR, MIN, MAX) \
+static ssize_t __FUNC(struct cgroup *cont, struct cftype *cft, \
+ struct file *file, const char __user *userbuf, \
+ size_t nbytes, loff_t *ppos) \
+{ \
+ struct cfq_cgroup *cfqc; \
+ ssize_t ret; \
+ long new_val; \
+ int err, sn; \
+ char *buffer = NULL; \
+ char *valbuf = NULL, *pathbuf = NULL; \
+ struct rb_node *p; \
+ \
+ cgroup_lock(); \
+ if (cgroup_is_removed(cont)) { \
+ cgroup_unlock(); \
+ ret = -ENODEV; \
+ goto out; \
+ } \
+ \
+ cfqc = cgroup_to_cfq_cgroup(cont); \
+ cgroup_unlock(); \
+ \
+ /* set */ \
+ buffer = kmalloc(nbytes + 1, GFP_KERNEL); \
+ if (buffer == NULL) \
+ return -ENOMEM; \
+ \
+ if (copy_from_user(buffer, userbuf, nbytes)) { \
+ ret = -EFAULT; \
+ goto free_buf; \
+ } \
+ buffer[nbytes] = 0; \
+ \
+ valbuf = kmalloc(nbytes + 1, GFP_KERNEL); \
+ if (!valbuf) { \
+ ret = -ENOMEM; \
+ goto free_buf; \
+ } \
+ \
+ pathbuf = kmalloc(nbytes + 1, GFP_KERNEL); \
+ if (!pathbuf) { \
+ ret = -ENOMEM; \
+ goto free_val; \
+ } \
+ \
+ param_separate(buffer, valbuf, pathbuf, nbytes); \
+ \
+ err = strict_strtoul(valbuf, 10, &new_val); \
+ if ((err) || ((new_val < (MIN)) || (new_val > (MAX)))) { \
+ ret = -EINVAL; \
+ goto free_path; \
+ } \
+ \
+ sn = strlen(pathbuf); \
+ \
+ p = rb_first(&cfqc->sibling_tree); \
+ while (p) { \
+ struct cfq_data *__cfqd; \
+ const char *namep; \
+ \
+ __cfqd = rb_entry(p, struct cfq_data, group_node); \
+ namep = __cfqd->cfqdd->queue->kobj.parent->name; \
+ \
+ if (sn == 0) { \
+ __cfqd->__VAR = new_val; \
+ } else if ((sn == strlen(namep)) && \
+ (strncmp(pathbuf, namep, sn) == 0)) { \
+ __cfqd->__VAR = new_val; \
+ break; \
+ } \
+ \
+ p = rb_next(p); \
+ } \
+ \
+ if ((sn == 0) || \
+ ((sn == 7) && (strncmp(pathbuf, "default", 7) == 0))) \
+ cfqc->__VAR = new_val; \
+ \
+ ret = nbytes; \
+ \
+free_path: \
+ kfree(pathbuf); \
+free_val: \
+ kfree(valbuf); \
+free_buf: \
+ kfree(buffer); \
+out: \
+ return ret; \
}
+WRITE_FUNCTION(cfq_cgroup_ioprio_write, ioprio, 0, CFQ_CGROUP_MAX_IOPRIO);
+WRITE_FUNCTION(cfq_cgroup_ioprio_class_write, ioprio_class, 0,
+ IOPRIO_CLASS_IDLE);
+#undef WRITE_FUNCTION
+
+#define CFQ_CGROUP_CTYPE_ATTR(_name) \
+ { \
+ .name = (__stringify(_name)), \
+ .read = cfq_cgroup_##_name##_read, \
+ .write = cfq_cgroup_##_name##_write, \
+ }

static struct cftype files[] = {
- {
- .name = "ioprio",
- .read = cfq_cgroup_read,
- .write = cfq_cgroup_write,
- },
+ CFQ_CGROUP_CTYPE_ATTR(ioprio),
+ CFQ_CGROUP_CTYPE_ATTR(ioprio_class),
};

static int cfq_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
diff --git a/include/linux/cfq-iosched.h b/include/linux/cfq-iosched.h
index 920bcb5..ca04ebd 100644
--- a/include/linux/cfq-iosched.h
+++ b/include/linux/cfq-iosched.h
@@ -102,6 +102,7 @@ struct cfq_data {

#ifdef CONFIG_IOSCHED_CFQ_CGROUP
unsigned int ioprio;
+ unsigned short ioprio_class;

/* sibling_tree member for cfq_meta_data */
struct rb_node sib_node;
--
1.5.6.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/