[PATCH 2/4] sched: Account per task_group nr_iowait

From: Kirill Tkhai
Date: Mon Nov 06 2017 - 09:40:45 EST


The patch makes number of task_group's tasks in iowait state
be tracked separately. This may be useful for containers to
check nr_iowait state of a single one.

Signed-off-by: Kirill Tkhai <ktkhai@xxxxxxxxxxxxx>
---
kernel/sched/core.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
kernel/sched/sched.h | 5 +++++
2 files changed, 50 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 712ee54edaa1..86d1ad5f49bd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -796,12 +796,32 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags)

static void task_iowait_start(struct rq *rq, struct task_struct *p)
{
+#ifdef CONFIG_CGROUP_SCHED
+ struct task_group *tg = task_group(p);
+
+ /* Task's sched_task_group is changed under both of the below locks */
+ BUG_ON(!raw_spin_is_locked(&p->pi_lock) && !raw_spin_is_locked(&rq->lock));
+ while (task_group_is_autogroup(tg))
+ tg = tg->parent;
+ atomic_inc(&tg->stat[rq->cpu].nr_iowait);
+#endif
+
atomic_inc(&rq->nr_iowait);
delayacct_blkio_start();
}

static void task_iowait_end(struct rq *rq, struct task_struct *p)
{
+#ifdef CONFIG_CGROUP_SCHED
+ struct task_group *tg = task_group(p);
+
+ /* Task's sched_task_group is changed under both of the below locks */
+ BUG_ON(!raw_spin_is_locked(&p->pi_lock) && !raw_spin_is_locked(&rq->lock));
+ while (task_group_is_autogroup(tg))
+ tg = tg->parent;
+ atomic_dec(&tg->stat[rq->cpu].nr_iowait);
+#endif
+
delayacct_blkio_end();
atomic_dec(&rq->nr_iowait);
}
@@ -5805,6 +5825,9 @@ void __init sched_init(void)
sched_clock_init();
wait_bit_init();

+#ifdef CONFIG_CGROUP_SCHED
+ alloc_size += nr_cpu_ids * sizeof(struct tg_stat);
+#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
alloc_size += 2 * nr_cpu_ids * sizeof(void **);
#endif
@@ -5814,6 +5837,10 @@ void __init sched_init(void)
if (alloc_size) {
ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);

+#ifdef CONFIG_CGROUP_SCHED
+ root_task_group.stat = (struct tg_stat *)ptr;
+ ptr += nr_cpu_ids * sizeof(struct tg_stat);
+#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
root_task_group.se = (struct sched_entity **)ptr;
ptr += nr_cpu_ids * sizeof(void **);
@@ -6133,6 +6160,8 @@ static DEFINE_SPINLOCK(task_group_lock);

static void sched_free_group(struct task_group *tg)
{
+ if (tg->stat)
+ kfree(tg->stat);
free_fair_sched_group(tg);
free_rt_sched_group(tg);
autogroup_free(tg);
@@ -6154,6 +6183,10 @@ struct task_group *sched_create_group(struct task_group *parent)
if (!alloc_rt_sched_group(tg, parent))
goto err;

+ tg->stat = kzalloc(sizeof(struct tg_stat) * nr_cpu_ids, 0);
+ if (!tg->stat)
+ goto err;
+
return tg;

err:
@@ -6207,8 +6240,16 @@ void sched_offline_group(struct task_group *tg)

static void sched_change_group(struct task_struct *tsk, int type)
{
+ int cpu = 0, queued = task_on_rq_queued(tsk);
struct task_group *tg;

+ if (!queued && tsk->in_iowait && type == TASK_MOVE_GROUP) {
+ cpu = task_cpu(tsk);
+ tg = task_group(tsk);
+ while (task_group_is_autogroup(tg))
+ tg = tg->parent;
+ atomic_dec(&tg->stat[cpu].nr_iowait);
+ }
/*
* All callers are synchronized by task_rq_lock(); we do not use RCU
* which is pointless here. Thus, we pass "true" to task_css_check()
@@ -6216,6 +6257,10 @@ static void sched_change_group(struct task_struct *tsk, int type)
*/
tg = container_of(task_css_check(tsk, cpu_cgrp_id, true),
struct task_group, css);
+
+ if (!queued && tsk->in_iowait && type == TASK_MOVE_GROUP)
+ atomic_inc(&tg->stat[cpu].nr_iowait);
+
tg = autogroup_task_group(tsk, tg);
tsk->sched_task_group = tg;

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 58787e3631c7..5e7cb18e1340 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -290,6 +290,10 @@ struct cfs_bandwidth {
#endif
};

+struct tg_stat {
+ atomic_t nr_iowait;
+};
+
/* task group related information */
struct task_group {
struct cgroup_subsys_state css;
@@ -330,6 +334,7 @@ struct task_group {
#endif

struct cfs_bandwidth cfs_bandwidth;
+ struct tg_stat *stat;
};

#ifdef CONFIG_FAIR_GROUP_SCHED