[PATCH v4 1/3] sched/fair: make it possible to account fair load avg consistently

From: byungchul.park
Date: Fri Oct 23 2015 - 12:17:29 EST


From: Byungchul Park <byungchul.park@xxxxxxx>

Current code can account fair class load average for the time the task
was absent from the fair class thanks to ATTACH_AGE_LOAD. However, it
doesn't work in the cases that either migration or group change happened
in the other sched classes.

This patch introduces more general way to care fair load average
accounting, and it works consistently in any case e.g. migration or
cgroup change in other sched classes.

Signed-off-by: Byungchul Park <byungchul.park@xxxxxxx>
---
kernel/sched/core.c | 1 +
kernel/sched/fair.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
kernel/sched/sched.h | 4 ++++
3 files changed, 50 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a91df61..0368054 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2075,6 +2075,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.prev_sum_exec_runtime = 0;
p->se.nr_migrations = 0;
p->se.vruntime = 0;
+ p->se.cfs_rq = NULL;
INIT_LIST_HEAD(&p->se.group_node);

#ifdef CONFIG_SCHEDSTATS
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 077076f..e9c5668 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2808,6 +2808,51 @@ dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
max_t(s64, cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
}

+#ifdef CONFIG_FAIR_GROUP_SCHED
+/*
+ * Called within set_task_rq() right before setting a task's cpu. The
+ * caller only guarantees p->pi_lock is held; no other assumptions,
+ * including the state of rq->lock, should be made.
+ */
+void set_task_rq_fair(struct sched_entity *se,
+ struct cfs_rq *prev,
+ struct cfs_rq *next)
+{
+ if (!sched_feat(ATTACH_AGE_LOAD)) return;
+
+ /*
+ * We are supposed to update the task to "current" time, then its up to date
+ * and ready to go to new CPU/cfs_rq. But we have difficulty in getting
+ * what current time is, so simply throw away the out-of-date time. This
+ * will result in the wakee task is less decayed, but giving the wakee more
+ * load sounds not bad.
+ */
+ if (se->avg.last_update_time && prev) {
+ u64 p_last_update_time;
+ u64 n_last_update_time;
+
+#ifndef CONFIG_64BIT
+ u64 p_last_update_time_copy;
+ u64 n_last_update_time_copy;
+
+ do {
+ p_last_update_time_copy = prev->load_last_update_time_copy;
+ n_last_update_time_copy = next->load_last_update_time_copy;
+ smp_rmb();
+ p_last_update_time = prev->avg.last_update_time;
+ n_last_update_time = next->avg.last_update_time;
+ } while (p_last_update_time != p_last_update_time_copy ||
+ n_last_update_time != n_last_update_time_copy);
+#else
+ p_last_update_time = prev->avg.last_update_time;
+ n_last_update_time = next->avg.last_update_time;
+#endif
+ __update_load_avg(p_last_update_time, cpu_of(rq_of(prev)), &se->avg, 0, 0, NULL);
+ se->avg.last_update_time = n_last_update_time;
+ }
+}
+#endif
+
/*
* Task first catches up with cfs_rq, and then subtract
* itself from the cfs_rq (task must be off the queue now).
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index af6f252..363cf9c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -335,6 +335,9 @@ extern void sched_move_task(struct task_struct *tsk);

#ifdef CONFIG_FAIR_GROUP_SCHED
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
+extern void set_task_rq_fair(struct sched_entity *se,
+ struct cfs_rq *prev,
+ struct cfs_rq *next);
#endif

#else /* CONFIG_CGROUP_SCHED */
@@ -933,6 +936,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
#endif

#ifdef CONFIG_FAIR_GROUP_SCHED
+ set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
p->se.cfs_rq = tg->cfs_rq[cpu];
p->se.parent = tg->se[cpu];
#endif
--
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/