Re: [PATCH v5 07/10] sched/irq: add irq utilization tracking

From: Dietmar Eggemann
Date: Thu May 31 2018 - 12:55:10 EST


On 05/30/2018 08:45 PM, Vincent Guittot wrote:
> Hi Dietmar,
>
> On 30 May 2018 at 17:55, Dietmar Eggemann <dietmar.eggemann@xxxxxxx> wrote:
>> On 05/25/2018 03:12 PM, Vincent Guittot wrote:

[...]

>>> + */
>>> + ret = ___update_load_sum(rq->clock - running, rq->cpu,
>>> &rq->avg_irq,
>>> + 0,
>>> + 0,
>>> + 0);
>>> + ret += ___update_load_sum(rq->clock, rq->cpu, &rq->avg_irq,
>>> + 1,
>>> + 1,
>>> + 1);

Can you not change the function parameter list to the usual
(u64 now, struct rq *rq, int running)?

Something like this (only compile and boot tested):

-- >8 --

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9894bc7af37e..26ffd585cab8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -177,8 +177,22 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
rq->clock_task += delta;

#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
- if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
- update_irq_load_avg(rq, irq_delta + steal);
+ if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY)) {
+ /*
+ * We know the time that has been used by interrupt since last
+ * update but we don't when. Let be pessimistic and assume that
+ * interrupt has happened just before the update. This is not
+ * so far from reality because interrupt will most probably
+ * wake up task and trig an update of rq clock during which the
+ * metric si updated.
+ * We start to decay with normal context time and then we add
+ * the interrupt context time.
+ * We can safely remove running from rq->clock because
+ * rq->clock += delta with delta >= running
+ */
+ update_irq_load_avg(rq_clock(rq) - (irq_delta + steal), rq, 0);
+ update_irq_load_avg(rq_clock(rq), rq, 1);
+ }
#endif
}

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1bb3379c4b71..a245f853c271 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7363,7 +7363,7 @@ static void update_blocked_averages(int cpu)
}
update_rt_rq_load_avg(rq_clock_task(rq), rq, 0);
update_dl_rq_load_avg(rq_clock_task(rq), rq, 0);
- update_irq_load_avg(rq, 0);
+ update_irq_load_avg(rq_clock(rq), rq, 0);
/* Don't need periodic decay once load/util_avg are null */
if (others_rqs_have_blocked(rq))
done = false;
@@ -7434,7 +7434,7 @@ static inline void update_blocked_averages(int cpu)
update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
update_rt_rq_load_avg(rq_clock_task(rq), rq, 0);
update_dl_rq_load_avg(rq_clock_task(rq), rq, 0);
- update_irq_load_avg(rq, 0);
+ update_irq_load_avg(rq_clock(rq), rq, 0);
#ifdef CONFIG_NO_HZ_COMMON
rq->last_blocked_load_update_tick = jiffies;
if (!cfs_rq_has_blocked(cfs_rq) && !others_rqs_have_blocked(rq))
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index d2e4f2186b13..ae01bb18e28c 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -365,31 +365,15 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
*
*/

-int update_irq_load_avg(struct rq *rq, u64 running)
+int update_irq_load_avg(u64 now, struct rq *rq, int running)
{
- int ret = 0;
- /*
- * We know the time that has been used by interrupt since last update
- * but we don't when. Let be pessimistic and assume that interrupt has
- * happened just before the update. This is not so far from reality
- * because interrupt will most probably wake up task and trig an update
- * of rq clock during which the metric si updated.
- * We start to decay with normal context time and then we add the
- * interrupt context time.
- * We can safely remove running from rq->clock because
- * rq->clock += delta with delta >= running
- */
- ret = ___update_load_sum(rq->clock - running, rq->cpu, &rq->avg_irq,
- 0,
- 0,
- 0);
- ret += ___update_load_sum(rq->clock, rq->cpu, &rq->avg_irq,
- 1,
- 1,
- 1);
-
- if (ret)
+ if (___update_load_sum(now, rq->cpu, &rq->avg_irq,
+ running,
+ running,
+ running)) {
___update_load_avg(&rq->avg_irq, 1, 1);
+ return 1;
+ }

- return ret;
+ return 0;
}
diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
index 0ce9a5a5877a..ebc57301a9a8 100644
--- a/kernel/sched/pelt.h
+++ b/kernel/sched/pelt.h
@@ -5,7 +5,7 @@ int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_e
int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq);
int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);
int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);
-int update_irq_load_avg(struct rq *rq, u64 running);
+int update_irq_load_avg(u64 now, struct rq *rq, int running);

/*
* When a task is dequeued, its estimated utilization should not be update if