Re: [PATCH] sched/fair: Fix ftq noise bench regression

From: Dietmar Eggemann
Date: Tue Mar 21 2017 - 13:47:40 EST


Hi Vincent,

On 17/03/17 13:47, Vincent Guittot wrote:

[...]

> Reported-by: ying.huang@xxxxxxxxxxxxxxx
> Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
> Fixes: 4e5160766fcc ("sched/fair: Propagate asynchrous detach")

I thought I can see a difference by running:

perf stat --null --repeat 10 -- perf bench sched messaging -g 50 -l
5000

on an Ubuntu 16.10 server system.

Number of entries in the rq->leaf_cfs_rq_list per cpu: ~40

Target: Intel i5-3320M (4 logical cpus)

tip/sched/core: 42.119140365 seconds time elapsed ( +- 0.33% )

+ patch : 42.089557108 seconds time elapsed ( +- 0.37% )

Maybe I need a CPU with more 'logical cpus' or a different test case.
Anyway, couldn't spot any regression.

> ---
> kernel/sched/fair.c | 39 ++++++++++++++++++++++++++++++++++++---
> 1 file changed, 36 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 2805bd7..007df59 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3173,6 +3173,36 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
> return 1;
> }
>
> +/*
> + * Check if we need to update the load and the utilization of a blocked
> + * group_entity
> + */
> +static inline bool skip_blocked_update(struct sched_entity *se)
> +{
> + struct cfs_rq *gcfs_rq = group_cfs_rq(se);
> +
> + /*
> + * If sched_entity still have not null load or utilization, we have to
> + * decay it.
> + */
> + if (se->avg.load_avg || se->avg.util_avg)
> + return false;
> +
> + /*
> + * If there is a pending propagation, we have to update the load and
> + * the utilizaion of the sched_entity

nit pick: s/utilizaion/utilization

> + */
> + if (gcfs_rq->propagate_avg)
> + return false;
> +
> + /*
> + * Other wise, the load and the utilization of the sched_entity is
> + * already null and there is no pending propagation so it will be a
> + * waste of time to try to decay it.
> + */
> + return true;
> +}
> +
> #else /* CONFIG_FAIR_GROUP_SCHED */
>
> static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
> @@ -6961,6 +6991,8 @@ static void update_blocked_averages(int cpu)
> * list_add_leaf_cfs_rq() for details.
> */
> for_each_leaf_cfs_rq(rq, cfs_rq) {
> + struct sched_entity *se;
> +
> /* throttled entities do not contribute to load */
> if (throttled_hierarchy(cfs_rq))
> continue;
> @@ -6968,9 +7000,10 @@ static void update_blocked_averages(int cpu)
> if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
> update_tg_load_avg(cfs_rq, 0);
>
> - /* Propagate pending load changes to the parent */
> - if (cfs_rq->tg->se[cpu])
> - update_load_avg(cfs_rq->tg->se[cpu], 0);
> + /* Propagate pending load changes to the parent if any */
> + se = cfs_rq->tg->se[cpu];
> + if (se && !skip_blocked_update(se))
> + update_load_avg(se, 0);
> }
> rq_unlock_irqrestore(rq, &rf);
> }
>

Why not turn skip_blocked_update(se) into blocked_update_needed(cpu, cfs_rq)?
Saves a couple of patch lines:

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 007df5953d1a..8001eeb4ec18 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3177,30 +3177,34 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
* Check if we need to update the load and the utilization of a blocked
* group_entity
*/
-static inline bool skip_blocked_update(struct sched_entity *se)
+static inline bool blocked_update_needed(int cpu, struct cfs_rq *cfs_rq)
{
- struct cfs_rq *gcfs_rq = group_cfs_rq(se);
+ struct sched_entity *se = cfs_rq->tg->se[cpu];
+
+ /* cfs_rq of a root task_group has no sched_entity counterpart */
+ if (!se)
+ return false;

/*
* If sched_entity still have not null load or utilization, we have to
* decay it.
*/
if (se->avg.load_avg || se->avg.util_avg)
- return false;
+ return true;

/*
* If there is a pending propagation, we have to update the load and
* the utilizaion of the sched_entity
*/
- if (gcfs_rq->propagate_avg)
- return false;
+ if (cfs_rq->propagate_avg)
+ return true;

/*
* Other wise, the load and the utilization of the sched_entity is
* already null and there is no pending propagation so it will be a
* waste of time to try to decay it.
*/
- return true;
+ return false;
}

#else /* CONFIG_FAIR_GROUP_SCHED */
@@ -6991,8 +6995,6 @@ static void update_blocked_averages(int cpu)
* list_add_leaf_cfs_rq() for details.
*/
for_each_leaf_cfs_rq(rq, cfs_rq) {
- struct sched_entity *se;
-
/* throttled entities do not contribute to load */
if (throttled_hierarchy(cfs_rq))
continue;
@@ -7001,9 +7003,8 @@ static void update_blocked_averages(int cpu)
update_tg_load_avg(cfs_rq, 0);

/* Propagate pending load changes to the parent if any */
- se = cfs_rq->tg->se[cpu];
- if (se && !skip_blocked_update(se))
- update_load_avg(se, 0);
+ if (blocked_update_needed(cpu, cfs_rq))
+ update_load_avg(cfs_rq->tg->se[cpu], 0);
}
rq_unlock_irqrestore(rq, &rf);
}