Re: [RFC PATCH 2/5] sched: Add NOHZ_STATS_KICK

From: Vincent Guittot
Date: Thu Dec 21 2017 - 11:57:02 EST


On 21 December 2017 at 17:23, Vincent Guittot
<vincent.guittot@xxxxxxxxxx> wrote:
> Hi Peter,
>
> I think that part of the proposal is missing.
>
> One goal of the patchset was to kick an update of the stats of idle
> cpu when a task wake up on a cpu but the statistic has not been
> updated for a while.
>
> That's why there where a call to nohz_kick_needed in the proposal to
> kick ilb but only for updating blocked load and not a full idle load

sorry a call to nohz_kick_needed (which becomes nohz_balancer_kick in
yours patchset) in select_task_fair_rq_fair

> balance
>
> I can't find this call any more in your patchset

In fact, we can't only rely on the tick and newly_idle load balance to
ensure a period update of the blocked load because they can never
happen. So we need to find another place to kick for a periodic update
which is when a task wake up


>
> On 21 December 2017 at 11:21, Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
>>
>> Suggested-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
>> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
>> ---
>> kernel/sched/core.c | 4 +--
>> kernel/sched/fair.c | 52 ++++++++++++++++++++++++++++++++++-----------------
>> kernel/sched/sched.h | 4 +++
>> 3 files changed, 41 insertions(+), 19 deletions(-)
>>
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -608,7 +608,7 @@ static inline bool got_nohz_idle_kick(vo
>> {
>> int cpu = smp_processor_id();
>>
>> - if (!(atomic_read(nohz_flags(cpu)) & NOHZ_BALANCE_KICK))
>> + if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK))
>> return false;
>>
>> if (idle_cpu(cpu) && !need_resched())
>> @@ -618,7 +618,7 @@ static inline bool got_nohz_idle_kick(vo
>> * We can't run Idle Load Balance on this CPU for this time so we
>> * cancel it and clear NOHZ_BALANCE_KICK
>> */
>> - atomic_andnot(NOHZ_BALANCE_KICK, nohz_flags(cpu));
>> + atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
>> return false;
>> }
>>
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -9001,8 +9001,8 @@ static void nohz_balancer_kick(void)
>> if (ilb_cpu >= nr_cpu_ids)
>> return;
>>
>> - flags = atomic_fetch_or(NOHZ_BALANCE_KICK, nohz_flags(ilb_cpu));
>> - if (flags & NOHZ_BALANCE_KICK)
>> + flags = atomic_fetch_or(NOHZ_KICK_MASK, nohz_flags(ilb_cpu));
>> + if (flags & NOHZ_KICK_MASK)
>> return;
>> /*
>> * Use smp_send_reschedule() instead of resched_cpu().
>> @@ -9125,8 +9125,6 @@ static void rebalance_domains(struct rq
>> int need_serialize, need_decay = 0;
>> u64 max_cost = 0;
>>
>> - update_blocked_averages(cpu);
>> -
>> rcu_read_lock();
>> for_each_domain(cpu, sd) {
>> /*
>> @@ -9221,20 +9219,27 @@ static void rebalance_domains(struct rq
>> * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the
>> * rebalancing for all the cpus for whom scheduler ticks are stopped.
>> */
>> -static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
>> +static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
>> {
>> - int this_cpu = this_rq->cpu;
>> - struct rq *rq;
>> - int balance_cpu;
>> /* Earliest time when we have to do rebalance again */
>> unsigned long next_balance = jiffies + 60*HZ;
>> int update_next_balance = 0;
>> + int this_cpu = this_rq->cpu;
>> + unsigned int flags;
>> + int balance_cpu;
>> + struct rq *rq;
>>
>> - if (!(atomic_read(nohz_flags(this_cpu)) & NOHZ_BALANCE_KICK))
>> - return;
>> + if (!(atomic_read(nohz_flags(this_cpu)) & NOHZ_KICK_MASK))
>> + return false;
>>
>> - if (idle != CPU_IDLE)
>> - goto end;
>> + if (idle != CPU_IDLE) {
>> + atomic_andnot(NOHZ_KICK_MASK, nohz_flags(this_cpu));
>> + return false;
>> + }
>> +
>> + flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(this_cpu));
>> +
>> + SCHED_WARN_ON((flags & NOHZ_KICK_MASK) == NOHZ_BALANCE_KICK);
>>
>> for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
>> if (balance_cpu == this_cpu || !idle_cpu(balance_cpu))
>> @@ -9262,7 +9267,9 @@ static void nohz_idle_balance(struct rq
>> cpu_load_update_idle(rq);
>> rq_unlock_irq(rq, &rf);
>>
>> - rebalance_domains(rq, CPU_IDLE);
>> + update_blocked_averages(rq->cpu);
>> + if (flags & NOHZ_BALANCE_KICK)
>> + rebalance_domains(rq, CPU_IDLE);
>> }
>>
>> if (time_after(next_balance, rq->next_balance)) {
>> @@ -9271,6 +9278,10 @@ static void nohz_idle_balance(struct rq
>> }
>> }
>>
>> + update_blocked_averages(this_cpu);
>> + if (flags & NOHZ_BALANCE_KICK)
>> + rebalance_domains(this_rq, CPU_IDLE);
>> +
>> /*
>> * next_balance will be updated only when there is a need.
>> * When the CPU is attached to null domain for ex, it will not be
>> @@ -9278,8 +9289,8 @@ static void nohz_idle_balance(struct rq
>> */
>> if (likely(update_next_balance))
>> nohz.next_balance = next_balance;
>> -end:
>> - atomic_andnot(NOHZ_BALANCE_KICK, nohz_flags(this_cpu));
>> +
>> + return true;
>> }
>>
>> /*
>> @@ -9366,7 +9377,10 @@ static inline bool nohz_kick_needed(stru
>> return kick;
>> }
>> #else
>> -static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { }
>> +static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
>> +{
>> + return false;
>> +}
>> #endif
>>
>> /*
>> @@ -9387,7 +9401,11 @@ static __latent_entropy void run_rebalan
>> * load balance only within the local sched_domain hierarchy
>> * and abort nohz_idle_balance altogether if we pull some load.
>> */
>> - nohz_idle_balance(this_rq, idle);
>> + if (nohz_idle_balance(this_rq, idle))
>> + return;
>> +
>> + /* normal load balance */
>> + update_blocked_averages(this_rq->cpu);
>> rebalance_domains(this_rq, idle);
>> }
>>
>> --- a/kernel/sched/sched.h
>> +++ b/kernel/sched/sched.h
>> @@ -2005,9 +2005,13 @@ extern void cfs_bandwidth_usage_dec(void
>> #ifdef CONFIG_NO_HZ_COMMON
>> #define NOHZ_TICK_STOPPED_BIT 0
>> #define NOHZ_BALANCE_KICK_BIT 1
>> +#define NOHZ_STATS_KICK_BIT 2
>>
>> #define NOHZ_TICK_STOPPED BIT(NOHZ_TICK_STOPPED_BIT)
>> #define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT)
>> +#define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT)
>> +
>> +#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
>>
>> #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
>>
>>
>>