Re: [PATCH] timers/nohz: Update nohz load even if tick already stopped

From: Peter Zijlstra
Date: Wed Oct 30 2019 - 09:31:37 EST


On Wed, Oct 30, 2019 at 03:48:26AM -0500, Scott Wood wrote:
> On Tue, 2019-10-29 at 11:05 +0100, Peter Zijlstra wrote:

> > @@ -3686,6 +3688,7 @@ static void sched_tick_remote(struct work_struct
> > *work)
> > curr->sched_class->task_tick(rq, curr, 0);
> >
> > out_unlock:
> > + calc_load_nohz_remote(cpu);
> > rq_unlock_irq(rq, &rf);
>
> This gets skipped when the cpu is idle, so it still misses the update.

Oh argh! that's a bit radical of the remote tick. The normal tick runs
just fine on idle CPUs, so lets mirror that.

How's this then?

---
diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index 1abe91ff6e4a..6d67e9a5af6b 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -15,9 +15,11 @@ static inline void nohz_balance_enter_idle(int cpu) { }

#ifdef CONFIG_NO_HZ_COMMON
void calc_load_nohz_start(void);
+void calc_load_nohz_remote(struct rq *rq);
void calc_load_nohz_stop(void);
#else
static inline void calc_load_nohz_start(void) { }
+static inline void calc_load_nohz_remote(struct rq *rq) { }
static inline void calc_load_nohz_stop(void) { }
#endif /* CONFIG_NO_HZ_COMMON */

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index eb42b71faab9..d02d1b8f40af 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3660,21 +3660,17 @@ static void sched_tick_remote(struct work_struct *work)
u64 delta;
int os;

- /*
- * Handle the tick only if it appears the remote CPU is running in full
- * dynticks mode. The check is racy by nature, but missing a tick or
- * having one too much is no big deal because the scheduler tick updates
- * statistics and checks timeslices in a time-independent way, regardless
- * of when exactly it is running.
- */
- if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu))
+ if (!tick_nohz_tick_stopped_cpu(cpu))
goto out_requeue;

rq_lock_irq(rq, &rf);
- curr = rq->curr;
- if (is_idle_task(curr) || cpu_is_offline(cpu))
+ /*
+ * We must not call calc_load_nohz_remote() when not in NOHZ mode.
+ */
+ if (cpu_is_offline(cpu) || !tick_nohz_tick_stopped(cpu))
goto out_unlock;

+ curr = rq->curr;
update_rq_clock(rq);
delta = rq_clock_task(rq) - curr->se.exec_start;

@@ -3685,10 +3681,11 @@ static void sched_tick_remote(struct work_struct *work)
WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
curr->sched_class->task_tick(rq, curr, 0);

+ calc_load_nohz_remote(rq);
out_unlock:
rq_unlock_irq(rq, &rf);
-
out_requeue:
+
/*
* Run the remote tick once per second (1Hz). This arbitrary
* frequency is large enough to avoid overload but short enough
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index 28a516575c18..de22da666ac7 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -231,16 +231,11 @@ static inline int calc_load_read_idx(void)
return calc_load_idx & 1;
}

-void calc_load_nohz_start(void)
+static void calc_load_nohz_fold(struct rq *rq)
{
- struct rq *this_rq = this_rq();
long delta;

- /*
- * We're going into NO_HZ mode, if there's any pending delta, fold it
- * into the pending NO_HZ delta.
- */
- delta = calc_load_fold_active(this_rq, 0);
+ delta = calc_load_fold_active(rq, 0);
if (delta) {
int idx = calc_load_write_idx();

@@ -248,6 +243,24 @@ void calc_load_nohz_start(void)
}
}

+void calc_load_nohz_start(void)
+{
+ /*
+ * We're going into NO_HZ mode, if there's any pending delta, fold it
+ * into the pending NO_HZ delta.
+ */
+ calc_load_nohz_fold(this_rq());
+}
+
+/*
+ * Keep track of the load for NOHZ_FULL, must be called between
+ * calc_load_nohz_{start,stop}().
+ */
+void calc_load_nohz_remote(struct rq *rq)
+{
+ calc_load_nohz_fold(rq);
+}
+
void calc_load_nohz_stop(void)
{
struct rq *this_rq = this_rq();
@@ -268,7 +281,7 @@ void calc_load_nohz_stop(void)
this_rq->calc_load_update += LOAD_FREQ;
}

-static long calc_load_nohz_fold(void)
+static long calc_load_nohz_read(void)
{
int idx = calc_load_read_idx();
long delta = 0;
@@ -323,7 +336,7 @@ static void calc_global_nohz(void)
}
#else /* !CONFIG_NO_HZ_COMMON */

-static inline long calc_load_nohz_fold(void) { return 0; }
+static inline long calc_load_nohz_read(void) { return 0; }
static inline void calc_global_nohz(void) { }

#endif /* CONFIG_NO_HZ_COMMON */
@@ -346,7 +359,7 @@ void calc_global_load(unsigned long ticks)
/*
* Fold the 'old' NO_HZ-delta to include all NO_HZ CPUs.
*/
- delta = calc_load_nohz_fold();
+ delta = calc_load_nohz_read();
if (delta)
atomic_long_add(delta, &calc_load_tasks);