[PATCH 7/8] nohz: Evaluate tick dependency once on context switch

From: Frederic Weisbecker
Date: Thu Jun 11 2015 - 13:36:52 EST


The tick dependency is evaluated on every irq. This is a batch of checks
which determine whether it is safe to stop the tick or not. These checks
are often split in many details: posix cpu timers, scheduler, sched clock,
perf events. Each of which are made of smaller details: posix cpu
timer involves checking process wide timers then thread wide timers. Perf
involves checking freq events then more per cpu details.

Checking these details every time we update the full dynticks state
bring avoidable overhead.

So lets evaluate these dependencies once on context switch. Then the
further dependency checks will be performed through a single state check.

This is a first step that can be later optimized by dividing task level
dependency, CPU level dependency and global dependency and update
each at the right time.

Suggested-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Christoph Lameter <cl@xxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc; John Stultz <john.stultz@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Preeti U Murthy <preeti@xxxxxxxxxxxxxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Viresh Kumar <viresh.kumar@xxxxxxxxxx>
Signed-off-by: Frederic Weisbecker <fweisbec@xxxxxxxxx>
---
kernel/time/tick-sched.c | 63 +++++++++++++++++++++++++++++++-----------------
kernel/time/tick-sched.h | 6 +++++
2 files changed, 47 insertions(+), 22 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 8acaab5..5fea798 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -157,49 +157,61 @@ cpumask_var_t tick_nohz_full_mask;
cpumask_var_t housekeeping_mask;
bool tick_nohz_full_running;

-static bool can_stop_full_tick(void)
+static bool can_stop_full_tick(struct tick_sched *ts)
{
WARN_ON_ONCE(!irqs_disabled());

- if (!sched_can_stop_tick()) {
- trace_tick_stop(0, "more than 1 task in runqueue\n");
+ if (ts->tick_needed) {
+ if (ts->tick_needed & TICK_NEEDED_POSIX_CPU_TIMER)
+ trace_tick_stop(0, "posix timers running\n");
+ if (ts->tick_needed & TICK_NEEDED_PERF_EVENT)
+ trace_tick_stop(0, "perf events running\n");
+ if (ts->tick_needed & TICK_NEEDED_SCHED)
+ trace_tick_stop(0, "more than 1 task in runqueue\n");
+ if (ts->tick_needed & TICK_NEEDED_CLOCK_UNSTABLE)
+ trace_tick_stop(0, "unstable sched clock\n");
return false;
}

- if (!posix_cpu_timers_can_stop_tick(current)) {
- trace_tick_stop(0, "posix timers running\n");
- return false;
- }
+ return true;
+}

- if (!perf_event_can_stop_tick()) {
- trace_tick_stop(0, "perf events running\n");
- return false;
- }
+static void tick_nohz_full_update_dependencies(void)
+{
+ struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+ if (!posix_cpu_timers_can_stop_tick(current))
+ ts->tick_needed |= TICK_NEEDED_POSIX_CPU_TIMER;
+
+ if (!perf_event_can_stop_tick())
+ ts->tick_needed |= TICK_NEEDED_PERF_EVENT;
+
+ if (!sched_can_stop_tick())
+ ts->tick_needed |= TICK_NEEDED_SCHED;

- /* sched_clock_tick() needs us? */
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
/*
+ * sched_clock_tick() needs us?
+ *
* TODO: kick full dynticks CPUs when
* sched_clock_stable is set.
*/
if (!sched_clock_stable()) {
- trace_tick_stop(0, "unstable sched clock\n");
+ ts->tick_needed |= TICK_NEEDED_CLOCK_UNSTABLE;
/*
* Don't allow the user to think they can get
* full NO_HZ with this machine.
*/
WARN_ONCE(tick_nohz_full_running,
"NO_HZ FULL will not work with unstable sched clock");
- return false;
}
#endif
-
- return true;
}

static void nohz_full_kick_work_func(struct irq_work *work)
{
- /* Empty, the tick restart happens on tick_nohz_irq_exit() */
+ /* tick restart happens on tick_nohz_irq_exit() */
+ tick_nohz_full_update_dependencies();
}

static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
@@ -234,7 +246,8 @@ void tick_nohz_full_kick_cpu(int cpu)

static void nohz_full_kick_ipi(void *info)
{
- /* Empty, the tick restart happens on tick_nohz_irq_exit() */
+ /* tick restart happens on tick_nohz_irq_exit() */
+ tick_nohz_full_update_dependencies();
}

/*
@@ -258,18 +271,24 @@ void tick_nohz_full_kick_all(void)
* It might need the tick due to per task/process properties:
* perf events, posix cpu timers, ...
*/
-void __tick_nohz_task_switch(struct task_struct *tsk)
+void __tick_nohz_task_switch(struct task_struct *next)
{
unsigned long flags;
+ struct tick_sched *ts;

local_irq_save(flags);

+ ts = this_cpu_ptr(&tick_cpu_sched);
+ /* Reset tick dependency evaluation */
+ ts->tick_needed = 0;
+
if (!tick_nohz_full_cpu(smp_processor_id()))
goto out;

- if (tick_nohz_tick_stopped() && !can_stop_full_tick())
+ tick_nohz_full_update_dependencies();
+
+ if (ts->tick_stopped && !can_stop_full_tick(ts))
tick_nohz_full_kick();
-
out:
local_irq_restore(flags);
}
@@ -715,7 +734,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
return;

- if (can_stop_full_tick())
+ if (can_stop_full_tick(ts))
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, ktime_get());
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 42fdf49..03c283e 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -19,6 +19,11 @@ enum tick_nohz_mode {
NOHZ_MODE_HIGHRES,
};

+#define TICK_NEEDED_POSIX_CPU_TIMER 0x1
+#define TICK_NEEDED_PERF_EVENT 0x2
+#define TICK_NEEDED_SCHED 0x4
+#define TICK_NEEDED_CLOCK_UNSTABLE 0x8
+
/**
* struct tick_sched - sched tick emulation and no idle tick control/stats
* @sched_timer: hrtimer to schedule the periodic tick in high
@@ -60,6 +65,7 @@ struct tick_sched {
u64 next_timer;
ktime_t idle_expires;
int do_timer_last;
+ int tick_needed;
};

extern struct tick_sched *tick_get_tick_sched(int cpu);
--
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/