[PATCH 23/70] cpufreq: interactive: pin timers to associated CPU

From: BÃlint Czobor
Date: Tue Oct 27 2015 - 13:57:34 EST


From: Todd Poynor <toddpoynor@xxxxxxxxxx>

Helps avoid waking up other CPUs to react to activity on the local CPU.

Change-Id: Ife272aaa7916894a437705d44521b1a1693fbe8e
Signed-off-by: Todd Poynor <toddpoynor@xxxxxxxxxx>
Signed-off-by: BÃlint Czobor <czoborbalint@xxxxxxxxx>
---
drivers/cpufreq/cpufreq_interactive.c | 106 +++++++++------------------------
1 file changed, 27 insertions(+), 79 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c
index 16bd23b..e53eae2 100644
--- a/drivers/cpufreq/cpufreq_interactive.c
+++ b/drivers/cpufreq/cpufreq_interactive.c
@@ -42,8 +42,6 @@ struct cpufreq_interactive_cpuinfo {
int timer_idlecancel;
u64 time_in_idle;
u64 idle_exit_time;
- u64 timer_run_time;
- int idling;
u64 target_set_time;
u64 target_set_time_in_idle;
struct cpufreq_policy *policy;
@@ -109,6 +107,7 @@ struct cpufreq_governor cpufreq_gov_interactive = {

static void cpufreq_interactive_timer(unsigned long data)
{
+ u64 now;
unsigned int delta_idle;
unsigned int delta_time;
int cpu_load;
@@ -127,26 +126,11 @@ static void cpufreq_interactive_timer(unsigned long data)
if (!pcpu->governor_enabled)
goto exit;

- /*
- * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time,
- * this lets idle exit know the current idle time sample has
- * been processed, and idle exit can generate a new sample and
- * re-arm the timer. This prevents a concurrent idle
- * exit on that CPU from writing a new set of info at the same time
- * the timer function runs (the timer function can't use that info
- * until more time passes).
- */
time_in_idle = pcpu->time_in_idle;
idle_exit_time = pcpu->idle_exit_time;
- now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time);
- smp_wmb();
-
- /* If we raced with cancelling a timer, skip. */
- if (!idle_exit_time)
- goto exit;
-
+ now_idle = get_cpu_idle_time_us(data, &now);
delta_idle = (unsigned int)(now_idle - time_in_idle);
- delta_time = (unsigned int)(pcpu->timer_run_time - idle_exit_time);
+ delta_time = (unsigned int)(now - idle_exit_time);

/*
* If timer ran less than 1ms after short-term sample started, retry.
@@ -160,8 +144,7 @@ static void cpufreq_interactive_timer(unsigned long data)
cpu_load = 100 * (delta_time - delta_idle) / delta_time;

delta_idle = (unsigned int)(now_idle - pcpu->target_set_time_in_idle);
- delta_time = (unsigned int)(pcpu->timer_run_time -
- pcpu->target_set_time);
+ delta_time = (unsigned int)(now - pcpu->target_set_time);

if ((delta_time == 0) || (delta_idle > delta_time))
load_since_change = 0;
@@ -189,7 +172,7 @@ static void cpufreq_interactive_timer(unsigned long data)

if (pcpu->target_freq == hispeed_freq &&
new_freq > hispeed_freq &&
- pcpu->timer_run_time - pcpu->hispeed_validate_time
+ now - pcpu->hispeed_validate_time
< above_hispeed_delay_val) {
trace_cpufreq_interactive_notyet(data, cpu_load,
pcpu->target_freq,
@@ -202,7 +185,7 @@ static void cpufreq_interactive_timer(unsigned long data)
}

if (new_freq <= hispeed_freq)
- pcpu->hispeed_validate_time = pcpu->timer_run_time;
+ pcpu->hispeed_validate_time = now;

if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
new_freq, CPUFREQ_RELATION_H,
@@ -219,8 +202,7 @@ static void cpufreq_interactive_timer(unsigned long data)
* floor frequency for the minimum sample time since last validated.
*/
if (new_freq < pcpu->floor_freq) {
- if (pcpu->timer_run_time - pcpu->floor_validate_time
- < min_sample_time) {
+ if (now - pcpu->floor_validate_time < min_sample_time) {
trace_cpufreq_interactive_notyet(data, cpu_load,
pcpu->target_freq, new_freq);
goto rearm;
@@ -228,7 +210,7 @@ static void cpufreq_interactive_timer(unsigned long data)
}

pcpu->floor_freq = new_freq;
- pcpu->floor_validate_time = pcpu->timer_run_time;
+ pcpu->floor_validate_time = now;

if (pcpu->target_freq == new_freq) {
trace_cpufreq_interactive_already(data, cpu_load,
@@ -239,7 +221,7 @@ static void cpufreq_interactive_timer(unsigned long data)
trace_cpufreq_interactive_target(data, cpu_load, pcpu->target_freq,
new_freq);
pcpu->target_set_time_in_idle = now_idle;
- pcpu->target_set_time = pcpu->timer_run_time;
+ pcpu->target_set_time = now;

pcpu->target_freq = new_freq;
spin_lock_irqsave(&speedchange_cpumask_lock, flags);
@@ -258,23 +240,16 @@ rearm_if_notmax:
rearm:
if (!timer_pending(&pcpu->cpu_timer)) {
/*
- * If already at min: if that CPU is idle, don't set timer.
- * Else cancel the timer if that CPU goes idle. We don't
- * need to re-evaluate speed until the next idle exit.
+ * If already at min, cancel the timer if that CPU goes idle.
+ * We don't need to re-evaluate speed until the next idle exit.
*/
- if (pcpu->target_freq == pcpu->policy->min) {
- smp_rmb();
-
- if (pcpu->idling)
- goto exit;
-
+ if (pcpu->target_freq == pcpu->policy->min)
pcpu->timer_idlecancel = 1;
- }

pcpu->time_in_idle = get_cpu_idle_time_us(
data, &pcpu->idle_exit_time);
- mod_timer(&pcpu->cpu_timer,
- jiffies + usecs_to_jiffies(timer_rate));
+ mod_timer_pinned(&pcpu->cpu_timer,
+ jiffies + usecs_to_jiffies(timer_rate));
}

exit:
@@ -290,8 +265,6 @@ static void cpufreq_interactive_idle_start(void)
if (!pcpu->governor_enabled)
return;

- pcpu->idling = 1;
- smp_wmb();
pending = timer_pending(&pcpu->cpu_timer);

if (pcpu->target_freq != pcpu->policy->min) {
@@ -308,8 +281,9 @@ static void cpufreq_interactive_idle_start(void)
pcpu->time_in_idle = get_cpu_idle_time_us(
smp_processor_id(), &pcpu->idle_exit_time);
pcpu->timer_idlecancel = 0;
- mod_timer(&pcpu->cpu_timer,
- jiffies + usecs_to_jiffies(timer_rate));
+ mod_timer_pinned(
+ &pcpu->cpu_timer,
+ jiffies + usecs_to_jiffies(timer_rate));
}
#endif
} else {
@@ -321,12 +295,6 @@ static void cpufreq_interactive_idle_start(void)
*/
if (pending && pcpu->timer_idlecancel) {
del_timer(&pcpu->cpu_timer);
- /*
- * Ensure last timer run time is after current idle
- * sample start time, so next idle exit will always
- * start a new idle sampling period.
- */
- pcpu->idle_exit_time = 0;
pcpu->timer_idlecancel = 0;
}
}
@@ -341,29 +309,15 @@ static void cpufreq_interactive_idle_end(void)
if (!pcpu->governor_enabled)
return;

- pcpu->idling = 0;
- smp_wmb();
-
- /*
- * Arm the timer for 1-2 ticks later if not already, and if the timer
- * function has already processed the previous load sampling
- * interval. (If the timer is not pending but has not processed
- * the previous interval, it is probably racing with us on another
- * CPU. Let it compute load based on the previous sample and then
- * re-arm the timer for another interval when it's done, rather
- * than updating the interval start time to be "now", which doesn't
- * give the timer function enough time to make a decision on this
- * run.)
- */
- if (timer_pending(&pcpu->cpu_timer) == 0 &&
- pcpu->timer_run_time >= pcpu->idle_exit_time &&
- pcpu->governor_enabled) {
+ /* Arm the timer for 1-2 ticks later if not already. */
+ if (!timer_pending(&pcpu->cpu_timer)) {
pcpu->time_in_idle =
get_cpu_idle_time_us(smp_processor_id(),
&pcpu->idle_exit_time);
pcpu->timer_idlecancel = 0;
- mod_timer(&pcpu->cpu_timer,
- jiffies + usecs_to_jiffies(timer_rate));
+ mod_timer_pinned(
+ &pcpu->cpu_timer,
+ jiffies + usecs_to_jiffies(timer_rate));
}

}
@@ -673,6 +627,8 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy,

freq_table =
cpufreq_frequency_get_table(policy->cpu);
+ if (!hispeed_freq)
+ hispeed_freq = policy->max;

for_each_cpu(j, policy->cpus) {
pcpu = &per_cpu(cpuinfo, j);
@@ -689,11 +645,11 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
pcpu->target_set_time;
pcpu->governor_enabled = 1;
smp_wmb();
+ pcpu->cpu_timer.expires =
+ jiffies + usecs_to_jiffies(timer_rate);
+ add_timer_on(&pcpu->cpu_timer, j);
}

- if (!hispeed_freq)
- hispeed_freq = policy->max;
-
/*
* Do not register the idle hook and create sysfs
* entries if we have already done so.
@@ -715,14 +671,6 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
pcpu->governor_enabled = 0;
smp_wmb();
del_timer_sync(&pcpu->cpu_timer);
-
- /*
- * Reset idle exit time since we may cancel the timer
- * before it can run after the last idle exit time,
- * to avoid tripping the check in idle exit for a timer
- * that is trying to run.
- */
- pcpu->idle_exit_time = 0;
}

if (atomic_dec_return(&active_count) > 0)
--
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/