[PATCH v3 47/51] cpuidle: Ensure ct_cpuidle_enter() is always called from noinstr/__cpuidle

From: Peter Zijlstra
Date: Thu Jan 12 2023 - 15:02:33 EST


Tracing (kprobes included) and other compiler instrumentation relies
on a normal kernel runtime. Therefore all functions that disable RCU
should be noinstr, as should all functions that are called while RCU
is disabled.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
drivers/cpuidle/cpuidle.c | 37 ++++++++++++++++++++++++++++---------
1 file changed, 28 insertions(+), 9 deletions(-)

--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -137,11 +137,13 @@ int cpuidle_find_deepest_state(struct cp
}

#ifdef CONFIG_SUSPEND
-static void enter_s2idle_proper(struct cpuidle_driver *drv,
- struct cpuidle_device *dev, int index)
+static noinstr void enter_s2idle_proper(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev, int index)
{
- ktime_t time_start, time_end;
struct cpuidle_state *target_state = &drv->states[index];
+ ktime_t time_start, time_end;
+
+ instrumentation_begin();

time_start = ns_to_ktime(local_clock());

@@ -152,13 +154,18 @@ static void enter_s2idle_proper(struct c
* suspended is generally unsafe.
*/
stop_critical_timings();
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
ct_cpuidle_enter();
+ /* Annotate away the indirect call */
+ instrumentation_begin();
+ }
target_state->enter_s2idle(dev, drv, index);
if (WARN_ON_ONCE(!irqs_disabled()))
raw_local_irq_disable();
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
+ instrumentation_end();
ct_cpuidle_exit();
+ }
tick_unfreeze();
start_critical_timings();

@@ -166,6 +173,7 @@ static void enter_s2idle_proper(struct c

dev->states_usage[index].s2idle_time += ktime_us_delta(time_end, time_start);
dev->states_usage[index].s2idle_usage++;
+ instrumentation_end();
}

/**
@@ -200,8 +208,9 @@ int cpuidle_enter_s2idle(struct cpuidle_
* @drv: cpuidle driver for this cpu
* @index: index into the states table in @drv of the state to enter
*/
-int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
- int index)
+noinstr int cpuidle_enter_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
{
int entered_state;

@@ -209,6 +218,8 @@ int cpuidle_enter_state(struct cpuidle_d
bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
ktime_t time_start, time_end;

+ instrumentation_begin();
+
/*
* Tell the time framework to switch to a broadcast timer because our
* local timer will be shut down. If a local timer is used from another
@@ -235,15 +246,21 @@ int cpuidle_enter_state(struct cpuidle_d
time_start = ns_to_ktime(local_clock());

stop_critical_timings();
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
ct_cpuidle_enter();
+ /* Annotate away the indirect call */
+ instrumentation_begin();
+ }

entered_state = target_state->enter(dev, drv, index);
+
if (WARN_ONCE(!irqs_disabled(), "%ps leaked IRQ state", target_state->enter))
raw_local_irq_disable();

- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
+ instrumentation_end();
ct_cpuidle_exit();
+ }
start_critical_timings();

sched_clock_idle_wakeup_event();
@@ -306,6 +323,8 @@ int cpuidle_enter_state(struct cpuidle_d
dev->states_usage[index].rejected++;
}

+ instrumentation_end();
+
return entered_state;
}