[RFC][PATCH 1/3] cpuidle: Inject tick boundary state

From: Peter Zijlstra
Date: Fri Jul 28 2023 - 11:01:25 EST


In order to facilitate governors that track history in idle-state
buckets (TEO) making a useful decision about NOHZ, make sure we have a
bucket that counts tick-and-longer.

In order to be inclusive of the tick itself -- after all, if we do not
disable NOHZ we'll sleep for a full tick, the actual boundary should
be just short of a full tick.

IOW, when registering the idle-states, add one that is always
disabled, just to have a bucket.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
drivers/cpuidle/cpuidle.h | 2 +
drivers/cpuidle/driver.c | 48 +++++++++++++++++++++++++++++++++++++++++++++-
include/linux/cpuidle.h | 2 -
3 files changed, 50 insertions(+), 2 deletions(-)

--- a/drivers/cpuidle/cpuidle.h
+++ b/drivers/cpuidle/cpuidle.h
@@ -72,4 +72,6 @@ static inline void cpuidle_coupled_unreg
}
#endif

+#define SHORT_TICK_NSEC (TICK_NSEC - TICK_NSEC/32)
+
#endif /* __DRIVER_CPUIDLE_H */
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -147,13 +147,37 @@ static void cpuidle_setup_broadcast_time
tick_broadcast_disable();
}

+static int tick_enter(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ return -ENODEV;
+}
+
+static void __cpuidle_state_init_tick(struct cpuidle_state *s)
+{
+ strcpy(s->name, "TICK");
+ strcpy(s->desc, "(no-op)");
+
+ s->target_residency_ns = SHORT_TICK_NSEC;
+ s->target_residency = div_u64(SHORT_TICK_NSEC, NSEC_PER_USEC);
+
+ s->exit_latency_ns = 0;
+ s->exit_latency = 0;
+
+ s->flags |= CPUIDLE_FLAG_UNUSABLE;
+
+ s->enter = tick_enter;
+ s->enter_s2idle = tick_enter;
+}
+
/**
* __cpuidle_driver_init - initialize the driver's internal data
* @drv: a valid pointer to a struct cpuidle_driver
*/
static void __cpuidle_driver_init(struct cpuidle_driver *drv)
{
- int i;
+ int tick = 0, i;

/*
* Use all possible CPUs as the default, because if the kernel boots
@@ -163,6 +187,9 @@ static void __cpuidle_driver_init(struct
if (!drv->cpumask)
drv->cpumask = (struct cpumask *)cpu_possible_mask;

+ if (WARN_ON_ONCE(drv->state_count >= CPUIDLE_STATE_MAX-2))
+ tick = 1;
+
for (i = 0; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i];

@@ -192,6 +219,25 @@ static void __cpuidle_driver_init(struct
s->exit_latency_ns = 0;
else
s->exit_latency = div_u64(s->exit_latency_ns, NSEC_PER_USEC);
+
+ if (!tick && s->target_residency_ns >= SHORT_TICK_NSEC) {
+ tick = 1;
+
+ if (s->target_residency_ns == SHORT_TICK_NSEC)
+ continue;
+
+ memmove(&drv->states[i+1], &drv->states[i],
+ sizeof(struct cpuidle_state) * (CPUIDLE_STATE_MAX - i - 1));
+ __cpuidle_state_init_tick(s);
+ drv->state_count++;
+ i++;
+ }
+ }
+
+ if (!tick) {
+ struct cpuidle_state *s = &drv->states[i];
+ __cpuidle_state_init_tick(s);
+ drv->state_count++;
}
}

--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -16,7 +16,7 @@
#include <linux/hrtimer.h>
#include <linux/context_tracking.h>

-#define CPUIDLE_STATE_MAX 10
+#define CPUIDLE_STATE_MAX 16
#define CPUIDLE_NAME_LEN 16
#define CPUIDLE_DESC_LEN 32