Re: Stopping the tick on a fully loaded system

From: Peter Zijlstra
Date: Wed Jul 26 2023 - 15:55:05 EST


On Wed, Jul 26, 2023 at 05:53:46PM +0200, Rafael J. Wysocki wrote:

> > > That means we don't track nearly enough data to reliably tell anything
> > > about disabling the tick or not. We should have at least one bucket
> > > beyond TICK_NSEC for this.
> >
> > Quite likely.
>
> So the reasoning here was that those additional bins would not be
> necessary for idle state selection, but the problem of whether or not
> to stop the tick is kind of separate from the idle state selection
> problem if the target residency values for all of the idle states are
> relatively short. And so it should be addressed separately which
> currently it is not. Admittedly, this is a mistake.

Right, the C state buckets are enough to pick a state, but not to handle
the tick thing.

The below hack boots on my ivb-ep with extra (disabled) states. Now let
me go hack up teo to make use of that.

name residency

POLL 0
C1 1
C1E 80
C3 156
C6 300
TICK 1000
POST-TICK 2000


---
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index d9cda7f6ccb9..5f435fb8b89f 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -147,13 +147,37 @@ static void cpuidle_setup_broadcast_timer(void *arg)
tick_broadcast_disable();
}

+static int tick_enter(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ return -ENODEV;
+}
+
+static void __cpuidle_state_init_tick(struct cpuidle_state *s)
+{
+ strcpy(s->name, "TICK");
+ strcpy(s->desc, "(no-op)");
+
+ s->target_residency_ns = TICK_NSEC;
+ s->target_residency = div_u64(TICK_NSEC, NSEC_PER_USEC);
+
+ s->exit_latency_ns = 0;
+ s->exit_latency = 0;
+
+ s->flags |= CPUIDLE_FLAG_UNUSABLE;
+
+ s->enter = tick_enter;
+ s->enter_s2idle = tick_enter;
+}
+
/**
* __cpuidle_driver_init - initialize the driver's internal data
* @drv: a valid pointer to a struct cpuidle_driver
*/
static void __cpuidle_driver_init(struct cpuidle_driver *drv)
{
- int i;
+ int tick = 0, post_tick = 0, i;

/*
* Use all possible CPUs as the default, because if the kernel boots
@@ -192,6 +216,39 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
s->exit_latency_ns = 0;
else
s->exit_latency = div_u64(s->exit_latency_ns, NSEC_PER_USEC);
+
+ if (!tick && s->target_residency_ns >= TICK_NSEC) {
+ tick = 1;
+
+ if (s->target_residency_ns == TICK_NSEC)
+ continue;
+
+ post_tick = 1;
+
+ memmove(&drv->states[i+1], &drv->states[i],
+ sizeof(struct cpuidle_state) * (CPUIDLE_STATE_MAX - i - 1));
+ drv->state_count++;
+
+ __cpuidle_state_init_tick(s);
+ i++;
+ }
+ }
+
+ if (!tick) {
+ struct cpuidle_state *s = &drv->states[i];
+ __cpuidle_state_init_tick(s);
+ drv->state_count++;
+ i++;
+ }
+
+ if (!post_tick) {
+ struct cpuidle_state *s = &drv->states[i];
+ __cpuidle_state_init_tick(s);
+ strcpy(s->name, "POST-TICK");
+ s->target_residency_ns *= 2;
+ s->target_residency *= 2;
+ drv->state_count++;
+ i++;
}
}

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 3183aeb7f5b4..a642ee9e916c 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -16,7 +16,7 @@
#include <linux/hrtimer.h>
#include <linux/context_tracking.h>

-#define CPUIDLE_STATE_MAX 10
+#define CPUIDLE_STATE_MAX 16
#define CPUIDLE_NAME_LEN 16
#define CPUIDLE_DESC_LEN 32