Re: [RESEND PATCH] cpuidle: Handle tick_broadcast_enter() failure gracefully

From: Preeti U Murthy
Date: Thu May 07 2015 - 13:52:47 EST


On 05/07/2015 11:09 PM, Preeti U Murthy wrote:
> When a CPU has to enter an idle state where tick stops, it makes a call
> to tick_broadcast_enter(). The call will fail if this CPU is the
> broadcast CPU. Today, under such a circumstance, the arch cpuidle code
> handles this CPU. This is not convincing because not only are we not
> aware what the arch cpuidle code does, but we also do not account for
> the idle state residency time and usage of such a CPU.
>
> This scenario can be handled better by simply asking the cpuidle
> governor to choose an idle state where in ticks do not stop. To
> accommodate this change move the setting of runqueue idle state from the
> core to the cpuidle driver, else the rq->idle_state will be set wrong.
>
> Signed-off-by: Preeti U Murthy <preeti@xxxxxxxxxxxxxxxxxx>
> ---
> Rebased on the latest linux-pm/bleeding-edge

Kindly ignore this. I have sent the rebase as V2.
[PATCH V2] cpuidle: Handle tick_broadcast_enter() failure gracefully

The below patch is not updated.

Regards
Preeti U Murthy
>
> drivers/cpuidle/cpuidle.c | 21 +++++++++++++++++----
> drivers/cpuidle/governors/ladder.c | 13 ++++++++++---
> drivers/cpuidle/governors/menu.c | 6 +++++-
> include/linux/cpuidle.h | 6 +++---
> include/linux/sched.h | 16 ++++++++++++++++
> kernel/sched/core.c | 17 +++++++++++++++++
> kernel/sched/fair.c | 2 +-
> kernel/sched/idle.c | 8 +-------
> kernel/sched/sched.h | 24 ------------------------
> 9 files changed, 70 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> index 61c417b..8f5657e 100644
> --- a/drivers/cpuidle/cpuidle.c
> +++ b/drivers/cpuidle/cpuidle.c
> @@ -21,6 +21,7 @@
> #include <linux/module.h>
> #include <linux/suspend.h>
> #include <linux/tick.h>
> +#include <linux/sched.h>
> #include <trace/events/power.h>
>
> #include "cpuidle.h"
> @@ -167,8 +168,15 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
> * local timer will be shut down. If a local timer is used from another
> * CPU as a broadcast timer, this call may fail if it is not available.
> */
> - if (broadcast && tick_broadcast_enter())
> - return -EBUSY;
> + if (broadcast && tick_broadcast_enter()) {
> + index = cpuidle_select(drv, dev, !broadcast);
> + if (index < 0)
> + return -EBUSY;
> + target_state = &drv->states[index];
> + }
> +
> + /* Take note of the planned idle state. */
> + idle_set_state(smp_processor_id(), target_state);
>
> trace_cpu_idle_rcuidle(index, dev->cpu);
> time_start = ktime_get();
> @@ -178,6 +186,9 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
> time_end = ktime_get();
> trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
>
> + /* The cpu is no longer idle or about to enter idle. */
> + idle_set_state(smp_processor_id(), NULL);
> +
> if (broadcast) {
> if (WARN_ON_ONCE(!irqs_disabled()))
> local_irq_disable();
> @@ -213,12 +224,14 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
> *
> * @drv: the cpuidle driver
> * @dev: the cpuidle device
> + * @timer_stop_valid: allow selection of idle state where tick stops
> *
> * Returns the index of the idle state.
> */
> -int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
> +int cpuidle_select(struct cpuidle_driver *drv,
> + struct cpuidle_device *dev, int timer_stop_valid)
> {
> - return cpuidle_curr_governor->select(drv, dev);
> + return cpuidle_curr_governor->select(drv, dev, timer_stop_valid);
> }
>
> /**
> diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
> index 401c010..c437322 100644
> --- a/drivers/cpuidle/governors/ladder.c
> +++ b/drivers/cpuidle/governors/ladder.c
> @@ -62,9 +62,10 @@ static inline void ladder_do_selection(struct ladder_device *ldev,
> * ladder_select_state - selects the next state to enter
> * @drv: cpuidle driver
> * @dev: the CPU
> + * @timer_stop_valid: allow selection of idle state where tick stops
> */
> static int ladder_select_state(struct cpuidle_driver *drv,
> - struct cpuidle_device *dev)
> + struct cpuidle_device *dev, int timer_stop_valid)
> {
> struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
> struct ladder_device_state *last_state;
> @@ -86,6 +87,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
> !drv->states[last_idx + 1].disabled &&
> !dev->states_usage[last_idx + 1].disable &&
> last_residency > last_state->threshold.promotion_time &&
> + !(!timer_stop_valid && (drv->states[last_idx + 1].flags & CPUIDLE_FLAG_TIMER_STOP)) &&
> drv->states[last_idx + 1].exit_latency <= latency_req) {
> last_state->stats.promotion_count++;
> last_state->stats.demotion_count = 0;
> @@ -99,11 +101,14 @@ static int ladder_select_state(struct cpuidle_driver *drv,
> if (last_idx > CPUIDLE_DRIVER_STATE_START &&
> (drv->states[last_idx].disabled ||
> dev->states_usage[last_idx].disable ||
> + (!timer_stop_valid && (drv->states[last_idx].flags & CPUIDLE_FLAG_TIMER_STOP)) ||
> drv->states[last_idx].exit_latency > latency_req)) {
> int i;
>
> for (i = last_idx - 1; i > CPUIDLE_DRIVER_STATE_START; i--) {
> - if (drv->states[i].exit_latency <= latency_req)
> + if (drv->states[i].exit_latency <= latency_req &&
> + !(!timer_stop_valid &&
> + (drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP)))
> break;
> }
> ladder_do_selection(ldev, last_idx, i);
> @@ -114,7 +119,9 @@ static int ladder_select_state(struct cpuidle_driver *drv,
> last_residency < last_state->threshold.demotion_time) {
> last_state->stats.demotion_count++;
> last_state->stats.promotion_count = 0;
> - if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
> + if (last_state->stats.demotion_count >= last_state->threshold.demotion_count &&
> + !(!timer_stop_valid &&
> + (drv->states[last_idx - 1].flags & CPUIDLE_FLAG_TIMER_STOP))) {
> ladder_do_selection(ldev, last_idx, last_idx - 1);
> return last_idx - 1;
> }
> diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
> index b8a5fa1..900404f 100644
> --- a/drivers/cpuidle/governors/menu.c
> +++ b/drivers/cpuidle/governors/menu.c
> @@ -280,8 +280,10 @@ again:
> * menu_select - selects the next idle state to enter
> * @drv: cpuidle driver containing state data
> * @dev: the CPU
> + * @timer_stop_valid: allow selection of idle state where tick stops
> */
> -static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
> +static int menu_select(struct cpuidle_driver *drv,
> + struct cpuidle_device *dev, int timer_stop_valid)
> {
> struct menu_device *data = this_cpu_ptr(&menu_devices);
> int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
> @@ -349,6 +351,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
> continue;
> if (s->exit_latency > latency_req)
> continue;
> + if (!timer_stop_valid && (s->flags & CPUIDLE_FLAG_TIMER_STOP))
> + continue;
>
> data->last_state_idx = i;
> }
> diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
> index 9c5e892..d7c1734 100644
> --- a/include/linux/cpuidle.h
> +++ b/include/linux/cpuidle.h
> @@ -129,7 +129,7 @@ extern bool cpuidle_not_available(struct cpuidle_driver *drv,
> struct cpuidle_device *dev);
>
> extern int cpuidle_select(struct cpuidle_driver *drv,
> - struct cpuidle_device *dev);
> + struct cpuidle_device *dev, int timer_stop_valid);
> extern int cpuidle_enter(struct cpuidle_driver *drv,
> struct cpuidle_device *dev, int index);
> extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
> @@ -163,7 +163,7 @@ static inline bool cpuidle_not_available(struct cpuidle_driver *drv,
> struct cpuidle_device *dev)
> {return true; }
> static inline int cpuidle_select(struct cpuidle_driver *drv,
> - struct cpuidle_device *dev)
> + struct cpuidle_device *dev, int timer_stop_valid)
> {return -ENODEV; }
> static inline int cpuidle_enter(struct cpuidle_driver *drv,
> struct cpuidle_device *dev, int index)
> @@ -223,7 +223,7 @@ struct cpuidle_governor {
> struct cpuidle_device *dev);
>
> int (*select) (struct cpuidle_driver *drv,
> - struct cpuidle_device *dev);
> + struct cpuidle_device *dev, int timer_stop_valid);
> void (*reflect) (struct cpuidle_device *dev, int index);
>
> struct module *owner;
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 8222ae4..ff4e2df 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -45,6 +45,7 @@ struct sched_param {
> #include <linux/rcupdate.h>
> #include <linux/rculist.h>
> #include <linux/rtmutex.h>
> +#include <linux/cpuidle.h>
>
> #include <linux/time.h>
> #include <linux/param.h>
> @@ -901,6 +902,21 @@ enum cpu_idle_type {
> CPU_MAX_IDLE_TYPES
> };
>
> +#ifdef CONFIG_CPU_IDLE
> +extern void idle_set_state(int cpu, struct cpuidle_state *idle_state);
> +extern struct cpuidle_state *idle_get_state(int cpu);
> +#else
> +static inline void idle_set_state(int cpu,
> + struct cpuidle_state *idle_state)
> +{
> +}
> +
> +static inline struct cpuidle_state *idle_get_state(int cpu)
> +{
> + return NULL;
> +}
> +#endif
> +
> /*
> * Increase resolution of cpu_capacity calculations
> */
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index f9123a8..427e190 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3231,6 +3231,23 @@ struct task_struct *idle_task(int cpu)
> return cpu_rq(cpu)->idle;
> }
>
> +#ifdef CONFIG_CPU_IDLE
> +void idle_set_state(int cpu, struct cpuidle_state *idle_state)
> +{
> + struct rq *rq = cpu_rq(cpu);
> +
> + rq->idle_state = idle_state;
> +}
> +
> +struct cpuidle_state *idle_get_state(int cpu)
> +{
> + struct rq *rq = cpu_rq(cpu);
> +
> + WARN_ON(!rcu_read_lock_held());
> + return rq->idle_state;
> +}
> +#endif /* CONFIG_CPU_IDLE */
> +
> /**
> * find_process_by_pid - find a process with a matching PID value.
> * @pid: the pid in question.
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index ffeaa41..211ef9a 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -4709,7 +4709,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
> for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) {
> if (idle_cpu(i)) {
> struct rq *rq = cpu_rq(i);
> - struct cpuidle_state *idle = idle_get_state(rq);
> + struct cpuidle_state *idle = idle_get_state(i);
> if (idle && idle->exit_latency < min_exit_latency) {
> /*
> * We give priority to a CPU whose idle state
> diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
> index fefcb1f..5bd766c 100644
> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -131,7 +131,7 @@ static void cpuidle_idle_call(void)
> /*
> * Ask the cpuidle framework to choose a convenient idle state.
> */
> - next_state = cpuidle_select(drv, dev);
> + next_state = cpuidle_select(drv, dev, 1);
> }
> /* Fall back to the default arch idle method on errors. */
> if (next_state < 0)
> @@ -149,9 +149,6 @@ static void cpuidle_idle_call(void)
> goto exit_idle;
> }
>
> - /* Take note of the planned idle state. */
> - idle_set_state(this_rq(), &drv->states[next_state]);
> -
> /*
> * Enter the idle state previously returned by the governor decision.
> * This function will block until an interrupt occurs and will take
> @@ -159,9 +156,6 @@ static void cpuidle_idle_call(void)
> */
> entered_state = cpuidle_enter(drv, dev, next_state);
>
> - /* The cpu is no longer idle or about to enter idle. */
> - idle_set_state(this_rq(), NULL);
> -
> if (entered_state == -EBUSY)
> goto use_default;
>
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index e0e1299..2c56caa 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1253,30 +1253,6 @@ static inline void idle_exit_fair(struct rq *rq) { }
>
> #endif
>
> -#ifdef CONFIG_CPU_IDLE
> -static inline void idle_set_state(struct rq *rq,
> - struct cpuidle_state *idle_state)
> -{
> - rq->idle_state = idle_state;
> -}
> -
> -static inline struct cpuidle_state *idle_get_state(struct rq *rq)
> -{
> - WARN_ON(!rcu_read_lock_held());
> - return rq->idle_state;
> -}
> -#else
> -static inline void idle_set_state(struct rq *rq,
> - struct cpuidle_state *idle_state)
> -{
> -}
> -
> -static inline struct cpuidle_state *idle_get_state(struct rq *rq)
> -{
> - return NULL;
> -}
> -#endif
> -
> extern void sysrq_sched_debug_show(void);
> extern void sched_init_granularity(void);
> extern void update_max_interval(void);
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/