[PATCH 04/11] hrtimer: fixup the HRTIMER_CB_IRQSAFE_NO_SOFTIRQ fallback

From: Peter Zijlstra
Date: Sun Jan 06 2008 - 11:26:14 EST


Currently all highres=off timers are run from softirq context, but
HRTIMER_CB_IRQSAFE_NO_SOFTIRQ timers expect to run from irq context.

Fix this up by splitting it similar to the highres=on case.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
---
include/linux/hrtimer.h | 5 -
kernel/hrtimer.c | 232 +++++++++++++++++++++++++-----------------------
kernel/timer.c | 3
3 files changed, 125 insertions(+), 115 deletions(-)

Index: linux-2.6/kernel/hrtimer.c
===================================================================
--- linux-2.6.orig/kernel/hrtimer.c
+++ linux-2.6/kernel/hrtimer.c
@@ -622,6 +622,11 @@ static inline int hrtimer_cb_pending(str
static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) { }
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
+static inline int hrtimer_reprogram(struct hrtimer *timer,
+ struct hrtimer_clock_base *base)
+{
+ return 0;
+}

#endif /* CONFIG_HIGH_RES_TIMERS */

@@ -1030,6 +1035,85 @@ int hrtimer_get_res(const clockid_t whic
}
EXPORT_SYMBOL_GPL(hrtimer_get_res);

+static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
+{
+ spin_lock_irq(&cpu_base->lock);
+
+ while (!list_empty(&cpu_base->cb_pending)) {
+ enum hrtimer_restart (*fn)(struct hrtimer *);
+ struct hrtimer *timer;
+ int restart;
+
+ timer = list_entry(cpu_base->cb_pending.next,
+ struct hrtimer, cb_entry);
+
+ timer_stats_account_hrtimer(timer);
+
+ fn = timer->function;
+ __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
+ spin_unlock_irq(&cpu_base->lock);
+
+ restart = fn(timer);
+
+ spin_lock_irq(&cpu_base->lock);
+
+ timer->state &= ~HRTIMER_STATE_CALLBACK;
+ if (restart == HRTIMER_RESTART) {
+ BUG_ON(hrtimer_active(timer));
+ /*
+ * Enqueue the timer, allow reprogramming of the event
+ * device
+ */
+ enqueue_hrtimer(timer, timer->base, 1);
+ } else if (hrtimer_active(timer)) {
+ /*
+ * If the timer was rearmed on another CPU, reprogram
+ * the event device.
+ */
+ if (timer->base->first == &timer->node)
+ hrtimer_reprogram(timer, timer->base);
+ }
+ }
+ spin_unlock_irq(&cpu_base->lock);
+}
+
+static void __run_hrtimer(struct hrtimer *timer)
+{
+ struct hrtimer_clock_base *base = timer->base;
+ struct hrtimer_cpu_base *cpu_base = base->cpu_base;
+ enum hrtimer_restart (*fn)(struct hrtimer *);
+ int restart;
+
+ __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
+ timer_stats_account_hrtimer(timer);
+
+ fn = timer->function;
+ if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
+ /*
+ * Used for scheduler timers, avoid lock inversion with
+ * rq->lock and tasklist_lock.
+ *
+ * These timers are required to deal with enqueue expiry
+ * themselves and are not allowed to migrate.
+ */
+ spin_unlock(&cpu_base->lock);
+ restart = fn(timer);
+ spin_lock(&cpu_base->lock);
+ } else
+ restart = fn(timer);
+
+ /*
+ * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid
+ * reprogramming of the event hardware. This happens at the end of this
+ * function anyway.
+ */
+ if (restart != HRTIMER_NORESTART) {
+ BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
+ enqueue_hrtimer(timer, base, 0);
+ }
+ timer->state &= ~HRTIMER_STATE_CALLBACK;
+}
+
#ifdef CONFIG_HIGH_RES_TIMERS

/*
@@ -1063,9 +1147,7 @@ void hrtimer_interrupt(struct clock_even
basenow = ktime_add(now, base->offset);

while ((node = base->first)) {
- enum hrtimer_restart (*fn)(struct hrtimer *);
struct hrtimer *timer;
- int restart;

timer = rb_entry(node, struct hrtimer, node);

@@ -1089,37 +1171,7 @@ void hrtimer_interrupt(struct clock_even
continue;
}

- __remove_hrtimer(timer, base,
- HRTIMER_STATE_CALLBACK, 0);
- timer_stats_account_hrtimer(timer);
-
- fn = timer->function;
- if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
- /*
- * Used for scheduler timers, avoid lock
- * inversion with rq->lock and tasklist_lock.
- *
- * These timers are required to deal with
- * enqueue expiry themselves and are not
- * allowed to migrate.
- */
- spin_unlock(&cpu_base->lock);
- restart = fn(timer);
- spin_lock(&cpu_base->lock);
- } else
- restart = fn(timer);
-
- /*
- * Note: We clear the CALLBACK bit after
- * enqueue_hrtimer to avoid reprogramming of
- * the event hardware. This happens at the end
- * of this function anyway.
- */
- if (restart != HRTIMER_NORESTART) {
- BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
- enqueue_hrtimer(timer, base, 0);
- }
- timer->state &= ~HRTIMER_STATE_CALLBACK;
+ __run_hrtimer(timer);
}
spin_unlock(&cpu_base->lock);
base++;
@@ -1140,52 +1192,41 @@ void hrtimer_interrupt(struct clock_even

static void run_hrtimer_softirq(struct softirq_action *h)
{
- struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
-
- spin_lock_irq(&cpu_base->lock);
-
- while (!list_empty(&cpu_base->cb_pending)) {
- enum hrtimer_restart (*fn)(struct hrtimer *);
- struct hrtimer *timer;
- int restart;
-
- timer = list_entry(cpu_base->cb_pending.next,
- struct hrtimer, cb_entry);
+ run_hrtimer_pending(&__get_cpu_var(hrtimer_bases));
+}

- timer_stats_account_hrtimer(timer);
+#endif /* CONFIG_HIGH_RES_TIMERS */

- fn = timer->function;
- __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
- spin_unlock_irq(&cpu_base->lock);
+/*
+ * Called from timer softirq every jiffy, expire hrtimers:
+ *
+ * For HRT its the fall back code to run the softirq in the timer
+ * softirq context in case the hrtimer initialization failed or has
+ * not been done yet.
+ */
+void hrtimer_run_pending(void)
+{
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);

- restart = fn(timer);
+ if (hrtimer_hres_active())
+ return;

- spin_lock_irq(&cpu_base->lock);
+ /*
+ * This _is_ ugly: We have to check in the softirq context,
+ * whether we can switch to highres and / or nohz mode. The
+ * clocksource switch happens in the timer interrupt with
+ * xtime_lock held. Notification from there only sets the
+ * check bit in the tick_oneshot code, otherwise we might
+ * deadlock vs. xtime_lock.
+ */
+ if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
+ hrtimer_switch_to_hres();

- timer->state &= ~HRTIMER_STATE_CALLBACK;
- if (restart == HRTIMER_RESTART) {
- BUG_ON(hrtimer_active(timer));
- /*
- * Enqueue the timer, allow reprogramming of the event
- * device
- */
- enqueue_hrtimer(timer, timer->base, 1);
- } else if (hrtimer_active(timer)) {
- /*
- * If the timer was rearmed on another CPU, reprogram
- * the event device.
- */
- if (timer->base->first == &timer->node)
- hrtimer_reprogram(timer, timer->base);
- }
- }
- spin_unlock_irq(&cpu_base->lock);
+ run_hrtimer_pending(cpu_base);
}

-#endif /* CONFIG_HIGH_RES_TIMERS */
-
/*
- * Expire the per base hrtimer-queue:
+ * Called from hardirq context every jiffy
*/
static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
int index)
@@ -1199,46 +1240,27 @@ static inline void run_hrtimer_queue(str
if (base->get_softirq_time)
base->softirq_time = base->get_softirq_time();

- spin_lock_irq(&cpu_base->lock);
+ spin_lock(&cpu_base->lock);

while ((node = base->first)) {
struct hrtimer *timer;
- enum hrtimer_restart (*fn)(struct hrtimer *);
- int restart;

timer = rb_entry(node, struct hrtimer, node);
if (base->softirq_time.tv64 <= timer->expires.tv64)
break;

-#ifdef CONFIG_HIGH_RES_TIMERS
- WARN_ON_ONCE(timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ);
-#endif
- timer_stats_account_hrtimer(timer);
-
- fn = timer->function;
- __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
- spin_unlock_irq(&cpu_base->lock);
-
- restart = fn(timer);
-
- spin_lock_irq(&cpu_base->lock);
-
- timer->state &= ~HRTIMER_STATE_CALLBACK;
- if (restart != HRTIMER_NORESTART) {
- BUG_ON(hrtimer_active(timer));
- enqueue_hrtimer(timer, base, 0);
+ if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
+ __remove_hrtimer(timer, base, HRTIMER_STATE_PENDING, 0);
+ list_add_tail(&timer->cb_entry,
+ &base->cpu_base->cb_pending);
+ continue;
}
+
+ __run_hrtimer(timer);
}
- spin_unlock_irq(&cpu_base->lock);
+ spin_unlock(&cpu_base->lock);
}

-/*
- * Called from timer softirq every jiffy, expire hrtimers:
- *
- * For HRT its the fall back code to run the softirq in the timer
- * softirq context in case the hrtimer initialization failed or has
- * not been done yet.
- */
void hrtimer_run_queues(void)
{
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
@@ -1247,18 +1269,6 @@ void hrtimer_run_queues(void)
if (hrtimer_hres_active())
return;

- /*
- * This _is_ ugly: We have to check in the softirq context,
- * whether we can switch to highres and / or nohz mode. The
- * clocksource switch happens in the timer interrupt with
- * xtime_lock held. Notification from there only sets the
- * check bit in the tick_oneshot code, otherwise we might
- * deadlock vs. xtime_lock.
- */
- if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
- if (hrtimer_switch_to_hres())
- return;
-
hrtimer_get_softirq_time(cpu_base);

for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
Index: linux-2.6/kernel/timer.c
===================================================================
--- linux-2.6.orig/kernel/timer.c
+++ linux-2.6/kernel/timer.c
@@ -896,7 +896,7 @@ static void run_timer_softirq(struct sof
{
tvec_base_t *base = __get_cpu_var(tvec_bases);

- hrtimer_run_queues();
+ hrtimer_run_pending();

if (time_after_eq(jiffies, base->timer_jiffies))
__run_timers(base);
@@ -907,6 +907,7 @@ static void run_timer_softirq(struct sof
*/
void run_local_timers(void)
{
+ hrtimer_run_queues();
raise_softirq(TIMER_SOFTIRQ);
softlockup_tick();
}
Index: linux-2.6/include/linux/hrtimer.h
===================================================================
--- linux-2.6.orig/include/linux/hrtimer.h
+++ linux-2.6/include/linux/hrtimer.h
@@ -115,10 +115,8 @@ struct hrtimer {
enum hrtimer_restart (*function)(struct hrtimer *);
struct hrtimer_clock_base *base;
unsigned long state;
-#ifdef CONFIG_HIGH_RES_TIMERS
enum hrtimer_cb_mode cb_mode;
struct list_head cb_entry;
-#endif
#ifdef CONFIG_TIMER_STATS
void *start_site;
char start_comm[16];
@@ -194,10 +192,10 @@ struct hrtimer_cpu_base {
spinlock_t lock;
struct lock_class_key lock_key;
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
+ struct list_head cb_pending;
#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires_next;
int hres_active;
- struct list_head cb_pending;
unsigned long nr_events;
#endif
};
@@ -319,6 +317,7 @@ extern void hrtimer_init_sleeper(struct

/* Soft interrupt function to run the hrtimer queues: */
extern void hrtimer_run_queues(void);
+extern void hrtimer_run_pending(void);

/* Bootup initialization: */
extern void __init hrtimers_init(void);

--

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/