[PATCH] Speed up __run_timers() when jiffie gap is large

From: Wenbo Wang
Date: Thu Apr 09 2015 - 04:01:33 EST


From: Wenbo Wang <wenbo.wang@xxxxxxxxxxxx>

Signed-off-by: Wenbo Wang <wenbo.wang@xxxxxxxxxxxx>
Reviewed-by: Chong Yuan <chong.yuan@xxxxxxxxxxxx>

Currently __run_timers() increases jiffie by one in every loop. This is
very slow when the jiffie gap is large.

The following test is done on idle cpu 15 (isolated by isolcpus= kernel
option). There is a clocksource_watchdog timer expires in every 20s.

run_timer_softirq() (invokes __run_timers) takes 120+us to catch up jiffies.

[root@localhost tracing]# cat trace_pipe
450.328663 | 15) ! 124.138 us | run_timer_softirq();
470.318675 | 15) ! 128.002 us | run_timer_softirq();
490.308875 | 15) | run_timer_softirq() {
490.309048 | 15) ! 164.647 us | }
510.299059 | 15) | run_timer_softirq() {
510.299230 | 15) ! 162.307 us | }

After applying this patch, the same softirq takes less than 20us to complete
the same work.

[root@localhost tracing]# cat trace_pipe
430.058585 | 15) + 11.228 us | run_timer_softirq();
450.048683 | 15) 7.936 us | run_timer_softirq();
470.038760 | 15) + 19.443 us | run_timer_softirq();
490.028856 | 15) + 11.361 us | run_timer_softirq();
510.018953 | 15) + 10.085 us | run_timer_softirq();
530.009038 | 15) + 10.075 us | run_timer_softirq();
550.000242 | 15) + 10.958 us | run_timer_softirq();
---
kernel/time/timer.c | 32 +++++++++++++++++++++++++++-----
1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 2d3f5c5..15da6a4 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -88,6 +88,7 @@ struct tvec_base {
struct tvec tv3;
struct tvec tv4;
struct tvec tv5;
+ int tv1_cnt; /* number of newly inserted timers in tv1 */
} ____cacheline_aligned;

struct tvec_base boot_tvec_bases;
@@ -363,6 +364,7 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer)
if (idx < TVR_SIZE) {
int i = expires & TVR_MASK;
vec = base->tv1.vec + i;
+ base->tv1_cnt++;
} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
int i = (expires >> TVR_BITS) & TVN_MASK;
vec = base->tv2.vec + i;
@@ -378,6 +380,7 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer)
* or you set a timer to go off in the past
*/
vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
+ base->tv1_cnt++;
} else {
int i;
/* If the timeout is larger than MAX_TVAL (on 64-bit
@@ -1174,6 +1177,7 @@ static inline void __run_timers(struct tvec_base *base)
spin_unlock_irq(&base->lock);
return;
}
+
while (time_after_eq(jiffies, base->timer_jiffies)) {
struct list_head work_list;
struct list_head *head = &work_list;
@@ -1182,11 +1186,29 @@ static inline void __run_timers(struct tvec_base *base)
/*
* Cascade timers:
*/
- if (!index &&
- (!cascade(base, &base->tv2, INDEX(0))) &&
- (!cascade(base, &base->tv3, INDEX(1))) &&
- !cascade(base, &base->tv4, INDEX(2)))
- cascade(base, &base->tv5, INDEX(3));
+ if (unlikely(!index)) {
+ if (!cascade(base, &base->tv2, INDEX(0))) {
+ if ((!cascade(base, &base->tv3, INDEX(1))) &&
+ !cascade(base, &base->tv4, INDEX(2)))
+ cascade(base, &base->tv5, INDEX(3));
+ }
+
+ /*
+ * We are just crossing the boundary of tv1 (usually 256 jiffies).
+ * Since there was no new timers inserted to tv1 and all the old
+ * timers have been processed, it is guaranteed that tv1 has no
+ * pending timers, so jiffie can jump to the next boundary at
+ * most.
+ */
+ if (base->tv1_cnt == 0) {
+ base->timer_jiffies += min_t(unsigned long,
+ TVR_SIZE - index, jiffies - base->timer_jiffies + 1);
+ continue;
+ }
+
+ base->tv1_cnt = 0;
+ }
+
++base->timer_jiffies;
list_replace_init(base->tv1.vec + index, head);
while (!list_empty(head)) {
--
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/