Re: [PATCH 2/2] tick: Make sure tick timer is active when bypassing reprogramming

From: Frederic Weisbecker
Date: Tue Jun 06 2017 - 10:52:43 EST


On Sat, Jun 03, 2017 at 01:00:53PM +0000, Levin, Alexander (Sasha Levin) wrote:
> On Sat, Jun 03, 2017 at 02:42:43PM +0200, Frederic Weisbecker wrote:
> > On Sat, Jun 03, 2017 at 08:06:41AM +0000, Levin, Alexander (Sasha Levin) wrote:
> > > On Fri, Apr 21, 2017 at 04:00:55PM +0200, Frederic Weisbecker wrote:
> > > > diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> > > > index 502b320..be7ca4d 100644
> > > > --- a/kernel/time/tick-sched.c
> > > > +++ b/kernel/time/tick-sched.c
> > > > @@ -783,8 +783,13 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
> > > > tick = expires;
> > > >
> > > > /* Skip reprogram of event if its not changed */
> > > > - if (ts->tick_stopped && (expires == ts->next_tick))
> > > > - goto out;
> > > > + if (ts->tick_stopped && (expires == ts->next_tick)) {
> > > > + /* Sanity check: make sure clockevent is actually programmed */
> > > > + if (likely(dev->next_event <= ts->next_tick))
> > > > + goto out;
> > > > +
> > > > + WARN_ON_ONCE(1);
> > > > + }
> > >
> > > I seem to be hitting that in a KVM vm, even without load (sometimes
> > > right after boot):
> >
> > Ah, can you tell me which tree you were using? Is it tip/master?
>
> Its next-20170601: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?h=next-20170601&id=3ab334ebe84e0dfd1cc3ea2fe77f5ce4406f7370
>
> > Can you give me its HEAD and your config file?
>
> Attached config.

Thanks Sasha!

I couldn't reproduce it, that config boots fine on my kvm.
Would you have the time to dump some traces for me?

I'd need you to add this boot option: trace_event=hrtimer_cancel,hrtimer_start,hrtimer_expire_entry
And boot your kernel with the below patch. This will dump the timer traces to your console.
I would be very interested in the resulting console dump file.

Thanks!

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 2de9c55..ad1de28 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -156,6 +156,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
* to the same deadline.
*/
ts->next_tick = 0;
+ trace_printk("ts->next_tick reset (tick)\n");
}
#endif
update_process_times(user_mode(regs));
@@ -672,6 +673,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
* cached clock deadline.
*/
ts->next_tick = 0;
+ trace_printk("ts->next_tick reset (tick restart)\n");
}

static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
@@ -788,6 +790,11 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
if (likely(dev->next_event <= ts->next_tick))
goto out;

+ trace_printk("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
+ basemono, ts->next_tick, dev->next_event,
+ hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer));
+ tracing_stop();
+ ftrace_dump(DUMP_ORIG);
WARN_ON_ONCE(1);
printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
basemono, ts->next_tick, dev->next_event,
@@ -812,6 +819,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
}

ts->next_tick = tick;
+ trace_printk("ts->next_tick = %llu\n", ts->next_tick);

/*
* If the expiration time == KTIME_MAX, then we simply stop
@@ -894,6 +902,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
* deadline if it comes back online later.
*/
ts->next_tick = 0;
+ trace_printk("ts->next_tick reset (offline)\n");
return false;
}

@@ -1202,8 +1211,10 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
*/
if (regs)
tick_sched_handle(ts, regs);
- else
+ else {
+ trace_printk("ts->next_tick reset (tick)\n");
ts->next_tick = 0;
+ }

/* No need to reprogram if we are in idle or full dynticks mode */
if (unlikely(ts->tick_stopped))