Re: [BUG] 2.6.37-rc3 massive interactivity regression on ARM

From: Peter Zijlstra
Date: Fri Dec 10 2010 - 08:27:36 EST


On Fri, 2010-12-10 at 14:17 +0100, Peter Zijlstra wrote:
>
> OK, so I ended up doing the same you did.. Still staring at that, 32bit
> will go very funny in the head once every so often. One possible
> solution would be to ignore the occasional abs(irq_delta) > 2 * delta.
>
> That would however result in an accounting discrepancy such that:
> clock_task + irq_time != clock
>
> Thoughts?

The brute force solution is a seqcount.. something like so:

---
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -1786,21 +1786,63 @@ static void deactivate_task(struct rq *r
#ifdef CONFIG_IRQ_TIME_ACCOUNTING

/*
- * There are no locks covering percpu hardirq/softirq time.
- * They are only modified in account_system_vtime, on corresponding CPU
- * with interrupts disabled. So, writes are safe.
+ * There are no locks covering percpu hardirq/softirq time. They are only
+ * modified in account_system_vtime, on corresponding CPU with interrupts
+ * disabled. So, writes are safe.
+ *
* They are read and saved off onto struct rq in update_rq_clock().
- * This may result in other CPU reading this CPU's irq time and can
- * race with irq/account_system_vtime on this CPU. We would either get old
- * or new value (or semi updated value on 32 bit) with a side effect of
- * accounting a slice of irq time to wrong task when irq is in progress
- * while we read rq->clock. That is a worthy compromise in place of having
- * locks on each irq in account_system_time.
+ *
+ * This may result in other CPU reading this CPU's irq time and can race with
+ * irq/account_system_vtime on this CPU. We would either get old or new value
+ * with a side effect of accounting a slice of irq time to wrong task when irq
+ * is in progress while we read rq->clock. That is a worthy compromise in place
+ * of having locks on each irq in account_system_time.
*/
static DEFINE_PER_CPU(u64, cpu_hardirq_time);
static DEFINE_PER_CPU(u64, cpu_softirq_time);
-
static DEFINE_PER_CPU(u64, irq_start_time);
+
+#ifndef CONFIG_64BIT
+static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
+
+static inline void irq_time_write_begin(int cpu)
+{
+ write_seqcount_begin(&per_cpu(irq_time_seq, cpu));
+}
+
+static inline void irq_time_write_end(int cpu)
+{
+ write_seqcount_end(&per_cpu(irq_time_seq, cpu));
+}
+
+static inline u64 irq_time_read(int cpu)
+{
+ u64 irq_time;
+ unsigned seq;
+
+ do {
+ seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
+ irq_time = per_cpu(cpu_softirq_time, cpu) +
+ per_cpu(cpu_hardirq_time, cpu);
+ } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
+
+ return irq_time;
+}
+#else /* CONFIG_64BIT */
+static inline void irq_time_write_begin(int cpu)
+{
+}
+
+static inline void irq_time_write_end(int cpu)
+{
+}
+
+static inline u64 irq_time_read(int cpu)
+{
+ return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
+}
+#endif /* CONFIG_64BIT */
+
static int sched_clock_irqtime;

void enable_sched_clock_irqtime(void)
@@ -1820,6 +1862,7 @@ static void __account_system_vtime(int c
delta = now - per_cpu(irq_start_time, cpu);
per_cpu(irq_start_time, cpu) = now;

+ irq_time_write_begin(cpu);
if (hardirq_count())
per_cpu(cpu_hardirq_time, cpu) += delta;
/*
@@ -1830,6 +1873,7 @@ static void __account_system_vtime(int c
*/
else if (in_serving_softirq() && !(current->flags & PF_KSOFTIRQD))
per_cpu(cpu_softirq_time, cpu) += delta;
+ irq_time_write_end(cpu);
}

/*
@@ -1859,14 +1903,11 @@ EXPORT_SYMBOL_GPL(account_system_vtime);

static u64 irq_time_cpu(struct rq *rq)
{
- int cpu = cpu_of(rq);
/*
* See the comment in update_rq_clock_task(), ideally we'd update
* the *irq_time values using rq->clock here.
- *
- * As it stands, reading this from a remote cpu is buggy on 32bit.
*/
- return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
+ return irq_time_read(cpu_of(rq));
}

static void update_rq_clock_task(struct rq *rq, s64 delta)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/