Re: [RFC PATCH 1/2] softirq: Account time and iteration stats per vector

From: Eric Dumazet
Date: Fri Jan 12 2018 - 01:23:09 EST


On Thu, Jan 11, 2018 at 9:35 PM, Frederic Weisbecker
<frederic@xxxxxxxxxx> wrote:
> As we plan to be able to defer some specific softurq vector processing
> to workqueues when those vectors need more time than IRQs can offer,
> let's first count the time spent and the number of occurences per vector.
>
> For now we still defer to ksoftirqd when the per vector limits are reached
>
> Suggested-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
> Signed-off-by: Frederic Weisbecker <frederic@xxxxxxxxxx>
> Cc: Dmitry Safonov <dima@xxxxxxxxxx>
> Cc: Eric Dumazet <edumazet@xxxxxxxxxx>
> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> Cc: David Miller <davem@xxxxxxxxxxxxx>
> Cc: Hannes Frederic Sowa <hannes@xxxxxxxxxxxxxxxxxxx>
> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> Cc: Levin Alexander <alexander.levin@xxxxxxxxxxx>
> Cc: Paolo Abeni <pabeni@xxxxxxxxxx>
> Cc: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
> Cc: Radu Rendec <rrendec@xxxxxxxxxx>
> Cc: Rik van Riel <riel@xxxxxxxxxx>
> Cc: Stanislaw Gruszka <sgruszka@xxxxxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
> ---
> kernel/softirq.c | 37 +++++++++++++++++++++++++++++--------
> 1 file changed, 29 insertions(+), 8 deletions(-)
>
> diff --git a/kernel/softirq.c b/kernel/softirq.c
> index 2f5e87f..fa267f7 100644
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -26,6 +26,7 @@
> #include <linux/smpboot.h>
> #include <linux/tick.h>
> #include <linux/irq.h>
> +#include <linux/sched/clock.h>
>
> #define CREATE_TRACE_POINTS
> #include <trace/events/irq.h>
> @@ -62,6 +63,17 @@ const char * const softirq_to_name[NR_SOFTIRQS] = {
> "TASKLET", "SCHED", "HRTIMER", "RCU"
> };
>
> +struct vector_stat {
> + u64 time;
> + int count;
> +};
> +
> +struct softirq_stat {
> + struct vector_stat stat[NR_SOFTIRQS];
> +};
> +
> +static DEFINE_PER_CPU(struct softirq_stat, softirq_stat_cpu);
> +
> /*
> * we cannot loop indefinitely here to avoid userspace starvation,
> * but we also don't want to introduce a worst case 1/HZ latency
> @@ -203,7 +215,7 @@ EXPORT_SYMBOL(__local_bh_enable_ip);
> * we want to handle softirqs as soon as possible, but they
> * should not be able to lock up the box.
> */
> -#define MAX_SOFTIRQ_TIME msecs_to_jiffies(2)
> +#define MAX_SOFTIRQ_TIME (2 * NSEC_PER_MSEC)
> #define MAX_SOFTIRQ_RESTART 10
>
> #ifdef CONFIG_TRACE_IRQFLAGS
> @@ -241,12 +253,11 @@ static inline void lockdep_softirq_end(bool in_hardirq) { }
>
> asmlinkage __visible void __softirq_entry __do_softirq(void)
> {
> - unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
> + struct softirq_stat *sstat = this_cpu_ptr(&softirq_stat_cpu);
> unsigned long old_flags = current->flags;
> - int max_restart = MAX_SOFTIRQ_RESTART;
> struct softirq_action *h;
> bool in_hardirq;
> - __u32 pending;
> + __u32 pending, overrun = 0;
> int softirq_bit;
>
> /*
> @@ -262,6 +273,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
> __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
> in_hardirq = lockdep_softirq_start();
>
> + memzero_explicit(sstat, sizeof(*sstat));

If you clear sstat here, it means it does not need to be a per cpu
variable, but an automatic one (defined on the stack)

I presume we need a per cpu var to track cpu usage on last time window.

( typical case of 99,000 IRQ per second, one packet delivered per IRQ,
10 usec spent per packet)



> restart:
> /* Reset the pending bitmask before enabling irqs */
> set_softirq_pending(0);
> @@ -271,8 +283,10 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
> h = softirq_vec;
>
> while ((softirq_bit = ffs(pending))) {
> + struct vector_stat *vstat;
> unsigned int vec_nr;
> int prev_count;
> + u64 startime;
>
> h += softirq_bit - 1;
>
> @@ -280,10 +294,18 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
> prev_count = preempt_count();
>
> kstat_incr_softirqs_this_cpu(vec_nr);
> + vstat = &sstat->stat[vec_nr];
>
> trace_softirq_entry(vec_nr);
> + startime = local_clock();
> h->action(h);
> + vstat->time += local_clock() - startime;

You might store local_clock() in a variable, so that we do not call
local_clock() two times per ->action() called.


> + vstat->count++;
> trace_softirq_exit(vec_nr);
> +
> + if (vstat->time > MAX_SOFTIRQ_TIME || vstat->count > MAX_SOFTIRQ_RESTART)

If we trust local_clock() to be precise enough, we do not need to
track vstat->count anymore.

> + overrun |= 1 << vec_nr;
> +
> if (unlikely(prev_count != preempt_count())) {
> pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
> vec_nr, softirq_to_name[vec_nr], h->action,
> @@ -299,11 +321,10 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
>
> pending = local_softirq_pending();
> if (pending) {
> - if (time_before(jiffies, end) && !need_resched() &&
> - --max_restart)
> + if (overrun || need_resched())
> + wakeup_softirqd();
> + else
> goto restart;
> -
> - wakeup_softirqd();
> }
>
> lockdep_softirq_end(in_hardirq);
> --
> 2.7.4
>