Re: [PATCH v6 1/2] rcu: Add RCU stall diagnosis information

From: Leizhen (ThunderTown)
Date: Thu Nov 10 2022 - 01:55:20 EST




On 2022/11/9 23:20, Frederic Weisbecker wrote:
> On Wed, Nov 09, 2022 at 05:37:37PM +0800, Zhen Lei wrote:
>> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
>> index ed93ddb8203d42c..e1ff23b2a14d71d 100644
>> --- a/kernel/rcu/tree.c
>> +++ b/kernel/rcu/tree.c
>> @@ -866,6 +866,23 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
>> rdp->rcu_iw_gp_seq = rnp->gp_seq;
>> irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
>> }
>> +
>> + if (rcu_cpu_stall_cputime && rdp->snap_record.gp_seq != rdp->gp_seq) {
>> + u64 *cpustat;
>> + struct rcu_snap_record *rsrp;
>> +
>> + cpustat = kcpustat_cpu(rdp->cpu).cpustat;
>> +
>> + rsrp = &rdp->snap_record;
>> + rsrp->cputime_irq = cpustat[CPUTIME_IRQ];
>> + rsrp->cputime_softirq = cpustat[CPUTIME_SOFTIRQ];
>> + rsrp->cputime_system = cpustat[CPUTIME_SYSTEM];
>
> You need to use kcpustat_field(), otherwise you'll get stalled values on nohz_full CPUs.

OK, I'll update it. Thanks.

>
>> + rsrp->nr_hardirqs = kstat_cpu_irqs_sum(rdp->cpu);
>> + rsrp->nr_softirqs = kstat_cpu_softirqs_sum(rdp->cpu);
>> + rsrp->nr_csw = nr_context_switches_cpu(rdp->cpu);
>> + rsrp->jiffies = jiffies;
>> + rsrp->gp_seq = rdp->gp_seq;
>> + }
>> }
>>
>> return 0;
>> diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
>> index 5653560573e22d6..7b6afb9c7b96dbe 100644
>> --- a/kernel/rcu/tree_stall.h
>> +++ b/kernel/rcu/tree_stall.h
>> @@ -428,6 +428,33 @@ static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp
>> return j > 2 * HZ;
>> }
>>
>> +static void print_cpu_stat_info(int cpu)
>> +{
>> + u64 *cpustat;
>> + struct rcu_snap_record *rsrp;
>> + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
>> +
>> + if (!rcu_cpu_stall_cputime)
>> + return;
>> +
>> + rsrp = &rdp->snap_record;
>> + if (rsrp->gp_seq != rdp->gp_seq)
>> + return;
>> +
>> + cpustat = kcpustat_cpu(cpu).cpustat;
>> +
>> + pr_err(" hardirqs softirqs csw/system\n");
>> + pr_err(" number: %8ld %10d %12lld\n",
>> + kstat_cpu_irqs_sum(cpu) - rsrp->nr_hardirqs,
>> + kstat_cpu_softirqs_sum(cpu) - rsrp->nr_softirqs,
>> + nr_context_switches_cpu(cpu) - rsrp->nr_csw);
>> + pr_err("cputime: %8lld %10lld %12lld ==> %lld(ms)\n",
>> + div_u64(cpustat[CPUTIME_IRQ] - rsrp->cputime_irq, NSEC_PER_MSEC),
>> + div_u64(cpustat[CPUTIME_SOFTIRQ] - rsrp->cputime_softirq, NSEC_PER_MSEC),
>> + div_u64(cpustat[CPUTIME_SYSTEM] - rsrp->cputime_system,
>> NSEC_PER_MSEC),
>
> Same here.

OK

>
> Thanks.
> .
>

--
Regards,
Zhen Lei