Re: [PATCH v2] riscv: Add tracepoints for SBI calls and returns

From: Samuel Holland
Date: Mon Mar 25 2024 - 23:39:40 EST


Hi Alex,

On 2024-03-22 4:27 AM, Alexandre Ghiti wrote:
> On Fri, Mar 22, 2024 at 12:01 AM Samuel Holland
> <samuel.holland@xxxxxxxxxx> wrote:
>>
>> These are useful for measuring the latency of SBI calls. The SBI HSM
>> extension is excluded because those functions are called from contexts
>> such as cpuidle where instrumentation is not allowed.
>>
>> Reviewed-by: Andrew Jones <ajones@xxxxxxxxxxxxxxxx>
>> Signed-off-by: Samuel Holland <samuel.holland@xxxxxxxxxx>
>> ---
>>
>> Changes in v2:
>> - Remove some blank lines
>>
>> arch/riscv/include/asm/trace.h | 54 ++++++++++++++++++++++++++++++++++
>> arch/riscv/kernel/sbi.c | 7 +++++
>> 2 files changed, 61 insertions(+)
>> create mode 100644 arch/riscv/include/asm/trace.h
>>
>> diff --git a/arch/riscv/include/asm/trace.h b/arch/riscv/include/asm/trace.h
>> new file mode 100644
>> index 000000000000..6151cee5450c
>> --- /dev/null
>> +++ b/arch/riscv/include/asm/trace.h
>> @@ -0,0 +1,54 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#undef TRACE_SYSTEM
>> +#define TRACE_SYSTEM riscv
>> +
>> +#if !defined(_TRACE_RISCV_H) || defined(TRACE_HEADER_MULTI_READ)
>> +#define _TRACE_RISCV_H
>> +
>> +#include <linux/tracepoint.h>
>> +
>> +TRACE_EVENT_CONDITION(sbi_call,
>> + TP_PROTO(int ext, int fid),
>> + TP_ARGS(ext, fid),
>> + TP_CONDITION(ext != SBI_EXT_HSM),
>> +
>> + TP_STRUCT__entry(
>> + __field(int, ext)
>> + __field(int, fid)
>> + ),
>> +
>> + TP_fast_assign(
>> + __entry->ext = ext;
>> + __entry->fid = fid;
>> + ),
>> +
>> + TP_printk("ext=0x%x fid=%d", __entry->ext, __entry->fid)
>> +);
>> +
>> +TRACE_EVENT_CONDITION(sbi_return,
>> + TP_PROTO(int ext, long error, long value),
>> + TP_ARGS(ext, error, value),
>> + TP_CONDITION(ext != SBI_EXT_HSM),
>> +
>> + TP_STRUCT__entry(
>> + __field(long, error)
>> + __field(long, value)
>> + ),
>> +
>> + TP_fast_assign(
>> + __entry->error = error;
>> + __entry->value = value;
>> + ),
>> +
>> + TP_printk("error=%ld value=0x%lx", __entry->error, __entry->value)
>> +);
>> +
>> +#endif /* _TRACE_RISCV_H */
>> +
>> +#undef TRACE_INCLUDE_PATH
>> +#undef TRACE_INCLUDE_FILE
>> +
>> +#define TRACE_INCLUDE_PATH asm
>> +#define TRACE_INCLUDE_FILE trace
>> +
>> +#include <trace/define_trace.h>
>> diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
>> index e66e0999a800..a1d21d8f5293 100644
>> --- a/arch/riscv/kernel/sbi.c
>> +++ b/arch/riscv/kernel/sbi.c
>> @@ -14,6 +14,9 @@
>> #include <asm/smp.h>
>> #include <asm/tlbflush.h>
>>
>> +#define CREATE_TRACE_POINTS
>> +#include <asm/trace.h>
>> +
>> /* default SBI version is 0.1 */
>> unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
>> EXPORT_SYMBOL(sbi_spec_version);
>> @@ -31,6 +34,8 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
>> {
>> struct sbiret ret;
>>
>> + trace_sbi_call(ext, fid);
>> +
>> register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
>> register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
>> register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
>> @@ -46,6 +51,8 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
>> ret.error = a0;
>> ret.value = a1;
>>
>> + trace_sbi_return(ext, ret.error, ret.value);
>> +
>> return ret;
>> }
>> EXPORT_SYMBOL(sbi_ecall);
>> --
>> 2.43.1
>>
>
> sbi_ecall() is used by flush_icache_all() so I was a bit scared we
> would need to patch 2 instructions using a tracepoint and then to use
> flush_icache_all() which could bug if only one of the patched
> instruction was seen. But I took a look at the disassembled code, and
> tracepoints happen to only need to patch 1 nop into an unconditional
> jump, which is not a problem then.

Right, tracepoints use the static branch infrastructure, which uses a single
instruction.

> But then when disassembling, I noticed that the addition of the
> tracepoint comes with "quite" some overhead:
>
> Before:
>
> Dump of assembler code for function sbi_ecall:
> 0xffffffff800085e0 <+0>: add sp,sp,-32
> 0xffffffff800085e2 <+2>: sd s0,24(sp)
> 0xffffffff800085e4 <+4>: mv t1,a0
> 0xffffffff800085e6 <+6>: add s0,sp,32
> 0xffffffff800085e8 <+8>: mv t3,a1
> 0xffffffff800085ea <+10>: mv a0,a2
> 0xffffffff800085ec <+12>: mv a1,a3
> 0xffffffff800085ee <+14>: mv a2,a4
> 0xffffffff800085f0 <+16>: mv a3,a5
> 0xffffffff800085f2 <+18>: mv a4,a6
> 0xffffffff800085f4 <+20>: mv a5,a7
> 0xffffffff800085f6 <+22>: mv a6,t3
> 0xffffffff800085f8 <+24>: mv a7,t1
> 0xffffffff800085fa <+26>: ecall
> 0xffffffff800085fe <+30>: ld s0,24(sp)
> 0xffffffff80008600 <+32>: add sp,sp,32
> 0xffffffff80008602 <+34>: ret
>
> After:
>
> Dump of assembler code for function sbi_ecall:
> 0xffffffff8000bbf2 <+0>: add sp,sp,-112
> 0xffffffff8000bbf4 <+2>: sd s0,96(sp)
> 0xffffffff8000bbf6 <+4>: sd s1,88(sp)
> 0xffffffff8000bbf8 <+6>: sd s3,72(sp)
> 0xffffffff8000bbfa <+8>: sd s4,64(sp)
> 0xffffffff8000bbfc <+10>: sd s5,56(sp)
> 0xffffffff8000bbfe <+12>: sd s6,48(sp)
> 0xffffffff8000bc00 <+14>: sd s7,40(sp)
> 0xffffffff8000bc02 <+16>: sd s8,32(sp)
> 0xffffffff8000bc04 <+18>: sd s9,24(sp)
> 0xffffffff8000bc06 <+20>: sd ra,104(sp)
> 0xffffffff8000bc08 <+22>: sd s2,80(sp)
> 0xffffffff8000bc0a <+24>: add s0,sp,112
> 0xffffffff8000bc0c <+26>: mv s1,a0
> 0xffffffff8000bc0e <+28>: mv s3,a1
> 0xffffffff8000bc10 <+30>: mv s9,a2
> 0xffffffff8000bc12 <+32>: mv s8,a3
> 0xffffffff8000bc14 <+34>: mv s7,a4
> 0xffffffff8000bc16 <+36>: mv s6,a5
> 0xffffffff8000bc18 <+38>: mv s5,a6
> 0xffffffff8000bc1a <+40>: mv s4,a7
> 0xffffffff8000bc1c <+42>: nop
> 0xffffffff8000bc20 <+46>: mv a0,s9
> 0xffffffff8000bc22 <+48>: mv a1,s8
> 0xffffffff8000bc24 <+50>: mv a2,s7
> 0xffffffff8000bc26 <+52>: mv a3,s6
> 0xffffffff8000bc28 <+54>: mv a4,s5
> 0xffffffff8000bc2a <+56>: mv a5,s4
> 0xffffffff8000bc2c <+58>: mv a6,s3
> 0xffffffff8000bc2e <+60>: mv a7,s1
> 0xffffffff8000bc30 <+62>: ecall
> 0xffffffff8000bc34 <+66>: mv s4,a0
> 0xffffffff8000bc36 <+68>: mv s3,a1
> 0xffffffff8000bc38 <+70>: nop
> 0xffffffff8000bc3c <+74>: ld ra,104(sp)
> 0xffffffff8000bc3e <+76>: ld s0,96(sp)
> 0xffffffff8000bc40 <+78>: ld s1,88(sp)
> 0xffffffff8000bc42 <+80>: ld s2,80(sp)
> 0xffffffff8000bc44 <+82>: ld s5,56(sp)
> 0xffffffff8000bc46 <+84>: ld s6,48(sp)
> 0xffffffff8000bc48 <+86>: ld s7,40(sp)
> 0xffffffff8000bc4a <+88>: ld s8,32(sp)
> 0xffffffff8000bc4c <+90>: ld s9,24(sp)
> 0xffffffff8000bc4e <+92>: mv a0,s4
> 0xffffffff8000bc50 <+94>: mv a1,s3
> 0xffffffff8000bc52 <+96>: ld s4,64(sp)
> 0xffffffff8000bc54 <+98>: ld s3,72(sp)
> 0xffffffff8000bc56 <+100>: add sp,sp,112
> 0xffffffff8000bc58 <+102>: ret
>
> Not sure this is really problematic though, let me know what you think.

The overhead is only incurred when tracepoints are enabled. But when they are
enabled, I do not think it is avoidable, unless we were to hide this tracepoint
behind some Kconfig option. I don't think the overhead enough to be a problem,
but I suppose it's up to maintainer judgement.

Regards,
Samuel