Re: [PATCH v2 0/4] perf: Make SIGTRAP and __perf_pending_irq() work on RT.

From: Arnaldo Carvalho de Melo
Date: Wed Mar 13 2024 - 09:47:48 EST


On Wed, Mar 13, 2024 at 10:28:44AM -0300, Arnaldo Carvalho de Melo wrote:
> On Wed, Mar 13, 2024 at 09:13:03AM +0100, Sebastian Andrzej Siewior wrote:
> > One part I don't get: did you let it run or did you kill it?

> If I let them run they will finish and exit, no exec_child remains.

> If I instead try to stop the loop that goes on forking the 100 of them,
> then the exec_child remain spinning.

> > `exec_child' spins until a signal is received or the parent kills it. So

> > it shouldn't remain there for ever. And my guess, that it is in spinning
> > in userland and not in kernel.

> Checking that now:

tldr; the tight loop, full details at the end.

100.00 b6: mov signal_count,%eax
test %eax,%eax
↑ je b6

remove_on_exec.c

/* For exec'd child. */
static void exec_child(void)
{
struct sigaction action = {};
const int val = 42;

/* Set up sigtrap handler in case we erroneously receive a trap. */
action.sa_flags = SA_SIGINFO | SA_NODEFER;
action.sa_sigaction = sigtrap_handler;
sigemptyset(&action.sa_mask);
if (sigaction(SIGTRAP, &action, NULL))
_exit((perror("sigaction failed"), 1));

/* Signal parent that we're starting to spin. */
if (write(STDOUT_FILENO, &val, sizeof(int)) == -1)
_exit((perror("write failed"), 1));

/* Should hang here until killed. */
while (!signal_count);
}

So probably just a test needing to be a bit more polished?

Seems like it, on a newer machine, faster, I managed to reproduce it on
a non-RT kernel, with one exec_child remaining:

1.44 b6: mov signal_count,%eax
test %eax,%eax
98.56 ↑ je b6

same tight loop:

acme@x1:~/git/perf-tools-next/tools/testing/selftests/perf_events$ pidof exec_child
722300
acme@x1:~/git/perf-tools-next/tools/testing/selftests/perf_events$ ps ax|grep exec_child
722300 pts/2 R 4:08 exec_child
722502 pts/2 S+ 0:00 grep --color=auto exec_child
acme@x1:~/git/perf-tools-next/tools/testing/selftests/perf_events$

- Arnaldo

[root@nine ~]# perf record --call-graph dwarf -p 35785
^C[ perf record: Woken up 48 times to write data ]
[ perf record: Captured and wrote 12.120 MB perf.data (1503 samples) ]

[root@nine ~]# ls -la perf.data
-rw-------. 1 root root 12720152 Mar 13 10:32 perf.data
[root@nine ~]#
[root@nine ~]# perf report --no-child --stdio
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 1K of event 'cycles:P'
# Event count (approx.): 926018718
#
# Overhead Command Shared Object Symbol
# ........ ....... ................. ......................................
#
98.48% exe remove_on_exec [.] exec_child
|
---exec_child
main
__libc_start_call_main
__libc_start_main@@GLIBC_2.34
_start

0.33% exe [kernel.kallsyms] [k] arch_scale_freq_tick
0.13% exe [kernel.kallsyms] [k] debug_smp_processor_id
0.13% exe [kernel.kallsyms] [k] check_cpu_stall
0.13% exe [kernel.kallsyms] [k] acct_account_cputime
0.13% exe [kernel.kallsyms] [k] cpuacct_account_field
0.07% exe [kernel.kallsyms] [k] preempt_count_add
0.07% exe [kernel.kallsyms] [k] update_irq_load_avg
0.07% exe [kernel.kallsyms] [k] cgroup_rstat_updated
0.07% exe [kernel.kallsyms] [k] rcu_sched_clock_irq
0.07% exe [kernel.kallsyms] [k] account_user_time
0.07% exe [kernel.kallsyms] [k] __hrtimer_run_queues
0.07% exe [kernel.kallsyms] [k] tick_nohz_highres_handler
0.07% exe [kernel.kallsyms] [k] ktime_get_update_offsets_now
0.06% exe [kernel.kallsyms] [k] __enqueue_entity
0.06% exe [kernel.kallsyms] [k] tick_sched_handle
0.00% exe [kernel.kallsyms] [k] __intel_pmu_enable_all.constprop.0


#
# (Tip: To show assembler sample contexts use perf record -b / perf script -F +brstackinsn --xed)
#
[root@nine ~]#

[root@nine ~]# perf annotate --stdio2 exec_child
Samples: 1K of event 'cycles:P', 4000 Hz, Event count (approx.): 911943256, [percent: local period]
exec_child() /home/acme/git/linux/tools/testing/selftests/perf_events/remove_on_exec
Percent


Disassembly of section .text:

00000000004045cf <exec_child>:
push %rbp
mov %rsp,%rbp
sub $0xb0,%rsp
lea -0xa0(%rbp),%rdx
mov $0x0,%eax
mov $0x13,%ecx
mov %rdx,%rdi
rep stos %rax,%es:(%rdi)
movl $0x2a,-0xa4(%rbp)
movl $0x40000004,-0x18(%rbp)
movq $0x402a2e,-0xa0(%rbp)
lea -0xa0(%rbp),%rax
add $0x8,%rax
mov %rax,%rdi
→ callq sigemptyset@plt
lea -0xa0(%rbp),%rax
mov $0x0,%edx
mov %rax,%rsi
mov $0x5,%edi
→ callq sigaction@plt
test %eax,%eax
↓ je 82
mov $0x4058af,%edi
→ callq perror@plt
mov $0x1,%edi
→ callq _exit@plt
82: lea -0xa4(%rbp),%rax
mov $0x4,%edx
mov %rax,%rsi
mov $0x1,%edi
→ callq write@plt
cmp $0xffffffffffffffff,%rax
↓ jne b5
mov $0x4058c0,%edi
→ callq perror@plt
mov $0x1,%edi
→ callq _exit@plt
b5: nop
100.00 b6: mov signal_count,%eax
test %eax,%eax
↑ je b6
nop
nop
leaveq
← retq
[root@nine ~]#