Re: [PATCH] perf: Fix missing SIGTRAPs

From: Marco Elver
Date: Fri Oct 07 2022 - 05:37:53 EST


On Thu, Oct 06, 2022 at 06:02PM +0200, Peter Zijlstra wrote:

> This can happen if we get two consecutive event_sched_out() and both
> instances will have pending_sigtrap set. This can happen when the event
> that has sigtrap set also triggers in kernel space.
>
> You then get task_work list corruption and *boom*.
>
> I'm thinking the below might be the simplest solution; we can only send
> a single signal after all.

That worked. In addition I had to disable the ctx->task != current check
if we're in task_work, because presumably the event might have already
been disabled/moved??

At least with all the below fixups, things seem to work (tests +
light fuzzing).

Thanks,
-- Marco

------ >8 ------

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9319af6013f1..29ed6e58906b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2285,9 +2285,10 @@ event_sched_out(struct perf_event *event,
*/
local_dec(&event->ctx->nr_pending);
} else {
- WARN_ON_ONCE(event->pending_work);
- event->pending_work = 1;
- task_work_add(current, &event->pending_task, TWA_RESUME);
+ if (!event->pending_work) {
+ event->pending_work = 1;
+ task_work_add(current, &event->pending_task, TWA_RESUME);
+ }
}
}

@@ -6455,18 +6456,19 @@ void perf_event_wakeup(struct perf_event *event)
}
}

-static void perf_sigtrap(struct perf_event *event)
+static void perf_sigtrap(struct perf_event *event, bool in_task_work)
{
/*
* We'd expect this to only occur if the irq_work is delayed and either
* ctx->task or current has changed in the meantime. This can be the
* case on architectures that do not implement arch_irq_work_raise().
*/
- if (WARN_ON_ONCE(event->ctx->task != current))
+ if (WARN_ON_ONCE(!in_task_work && event->ctx->task != current))
return;

/*
- * perf_pending_irq() can race with the task exiting.
+ * Both perf_pending_task() and perf_pending_irq() can race with the
+ * task exiting.
*/
if (current->flags & PF_EXITING)
return;
@@ -6496,7 +6498,7 @@ static void __perf_pending_irq(struct perf_event *event)
if (event->pending_sigtrap) {
event->pending_sigtrap = 0;
local_dec(&event->ctx->nr_pending);
- perf_sigtrap(event);
+ perf_sigtrap(event, false);
}
if (event->pending_disable) {
event->pending_disable = 0;
@@ -6563,16 +6565,18 @@ static void perf_pending_task(struct callback_head *head)
* If we 'fail' here, that's OK, it means recursion is already disabled
* and we won't recurse 'further'.
*/
+ preempt_disable_notrace();
rctx = perf_swevent_get_recursion_context();

if (event->pending_work) {
event->pending_work = 0;
local_dec(&event->ctx->nr_pending);
- perf_sigtrap(event);
+ perf_sigtrap(event, true);
}

if (rctx >= 0)
perf_swevent_put_recursion_context(rctx);
+ preempt_enable_notrace();
}

#ifdef CONFIG_GUEST_PERF_EVENTS