Re: [PATCH 2/7] perf: Generalize task_function_call()ers

From: Peter Zijlstra
Date: Thu Dec 03 2015 - 12:34:46 EST


On Thu, Dec 03, 2015 at 12:32:37PM +0200, Alexander Shishkin wrote:
> +static int
> +remote_call_or_ctx_lock(struct perf_event *event, remote_function_f func,
> + void *info, enum perf_event_active_state retry_state)
> +{
> + struct perf_event_context *ctx = event->ctx;
> + struct task_struct *task = ctx->task;
> + unsigned int retry = 1;
> +
> + if (!task) {
> + /*
> + * Per cpu events are removed via an smp call. The removal can
> + * fail if the CPU is currently offline, but in that case we
> + * already called __perf_remove_from_context from
> + * perf_event_exit_cpu.
> + */
> + cpu_function_call(event->cpu, func, info);
> + return 0;
> + }
> +
> + raw_spin_lock_irq(&ctx->lock);
> + do {
> + /*
> + * Reload the task pointer, it might have been changed by
> + * a concurrent perf_event_context_sched_out().
> + */
> + task = ctx->task;
> +
> + /*
> + * If the context is inactive, we don't need a cross call to
> + * fiddle with the event so long as the ctx::lock is held.
> + */
> + if (!ctx->is_active)
> + break;
> +
> + raw_spin_unlock_irq(&ctx->lock);
> +
> + if (!task_function_call(task, func, info))
> + return 0;
> +
> + raw_spin_lock_irq(&ctx->lock);
> +
> + if (retry_state <= PERF_EVENT_STATE_ACTIVE)
> + retry = event->state == retry_state;
> + } while (retry);

OK, so the retry_state thing is clever, but either I'm too tired or its
not quite right. Nor do I think its actually required.

/me frobs...

Hmm, I cannot seem to convince myself the current code is correct to
begin with.

In any case, consider the below (on top of my previous collapse patch).
The two 'hard' cases are perf_event_{dis,en}able(), those appear to play
silly games with event->state.

So starting with perf_event_disable(); we don't strictly need to test
for event->state == ACTIVE, ctx->is_active is enough. If the event is
not scheduled while the ctx is, __perf_event_disable() still does the
right thing. Its a little less efficient to IPI in that case, over-all
simpler.

For perf_event_enable(); the same goes, but I think that's actually
broken in its current form. The current condition is: ctx->is_active &&
event->state == OFF, that means it doesn't do anything when !ctx->active
&& event->state == OFF. This is wrong, it should still mark the event
INACTIVE in that case, otherwise we'll still not try and schedule the
event once the context becomes active again.


--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1766,6 +1766,20 @@ int __perf_event_disable(void *info)
return 0;
}

+void ___perf_event_disable(void *info)
+{
+ struct perf_event *event = info;
+
+ /*
+ * Since we have the lock this context can't be scheduled
+ * in, so we can change the state safely.
+ */
+ if (event->state == PERF_EVENT_STATE_INACTIVE) {
+ update_group_times(event);
+ event->state = PERF_EVENT_STATE_OFF;
+ }
+}
+
/*
* Disable a event.
*
@@ -1782,43 +1796,16 @@ int __perf_event_disable(void *info)
static void _perf_event_disable(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
- struct task_struct *task = ctx->task;
-
- if (!task) {
- /*
- * Disable the event on the cpu that it's on
- */
- cpu_function_call(event->cpu, __perf_event_disable, event);
- return;
- }
-
-retry:
- if (!task_function_call(task, __perf_event_disable, event))
- return;

raw_spin_lock_irq(&ctx->lock);
- /*
- * If the event is still active, we need to retry the cross-call.
- */
- if (event->state == PERF_EVENT_STATE_ACTIVE) {
+ if (event->state <= PERF_EVENT_STATE_OFF) {
raw_spin_unlock_irq(&ctx->lock);
- /*
- * Reload the task pointer, it might have been changed by
- * a concurrent perf_event_context_sched_out().
- */
- task = ctx->task;
- goto retry;
- }
-
- /*
- * Since we have the lock this context can't be scheduled
- * in, so we can change the state safely.
- */
- if (event->state == PERF_EVENT_STATE_INACTIVE) {
- update_group_times(event);
- event->state = PERF_EVENT_STATE_OFF;
+ return;
}
raw_spin_unlock_irq(&ctx->lock);
+
+ event_function_call(event, __perf_event_disable,
+ ___perf_event_disable, event);
}

/*
@@ -2269,6 +2256,11 @@ static int __perf_event_enable(void *inf
return 0;
}

+void ___perf_event_enable(void *info)
+{
+ __perf_event_mark_enabled((struct perf_event *)info);
+}
+
/*
* Enable a event.
*
@@ -2281,58 +2273,26 @@ static int __perf_event_enable(void *inf
static void _perf_event_enable(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
- struct task_struct *task = ctx->task;

- if (!task) {
- /*
- * Enable the event on the cpu that it's on
- */
- cpu_function_call(event->cpu, __perf_event_enable, event);
+ raw_spin_lock_irq(&ctx->lock);
+ if (event->state >= PERF_EVENT_STATE_INACTIVE) {
+ raw_spin_unlock_irq(&ctx->lock);
return;
}

- raw_spin_lock_irq(&ctx->lock);
- if (event->state >= PERF_EVENT_STATE_INACTIVE)
- goto out;
-
/*
* If the event is in error state, clear that first.
- * That way, if we see the event in error state below, we
- * know that it has gone back into error state, as distinct
- * from the task having been scheduled away before the
- * cross-call arrived.
+ *
+ * That way, if we see the event in error state below, we know that it
+ * has gone back into error state, as distinct from the task having
+ * been scheduled away before the cross-call arrived.
*/
if (event->state == PERF_EVENT_STATE_ERROR)
event->state = PERF_EVENT_STATE_OFF;
-
-retry:
- if (!ctx->is_active) {
- __perf_event_mark_enabled(event);
- goto out;
- }
-
raw_spin_unlock_irq(&ctx->lock);

- if (!task_function_call(task, __perf_event_enable, event))
- return;
-
- raw_spin_lock_irq(&ctx->lock);
-
- /*
- * If the context is active and the event is still off,
- * we need to retry the cross-call.
- */
- if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) {
- /*
- * task could have been flipped by a concurrent
- * perf_event_context_sched_out()
- */
- task = ctx->task;
- goto retry;
- }
-
-out:
- raw_spin_unlock_irq(&ctx->lock);
+ event_function_call(event, __perf_event_enable,
+ ___perf_event_enable, event);
}

/*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/