Re: [PATCH v2 2/5] perf: Free aux pages in unmap path

From: Peter Zijlstra
Date: Mon Mar 14 2016 - 08:38:49 EST


On Fri, Mar 04, 2016 at 03:42:46PM +0200, Alexander Shishkin wrote:
> @@ -4649,10 +4679,22 @@ static void perf_mmap_close(struct vm_area_struct *vma)
> */
> if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
> atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) {
> + /*
> + * Stop all aux events that are writing to this here buffer,
> + * so that we can free its aux pages and corresponding pmu
> + * data. Note that after rb::aux_mmap_count dropped to zero,
> + * they won't start any more (see perf_aux_output_begin()).
> + */
> + perf_pmu_output_stop(event);

So to me it seems like we're interested in rb, we don't particularly
care about @event in this case.

> +
> + /* now it's safe to free the pages */
> atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
> vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
>
> + /* this has to be the last one */
> rb_free_aux(rb);
> + WARN_ON_ONCE(atomic_read(&rb->aux_refcount));
> +
> mutex_unlock(&event->mmap_mutex);
> }
>

> +static void __perf_event_output_stop(struct perf_event *event, void *data)
> +{
> + struct perf_event *parent = event->parent;
> + struct remote_output *ro = data;
> + struct ring_buffer *rb = ro->rb;
> +
> + if (!has_aux(event))
> + return;
> +

if (!parent)
parent = event;

> + if (rcu_dereference(event->rb) == rb)
s/event/parent/

> + ro->err = __perf_event_stop(event);

> + else if (parent && rcu_dereference(parent->rb) == rb)
> + ro->err = __perf_event_stop(event);

and these can go.. However..

> +}
> +
> +static int __perf_pmu_output_stop(void *info)
> +{
> + struct perf_event *event = info;
> + struct pmu *pmu = event->pmu;
> + struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
> + struct remote_output ro = {
> + .rb = event->rb,
> + };
> +
> + rcu_read_lock();
> + perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro);
> + if (cpuctx->task_ctx)
> + perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop,
> + &ro);
> + rcu_read_unlock();
> +
> + return ro.err;
> +}
> +
> +static void perf_pmu_output_stop(struct perf_event *event)
> +{
> + int cpu, err;
> +
> + /* better be thorough */
> + get_online_cpus();
> +restart:
> + for_each_online_cpu(cpu) {
> + err = cpu_function_call(cpu, __perf_pmu_output_stop, event);
> + if (err)
> + goto restart;
> + }
> + put_online_cpus();
> +}

This seems wildly overkill, could we not iterate rb->event_list like we
do for the normal buffer?

Sure, we need to IPI for each event found, but that seems better than
unconditionally sending IPIs to all CPUs.