Re: [RFC v2] perf: Rewrite core context handling

From: Peter Zijlstra
Date: Mon Jun 13 2022 - 14:29:26 EST


On Mon, Jun 13, 2022 at 04:35:11PM +0200, Peter Zijlstra wrote:

> @@ -3652,17 +3697,28 @@ static noinline int visit_groups_merge(s
> .size = ARRAY_SIZE(itrs),
> };
> /* Events not within a CPU context may be on any CPU. */
> - __heap_add(&event_heap, perf_event_groups_first(groups, -1, NULL));
> + __heap_add(&event_heap, perf_event_groups_first(groups, -1, pmu, NULL));
> }
> evt = event_heap.data;
>
> - __heap_add(&event_heap, perf_event_groups_first(groups, cpu, NULL));
> + __heap_add(&event_heap, perf_event_groups_first(groups, cpu, pmu, NULL));
>
> #ifdef CONFIG_CGROUP_PERF
> for (; css; css = css->parent)
> - __heap_add(&event_heap, perf_event_groups_first(groups, cpu, css->cgroup));
> + __heap_add(&event_heap, perf_event_groups_first(groups, cpu, pmu, css->cgroup));
> #endif
>
> + if (event_heap.nr) {
> + /*
> + * XXX: For now, visit_groups_merge() gets called with pmu
> + * pointer never NULL. But these functions needs to be called
> + * once for each pmu if I implement pmu=NULL optimization.
> + */
> + __link_epc((*evt)->pmu_ctx);
> + perf_assert_pmu_disabled((*evt)->pmu_ctx->pmu);
> + }
> +
> +
> min_heapify_all(&event_heap, &perf_min_heap);
>
> while (event_heap.nr) {

> @@ -3741,39 +3799,67 @@ static int merge_sched_in(struct perf_ev
> return 0;
> }
>
> -static void
> -ctx_pinned_sched_in(struct perf_event_context *ctx,
> - struct perf_cpu_context *cpuctx)
> +static void ctx_pinned_sched_in(struct perf_event_context *ctx, struct pmu *pmu)
> {
> + struct perf_event_pmu_context *pmu_ctx;
> int can_add_hw = 1;
>
> - if (ctx != &cpuctx->ctx)
> - cpuctx = NULL;
> -
> - visit_groups_merge(cpuctx, &ctx->pinned_groups,
> - smp_processor_id(),
> - merge_sched_in, &can_add_hw);
> + if (pmu) {
> + visit_groups_merge(ctx, &ctx->pinned_groups,
> + smp_processor_id(), pmu,
> + merge_sched_in, &can_add_hw);
> + } else {
> + /*
> + * XXX: This can be optimized for per-task context by calling
> + * visit_groups_merge() only once with:
> + * 1) pmu=NULL
> + * 2) Ignoring pmu in perf_event_groups_cmp() when it's NULL
> + * 3) Making can_add_hw a per-pmu variable
> + *
> + * Though, it can not be opimized for per-cpu context because
> + * per-cpu rb-tree consist of pmu-subtrees and pmu-subtrees
> + * consist of cgroup-subtrees. i.e. a cgroup events of same
> + * cgroup but different pmus are seperated out into respective
> + * pmu-subtrees.
> + */
> + list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
> + can_add_hw = 1;
> + visit_groups_merge(ctx, &ctx->pinned_groups,
> + smp_processor_id(), pmu_ctx->pmu,
> + merge_sched_in, &can_add_hw);
> + }
> + }
> }

I'm not sure I follow.. task context can have multiple PMUs just the
same as CPU context can, that's more or less the entire point of the
patch.