Re: [PATCH V12 09/10] arm64/perf: Implement branch records save on task sched out

From: Mark Rutland
Date: Wed Jun 21 2023 - 09:16:25 EST


On Thu, Jun 15, 2023 at 07:02:38PM +0530, Anshuman Khandual wrote:
> This modifies current armv8pmu_sched_task(), to implement a branch records
> save mechanism via armv8pmu_branch_save() when a task scheds out of a cpu.
> BRBE is paused and disabled for all exception levels before branch records
> get captured, which then get concatenated with all existing stored records
> present in the task context maintaining the contiguity. Although the final
> length of the concatenated buffer does not exceed implemented BRBE length.
>
> Cc: Catalin Marinas <catalin.marinas@xxxxxxx>
> Cc: Will Deacon <will@xxxxxxxxxx>
> Cc: Mark Rutland <mark.rutland@xxxxxxx>
> Cc: linux-arm-kernel@xxxxxxxxxxxxxxxxxxx
> Cc: linux-kernel@xxxxxxxxxxxxxxx
> Tested-by: James Clark <james.clark@xxxxxxx>
> Signed-off-by: Anshuman Khandual <anshuman.khandual@xxxxxxx>

Acked-by: Mark Rutland <mark.rutland@xxxxxxx>

Mark.

> ---
> arch/arm64/include/asm/perf_event.h | 2 ++
> drivers/perf/arm_brbe.c | 30 +++++++++++++++++++++++++++++
> drivers/perf/arm_pmuv3.c | 14 ++++++++++++--
> 3 files changed, 44 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
> index b0c12a5882df..36e7dfb466a6 100644
> --- a/arch/arm64/include/asm/perf_event.h
> +++ b/arch/arm64/include/asm/perf_event.h
> @@ -40,6 +40,7 @@ void armv8pmu_branch_probe(struct arm_pmu *arm_pmu);
> void armv8pmu_branch_reset(void);
> int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu);
> void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu);
> +void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx);
> #else
> static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event)
> {
> @@ -66,6 +67,7 @@ static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu) { }
> static inline void armv8pmu_branch_reset(void) { }
> static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu) { return 0; }
> static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu) { }
> +static inline void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx) { }
> #endif
> #endif
> #endif
> diff --git a/drivers/perf/arm_brbe.c b/drivers/perf/arm_brbe.c
> index f6693699fade..3bb17ced2b1d 100644
> --- a/drivers/perf/arm_brbe.c
> +++ b/drivers/perf/arm_brbe.c
> @@ -171,6 +171,36 @@ static int stitch_stored_live_entries(struct brbe_regset *stored,
> return min(nr_live + nr_stored, nr_max);
> }
>
> +static int brbe_branch_save(int nr_hw_entries, struct brbe_regset *live)
> +{
> + u64 brbfcr = read_sysreg_s(SYS_BRBFCR_EL1);
> + int nr_live;
> +
> + write_sysreg_s(brbfcr | BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1);
> + isb();
> +
> + nr_live = capture_brbe_regset(nr_hw_entries, live);
> +
> + write_sysreg_s(brbfcr & ~BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1);
> + isb();
> +
> + return nr_live;
> +}
> +
> +void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx)
> +{
> + struct arm64_perf_task_context *task_ctx = ctx;
> + struct brbe_regset live[BRBE_MAX_ENTRIES];
> + int nr_live, nr_store, nr_hw_entries;
> +
> + nr_hw_entries = brbe_get_numrec(arm_pmu->reg_brbidr);
> + nr_live = brbe_branch_save(nr_hw_entries, live);
> + nr_store = task_ctx->nr_brbe_records;
> + nr_store = stitch_stored_live_entries(task_ctx->store, live, nr_store,
> + nr_live, nr_hw_entries);
> + task_ctx->nr_brbe_records = nr_store;
> +}
> +
> /*
> * Generic perf branch filters supported on BRBE
> *
> diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
> index 3c079051a63a..53f404618891 100644
> --- a/drivers/perf/arm_pmuv3.c
> +++ b/drivers/perf/arm_pmuv3.c
> @@ -907,9 +907,19 @@ static int armv8pmu_user_event_idx(struct perf_event *event)
> static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
> {
> struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu);
> + void *task_ctx = pmu_ctx ? pmu_ctx->task_ctx_data : NULL;
>
> - if (sched_in && armpmu->has_branch_stack)
> - armv8pmu_branch_reset();
> + if (armpmu->has_branch_stack) {
> + /* Save branch records in task_ctx on sched out */
> + if (task_ctx && !sched_in) {
> + armv8pmu_branch_save(armpmu, task_ctx);
> + return;
> + }
> +
> + /* Reset branch records on sched in */
> + if (sched_in)
> + armv8pmu_branch_reset();
> + }
> }
>
> /*
> --
> 2.25.1
>