[tip: perf/urgent] perf/x86/amd/core: Fix overflow reset on hotplug

From: tip-bot2 for Sandipan Das
Date: Fri Sep 22 2023 - 06:14:47 EST


The following commit has been merged into the perf/urgent branch of tip:

Commit-ID: 23d2626b841c2adccdeb477665313c02dff02dc3
Gitweb: https://git.kernel.org/tip/23d2626b841c2adccdeb477665313c02dff02dc3
Author: Sandipan Das <sandipan.das@xxxxxxx>
AuthorDate: Thu, 14 Sep 2023 19:36:04 +05:30
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitterDate: Fri, 22 Sep 2023 12:05:14 +02:00

perf/x86/amd/core: Fix overflow reset on hotplug

Kernels older than v5.19 do not support PerfMonV2 and the PMI handler
does not clear the overflow bits of the PerfCntrGlobalStatus register.
Because of this, loading a recent kernel using kexec from an older
kernel can result in inconsistent register states on Zen 4 systems.

The PMI handler of the new kernel gets confused and shows a warning when
an overflow occurs because some of the overflow bits are set even if the
corresponding counters are inactive. These are remnants from overflows
that were handled by the older kernel.

During CPU hotplug, the PerfCntrGlobalCtl and PerfCntrGlobalStatus
registers should always be cleared for PerfMonV2-capable processors.
However, a condition used for NB event constaints applicable only to
older processors currently prevents this from happening. Move the reset
sequence to an appropriate place and also clear the LBR Freeze bit.

Fixes: 21d59e3e2c40 ("perf/x86/amd/core: Detect PerfMonV2 support")
Signed-off-by: Sandipan Das <sandipan.das@xxxxxxx>
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
Link: https://lore.kernel.org/r/882a87511af40792ba69bb0e9026f19a2e71e8a3.1694696888.git.sandipan.das@xxxxxxx
---
arch/x86/events/amd/core.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index abadd5f..ed626bf 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -534,8 +534,12 @@ static void amd_pmu_cpu_reset(int cpu)
/* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */
wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);

- /* Clear overflow bits i.e. PerfCntrGLobalStatus.PerfCntrOvfl */
- wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, amd_pmu_global_cntr_mask);
+ /*
+ * Clear freeze and overflow bits i.e. PerfCntrGLobalStatus.LbrFreeze
+ * and PerfCntrGLobalStatus.PerfCntrOvfl
+ */
+ wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
+ GLOBAL_STATUS_LBRS_FROZEN | amd_pmu_global_cntr_mask);
}

static int amd_pmu_cpu_prepare(int cpu)
@@ -570,6 +574,7 @@ static void amd_pmu_cpu_starting(int cpu)
int i, nb_id;

cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
+ amd_pmu_cpu_reset(cpu);

if (!x86_pmu.amd_nb_constraints)
return;
@@ -591,8 +596,6 @@ static void amd_pmu_cpu_starting(int cpu)

cpuc->amd_nb->nb_id = nb_id;
cpuc->amd_nb->refcnt++;
-
- amd_pmu_cpu_reset(cpu);
}

static void amd_pmu_cpu_dead(int cpu)
@@ -601,6 +604,7 @@ static void amd_pmu_cpu_dead(int cpu)

kfree(cpuhw->lbr_sel);
cpuhw->lbr_sel = NULL;
+ amd_pmu_cpu_reset(cpu);

if (!x86_pmu.amd_nb_constraints)
return;
@@ -613,8 +617,6 @@ static void amd_pmu_cpu_dead(int cpu)

cpuhw->amd_nb = NULL;
}
-
- amd_pmu_cpu_reset(cpu);
}

static inline void amd_pmu_set_global_ctl(u64 ctl)