[PATCH 07/11] KVM: x86/pmu: Use PERF_TYPE_RAW to merge reprogram_{gp, fixed}counter()

From: Like Xu
Date: Mon Feb 21 2022 - 06:53:04 EST


From: Like Xu <likexu@xxxxxxxxxxx>

The code sketch for reprogram_{gp, fixed}_counter() is similar, while the
fixed counter using the PERF_TYPE_HARDWAR type and the gp being
able to use either PERF_TYPE_HARDWAR or PERF_TYPE_RAW type
depending on the pmc->eventsel value.

After 'commit 761875634a5e ("KVM: x86/pmu: Setup pmc->eventsel
for fixed PMCs")', the pmc->eventsel of the fixed counter will also have
been setup with the same semantic value and will not be changed during
the guest runtime. But essentially, "the HARDWARE is just a convenience
wrapper over RAW IIRC", quoated from Peterz. So it could be pretty safe
to use the PERF_TYPE_RAW type only to program both gp and fixed
counters naturally in the reprogram_counter().

To make the gp and fixed counters more semantically symmetrical,
the selection of EVENTSEL_{USER, OS, INT} bits is temporarily translated
via fixed_ctr_ctrl before the pmc_reprogram_counter() call.

Practically, this change drops the guest pmu support on the hosts without
X86_FEATURE_ARCH_PERFMON (the oldest Pentium 4), where the
PERF_TYPE_HARDWAR is intentionally introduced so that hosts can
map the architectural guest PMU events to their own.

Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Suggested-by: Jim Mattson <jmattson@xxxxxxxxxx>
Signed-off-by: Like Xu <likexu@xxxxxxxxxxx>
---
arch/x86/kvm/pmu.c | 106 +++++++++++------------------------
arch/x86/kvm/vmx/pmu_intel.c | 2 +-
2 files changed, 35 insertions(+), 73 deletions(-)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 5816af6b6494..edd51ec7711d 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -213,35 +213,44 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
return allow_event;
}

-static void reprogram_gp_counter(struct kvm_pmc *pmc)
+static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
{
- u64 config;
- u32 type = PERF_TYPE_RAW;
- u64 eventsel = pmc->eventsel;
+ struct kvm_pmu *pmu = pmc_to_pmu(pmc);

- if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
- printk_once("kvm pmu: pin control bit is ignored\n");
+ if (pmc_is_fixed(pmc))
+ return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
+ pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
+
+ return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
+}
+
+void reprogram_counter(struct kvm_pmc *pmc)
+{
+ struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+ u64 eventsel = pmc->eventsel;
+ u8 fixed_ctr_ctrl;

pmc_pause_counter(pmc);

- if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
+ if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc))
return;

if (!check_pmu_event_filter(pmc))
return;

- if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
- ARCH_PERFMON_EVENTSEL_INV |
- ARCH_PERFMON_EVENTSEL_CMASK |
- HSW_IN_TX |
- HSW_IN_TX_CHECKPOINTED))) {
- config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc);
- if (config != PERF_COUNT_HW_MAX)
- type = PERF_TYPE_HARDWARE;
- }
+ if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
+ printk_once("kvm pmu: pin control bit is ignored\n");

- if (type == PERF_TYPE_RAW)
- config = eventsel & AMD64_RAW_EVENT_MASK;
+ if (pmc_is_fixed(pmc)) {
+ fixed_ctr_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl,
+ pmc->idx - INTEL_PMC_IDX_FIXED);
+ if (fixed_ctr_ctrl & 0x1)
+ eventsel |= ARCH_PERFMON_EVENTSEL_OS;
+ if (fixed_ctr_ctrl & 0x2)
+ eventsel |= ARCH_PERFMON_EVENTSEL_USR;
+ if (fixed_ctr_ctrl & 0x8)
+ eventsel |= ARCH_PERFMON_EVENTSEL_INT;
+ }

if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
return;
@@ -249,49 +258,13 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc)
pmc_release_perf_event(pmc);

pmc->current_config = eventsel;
- pmc_reprogram_counter(pmc, type, config,
- !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
- !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
- eventsel & ARCH_PERFMON_EVENTSEL_INT,
- (eventsel & HSW_IN_TX),
- (eventsel & HSW_IN_TX_CHECKPOINTED));
-}
-
-static void reprogram_fixed_counter(struct kvm_pmc *pmc)
-{
- struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- int idx = pmc->idx - INTEL_PMC_IDX_FIXED;
- u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx);
- unsigned en_field = ctrl & 0x3;
- bool pmi = ctrl & 0x8;
-
- pmc_pause_counter(pmc);
-
- if (!en_field || !pmc_is_enabled(pmc))
- return;
-
- if (!check_pmu_event_filter(pmc))
- return;
-
- if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc))
- return;
-
- pmc_release_perf_event(pmc);
-
- pmc->current_config = (u64)ctrl;
- pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
- kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc),
- !(en_field & 0x2), /* exclude user */
- !(en_field & 0x1), /* exclude kernel */
- pmi, false, false);
-}
-
-void reprogram_counter(struct kvm_pmc *pmc)
-{
- if (pmc_is_gp(pmc))
- reprogram_gp_counter(pmc);
- else
- reprogram_fixed_counter(pmc);
+ pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
+ (eventsel & AMD64_RAW_EVENT_MASK),
+ !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
+ !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
+ eventsel & ARCH_PERFMON_EVENTSEL_INT,
+ (eventsel & HSW_IN_TX),
+ (eventsel & HSW_IN_TX_CHECKPOINTED));
}
EXPORT_SYMBOL_GPL(reprogram_counter);

@@ -449,17 +422,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
kvm_pmu_refresh(vcpu);
}

-static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
-{
- struct kvm_pmu *pmu = pmc_to_pmu(pmc);
-
- if (pmc_is_fixed(pmc))
- return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
- pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
-
- return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
-}
-
/* Release perf_events for vPMCs that have been unused for a full time slice. */
void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
{
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index a69d2aeb7526..98a01f6a9d5d 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -492,7 +492,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->reserved_bits = 0xffffffff00200000ull;

entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
- if (!entry || !enable_pmu)
+ if (!entry || !enable_pmu || !boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
return;
eax.full = entry->eax;
edx.full = entry->edx;
--
2.35.0