[RFC PATCH 35/41] KVM: x86/pmu: Allow writing to event selector for GP counters if event is allowed

From: Xiong Zhang
Date: Fri Jan 26 2024 - 04:46:27 EST


From: Mingwei Zhang <mizhang@xxxxxxxxxx>

Only allow writing to event selector if event is allowed in filter. Since
passthrough PMU implementation does the PMU context switch at VM Enter/Exit
boudary, even if the value of event selector passes the checking, it cannot
be written directly to HW since PMU HW is owned by the host PMU at the
moment. Because of that, introduce eventsel_hw to cache that value which
will be assigned into HW just before VM entry.

Note that regardless of whether an event value is allowed, the value will
be cached in pmc->eventsel and guest VM can always read the cached value
back. This implementation is consistent with the HW CPU design.

Signed-off-by: Xiong Zhang <xiong.y.zhang@xxxxxxxxx>
Signed-off-by: Mingwei Zhang <mizhang@xxxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/vmx/pmu_intel.c | 18 ++++++++++++++----
2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ede45c923089..fd1c69371dbf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -503,6 +503,7 @@ struct kvm_pmc {
u64 counter;
u64 prev_counter;
u64 eventsel;
+ u64 eventsel_hw;
struct perf_event *perf_event;
struct kvm_vcpu *vcpu;
/*
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 621922005184..92c5baed8d36 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -458,7 +458,18 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data & reserved_bits)
return 1;

- if (data != pmc->eventsel) {
+ if (is_passthrough_pmu_enabled(vcpu)) {
+ pmc->eventsel = data;
+ if (!check_pmu_event_filter(pmc)) {
+ /* When guest request an invalid event,
+ * stop the counter by clearing the
+ * event selector MSR.
+ */
+ pmc->eventsel_hw = 0;
+ return 0;
+ }
+ pmc->eventsel_hw = data;
+ } else if (data != pmc->eventsel) {
pmc->eventsel = data;
kvm_pmu_request_counter_reprogram(pmc);
}
@@ -843,13 +854,12 @@ static void intel_save_pmu_context(struct kvm_vcpu *vcpu)
for (i = 0; i < pmu->nr_arch_gp_counters; i++) {
pmc = &pmu->gp_counters[i];
rdpmcl(i, pmc->counter);
- rdmsrl(i + MSR_ARCH_PERFMON_EVENTSEL0, pmc->eventsel);
/*
* Clear hardware PERFMON_EVENTSELx and its counter to avoid
* leakage and also avoid this guest GP counter get accidentally
* enabled during host running when host enable global ctrl.
*/
- if (pmc->eventsel)
+ if (pmc->eventsel_hw)
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0);
if (pmc->counter)
wrmsrl(MSR_IA32_PMC0 + i, 0);
@@ -894,7 +904,7 @@ static void intel_restore_pmu_context(struct kvm_vcpu *vcpu)
for (i = 0; i < pmu->nr_arch_gp_counters; i++) {
pmc = &pmu->gp_counters[i];
wrmsrl(MSR_IA32_PMC0 + i, pmc->counter);
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, pmc->eventsel);
+ wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, pmc->eventsel_hw);
}

/*
--
2.34.1