[PATCH 07/10] KVM: x86/pmu: Snapshot event selectors that KVM emulates in software

From: Sean Christopherson
Date: Thu Nov 09 2023 - 21:29:26 EST


Snapshot the event selectors for the events that KVM emulates in software,
which is currently instructions retired and branch instructions retired.
The event selectors a tied to the underlying CPU, i.e. are constant for a
given platform even though perf doesn't manage the mappings as such.

Getting the event selectors from perf isn't exactly cheap, especially if
mitigations are enabled, as at least one indirect call is involved.

Snapshot the values in KVM instead of optimizing perf as working with the
raw event selectors will be required if KVM ever wants to emulate events
that aren't part of perf's uABI, i.e. that don't have an "enum perf_hw_id"
entry.

Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/kvm/pmu.c | 17 ++++++++---------
arch/x86/kvm/pmu.h | 13 ++++++++++++-
arch/x86/kvm/vmx/nested.c | 2 +-
arch/x86/kvm/x86.c | 6 +++---
4 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 488d21024a92..45cb8b2a024b 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -29,6 +29,9 @@
struct x86_pmu_capability __read_mostly kvm_pmu_cap;
EXPORT_SYMBOL_GPL(kvm_pmu_cap);

+struct kvm_pmu_emulated_event_selectors __read_mostly kvm_pmu_eventsel;
+EXPORT_SYMBOL_GPL(kvm_pmu_eventsel);
+
/* Precise Distribution of Instructions Retired (PDIR) */
static const struct x86_cpu_id vmx_pebs_pdir_cpu[] = {
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL),
@@ -809,13 +812,6 @@ static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
kvm_pmu_request_counter_reprogram(pmc);
}

-static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
- unsigned int perf_hw_id)
-{
- return !((pmc->eventsel ^ perf_get_hw_event_config(perf_hw_id)) &
- AMD64_RAW_EVENT_MASK_NB);
-}
-
static inline bool cpl_is_matched(struct kvm_pmc *pmc)
{
bool select_os, select_user;
@@ -835,7 +831,7 @@ static inline bool cpl_is_matched(struct kvm_pmc *pmc)
return (static_call(kvm_x86_get_cpl)(pmc->vcpu) == 0) ? select_os : select_user;
}

-void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
+void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel)
{
DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX);
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -855,7 +851,10 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
continue;

/* Ignore checks for edge detect, pin control, invert and CMASK bits */
- if (eventsel_match_perf_hw_id(pmc, perf_hw_id) && cpl_is_matched(pmc))
+ if ((pmc->eventsel ^ eventsel) & AMD64_RAW_EVENT_MASK_NB)
+ continue;
+
+ if (cpl_is_matched(pmc))
kvm_pmu_incr_counter(pmc);
}
}
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index cb62a4e44849..9dc5f549c98c 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -22,6 +22,11 @@

#define KVM_FIXED_PMC_BASE_IDX INTEL_PMC_IDX_FIXED

+struct kvm_pmu_emulated_event_selectors {
+ u64 INSTRUCTIONS_RETIRED;
+ u64 BRANCH_INSTRUCTIONS_RETIRED;
+};
+
struct kvm_pmu_ops {
struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
unsigned int idx, u64 *mask);
@@ -171,6 +176,7 @@ static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
}

extern struct x86_pmu_capability kvm_pmu_cap;
+extern struct kvm_pmu_emulated_event_selectors kvm_pmu_eventsel;

static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
{
@@ -212,6 +218,11 @@ static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
pmu_ops->MAX_NR_GP_COUNTERS);
kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
KVM_PMC_MAX_FIXED);
+
+ kvm_pmu_eventsel.INSTRUCTIONS_RETIRED =
+ perf_get_hw_event_config(PERF_COUNT_HW_INSTRUCTIONS);
+ kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED =
+ perf_get_hw_event_config(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
}

static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc)
@@ -259,7 +270,7 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu);
void kvm_pmu_cleanup(struct kvm_vcpu *vcpu);
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp);
-void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id);
+void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel);

bool is_vmware_backdoor_pmc(u32 pmc_idx);

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index c5ec0ef51ff7..cf985085467b 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3564,7 +3564,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
return 1;
}

- kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+ kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED);

if (CC(evmptrld_status == EVMPTRLD_VMFAIL))
return nested_vmx_failInvalid(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index efbf52a9dc83..9d9b5f9e4b28 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8839,7 +8839,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
if (unlikely(!r))
return 0;

- kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_INSTRUCTIONS);
+ kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED);

/*
* rflags is the old, "raw" value of the flags. The new value has
@@ -9152,9 +9152,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
*/
if (!ctxt->have_exception ||
exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
- kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_INSTRUCTIONS);
+ kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED);
if (ctxt->is_branch)
- kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+ kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED);
kvm_rip_write(vcpu, ctxt->eip);
if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
r = kvm_vcpu_do_singlestep(vcpu);
--
2.42.0.869.gea05f2083d-goog