[PATCH 4/7] KVM: x86/pmu: Avoid perf_event creation for invalid counter config

From: Like Xu
Date: Fri Nov 12 2021 - 04:52:07 EST


From: Like Xu <likexu@xxxxxxxxxxx>

KVM needs to be fixed to avoid perf_event creation when the requested
hw event on a gp or fixed counter is marked as unavailable in the Intel
guest CPUID 0AH.EBX leaf.

It's proposed to use is_intel_cpuid_event() to distinguish whether the hw
event is an Intel pre-defined architecture event, so that we can decide to
reprogram it with PERF_TYPE_HARDWARE (for fixed and gp) or
PERF_TYPE_RAW (for gp only) perf_event, or just avoid creating perf_event.

If an Intel cpuid event is marked as unavailable by checking
pmu->available_event_types, the intel_find_[fixed|arch]_event() returns
a new special value of "PERF_COUNT_HW_MAX + 1" to tell the caller
to avoid creating perf_ event and not to use PERF_TYPE_RAW mode for gp.

Signed-off-by: Like Xu <likexu@xxxxxxxxxxx>
---
arch/x86/kvm/pmu.c | 8 +++++++
arch/x86/kvm/vmx/pmu_intel.c | 45 +++++++++++++++++++++++++++++++-----
2 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 7093fc70cd38..3b47bd92e7bb 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -111,6 +111,14 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
.config = config,
};

+ /*
+ * The "config > PERF_COUNT_HW_MAX" only appears when
+ * the kernel generic event is marked as unavailable
+ * in the Intel guest architecture event CPUID leaf.
+ */
+ if (type == PERF_TYPE_HARDWARE && config >= PERF_COUNT_HW_MAX)
+ return;
+
attr.sample_period = get_sample_period(pmc, pmc->counter);

if (in_tx)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 4c04e94ae548..4f58c14efa61 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -68,17 +68,39 @@ static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
reprogram_counter(pmu, bit);
}

+/* UMask and Event Select Encodings for Intel CPUID Events */
+static inline bool is_intel_cpuid_event(u8 event_select, u8 unit_mask)
+{
+ if ((!unit_mask && event_select == 0x3C) ||
+ (!unit_mask && event_select == 0xC0) ||
+ (unit_mask == 0x01 && event_select == 0x3C) ||
+ (unit_mask == 0x4F && event_select == 0x2E) ||
+ (unit_mask == 0x41 && event_select == 0x2E) ||
+ (!unit_mask && event_select == 0xC4) ||
+ (!unit_mask && event_select == 0xC5))
+ return true;
+
+ /* the unimplemented topdown.slots event check is kipped. */
+ return false;
+}
+
static unsigned intel_find_arch_event(struct kvm_pmu *pmu,
u8 event_select,
u8 unit_mask)
{
int i;

- for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++)
- if (intel_arch_events[i].eventsel == event_select &&
- intel_arch_events[i].unit_mask == unit_mask &&
- ((i > 6) || pmu->available_event_types & (1 << i)))
- break;
+ for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) {
+ if (intel_arch_events[i].eventsel != event_select ||
+ intel_arch_events[i].unit_mask != unit_mask)
+ continue;
+
+ if (is_intel_cpuid_event(event_select, unit_mask) &&
+ !(pmu->available_event_types & BIT_ULL(i)))
+ return PERF_COUNT_HW_MAX + 1;
+
+ break;
+ }

if (i == ARRAY_SIZE(intel_arch_events))
return PERF_COUNT_HW_MAX;
@@ -90,12 +112,23 @@ static unsigned int intel_find_fixed_event(struct kvm_pmu *pmu, int idx)
{
u32 event;
size_t size = ARRAY_SIZE(fixed_pmc_events);
+ u8 event_select, unit_mask;
+ unsigned int event_type;

if (idx >= size)
return PERF_COUNT_HW_MAX;

event = fixed_pmc_events[array_index_nospec(idx, size)];
- return intel_arch_events[event].event_type;
+
+ event_select = intel_arch_events[event].eventsel;
+ unit_mask = intel_arch_events[event].unit_mask;
+ event_type = intel_arch_events[event].event_type;
+
+ if (is_intel_cpuid_event(event_select, unit_mask) &&
+ !(pmu->available_event_types & BIT_ULL(event_type)))
+ return PERF_COUNT_HW_MAX + 1;
+
+ return event_type;
}

/* check if a PMC is enabled by comparing it with globl_ctrl bits. */
--
2.33.0