[PATCH 3/3] KVM: x86/pmu: Segregate Intel and AMD specific logic

From: Ravi Bangoria
Date: Mon Feb 21 2022 - 02:33:04 EST


HSW_IN_TX* bits are used in generic code which are not supported on
AMD. Worse, these bits overlap with AMD EventSelect[11:8] and hence
using HSW_IN_TX* bits unconditionally in generic code is resulting in
unintentional pmu behavior on AMD. For example, if EventSelect[11:8]
is 0x2, pmc_reprogram_counter() wrongly assumes that
HSW_IN_TX_CHECKPOINTED is set and thus forces sampling period to be 0.

Fixes: ca724305a2b0 ("KVM: x86/vPMU: Implement AMD vPMU code for KVM")
Signed-off-by: Ravi Bangoria <ravi.bangoria@xxxxxxx>
---
arch/x86/kvm/pmu.c | 66 +++++++++++++++++++++++-------------
arch/x86/kvm/pmu.h | 4 +--
arch/x86/kvm/svm/pmu.c | 6 +++-
arch/x86/kvm/vmx/pmu_intel.c | 4 +--
4 files changed, 51 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 4a70380f2287..b91dbede87b3 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -97,7 +97,7 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
u64 config, bool exclude_user,
bool exclude_kernel, bool intr,
- bool in_tx, bool in_tx_cp)
+ bool in_tx, bool in_tx_cp, bool is_intel)
{
struct perf_event *event;
struct perf_event_attr attr = {
@@ -116,16 +116,18 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,

attr.sample_period = get_sample_period(pmc, pmc->counter);

- if (in_tx)
- attr.config |= INTEL_HSW_IN_TX;
- if (in_tx_cp) {
- /*
- * INTEL_HSW_IN_TX_CHECKPOINTED is not supported with nonzero
- * period. Just clear the sample period so at least
- * allocating the counter doesn't fail.
- */
- attr.sample_period = 0;
- attr.config |= INTEL_HSW_IN_TX_CHECKPOINTED;
+ if (is_intel) {
+ if (in_tx)
+ attr.config |= INTEL_HSW_IN_TX;
+ if (in_tx_cp) {
+ /*
+ * INTEL_HSW_IN_TX_CHECKPOINTED is not supported with nonzero
+ * period. Just clear the sample period so at least
+ * allocating the counter doesn't fail.
+ */
+ attr.sample_period = 0;
+ attr.config |= INTEL_HSW_IN_TX_CHECKPOINTED;
+ }
}

event = perf_event_create_kernel_counter(&attr, -1, current,
@@ -179,13 +181,14 @@ static int cmp_u64(const void *a, const void *b)
return *(__u64 *)a - *(__u64 *)b;
}

-void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
+void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel, bool is_intel)
{
u64 config;
u32 type = PERF_TYPE_RAW;
struct kvm *kvm = pmc->vcpu->kvm;
struct kvm_pmu_event_filter *filter;
bool allow_event = true;
+ u64 eventsel_mask;

if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
printk_once("kvm pmu: pin control bit is ignored\n");
@@ -210,18 +213,31 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
if (!allow_event)
return;

- if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
- ARCH_PERFMON_EVENTSEL_INV |
- ARCH_PERFMON_EVENTSEL_CMASK |
- INTEL_HSW_IN_TX |
- INTEL_HSW_IN_TX_CHECKPOINTED))) {
+ eventsel_mask = ARCH_PERFMON_EVENTSEL_EDGE |
+ ARCH_PERFMON_EVENTSEL_INV |
+ ARCH_PERFMON_EVENTSEL_CMASK;
+ if (is_intel) {
+ eventsel_mask |= INTEL_HSW_IN_TX | INTEL_HSW_IN_TX_CHECKPOINTED;
+ } else {
+ /*
+ * None of the AMD generalized events has EventSelect[11:8]
+ * set so far.
+ */
+ eventsel_mask |= (0xFULL << 32);
+ }
+
+ if (!(eventsel & eventsel_mask)) {
config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc);
if (config != PERF_COUNT_HW_MAX)
type = PERF_TYPE_HARDWARE;
}

- if (type == PERF_TYPE_RAW)
- config = eventsel & AMD64_RAW_EVENT_MASK;
+ if (type == PERF_TYPE_RAW) {
+ if (is_intel)
+ config = eventsel & X86_RAW_EVENT_MASK;
+ else
+ config = eventsel & AMD64_RAW_EVENT_MASK;
+ }

if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
return;
@@ -234,11 +250,12 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
eventsel & ARCH_PERFMON_EVENTSEL_INT,
(eventsel & INTEL_HSW_IN_TX),
- (eventsel & INTEL_HSW_IN_TX_CHECKPOINTED));
+ (eventsel & INTEL_HSW_IN_TX_CHECKPOINTED),
+ is_intel);
}
EXPORT_SYMBOL_GPL(reprogram_gp_counter);

-void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
+void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx, bool is_intel)
{
unsigned en_field = ctrl & 0x3;
bool pmi = ctrl & 0x8;
@@ -270,24 +287,25 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc),
!(en_field & 0x2), /* exclude user */
!(en_field & 0x1), /* exclude kernel */
- pmi, false, false);
+ pmi, false, false, is_intel);
}
EXPORT_SYMBOL_GPL(reprogram_fixed_counter);

void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx)
{
struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, pmc_idx);
+ bool is_intel = !strncmp(kvm_x86_ops.name, "kvm_intel", 9);

if (!pmc)
return;

if (pmc_is_gp(pmc))
- reprogram_gp_counter(pmc, pmc->eventsel);
+ reprogram_gp_counter(pmc, pmc->eventsel, is_intel);
else {
int idx = pmc_idx - INTEL_PMC_IDX_FIXED;
u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx);

- reprogram_fixed_counter(pmc, ctrl, idx);
+ reprogram_fixed_counter(pmc, ctrl, idx, is_intel);
}
}
EXPORT_SYMBOL_GPL(reprogram_counter);
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 7a7b8d5b775e..610a4cbf85a4 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -140,8 +140,8 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
return sample_period;
}

-void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
-void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
+void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel, bool is_intel);
+void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx, bool is_intel);
void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);

void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 5aa45f13b16d..9ad63e940883 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -140,6 +140,10 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,

static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc)
{
+ /*
+ * None of the AMD generalized events has EventSelect[11:8] set.
+ * Hence 8 bit event_select works for now.
+ */
u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
int i;
@@ -265,7 +269,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data == pmc->eventsel)
return 0;
if (!(data & pmu->reserved_bits)) {
- reprogram_gp_counter(pmc, data);
+ reprogram_gp_counter(pmc, data, false);
return 0;
}
}
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 7c64792a9506..ba1fbd37f608 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -50,7 +50,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
continue;

__set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
- reprogram_fixed_counter(pmc, new_ctrl, i);
+ reprogram_fixed_counter(pmc, new_ctrl, i, true);
}

pmu->fixed_ctr_ctrl = data;
@@ -444,7 +444,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data == pmc->eventsel)
return 0;
if (!(data & pmu->reserved_bits)) {
- reprogram_gp_counter(pmc, data);
+ reprogram_gp_counter(pmc, data, true);
return 0;
}
} else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false))
--
2.27.0