[PATCH v12 06/11] KVM: vmx/pmu: Expose LBR to guest via MSR_IA32_PERF_CAPABILITIES

From: Like Xu
Date: Sat Jun 13 2020 - 04:12:49 EST


The bits [0, 5] of the read-only MSR_IA32_PERF_CAPABILITIES tells about
the record format stored in the LBR records. Userspace could expose guest
LBR when host supports LBR and the exactly supported LBR format value is
initialized to the MSR_IA32_PERF_CAPABILITIES and vcpu model is compatible.

Signed-off-by: Like Xu <like.xu@xxxxxxxxxxxxxxx>
---
arch/x86/kvm/vmx/capabilities.h | 11 ++++++-
arch/x86/kvm/vmx/pmu_intel.c | 52 +++++++++++++++++++++++++++++++--
arch/x86/kvm/vmx/vmx.h | 6 ++++
3 files changed, 65 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 4bbd8b448d22..b633a90320ee 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -19,6 +19,7 @@ extern int __read_mostly pt_mode;
#define PT_MODE_HOST_GUEST 1

#define PMU_CAP_FW_WRITES (1ULL << 13)
+#define PMU_CAP_LBR_FMT 0x3f

struct nested_vmx_msrs {
/*
@@ -375,7 +376,15 @@ static inline u64 vmx_get_perf_capabilities(void)
* Since counters are virtualized, KVM would support full
* width counting unconditionally, even if the host lacks it.
*/
- return PMU_CAP_FW_WRITES;
+ u64 perf_cap = PMU_CAP_FW_WRITES;
+
+ if (boot_cpu_has(X86_FEATURE_PDCM))
+ rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap);
+
+ /* From now on, KVM will support LBR. */
+ perf_cap |= perf_cap & PMU_CAP_LBR_FMT;
+
+ return perf_cap;
}

#endif /* __KVM_X86_VMX_CAPS_H */
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index bdcce65c7a1d..a953c7d633f6 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -168,6 +168,13 @@ static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr)
return get_gp_pmc(pmu, msr, MSR_IA32_PMC0);
}

+static inline bool lbr_is_enabled(struct kvm_vcpu *vcpu)
+{
+ struct x86_pmu_lbr *lbr = &to_vmx(vcpu)->lbr_desc.lbr;
+
+ return lbr->nr && (vcpu->arch.perf_capabilities & PMU_CAP_LBR_FMT);
+}
+
static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -251,6 +258,30 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
}

+static inline bool lbr_fmt_is_matched(u64 data)
+{
+ return (data & PMU_CAP_LBR_FMT) ==
+ (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT);
+}
+
+static inline bool lbr_is_compatible(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+
+ if (pmu->version < 2)
+ return false;
+
+ /*
+ * As a first step, a guest could only enable LBR feature if its cpu
+ * model is the same as the host because the LBR registers would
+ * be pass-through to the guest and they're model specific.
+ */
+ if (boot_cpu_data.x86_model != guest_cpuid_model(vcpu))
+ return false;
+
+ return true;
+}
+
static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -295,6 +326,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) ?
(data & ~vmx_get_perf_capabilities()) : data)
return 1;
+ if (data & PMU_CAP_LBR_FMT) {
+ if (!lbr_fmt_is_matched(data))
+ return 1;
+ if (!lbr_is_compatible(vcpu))
+ return 1;
+ if (x86_perf_get_lbr(&to_vmx(vcpu)->lbr_desc.lbr))
+ return 1;
+ }
vcpu->arch.perf_capabilities = data;
return 0;
default:
@@ -337,6 +376,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
struct kvm_cpuid_entry2 *entry;
union cpuid10_eax eax;
union cpuid10_edx edx;
+ struct lbr_desc *lbr_desc = &to_vmx(vcpu)->lbr_desc;

pmu->nr_arch_gp_counters = 0;
pmu->nr_arch_fixed_counters = 0;
@@ -344,7 +384,6 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
pmu->version = 0;
pmu->reserved_bits = 0xffffffff00200000ull;
- vcpu->arch.perf_capabilities = 0;

entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
if (!entry)
@@ -357,8 +396,6 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
return;

perf_get_x86_pmu_capability(&x86_pmu);
- if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
- vcpu->arch.perf_capabilities = vmx_get_perf_capabilities();

pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
x86_pmu.num_counters_gp);
@@ -397,6 +434,10 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
bitmap_set(pmu->all_valid_pmc_idx,
INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);

+ if ((vcpu->arch.perf_capabilities & PMU_CAP_LBR_FMT) &&
+ x86_perf_get_lbr(&lbr_desc->lbr))
+ vcpu->arch.perf_capabilities &= ~PMU_CAP_LBR_FMT;
+
nested_vmx_pmu_entry_exit_ctls_update(vcpu);
}

@@ -404,6 +445,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
{
int i;
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct lbr_desc *lbr_desc = &to_vmx(vcpu)->lbr_desc;

for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
pmu->gp_counters[i].type = KVM_PMC_GP;
@@ -418,6 +460,10 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
pmu->fixed_counters[i].current_config = 0;
}
+
+ vcpu->arch.perf_capabilities = guest_cpuid_has(vcpu, X86_FEATURE_PDCM) ?
+ vmx_get_perf_capabilities() : 0;
+ lbr_desc->lbr.nr = 0;
}

static void intel_pmu_reset(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 8a83b5edc820..ef24338b194d 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -91,6 +91,11 @@ struct pt_desc {
struct pt_ctx guest;
};

+struct lbr_desc {
+ /* Basic information about LBR records. */
+ struct x86_pmu_lbr lbr;
+};
+
/*
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -302,6 +307,7 @@ struct vcpu_vmx {
u64 ept_pointer;

struct pt_desc pt_desc;
+ struct lbr_desc lbr_desc;
};

enum ept_pointers_status {
--
2.21.3