[PATCH v1 4/8] KVM/x86/vPMU: support msr switch on vmx transitions

From: Wei Wang
Date: Thu Nov 01 2018 - 06:36:36 EST


This patch adds support to intel vPMU to switch msrs on vmx transitions.
Currenly only 1 msr (global ctrl) is switched. The number can be
increased on demand in the future (e.g. pebs enable). The old method from
the host perf subsystem is also removed.

Signed-off-by: Wei Wang <wei.w.wang@xxxxxxxxx>
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx>
---
arch/x86/events/intel/core.c | 60 ---------------------------------------
arch/x86/events/perf_event.h | 6 ----
arch/x86/include/asm/kvm_host.h | 11 +++++++
arch/x86/include/asm/perf_event.h | 12 --------
arch/x86/kvm/pmu.h | 2 ++
arch/x86/kvm/pmu_intel.c | 19 +++++++++++++
arch/x86/kvm/vmx.c | 6 ++--
7 files changed, 35 insertions(+), 81 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index f5e5191..33c156f 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3160,15 +3160,6 @@ static int intel_pmu_hw_config(struct perf_event *event)
return 0;
}

-struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
-{
- if (x86_pmu.guest_get_msrs)
- return x86_pmu.guest_get_msrs(nr);
- *nr = 0;
- return NULL;
-}
-EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
-
void x86_perf_register_pmi_callback(pmi_callback_t callback, void *opaque)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -3189,55 +3180,6 @@ void x86_perf_unregister_pmi_callback(void)
}
EXPORT_SYMBOL_GPL(x86_perf_unregister_pmi_callback);

-static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
-
- arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
- arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
- arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
- /*
- * If PMU counter has PEBS enabled it is not enough to disable counter
- * on a guest entry since PEBS memory write can overshoot guest entry
- * and corrupt guest memory. Disabling PEBS solves the problem.
- */
- arr[1].msr = MSR_IA32_PEBS_ENABLE;
- arr[1].host = cpuc->pebs_enabled;
- arr[1].guest = 0;
-
- *nr = 2;
- return arr;
-}
-
-static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
- int idx;
-
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- struct perf_event *event = cpuc->events[idx];
-
- arr[idx].msr = x86_pmu_config_addr(idx);
- arr[idx].host = arr[idx].guest = 0;
-
- if (!test_bit(idx, cpuc->active_mask))
- continue;
-
- arr[idx].host = arr[idx].guest =
- event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
-
- if (event->attr.exclude_host)
- arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
- else if (event->attr.exclude_guest)
- arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
- }
-
- *nr = x86_pmu.num_counters;
- return arr;
-}
-
static void core_pmu_enable_event(struct perf_event *event)
{
if (!event->attr.exclude_host)
@@ -3641,7 +3583,6 @@ static __initconst const struct x86_pmu core_pmu = {
.get_event_constraints = intel_get_event_constraints,
.put_event_constraints = intel_put_event_constraints,
.event_constraints = intel_core_event_constraints,
- .guest_get_msrs = core_guest_get_msrs,
.format_attrs = intel_arch_formats_attr,
.events_sysfs_show = intel_event_sysfs_show,

@@ -3694,7 +3635,6 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
- .guest_get_msrs = intel_guest_get_msrs,
.sched_task = intel_pmu_sched_task,
};

diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 06404eb..9f818d5 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -227,7 +227,6 @@ struct cpu_hw_events {
*/
u64 intel_ctrl_guest_mask;
u64 intel_ctrl_host_mask;
- struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX];

/*
* Intel checkpoint mask
@@ -645,11 +644,6 @@ struct x86_pmu {
*/
struct extra_reg *extra_regs;
unsigned int flags;
-
- /*
- * Intel host/guest support (KVM)
- */
- struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
};

struct x86_perf_task_context {
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f8bc46d..b66d164 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -453,6 +453,16 @@ struct kvm_pmc {
struct kvm_vcpu *vcpu;
};

+/*
+ * Below MSRs are currently switched on VMX transitions:
+ * - MSR_CORE_PERF_GLOBAL_CTRL
+ */
+#define KVM_PERF_SWITCH_MSR_NUM 1
+struct kvm_perf_switch_msr {
+ unsigned int msr;
+ u64 host, guest;
+};
+
struct kvm_pmu {
unsigned nr_arch_gp_counters;
unsigned nr_arch_fixed_counters;
@@ -470,6 +480,7 @@ struct kvm_pmu {
struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
struct irq_work irq_work;
u64 reprogram_pmi;
+ struct kvm_perf_switch_msr switch_msrs[KVM_PERF_SWITCH_MSR_NUM];
};

struct kvm_pmu_ops;
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index fd33688..e9cff88 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -270,24 +270,12 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
); \
}

-struct perf_guest_switch_msr {
- unsigned msr;
- u64 host, guest;
-};
-
typedef void (*pmi_callback_t)(void *opaque, u64 status);

-extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
extern void perf_check_microcode(void);
extern int x86_perf_rdpmc_index(struct perf_event *event);
#else
-static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
-{
- *nr = 0;
- return NULL;
-}
-
static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
{
memset(cap, 0, sizeof(*cap));
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index ba8898e..b3b0238 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -119,6 +119,8 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu);
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);

bool is_vmware_backdoor_pmc(u32 pmc_idx);
+struct kvm_perf_switch_msr *intel_pmu_get_switch_msrs(struct kvm_vcpu *vcpu,
+ u32 *nr_msrs);

extern struct kvm_pmu_ops intel_pmu_ops;
extern struct kvm_pmu_ops amd_pmu_ops;
diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c
index 8c2d37f..2be31ad 100644
--- a/arch/x86/kvm/pmu_intel.c
+++ b/arch/x86/kvm/pmu_intel.c
@@ -501,6 +501,25 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
pmu->assigned_pmc_bitmap = 0;
}

+struct kvm_perf_switch_msr *intel_pmu_get_switch_msrs(struct kvm_vcpu *vcpu,
+ u32 *nr_msrs)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct kvm_perf_switch_msr *arr = pmu->switch_msrs;
+
+ arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
+ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, arr[0].host);
+ arr[0].host &= ~pmu->assigned_pmc_bitmap;
+ /*
+ * The guest value will be written to the hardware msr when entering
+ * the guest, and the bits of unassigned pmcs are not enabled.
+ */
+ arr[0].guest = pmu->global_ctrl & pmu->assigned_pmc_bitmap;
+ *nr_msrs = KVM_PERF_SWITCH_MSR_NUM;
+
+ return arr;
+}
+
struct kvm_pmu_ops intel_pmu_ops = {
.find_arch_event = intel_find_arch_event,
.find_fixed_event = intel_find_fixed_event,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4555077..714d1f3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11073,10 +11073,10 @@ static void vmx_cancel_injection(struct kvm_vcpu *vcpu)

static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
{
- int i, nr_msrs;
- struct perf_guest_switch_msr *msrs;
+ u32 i, nr_msrs;
+ struct kvm_perf_switch_msr *msrs;

- msrs = perf_guest_get_msrs(&nr_msrs);
+ msrs = intel_pmu_get_switch_msrs(&vmx->vcpu, &nr_msrs);

if (!msrs)
return;
--
2.7.4