RE: [PATCH v3 5/5] KVM/x86/lbr: lazy save the guest lbr stack

From: Gonglei (Arei)
Date: Thu Sep 20 2018 - 08:07:52 EST



> -----Original Message-----
> From: Wei Wang [mailto:wei.w.wang@xxxxxxxxx]
> Sent: Thursday, September 20, 2018 6:06 PM
> To: linux-kernel@xxxxxxxxxxxxxxx; kvm@xxxxxxxxxxxxxxx; pbonzini@xxxxxxxxxx;
> ak@xxxxxxxxxxxxxxx
> Cc: kan.liang@xxxxxxxxx; peterz@xxxxxxxxxxxxx; mingo@xxxxxxxxxx;
> rkrcmar@xxxxxxxxxx; like.xu@xxxxxxxxx; wei.w.wang@xxxxxxxxx;
> jannh@xxxxxxxxxx; Gonglei (Arei) <arei.gonglei@xxxxxxxxxx>
> Subject: [PATCH v3 5/5] KVM/x86/lbr: lazy save the guest lbr stack
>
> When the vCPU is scheduled in:
> - if the lbr feature was used in the last vCPU time slice, set the lbr
> stack to be interceptible, so that the host can capture whether the
> lbr feature will be used in this time slice;
> - if the lbr feature wasn't used in the last vCPU time slice, disable
> the vCPU support of the guest lbr switching.
>
> Upon the first access to one of the lbr related MSRs (since the vCPU was
> scheduled in):
> - record that the guest has used the lbr;
> - create a host perf event to help save/restore the guest lbr stack if
> the guest uses the user callstack mode lbr stack;
> - pass the stack through to the guest.
>
> Suggested-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
> Signed-off-by: Like Xu <like.xu@xxxxxxxxx>
> Signed-off-by: Wei Wang <wei.w.wang@xxxxxxxxx>
> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx>
> ---
> arch/x86/events/intel/lbr.c | 16 +++++
> arch/x86/include/asm/kvm_host.h | 4 ++
> arch/x86/include/asm/perf_event.h | 6 ++
> arch/x86/kvm/pmu.h | 5 ++
> arch/x86/kvm/vmx.c | 137
> ++++++++++++++++++++++++++++++++++++++
> 5 files changed, 168 insertions(+)
>
> diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
> index 915fcc3..a260015 100644
> --- a/arch/x86/events/intel/lbr.c
> +++ b/arch/x86/events/intel/lbr.c
> @@ -64,6 +64,7 @@ static const enum {
> #define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT)
>
> #define LBR_PLM (LBR_KERNEL | LBR_USER)
> +#define LBR_USER_CALLSTACK (LBR_CALL_STACK | LBR_USER)
>
> #define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */
> #define LBR_NOT_SUPP -1 /* LBR filter not supported */
> @@ -1283,6 +1284,21 @@ void intel_pmu_lbr_init_knl(void)
> }
>
> /**
> + * lbr_select_user_callstack - check if the user callstack mode is set
> + *
> + * @lbr_select: the lbr select msr
> + *
> + * Returns: true if the msr is configured to the user callstack mode.
> + * Otherwise, false.
> + *
> + */
> +bool lbr_select_user_callstack(u64 lbr_select)
> +{
> + return !!(lbr_select & LBR_USER_CALLSTACK);
> +}
> +EXPORT_SYMBOL_GPL(lbr_select_user_callstack);
> +
> +/**
> * perf_get_lbr_stack - get the lbr stack related MSRs
> *
> * @stack: the caller's memory to get the lbr stack
> diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> index fdcac01..41b4d29 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -730,6 +730,10 @@ struct kvm_vcpu_arch {
>
> /* Flush the L1 Data cache for L1TF mitigation on VMENTER */
> bool l1tf_flush_l1d;
> + /* Indicate if the guest is using lbr with the user callstack mode */
> + bool lbr_user_callstack;
> + /* Indicate if the lbr msrs were accessed in this vCPU time slice */
> + bool lbr_used;
> };
>
> struct kvm_lpage_info {
> diff --git a/arch/x86/include/asm/perf_event.h
> b/arch/x86/include/asm/perf_event.h
> index e893a69..2d7ae55 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -277,6 +277,7 @@ struct perf_lbr_stack {
> unsigned long info;
> };
>
> +extern bool lbr_select_user_callstack(u64 msr_lbr_select);
> extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
> extern int perf_get_lbr_stack(struct perf_lbr_stack *stack);
> extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
> @@ -288,6 +289,11 @@ static inline struct perf_guest_switch_msr
> *perf_guest_get_msrs(int *nr)
> return NULL;
> }
>
> +static bool lbr_select_user_callstack(u64 msr_lbr_select)
> +{
> + return false;
> +}
> +
> static inline int perf_get_lbr_stack(struct perf_lbr_stack *stack)
> {
> return -1;
> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
> index e872aed..94f0624 100644
> --- a/arch/x86/kvm/pmu.h
> +++ b/arch/x86/kvm/pmu.h
> @@ -102,6 +102,11 @@ static inline struct kvm_pmc *get_fixed_pmc(struct
> kvm_pmu *pmu, u32 msr)
> return NULL;
> }
>
> +static inline bool intel_pmu_save_guest_lbr_enabled(struct kvm_vcpu *vcpu)
> +{
> + return !!vcpu_to_pmu(vcpu)->guest_lbr_event;
> +}
> +
> void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
> void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
> void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 92705b5..ae20563 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -1282,6 +1282,9 @@ static bool
> nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
> static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
> static void __always_inline vmx_disable_intercept_for_msr(unsigned long
> *msr_bitmap,
> u32 msr, int type);
> +static void
> +__always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, u32
> msr,
> + int type, bool value);
>
> static DEFINE_PER_CPU(struct vmcs *, vmxarea);
> static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
> @@ -4056,6 +4059,120 @@ static int vmx_get_msr_feature(struct
> kvm_msr_entry *msr)
> return 0;
> }
>
> +static void vmx_set_intercept_for_lbr_msrs(struct kvm_vcpu *vcpu, bool set)
> +{
> + unsigned long *msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
> + struct perf_lbr_stack *stack = &vcpu->kvm->arch.lbr_stack;
> + int nr = stack->nr;
> + int i;
> +
> + vmx_set_intercept_for_msr(msr_bitmap, stack->tos, MSR_TYPE_RW,
> set);
> + for (i = 0; i < nr; i++) {
> + vmx_set_intercept_for_msr(msr_bitmap, stack->from + i,
> + MSR_TYPE_RW, set);
> + vmx_set_intercept_for_msr(msr_bitmap, stack->to + i,
> + MSR_TYPE_RW, set);
> + if (stack->info)
> + vmx_set_intercept_for_msr(msr_bitmap, stack->info + i,
> + MSR_TYPE_RW, set);
> + }
> +}
> +
> +static inline bool msr_is_lbr_stack(struct kvm_vcpu *vcpu, u32 index)
> +{
> + struct perf_lbr_stack *stack = &vcpu->kvm->arch.lbr_stack;
> + int nr = stack->nr;
> +
> + return !!(index == stack->tos ||
> + (index >= stack->from && index < stack->from + nr) ||
> + (index >= stack->to && index < stack->to + nr) ||
> + (index >= stack->info && index < stack->info));
> +}
> +
> +static bool guest_get_lbr_msr(struct kvm_vcpu *vcpu, struct msr_data
> *msr_info)
> +{
> + u32 index = msr_info->index;
> + bool ret = false;
> +
> + switch (index) {
> + case MSR_IA32_DEBUGCTLMSR:
> + msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
> + ret = true;
> + break;
> + case MSR_LBR_SELECT:
> + ret = true;
> + rdmsrl(index, msr_info->data);
> + break;
> + default:
> + if (msr_is_lbr_stack(vcpu, index)) {
> + ret = true;
> + rdmsrl(index, msr_info->data);
> + }
> + }
> +
> + return ret;
> +}
> +
> +static bool guest_set_lbr_msr(struct kvm_vcpu *vcpu, struct msr_data
> *msr_info)
> +{
> + u32 index = msr_info->index;
> + u64 data = msr_info->data;
> + bool ret = false;
> +
> + switch (index) {
> + case MSR_IA32_DEBUGCTLMSR:
> + ret = true;
> + /*
> + * Currently, only FREEZE_LBRS_ON_PMI and DEBUGCTLMSR_LBR
> are
> + * supported.
> + */
> + data &= (DEBUGCTLMSR_FREEZE_LBRS_ON_PMI |
> DEBUGCTLMSR_LBR);
> + vmcs_write64(GUEST_IA32_DEBUGCTL, msr_info->data);
> + break;
> + case MSR_LBR_SELECT:
> + ret = true;
> + if (lbr_select_user_callstack(data))
> + vcpu->arch.lbr_user_callstack = true;
> + else
> + vcpu->arch.lbr_user_callstack = false;
> + wrmsrl(index, msr_info->data);
> + break;
> + default:
> + if (msr_is_lbr_stack(vcpu, index)) {
> + ret = true;
> + wrmsrl(index, msr_info->data);
> + }
> + }
> +
> + return ret;
> +}
> +
> +static bool guest_access_lbr_msr(struct kvm_vcpu *vcpu,
> + struct msr_data *msr_info,
> + bool set)
> +{
> + bool ret = false;
> +
> + if (!vcpu->kvm->arch.guest_lbr_enabled)
> + return false;
> +
> + if (set)
> + ret = guest_set_lbr_msr(vcpu, msr_info);
> + else
> + ret = guest_get_lbr_msr(vcpu, msr_info);
> +
> + if (ret) {
> + vcpu->arch.lbr_used = true;
> + vmx_set_intercept_for_lbr_msrs(vcpu, false);

You can use if (!vcpu->arch.lbr_used) as the condition of assign values.
They are need only once.

Thanks,
-Gonglei