Re: [PATCH v3] Enable MSR-BASED TPR shadow even if APICv is inactive

From: Wanpeng Li
Date: Tue Sep 20 2016 - 22:38:58 EST


The subject should be "KVM: VMX: Enable MSR-BASED TPR shadow even if
APICv is inactive"
2016-09-21 10:14 GMT+08:00 Wanpeng Li <kernellwp@xxxxxxxxx>:
> From: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
>
> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used
> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542
> x86, apicv: add virtual x2apic support) disable virtual x2apic mode
> completely if w/o APICv, and the author also told me that windows guest
> can't enter into x2apic mode when he developed the APICv feature several
> years ago. However, it is not truth currently, Interrupt Remapping and
> vIOMMU is added to qemu and the developers from Intel test windows 8 can
> work in x2apic mode w/ Interrupt Remapping enabled recently.
>
> This patch enables TPR shadow for virtual x2apic mode to boost
> windows guest in x2apic mode even if w/o APICv.
>
> Can pass the kvm-unit-test.
>
> Suggested-by: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
> Suggested-by: Wincy Van <fanwenyi0529@xxxxxxxxx>
> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> Cc: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
> Cc: Wincy Van <fanwenyi0529@xxxxxxxxx>
> Cc: Yang Zhang <yang.zhang.wz@xxxxxxxxx>
> Signed-off-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
> ---
> v2 -> v3:
> * introduce a new set of bitmaps and assign them in vmx_set_msr_bitmap()
> v1 -> v2:
> * leverage the cached msr bitmap
>
> arch/x86/kvm/vmx.c | 133 ++++++++++++++++++++++++++++++++++++++---------------
> 1 file changed, 95 insertions(+), 38 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 5cede40..4f3042c 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -927,6 +927,8 @@ static unsigned long *vmx_msr_bitmap_legacy;
> static unsigned long *vmx_msr_bitmap_longmode;
> static unsigned long *vmx_msr_bitmap_legacy_x2apic;
> static unsigned long *vmx_msr_bitmap_longmode_x2apic;
> +static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
> +static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
> static unsigned long *vmx_vmread_bitmap;
> static unsigned long *vmx_vmwrite_bitmap;
>
> @@ -2518,10 +2520,18 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
> else if (cpu_has_secondary_exec_ctrls() &&
> (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
> SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
> - if (is_long_mode(vcpu))
> - msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
> - else
> - msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
> + if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
> + if (is_long_mode(vcpu))
> + msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
> + else
> + msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
> + } else if ((enable_apicv && !kvm_vcpu_apicv_active(vcpu)) ||
> + !enable_apicv) {
> + if (is_long_mode(vcpu))
> + msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
> + else
> + msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
> + }
> } else {
> if (is_long_mode(vcpu))
> msr_bitmap = vmx_msr_bitmap_longmode;
> @@ -4678,28 +4688,49 @@ static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
> msr, MSR_TYPE_R | MSR_TYPE_W);
> }
>
> -static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
> +static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
> {
> - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> - msr, MSR_TYPE_R);
> - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> - msr, MSR_TYPE_R);
> + if (apicv_active) {
> + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> + msr, MSR_TYPE_R);
> + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> + msr, MSR_TYPE_R);
> + } else {
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
> + msr, MSR_TYPE_R);
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
> + msr, MSR_TYPE_R);
> + }
> }
>
> -static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
> +static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
> {
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> - msr, MSR_TYPE_R);
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> - msr, MSR_TYPE_R);
> + if (apicv_active) {
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> + msr, MSR_TYPE_R);
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> + msr, MSR_TYPE_R);
> + } else {
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
> + msr, MSR_TYPE_R);
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
> + msr, MSR_TYPE_R);
> + }
> }
>
> -static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
> +static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active)
> {
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> - msr, MSR_TYPE_W);
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> - msr, MSR_TYPE_W);
> + if (apicv_active) {
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> + msr, MSR_TYPE_W);
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> + msr, MSR_TYPE_W);
> + } else {
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
> + msr, MSR_TYPE_W);
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
> + msr, MSR_TYPE_W);
> + }
> }
>
> static bool vmx_get_enable_apicv(void)
> @@ -6360,22 +6391,32 @@ static __init int hardware_setup(void)
> if (!vmx_msr_bitmap_legacy_x2apic)
> goto out2;
>
> + vmx_msr_bitmap_legacy_x2apic_apicv_inactive =
> + (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive)
> + goto out3;
> +
> vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
> if (!vmx_msr_bitmap_longmode)
> - goto out3;
> + goto out4;
>
> vmx_msr_bitmap_longmode_x2apic =
> (unsigned long *)__get_free_page(GFP_KERNEL);
> if (!vmx_msr_bitmap_longmode_x2apic)
> - goto out4;
> + goto out5;
> +
> + vmx_msr_bitmap_longmode_x2apic_apicv_inactive =
> + (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive)
> + goto out6;
>
> vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> if (!vmx_vmread_bitmap)
> - goto out6;
> + goto out7;
>
> vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> if (!vmx_vmwrite_bitmap)
> - goto out7;
> + goto out8;
>
> memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
> memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
> @@ -6394,7 +6435,7 @@ static __init int hardware_setup(void)
>
> if (setup_vmcs_config(&vmcs_config) < 0) {
> r = -EIO;
> - goto out8;
> + goto out9;
> }
>
> if (boot_cpu_has(X86_FEATURE_NX))
> @@ -6461,20 +6502,35 @@ static __init int hardware_setup(void)
> vmx_msr_bitmap_legacy, PAGE_SIZE);
> memcpy(vmx_msr_bitmap_longmode_x2apic,
> vmx_msr_bitmap_longmode, PAGE_SIZE);
> + memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
> + vmx_msr_bitmap_legacy, PAGE_SIZE);
> + memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
> + vmx_msr_bitmap_longmode, PAGE_SIZE);
>
> set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
>
> + /*
> + * enable_apicv && kvm_vcpu_apicv_active()
> + */
> for (msr = 0x800; msr <= 0x8ff; msr++)
> - vmx_disable_intercept_msr_read_x2apic(msr);
> + vmx_disable_intercept_msr_read_x2apic(msr, true);
>
> /* TMCCT */
> - vmx_enable_intercept_msr_read_x2apic(0x839);
> + vmx_enable_intercept_msr_read_x2apic(0x839, true);
> /* TPR */
> - vmx_disable_intercept_msr_write_x2apic(0x808);
> + vmx_disable_intercept_msr_write_x2apic(0x808, true);
> /* EOI */
> - vmx_disable_intercept_msr_write_x2apic(0x80b);
> + vmx_disable_intercept_msr_write_x2apic(0x80b, true);
> /* SELF-IPI */
> - vmx_disable_intercept_msr_write_x2apic(0x83f);
> + vmx_disable_intercept_msr_write_x2apic(0x83f, true);
> +
> + /*
> + * (enable_apicv && !kvm_vcpu_apicv_active()) ||
> + * !enable_apicv
> + */
> + /* TPR */
> + vmx_disable_intercept_msr_read_x2apic(0x808, false);
> + vmx_disable_intercept_msr_write_x2apic(0x808, false);
>
> if (enable_ept) {
> kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
> @@ -6521,14 +6577,18 @@ static __init int hardware_setup(void)
>
> return alloc_kvm_area();
>
> -out8:
> +out9:
> free_page((unsigned long)vmx_vmwrite_bitmap);
> -out7:
> +out8:
> free_page((unsigned long)vmx_vmread_bitmap);
> +out7:
> + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
> out6:
> free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> -out4:
> +out5:
> free_page((unsigned long)vmx_msr_bitmap_longmode);
> +out4:
> + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
> out3:
> free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> out2:
> @@ -6544,7 +6604,9 @@ out:
> static __exit void hardware_unsetup(void)
> {
> free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
> free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
> free_page((unsigned long)vmx_msr_bitmap_legacy);
> free_page((unsigned long)vmx_msr_bitmap_longmode);
> free_page((unsigned long)vmx_io_bitmap_b);
> @@ -8435,12 +8497,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
> return;
> }
>
> - /*
> - * There is not point to enable virtualize x2apic without enable
> - * apicv
> - */
> - if (!cpu_has_vmx_virtualize_x2apic_mode() ||
> - !kvm_vcpu_apicv_active(vcpu))
> + if (!cpu_has_vmx_virtualize_x2apic_mode())
> return;
>
> if (!cpu_need_tpr_shadow(vcpu))
> --
> 1.9.1
>