Re: [PATCH] KVM: VMX: remove LFENCE in vmx_spec_ctrl_restore_host()

From: Josh Poimboeuf
Date: Mon Jun 05 2023 - 16:01:29 EST


On Mon, Jun 05, 2023 at 11:31:34AM -0700, Sean Christopherson wrote:
> Is there an actual bug here, or are we just micro-optimizing something that may or
> may not need additional optimization? Unless there's a bug to be fixed, moving
> code into ASM and increasing complexity doesn't seem worthwhile.

I can't really argue with that. FWIW, here's the (completely untested)
patch.

---8<---

From: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
Subject: [PATCH] KVM: VMX: Convert vmx_spec_ctrl_restore_host() to assembly

Convert vmx_spec_ctrl_restore_host() to assembly. This allows the
removal of a redundant LFENCE. It also "feels" safer as it doesn't
allow the compiler to insert any surprises. And it's more symmetrical
with the pre-vmentry SPEC_CTRL handling, which is also done in assembly.

Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
---
arch/x86/kvm/vmx/vmenter.S | 71 ++++++++++++++++++++++++++++++++------
arch/x86/kvm/vmx/vmx.c | 25 --------------
arch/x86/kvm/vmx/vmx.h | 1 -
3 files changed, 61 insertions(+), 36 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 631fd7da2bc3..977f3487469c 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -108,7 +108,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
lea (%_ASM_SP), %_ASM_ARG2
call vmx_update_host_rsp

- ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL
+ ALTERNATIVE "jmp .Lguest_spec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL

/*
* SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
@@ -122,13 +122,13 @@ SYM_FUNC_START(__vmx_vcpu_run)
movl VMX_spec_ctrl(%_ASM_DI), %edi
movl PER_CPU_VAR(x86_spec_ctrl_current), %esi
cmp %edi, %esi
- je .Lspec_ctrl_done
+ je .Lguest_spec_ctrl_done
mov $MSR_IA32_SPEC_CTRL, %ecx
xor %edx, %edx
mov %edi, %eax
wrmsr

-.Lspec_ctrl_done:
+.Lguest_spec_ctrl_done:

/*
* Since vmentry is serializing on affected CPUs, there's no need for
@@ -253,9 +253,65 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
#endif

/*
- * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
- * the first unbalanced RET after vmexit!
+ * IMPORTANT: The below SPEC_CTRL handling and RSB filling must be done
+ * before the first RET after vmexit!
+ */
+
+ ALTERNATIVE "jmp .Lhost_spec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL
+
+ pop %_ASM_SI /* @flags */
+ pop %_ASM_DI /* @vmx */
+
+ /*
+ * if (flags & VMX_RUN_SAVE_SPEC_CTRL)
+ * vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
+ */
+ test $VMX_RUN_SAVE_SPEC_CTRL, %_ASM_SI
+ jz .Lhost_spec_ctrl_check
+
+ mov $MSR_IA32_SPEC_CTRL, %ecx
+ rdmsr
+ mov %eax, VMX_spec_ctrl(%_ASM_DI)
+
+.Lhost_spec_ctrl_check:
+ /*
+ * If the guest/host SPEC_CTRL values differ, restore the host value.
*
+ * For legacy IBRS, the IBRS bit always needs to be written after
+ * transitioning from a less privileged predictor mode, regardless of
+ * whether the guest/host values differ.
+ *
+ * if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) ||
+ * vmx->spec_ctrl != this_cpu_read(x86_spec_ctrl_current))
+ * native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
+ */
+ ALTERNATIVE "", "jmp .Lhost_spec_ctrl_write", X86_FEATURE_KERNEL_IBRS
+ movl VMX_spec_ctrl(%_ASM_DI), %edi
+ movl PER_CPU_VAR(x86_spec_ctrl_current), %esi
+ cmp %edi, %esi
+ je .Lhost_spec_ctrl_done_lfence
+
+.Lhost_spec_ctrl_write:
+ mov $MSR_IA32_SPEC_CTRL, %ecx
+ xor %edx, %edx
+ mov %esi, %eax
+ wrmsr
+
+.Lhost_spec_ctrl_done_lfence:
+ /*
+ * This ensures that speculative execution doesn't incorrectly bypass
+ * the above SPEC_CTRL wrmsr by mispredicting the 'je'.
+ *
+ * It's only needed if the below FILL_RETURN_BUFFER doesn't do an
+ * LFENCE. Thus the X86_FEATURE_RSB_VMEXIT{_LITE} alternatives.
+ */
+ ALTERNATIVE_2 "lfence", \
+ "", X86_FEATURE_RSB_VMEXIT, \
+ "", X86_FEATURE_RSB_VMEXIT_LITE
+
+.Lhost_spec_ctrl_done:
+
+ /*
* For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB
* entries and (in some cases) RSB underflow.
*
@@ -267,11 +323,6 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\
X86_FEATURE_RSB_VMEXIT_LITE

- pop %_ASM_ARG2 /* @flags */
- pop %_ASM_ARG1 /* @vmx */
-
- call vmx_spec_ctrl_restore_host
-
/* Put return value in AX */
mov %_ASM_BX, %_ASM_AX

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 44fb619803b8..d353b0e23918 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7109,31 +7109,6 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
}
}

-void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
- unsigned int flags)
-{
- u64 hostval = this_cpu_read(x86_spec_ctrl_current);
-
- if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL))
- return;
-
- if (flags & VMX_RUN_SAVE_SPEC_CTRL)
- vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
-
- /*
- * If the guest/host SPEC_CTRL values differ, restore the host value.
- *
- * For legacy IBRS, the IBRS bit always needs to be written after
- * transitioning from a less privileged predictor mode, regardless of
- * whether the guest/host values differ.
- */
- if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) ||
- vmx->spec_ctrl != hostval)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
-
- barrier_nospec();
-}
-
static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{
switch (to_vmx(vcpu)->exit_reason.basic) {
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 9e66531861cf..f9fab33f4d76 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -420,7 +420,6 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
-void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags);
unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
unsigned int flags);
--
2.40.1