[PATCH RFC 2/3] x86/kvm/hyper-v: move VMX controls sanitization out of nested_enable_evmcs()

From: Vitaly Kuznetsov
Date: Wed Jan 15 2020 - 12:10:27 EST


With fine grained VMX feature enablement QEMU>=4.2 tries to do KVM_SET_MSRS
with default (matching CPU model) values and in case eVMCS is also enabled,
fails.

It would be possible to drop VMX feature filtering completely and make
this a guest's responsibility: if it decides to use eVMCS it should know
which fields are available and which are not. Hyper-V mostly complies to
this, however, there is at least one problematic control:
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES
which Hyper-V enables. As there is no 'apic_addr_field' in eVMCS, we
fail to handle this properly in KVM. It is unclear how this is supposed
to work, genuine Hyper-V doesn't expose the control so it is possible that
this is just a bug (in Hyper-V).

Move VMX controls sanitization from nested_enable_evmcs() to vmx_get_msr(),
this allows userspace to keep setting controls it wants and at the same
time hides them from the guest.

Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>
---
arch/x86/kvm/vmx/evmcs.c | 38 ++++++++++++++++++++++++++++++++------
arch/x86/kvm/vmx/evmcs.h | 1 +
arch/x86/kvm/vmx/vmx.c | 10 ++++++++--
3 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index 89c3e0caf39f..b5d6582ba589 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -346,6 +346,38 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
return 0;
}

+void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
+{
+ u32 ctl_low = (u32)*pdata, ctl_high = (u32)(*pdata >> 32);
+ /*
+ * Enlightened VMCS doesn't have certain fields, make sure we don't
+ * expose unsupported controls to L1.
+ */
+
+ switch (msr_index) {
+ case MSR_IA32_VMX_PINBASED_CTLS:
+ case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+ ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
+ break;
+ case MSR_IA32_VMX_EXIT_CTLS:
+ case MSR_IA32_VMX_TRUE_EXIT_CTLS:
+ ctl_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
+ break;
+ case MSR_IA32_VMX_ENTRY_CTLS:
+ case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
+ ctl_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
+ break;
+ case MSR_IA32_VMX_PROCBASED_CTLS2:
+ ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
+ break;
+ case MSR_IA32_VMX_VMFUNC:
+ ctl_low &= ~EVMCS1_UNSUPPORTED_VMFUNC;
+ break;
+ }
+
+ *pdata = ctl_low | ((u64)ctl_high << 32);
+}
+
int nested_enable_evmcs(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version)
{
@@ -356,11 +388,5 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu,
if (vmcs_version)
*vmcs_version = nested_get_evmcs_version(vcpu);

- vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
- vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
- vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
- vmx->nested.msrs.secondary_ctls_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
- vmx->nested.msrs.vmfunc_controls &= ~EVMCS1_UNSUPPORTED_VMFUNC;
-
return 0;
}
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index 07ebf6882a45..b88d9807a796 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -201,5 +201,6 @@ bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa);
uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu);
int nested_enable_evmcs(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version);
+void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata);

#endif /* __KVM_X86_VMX_EVMCS_H */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e3394c839dea..8eb74618b8d8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1849,8 +1849,14 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
if (!nested_vmx_allowed(vcpu))
return 1;
- return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
- &msr_info->data);
+ if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
+ &msr_info->data))
+ return 1;
+ if (!msr_info->host_initiated &&
+ vmx->nested.enlightened_vmcs_enabled)
+ nested_evmcs_filter_control_msr(msr_info->index,
+ &msr_info->data);
+ break;
case MSR_IA32_RTIT_CTL:
if (pt_mode != PT_MODE_HOST_GUEST)
return 1;
--
2.24.1