Re: [PATCH v7 1/7] KVM: CPUID: Fix IA32_XSS support in CPUID(0xd,i) enumeration

From: Jim Mattson
Date: Wed Oct 02 2019 - 13:26:25 EST


On Thu, Sep 26, 2019 at 7:17 PM Yang Weijiang <weijiang.yang@xxxxxxxxx> wrote:
>
> The control bits in IA32_XSS MSR are being used for new features,
> but current CPUID(0xd,i) enumeration code doesn't support them, so
> fix existing code first.
>
> The supervisor states in IA32_XSS haven't been used in public
> KVM code, so set KVM_SUPPORTED_XSS to 0 now, anyone who's developing
> IA32_XSS related feature may expand the macro to add the CPUID support,
> otherwise, CPUID(0xd,i>1) always reports 0 of the subleaf to guest.
>
> Extracted old code into a new filter and keep it same flavor as others.
>
> This patch passed selftest on a few Intel platforms.
>
> Suggested-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
> Signed-off-by: Yang Weijiang <weijiang.yang@xxxxxxxxx>
> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/cpuid.c | 94 +++++++++++++++++++++------------
> arch/x86/kvm/svm.c | 7 +++
> arch/x86/kvm/vmx/vmx.c | 6 +++
> arch/x86/kvm/x86.h | 7 +++
> 5 files changed, 82 insertions(+), 33 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 74e88e5edd9c..d018df8c5f32 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1209,6 +1209,7 @@ struct kvm_x86_ops {
> uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu);
>
> bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
> + u64 (*supported_xss)(void);
> };
>
> struct kvm_arch_async_pf {
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 22c2720cd948..9d282fec0a62 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -62,6 +62,11 @@ u64 kvm_supported_xcr0(void)
> return xcr0;
> }
>
> +u64 kvm_supported_xss(void)
> +{
> + return KVM_SUPPORTED_XSS & kvm_x86_ops->supported_xss();
> +}
> +
> #define F(x) bit(X86_FEATURE_##x)
>
> int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> @@ -414,6 +419,50 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
> }
> }
>
> +static inline void do_cpuid_0xd_mask(struct kvm_cpuid_entry2 *entry, int index)
> +{
> + unsigned int f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;

Does Intel have CPUs that support XSAVES but don't support the "enable
XSAVES/XRSTORS" VM-execution control? If so, what is the behavior of
XSAVESXRSTORS on those CPUs in VMX non-root mode? If not, why is this
conditional F(XSAVES) here?

> + /* cpuid 0xD.1.eax */
> + const u32 kvm_cpuid_D_1_eax_x86_features =
> + F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
> + u64 u_supported = kvm_supported_xcr0();
> + u64 s_supported = kvm_supported_xss();
> + u64 supported;
> +
> + switch (index) {
> + case 0:
> + entry->eax &= u_supported;
> + entry->ebx = xstate_required_size(u_supported, false);

EBX could actually be zero, couldn't it? Since this output is
context-dependent, I'm not sure how to interpret it when returned from
KVM_GET_SUPPORTED_CPUID.

> + entry->ecx = entry->ebx;
> + entry->edx = 0;

Shouldn't this be: entry->edx &= u_supported >> 32?

> + break;
> + case 1:
> + supported = u_supported | s_supported;
> + entry->eax &= kvm_cpuid_D_1_eax_x86_features;
> + cpuid_mask(&entry->eax, CPUID_D_1_EAX);
> + entry->ebx = 0;
> + entry->edx = 0;

Shouldn't this be: entry->edx &= s_supported >> 32?

> + entry->ecx &= s_supported;
> + if (entry->eax & (F(XSAVES) | F(XSAVEC)))
> + entry->ebx = xstate_required_size(supported, true);

As above, can't EBX just be zero, since it's context-dependent? What
is the context when processing KVM_GET_SUPPORTED_CPUID? And why do we
only fill this in when XSAVES or XSAVEC is supported?

> + break;
> + default:
> + supported = (entry->ecx & 1) ? s_supported : u_supported;
> + if (!(supported & ((u64)1 << index))) {

Nit: 1ULL << index.

> + entry->eax = 0;
> + entry->ebx = 0;
> + entry->ecx = 0;
> + entry->edx = 0;
> + return;
> + }
> + if (entry->ecx)
> + entry->ebx = 0;

This seems to back up my claims above regarding the EBX output for
cases 0 and 1, but aside from those subleaves, is this correct? For
subleaves > 1, ECX bit 1 can be set for extended state components that
need to be cache-line aligned. Such components could map to a valid
bit in XCR0 and have a non-zero offset from the beginning of the
non-compacted XSAVE area.

> + entry->edx = 0;

This seems too aggressive. See my comments above regarding EDX outputs
for cases 0 and 1.

> + break;
> + }
> +}
> +
> static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
> int *nent, int maxnent)
> {
> @@ -428,7 +477,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
> unsigned f_lm = 0;
> #endif
> unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
> - unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
> unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
>
> /* cpuid 1.edx */
> @@ -482,10 +530,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
> F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
> F(PMM) | F(PMM_EN);
>
> - /* cpuid 0xD.1.eax */
> - const u32 kvm_cpuid_D_1_eax_x86_features =
> - F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
> -
> /* all calls to cpuid_count() should be made on the same cpu */
> get_cpu();
>
> @@ -622,38 +666,22 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
> break;
> }
> case 0xd: {
> - int idx, i;
> - u64 supported = kvm_supported_xcr0();
> -
> - entry->eax &= supported;
> - entry->ebx = xstate_required_size(supported, false);
> - entry->ecx = entry->ebx;
> - entry->edx &= supported >> 32;
> - if (!supported)
> - break;
> + int i, idx;
>
> - for (idx = 1, i = 1; idx < 64; ++idx) {
> - u64 mask = ((u64)1 << idx);
> + do_cpuid_0xd_mask(&entry[0], 0);
> + for (i = 1, idx = 1; idx < 64; ++idx) {
> if (*nent >= maxnent)
> goto out;
> -
> do_host_cpuid(&entry[i], function, idx);
> - if (idx == 1) {
> - entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
> - cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
> - entry[i].ebx = 0;
> - if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
> - entry[i].ebx =
> - xstate_required_size(supported,
> - true);
> - } else {
> - if (entry[i].eax == 0 || !(supported & mask))
> - continue;
> - if (WARN_ON_ONCE(entry[i].ecx & 1))
> - continue;
> - }
> - entry[i].ecx = 0;
> - entry[i].edx = 0;
> + if (entry[i].eax == 0 && entry[i].ebx == 0 &&
> + entry[i].ecx == 0 && entry[i].edx == 0)
> + continue;
> +
> + do_cpuid_0xd_mask(&entry[i], idx);
> + if (entry[i].eax == 0 && entry[i].ebx == 0 &&
> + entry[i].ecx == 0 && entry[i].edx == 0)
> + continue;
> +
> ++*nent;
> ++i;
> }
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index e0368076a1ef..be967bf9a81d 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -7193,6 +7193,11 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
> return false;
> }
>
> +static u64 svm_supported_xss(void)
> +{
> + return 0;
> +}
> +
> static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
> .cpu_has_kvm_support = has_svm,
> .disabled_by_bios = is_disabled,
> @@ -7329,6 +7334,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
> .nested_get_evmcs_version = NULL,
>
> .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
> +
> + .supported_xss = svm_supported_xss,
> };
>
> static int __init svm_init(void)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index c6f6b05004d9..a84198cff397 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1651,6 +1651,11 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
> return !(val & ~valid_bits);
> }
>
> +static inline u64 vmx_supported_xss(void)
> +{
> + return host_xss;
> +}

Do you really need vendor-specific code for this? Can't you just hoist
host_xss into common code (x86.c) and use that? [Note that Aaron Lewis
is currently working on a series that will include that hoisting, if
you want to wait.]


> static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
> {
> switch (msr->index) {
> @@ -7799,6 +7804,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
> .nested_enable_evmcs = NULL,
> .nested_get_evmcs_version = NULL,
> .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
> + .supported_xss = vmx_supported_xss,
> };
>
> static void vmx_cleanup_l1d_flush(void)
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 6594020c0691..fbffabad0370 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -293,6 +293,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
> | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
> | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
> | XFEATURE_MASK_PKRU)
> +
> +/*
> + * Right now, no XSS states are used on x86 platform,
> + * expand the macro for new features.
> + */
> +#define KVM_SUPPORTED_XSS (0)
> +

Nit: superfluous parentheses.

> extern u64 host_xcr0;
>
> extern u64 kvm_supported_xcr0(void);
> --
> 2.17.2
>