Re: [PATCH 6/9] KVM: x86: Update guest cpu_caps at runtime for dynamic CPUID-based features

From: Robert Hoo
Date: Tue Nov 14 2023 - 21:03:39 EST


On 11/14/2023 9:48 PM, Sean Christopherson wrote:
On Mon, Nov 13, 2023, Robert Hoo wrote:
...
u32 *caps = vcpu->arch.cpu_caps;
and update guest_cpu_cap_set(), guest_cpu_cap_clear(),
guest_cpu_cap_change() and guest_cpu_cap_restrict() to pass in
vcpu->arch.cpu_caps instead of vcpu, since all of them merely refer to vcpu
cap, rather than whole vcpu info.

No, because then every caller would need extra code to pass vcpu->cpu_caps,

Emm, I don't understand this. I tried to modified and compiled, all need to do is simply substitute "vcpu" with "vcpu->arch.cpu_caps" in calling. (at the end is my diff based on this patch set)

and
passing 'u32 *' provides less type safety than 'struct kvm_vcpu *'. That tradeoff
isn't worth making this one path slightly easier to read.

My point is also from vulnerability, long term, since as a principle, we'd better pass in param/info to a function of its necessity. e.g. cpuid_entry2_find().
Anyway, this is a less important point, shouldn't distract your focus.

This patch set's whole idea is good, I also felt confusion when initially looking into vCPUID code and its complicated dependencies with each other and KVM cap (or your word govern) ( and even Kernel govern and HW cap ?). With this guest_cap[], the layered relationship can be much clearer, alone with fast guest cap queries.

Or, for simple change, here rename variable name "caps" --> "vcpu", to less
reading confusion.

@vcpu is already defined and needs to be used in this function. See the comment
below.

I'm definitely open to a better name, though I would like to keep the name
relative short so that the line lengths of the callers is reasonable, e.g. would
prefer not to do vcpu_caps.

+ /*
+ * Don't update vCPU capabilities if KVM is updating CPUID entries that
+ * are coming in from userspace!
+ */
+ if (entries != vcpu->arch.cpuid_entries)
+ caps = NULL;
best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
- if (best) {
- /* Update OSXSAVE bit */
- if (boot_cpu_has(X86_FEATURE_XSAVE))
- cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
+
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
+ kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
- cpuid_entry_change(best, X86_FEATURE_APIC,
- vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
+ kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
+ vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 6407e5c45f20..3e8976705342 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -262,7 +262,7 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
}

-static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
+static __always_inline void kvm_update_feature_runtime(u32 *guest_caps,
struct kvm_cpuid_entry2 *entry,
unsigned int x86_feature,
bool has_feature)
@@ -270,15 +270,15 @@ static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
if (entry)
cpuid_entry_change(entry, x86_feature, has_feature);

- if (vcpu)
- guest_cpu_cap_change(vcpu, x86_feature, has_feature);
+ if (guest_caps)
+ guest_cpu_cap_change(guest_caps, x86_feature, has_feature);
}

static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
int nent)
{
struct kvm_cpuid_entry2 *best;
- struct kvm_vcpu *caps = vcpu;
+ u32 *caps = vcpu->arch.cpu_caps;

/*
* Don't update vCPU capabilities if KVM is updating CPUID entries that
@@ -397,7 +397,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
*/
allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES);
- guest_cpu_cap_change(vcpu, X86_FEATURE_GBPAGES, allow_gbpages);
+ guest_cpu_cap_change(vcpu->arch.cpu_caps, X86_FEATURE_GBPAGES, allow_gbpages);

best = kvm_find_cpuid_entry(vcpu, 1);
if (best && apic) {
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 98694dfe062e..a3a0482fc514 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -183,39 +183,39 @@ static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu,
return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
}

-static __always_inline void guest_cpu_cap_set(struct kvm_vcpu *vcpu,
+static __always_inline void guest_cpu_cap_set(u32 *caps,
unsigned int x86_feature)
{
unsigned int x86_leaf = __feature_leaf(x86_feature);

reverse_cpuid_check(x86_leaf);
- vcpu->arch.cpu_caps[x86_leaf] |= __feature_bit(x86_feature);
+ caps[x86_leaf] |= __feature_bit(x86_feature);
}

-static __always_inline void guest_cpu_cap_clear(struct kvm_vcpu *vcpu,
+static __always_inline void guest_cpu_cap_clear(u32 *caps,
unsigned int x86_feature)
{
unsigned int x86_leaf = __feature_leaf(x86_feature);

reverse_cpuid_check(x86_leaf);
- vcpu->arch.cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature);
+ caps[x86_leaf] &= ~__feature_bit(x86_feature);
}

-static __always_inline void guest_cpu_cap_change(struct kvm_vcpu *vcpu,
+static __always_inline void guest_cpu_cap_change(u32 *caps,
unsigned int x86_feature,
bool guest_has_cap)
{
if (guest_has_cap)
- guest_cpu_cap_set(vcpu, x86_feature);
+ guest_cpu_cap_set(caps, x86_feature);
else
- guest_cpu_cap_clear(vcpu, x86_feature);
+ guest_cpu_cap_clear(caps, x86_feature);
}

-static __always_inline void guest_cpu_cap_restrict(struct kvm_vcpu *vcpu,
+static __always_inline void guest_cpu_cap_restrict(u32 *caps,
unsigned int x86_feature)
{
if (!kvm_cpu_cap_has(x86_feature))
- guest_cpu_cap_clear(vcpu, x86_feature);
+ guest_cpu_cap_clear(caps, x86_feature);
}

static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6fe2d7bf4959..dd4ca07c3cd0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4315,14 +4315,14 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
* XSS on VM-Enter/VM-Exit. Failure to do so would effectively give
* the guest read/write access to the host's XSS.
*/
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_XSAVE);
- guest_cpu_cap_change(vcpu, X86_FEATURE_XSAVES,
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_XSAVE);
+ guest_cpu_cap_change(vcpu->arch.cpu_caps, X86_FEATURE_XSAVES,
boot_cpu_has(X86_FEATURE_XSAVES) &&
guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVE));

- guest_cpu_cap_restrict(vcpu, X86_FEATURE_NRIPS);
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_TSCRATEMSR);
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_LBRV);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_NRIPS);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_TSCRATEMSR);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_LBRV);

/*
* Intercept VMLOAD if the vCPU mode is Intel in order to emulate that
@@ -4330,12 +4330,12 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
* SVM on Intel is bonkers and extremely unlikely to work).
*/
if (!guest_cpuid_is_intel(vcpu))
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_V_VMSAVE_VMLOAD);

- guest_cpu_cap_restrict(vcpu, X86_FEATURE_PAUSEFILTER);
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_PFTHRESHOLD);
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_VGIF);
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_VNMI);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_PAUSEFILTER);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_PFTHRESHOLD);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VGIF);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VNMI);

svm_recalc_instruction_intercepts(vcpu, svm);

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7645945af5c5..c23c96dc24cf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7752,13 +7752,13 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
* to the guest. XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be
* set if and only if XSAVE is supported.
*/
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_XSAVE);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_XSAVE);
if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVE))
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_XSAVES);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_XSAVES);
else
- guest_cpu_cap_clear(vcpu, X86_FEATURE_XSAVES);
+ guest_cpu_cap_clear(vcpu->arch.cpu_caps, X86_FEATURE_XSAVES);

- guest_cpu_cap_restrict(vcpu, X86_FEATURE_VMX);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VMX);

vmx_setup_uret_msrs(vmx);