Re: [PATCH v2 09/18] KVM: x86/mmu: look for a cached PGD when going from 32-bit to 64-bit

From: Sean Christopherson
Date: Fri Feb 18 2022 - 13:09:06 EST


On Thu, Feb 17, 2022, Paolo Bonzini wrote:
> Right now, PGD caching avoids placing a PAE root in the cache by using the
> old value of mmu->root_level and mmu->shadow_root_level; it does not look
> for a cached PGD if the old root is a PAE one, and then frees it using
> kvm_mmu_free_roots.
>
> Change the logic instead to free the uncacheable root early.
> This way, __kvm_new_mmu_pgd is able to look up the cache when going from
> 32-bit to 64-bit (if there is a hit, the invalid root becomes the least
> recently used). An example of this is nested virtualization with shadow
> paging, when a 64-bit L1 runs a 32-bit L2.
>
> As a side effect (which is actually the reason why this patch was
> written), PGD caching does not use the old value of mmu->root_level
> and mmu->shadow_root_level anymore.

Maybe another blurb on 5=>4-level nNPT being broken? I'm also ok omitting it.

> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---

Nits aside,

Reviewed-by: Sean Christopherson <seanjc@xxxxxxxxxx>

> +static bool cached_root_find_and_keep_current(struct kvm *kvm, struct kvm_mmu *mmu,
> + gpa_t new_pgd,
> + union kvm_mmu_page_role new_role)
> {
> uint i;
> - struct kvm_mmu *mmu = vcpu->arch.mmu;
>
> if (is_root_usable(&mmu->root, new_pgd, new_role))
> return true;
>
> for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
> + /*
> + * The swaps end up rotating the cache like this:
> + * C 0 1 2 3 (on entry to the function)
> + * 0 C 1 2 3
> + * 1 C 0 2 3
> + * 2 C 0 1 3
> + * 3 C 0 1 2 (on exit from the loop)
> + */
> swap(mmu->root, mmu->prev_roots[i]);
> -

I'd prefer we keep this whitespace, I like that it separates the swap() and its
comment from the usability check.

> if (is_root_usable(&mmu->root, new_pgd, new_role))
> - break;
> + return true;
> }
>
> - return i < KVM_MMU_NUM_PREV_ROOTS;
> + kvm_mmu_free_roots(kvm, mmu, KVM_MMU_ROOT_CURRENT);
> + return false;
> }
>
> -static bool fast_pgd_switch(struct kvm_vcpu *vcpu, gpa_t new_pgd,
> - union kvm_mmu_page_role new_role)
> +/*
> + * Find out if a previously cached root matching the new pgd/role is available.
> + * On entry, mmu->root is invalid.
> + * If a matching root is found, it is assigned to kvm_mmu->root, the LRU entry
> + * of the cache becomes invalid, and true is returned.
> + * If no match is found, kvm_mmu->root is left invalid and false is returned.
> + */
> +static bool cached_root_find_without_current(struct kvm *kvm, struct kvm_mmu *mmu,
> + gpa_t new_pgd,
> + union kvm_mmu_page_role new_role)
> {
> - struct kvm_mmu *mmu = vcpu->arch.mmu;
> + uint i;
> +
> + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
> + if (is_root_usable(&mmu->prev_roots[i], new_pgd, new_role))
> + goto hit;

The for-loop needs curly braces.

>
> + return false;
> +
> +hit:
> + swap(mmu->root, mmu->prev_roots[i]);
> + /* Bubble up the remaining roots. */
> + for (; i < KVM_MMU_NUM_PREV_ROOTS - 1; i++)
> + mmu->prev_roots[i] = mmu->prev_roots[i + 1];
> + mmu->prev_roots[i].hpa = INVALID_PAGE;
> + return true;
> +}
> +
> +static bool fast_pgd_switch(struct kvm *kvm, struct kvm_mmu *mmu,
> + gpa_t new_pgd, union kvm_mmu_page_role new_role)
> +{
> /*
> - * For now, limit the fast switch to 64-bit hosts+VMs in order to avoid
> + * For now, limit the caching to 64-bit hosts+VMs in order to avoid
> * having to deal with PDPTEs. We may add support for 32-bit hosts/VMs
> * later if necessary.
> */
> - if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
> - mmu->root_level >= PT64_ROOT_4LEVEL)
> - return cached_root_available(vcpu, new_pgd, new_role);
> + if (VALID_PAGE(mmu->root.hpa) && !to_shadow_page(mmu->root.hpa))
> + kvm_mmu_free_roots(kvm, mmu, KVM_MMU_ROOT_CURRENT);
>
> - return false;
> + if (VALID_PAGE(mmu->root.hpa))
> + return cached_root_find_and_keep_current(kvm, mmu, new_pgd, new_role);
> + else
> + return cached_root_find_without_current(kvm, mmu, new_pgd, new_role);
> }
>
> static void __kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd,
> @@ -4160,8 +4196,8 @@ static void __kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd,
> {
> struct kvm_mmu *mmu = vcpu->arch.mmu;
>
> - if (!fast_pgd_switch(vcpu, new_pgd, new_role)) {
> - kvm_mmu_free_roots(vcpu->kvm, mmu, KVM_MMU_ROOT_CURRENT);
> + if (!fast_pgd_switch(vcpu->kvm, mmu, new_pgd, new_role)) {
> + /* kvm_mmu_ensure_valid_pgd will set up a new root. */

The "kvm_mmu_ensure_valid_pgd" part is stale due to the bikeshedding stalemate.
Maybe reference vcpu_enter_guest() instead? E.g.

/*
* If no usable root is found there's nothing more to do, a new root
* will be set up during vcpu_enter_guest(), prior to the next VM-Enter.
*/
if (!fast_pgd_switch(vcpu->kvm, mmu, new_pgd, new_role))
return;