Re: [PATCH v2 1/6] KVM: x86/mmu: add a new mmu zap helper to indicate memtype changes

From: Yan Zhao
Date: Tue May 23 2023 - 22:47:21 EST


On Tue, May 23, 2023 at 03:51:49PM -0700, Sean Christopherson wrote:
> Rather than add a helper to zap "for memtype", add a helper to query if KVM honors
> guest MTRRs. The "for memtype" isn't intuitive and ends up being incomplete.
> That will also allow for deduplicating other code (and a comment). Waiting until
> the zap also wastes cycles in the MTRR case, there's no need to compute start and
> end if the zap will ultimately be skipped.
Agreed.

>
> E.g. over two or three patches:
>
> ---
> arch/x86/kvm/mmu.h | 1 +
> arch/x86/kvm/mmu/mmu.c | 19 ++++++++++++++-----
> arch/x86/kvm/mtrr.c | 2 +-
> arch/x86/kvm/x86.c | 2 +-
> 4 files changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 92d5a1924fc1..c3c50ec9d9db 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -103,6 +103,7 @@ static inline u8 kvm_get_shadow_phys_bits(void)
> void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
> void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask);
> void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
> +bool kvm_mmu_honors_guest_mtrrs(struct kvm *kvm);
>
> void kvm_init_mmu(struct kvm_vcpu *vcpu);
> void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index c8961f45e3b1..a2b1723bc6a4 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4512,12 +4512,10 @@ static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,
> }
> #endif
>
> -int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> +bool kvm_mmu_honors_guest_mtrrs(struct kvm *kvm)
> {
> /*
> - * If the guest's MTRRs may be used to compute the "real" memtype,
> - * restrict the mapping level to ensure KVM uses a consistent memtype
> - * across the entire mapping. If the host MTRRs are ignored by TDP
> + * If the TDP is enabled, the host MTRRs are ignored by TDP
> * (shadow_memtype_mask is non-zero), and the VM has non-coherent DMA
> * (DMA doesn't snoop CPU caches), KVM's ABI is to honor the memtype
> * from the guest's MTRRs so that guest accesses to memory that is
> @@ -4526,7 +4524,18 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> * Note, KVM may still ultimately ignore guest MTRRs for certain PFNs,
> * e.g. KVM will force UC memtype for host MMIO.
> */
> - if (shadow_memtype_mask && kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
> + return tdp_enabled && shadow_memtype_mask &&
> + kvm_arch_has_noncoherent_dma(kvm);
> +}
> +
> +int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> +{
> + /*
> + * If the guest's MTRRs may be used to compute the "real" memtype,
> + * restrict the mapping level to ensure KVM uses a consistent memtype
> + * across the entire mapping.
> + */
> + if (kvm_mmu_honors_guest_mtrrs(vcpu->kvm)) {
> for ( ; fault->max_level > PG_LEVEL_4K; --fault->max_level) {
> int page_num = KVM_PAGES_PER_HPAGE(fault->max_level);
> gfn_t base = gfn_round_for_level(fault->gfn,
> diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
> index 3eb6e7f47e96..a67c28a56417 100644
> --- a/arch/x86/kvm/mtrr.c
> +++ b/arch/x86/kvm/mtrr.c
> @@ -320,7 +320,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
> struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
> gfn_t start, end;
>
> - if (!tdp_enabled || !kvm_arch_has_noncoherent_dma(vcpu->kvm))
> + if (!kvm_mmu_honors_guest_mtrrs(vcpu->kvm))
Could we also add another helper kvm_mmu_cap_honors_guest_mtrrs(), which
does not check kvm_arch_has_noncoherent_dma()?

+static inline bool kvm_mmu_cap_honors_guest_mtrrs(struct kvm *kvm)
+{
+ return !!shadow_memtype_mask;
+}

This is because in patch 4 I plan to do the EPT zap when
noncoherent_dma_count goes from 1 to 0.

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 41d7bb51a297..ad0c43d7f532 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13146,13 +13146,19 @@ EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);

void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
{
- atomic_inc(&kvm->arch.noncoherent_dma_count);
+ if (atomic_inc_return(&kvm->arch.noncoherent_dma_count) == 1) {
+ if (kvm_mmu_cap_honors_guest_mtrrs(kvm))
+ kvm_zap_gfn_range(kvm, 0, ~0ULL);
+ }
}
EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);

void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
{
- atomic_dec(&kvm->arch.noncoherent_dma_count);
+ if (!atomic_dec_return(&kvm->arch.noncoherent_dma_count)) {
+ if (kvm_mmu_cap_honors_guest_mtrrs(kvm))
+ kvm_zap_gfn_range(kvm, 0, ~0ULL);
+ }
}
EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);


> return;
>
> if (!mtrr_is_enabled(mtrr_state) && msr != MSR_MTRRdefType)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 076d50f6321b..977dceb7ba7e 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -942,7 +942,7 @@ void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned lon
> kvm_mmu_reset_context(vcpu);
>
> if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
> - kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
> + kvm_mmu_honors_guest_mtrrs(vcpu->kvm) &&
> !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
> kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
> }
>
> base-commit: 8d4a1b4011c125b1510254b724433aaae746ce14
> --
>