Re: [PATCH RFC v9 04/51] KVM: x86: Determine shared/private faults using a configurable mask

From: Isaku Yamahata
Date: Wed Jun 14 2023 - 12:47:21 EST


On Sun, Jun 11, 2023 at 11:25:12PM -0500,
Michael Roth <michael.roth@xxxxxxx> wrote:

> This will be used to determine whether or not an #NPF should be serviced
> using a normal page vs. a guarded/gmem one.
>
> Signed-off-by: Michael Roth <michael.roth@xxxxxxx>
> ---
> arch/x86/include/asm/kvm_host.h | 7 +++++++
> arch/x86/kvm/mmu/mmu_internal.h | 35 ++++++++++++++++++++++++++++++++-
> 2 files changed, 41 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b3bd24f2a390..c26f76641121 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1445,6 +1445,13 @@ struct kvm_arch {
> */
> #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
> struct kvm_mmu_memory_cache split_desc_cache;
> +
> + /*
> + * When set, used to determine whether a fault should be treated as
> + * private in the context of protected VMs which use a separate gmem
> + * pool to back private guest pages.
> + */
> + u64 mmu_private_fault_mask;
> };
>
> struct kvm_vm_stat {
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index 780b91e1da9f..9b9e75aa43f4 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -252,6 +252,39 @@ struct kvm_page_fault {
>
> int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
>
> +static bool kvm_mmu_fault_is_private(struct kvm *kvm, gpa_t gpa, u64 err)
> +{
> + struct kvm_memory_slot *slot;
> + bool private_fault = false;
> + gfn_t gfn = gpa_to_gfn(gpa);
> +
> + slot = gfn_to_memslot(kvm, gfn);
> + if (!slot) {
> + pr_debug("%s: no slot, GFN: 0x%llx\n", __func__, gfn);
> + goto out;
> + }
> +
> + if (!kvm_slot_can_be_private(slot)) {
> + pr_debug("%s: slot is not private, GFN: 0x%llx\n", __func__, gfn);
> + goto out;
> + }
> +
> + if (kvm->arch.mmu_private_fault_mask) {
> + private_fault = !!(err & kvm->arch.mmu_private_fault_mask);
> + goto out;
> + }

What's the convention of err? Can we abstract it by introducing a new bit
PFERR_PRIVATE_MASK? The caller sets it based on arch specific value.
the logic will be
.is_private = err & PFERR_PRIVATE_MASK;


> +
> + /*
> + * Handling below is for UPM self-tests and guests that treat userspace
> + * as the authority on whether a fault should be private or not.
> + */
> + private_fault = kvm_mem_is_private(kvm, gpa >> PAGE_SHIFT);

This code path is sad. One extra slot lookup and xarray look up.
Without mmu lock, the result can change by other vcpu.
Let's find a better way.

> +
> +out:
> + pr_debug("%s: GFN: 0x%llx, private: %d\n", __func__, gfn, private_fault);
> + return private_fault;
> +}
> +
> /*
> * Return values of handle_mmio_page_fault(), mmu.page_fault(), fast_page_fault(),
> * and of course kvm_mmu_do_page_fault().
> @@ -301,7 +334,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> .max_level = KVM_MAX_HUGEPAGE_LEVEL,
> .req_level = PG_LEVEL_4K,
> .goal_level = PG_LEVEL_4K,
> - .is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT),
> + .is_private = kvm_mmu_fault_is_private(vcpu->kvm, cr2_or_gpa, err),
> };
> int r;
>
> --
> 2.25.1
>

--
Isaku Yamahata <isaku.yamahata@xxxxxxxxx>