[RFC PATCH v4 03/10] KVM: x86/mmu: Pass around full 64-bit error code for the KVM page fault

From: isaku . yamahata
Date: Thu Jul 20 2023 - 19:33:24 EST


From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>

Because the full 64-bit error code, or more info about the fault, for the
KVM page fault will be needed for protected VM, TDX and SEV-SNP, update
kvm_mmu_do_page_fault() to accept the 64-bit value so it can pass it to the
callbacks.

The upper 32 bits of error code are discarded at kvm_mmu_page_fault()
by lower_32_bits(). Now it's passed down as full 64 bits.
Currently two hardware defined bits, PFERR_GUEST_FINAL_MASK and
PFERR_GUEST_PAGE_MASK, and one software defined bit, PFERR_IMPLICIT_ACCESS,
is defined.

PFERR_IMPLICIT_ACCESS:
commit 4f4aa80e3b88 ("KVM: X86: Handle implicit supervisor access with SMAP")
introduced a software defined bit PFERR_IMPLICIT_ACCESS at bit 48 to
indicate implicit access for SMAP with instruction emulator. Concretely
emulator_read_std() and emulator_write_std() set the bit.
permission_fault() checks the bit as smap implicit access. The vendor page
fault handler shouldn't pass the bit to kvm_mmu_page_fault().

PFERR_GUEST_FINAL_MASK and PFERR_GUEST_PAGE_MASK:
commit 147277540bbc ("kvm: svm: Add support for additional SVM NPF error codes")
introduced them to optimize the nested page fault handling. Other code
path doesn't use the bits. Those two bits can be safely passed down
without functionality change.

The accesses of fault->error_code are as follows
- FNAME(page_fault): PFERR_IMPLICIT_ACCESS shouldn't be passed down.
PFERR_GUEST_FINAL_MASK and PFERR_GUEST_PAGE_MASK
aren't used.
- kvm_mmu_page_fault(): explicit mask with PFERR_RSVD_MASK, and
PFERR_NESTED_GUEST_PAGE is used outside of the
masking upper 32 bits.
- mmutrace: change u32 -> u64
- pgprintk(): change %x -> %llx

No functional change is intended. This is a preparation to pass on more
info with page fault error code.

Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>

---
Changes v2 -> v3:
- Make depends on a patch to clear PFERR_IMPLICIT_ACCESS
- drop clearing the upper 32 bit, instead just pass whole 64 bits
- update commit message to mention about PFERR_IMPLICIT_ACCESS and
PFERR_NESTED_GUEST_PAGE

Changes v1 -> v2:
- no change
---
arch/x86/kvm/mmu/mmu.c | 5 ++---
arch/x86/kvm/mmu/mmu_internal.h | 4 ++--
arch/x86/kvm/mmu/mmutrace.h | 2 +-
arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index a9bbc20c7dfd..a2fe091e327a 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4523,7 +4523,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
static int nonpaging_page_fault(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault)
{
- pgprintk("%s: gva %lx error %x\n", __func__, fault->addr, fault->error_code);
+ pgprintk("%s: gva %llx error %llx\n", __func__, fault->addr, fault->error_code);

/* This path builds a PAE pagetable, we can map 2mb pages at maximum. */
fault->max_level = PG_LEVEL_2M;
@@ -5844,8 +5844,7 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
}

if (r == RET_PF_INVALID) {
- r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
- lower_32_bits(error_code), false,
+ r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, error_code, false,
&emulation_type);
if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm))
return -EIO;
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index f1786698ae00..7f9ec1e5b136 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -191,7 +191,7 @@ static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
struct kvm_page_fault {
/* arguments to kvm_mmu_do_page_fault. */
const gpa_t addr;
- const u32 error_code;
+ const u64 error_code;
const bool prefetch;

/* Derived from error_code. */
@@ -283,7 +283,7 @@ enum {
};

static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
- u32 err, bool prefetch, int *emulation_type)
+ u64 err, bool prefetch, int *emulation_type)
{
struct kvm_page_fault fault = {
.addr = cr2_or_gpa,
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index 2d7555381955..2e77883c92f6 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -261,7 +261,7 @@ TRACE_EVENT(
TP_STRUCT__entry(
__field(int, vcpu_id)
__field(gpa_t, cr2_or_gpa)
- __field(u32, error_code)
+ __field(u64, error_code)
__field(u64 *, sptep)
__field(u64, old_spte)
__field(u64, new_spte)
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 0662e0278e70..42d48b1ec7b3 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -758,7 +758,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
struct guest_walker walker;
int r;

- pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code);
+ pgprintk("%s: addr %llx err %llx\n", __func__, fault->addr, fault->error_code);
WARN_ON_ONCE(fault->is_tdp);

/*
--
2.25.1