[PATCH RFC v9 40/51] KVM: SVM: Add support to handle RMP nested page faults

From: Michael Roth
Date: Mon Jun 12 2023 - 00:49:33 EST


From: Brijesh Singh <brijesh.singh@xxxxxxx>

When SEV-SNP is enabled in the guest, the hardware places restrictions
on all memory accesses based on the contents of the RMP table. When
hardware encounters RMP check failure caused by the guest memory access
it raises the #NPF. The error code contains additional information on
the access type. See the APM volume 2 for additional information.

When using gmem, RMP faults resulting from mismatches between the state
in the RMP table vs. what the guest expects via its page table result
in KVM_EXIT_MEMORY_FAULTs being forwarded to userspace to handle. This
means the only expected case that needs to be handled in the kernel is
when the page size of the entry in the RMP table is larger than the
mapping in the nested page table, in which case a PSMASH instruction
needs to be issued to split the large RMP entry into individual 4K
entries so that subsequent accesses can succeed.

Co-developed-by: Michael Roth <michael.roth@xxxxxxx>
Signed-off-by: Michael Roth <michael.roth@xxxxxxx>
Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
Signed-off-by: Ashish Kalra <ashish.kalra@xxxxxxx>
---
arch/x86/kvm/svm/sev.c | 85 ++++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/svm/svm.c | 21 +++++++++--
arch/x86/kvm/svm/svm.h | 1 +
3 files changed, 103 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 9b9dff7728c8..1ba49c5ebaed 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -3234,6 +3234,13 @@ static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
svm->vmcb->control.ghcb_gpa = value;
}

+static int snp_rmptable_psmash(struct kvm *kvm, kvm_pfn_t pfn)
+{
+ pfn = pfn & ~(KVM_PAGES_PER_HPAGE(PG_LEVEL_2M) - 1);
+
+ return psmash(pfn);
+}
+
static int snp_complete_psc_msr_protocol(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3696,3 +3703,81 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu)

return p;
}
+
+void handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code)
+{
+ struct kvm_memory_slot *slot;
+ struct kvm *kvm = vcpu->kvm;
+ int order, rmp_level, ret;
+ bool assigned;
+ kvm_pfn_t pfn;
+ gfn_t gfn;
+
+ /*
+ * Private memslots forward handling of implicit page state changes
+ * to userspace, so the only RMP faults expected here are for
+ * PFERR_GUEST_SIZEM_MASK. Anything else suggests that the RMP table
+ * has gotten out of sync with the private memslot. Generally...
+ *
+ * However, there is a transient case where access to an NPT mapping
+ * that has just been split/PSMASH'd can generate an RMP fault. In this
+ * case the PFERR_GUEST_SIZEM bit might not be set. In these cases it
+ * should be safe to ignore and let the guest retry, but allow for
+ * these to be optionally logged to diagnose exceptional cases.
+ */
+ if (!(error_code & PFERR_GUEST_SIZEM_MASK)) {
+ pr_debug_ratelimited("Unexpected RMP fault for GPA 0x%llx, error_code 0x%llx",
+ gpa, error_code);
+ return;
+ }
+
+ gfn = gpa >> PAGE_SHIFT;
+
+ /*
+ * Only RMPADJUST/PVALIDATE should cause PFERR_GUEST_SIZEM.
+ *
+ * For PVALIDATE, this should only happen if a guest PVALIDATEs a 4K GFN
+ * that is backed by a huge page in the host whose RMP entry has the
+ * hugepage/assigned bits set. With UPM, that should only ever happen
+ * for private pages.
+ *
+ * For RMPADJUST, this assumption might not hold, in which case handling
+ * for obtaining the PFN from HVA-backed memory may be needed. For now,
+ * just print warnings.
+ */
+ if (!kvm_mem_is_private(kvm, gfn)) {
+ pr_warn_ratelimited("Unexpected RMP fault, size-mismatch for non-private GPA 0x%llx\n",
+ gpa);
+ return;
+ }
+
+ slot = gfn_to_memslot(kvm, gfn);
+ if (!kvm_slot_can_be_private(slot)) {
+ pr_warn_ratelimited("Unexpected RMP fault, non-private slot for GPA 0x%llx\n",
+ gpa);
+ return;
+ }
+
+ ret = kvm_gmem_get_pfn(kvm, slot, gfn, &pfn, &order);
+ if (ret) {
+ pr_warn_ratelimited("Unexpected RMP fault, no private backing page for GPA 0x%llx\n",
+ gpa);
+ return;
+ }
+
+ ret = snp_lookup_rmpentry(pfn, &assigned, &rmp_level);
+ if (ret || !assigned) {
+ pr_warn_ratelimited("Unexpected RMP fault, no assigned RMP entry found for GPA 0x%llx PFN 0x%llx error %d\n",
+ gpa, pfn, ret);
+ goto out;
+ }
+
+ ret = snp_rmptable_psmash(kvm, pfn);
+ if (ret)
+ pr_err_ratelimited("Unable to split RMP entries for GPA 0x%llx PFN 0x%llx ret %d\n",
+ gpa, pfn, ret);
+
+out:
+ kvm_zap_gfn_range(kvm, gfn, gfn + PTRS_PER_PMD);
+ put_page(pfn_to_page(pfn));
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 065167b42f90..0cff050bf5bb 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1995,15 +1995,28 @@ static int pf_interception(struct kvm_vcpu *vcpu)
static int npf_interception(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ int rc;

u64 fault_address = svm->vmcb->control.exit_info_2;
u64 error_code = svm->vmcb->control.exit_info_1;

trace_kvm_page_fault(vcpu, fault_address, error_code);
- return kvm_mmu_page_fault(vcpu, fault_address, error_code,
- static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
- svm->vmcb->control.insn_bytes : NULL,
- svm->vmcb->control.insn_len);
+ rc = kvm_mmu_page_fault(vcpu, fault_address, error_code,
+ static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
+ svm->vmcb->control.insn_bytes : NULL,
+ svm->vmcb->control.insn_len);
+
+ /*
+ * rc == 0 indicates a userspace exit is needed to handle page
+ * transitions, so do that first before updating the RMP table.
+ */
+ if (error_code & PFERR_GUEST_RMP_MASK) {
+ if (rc == 0)
+ return rc;
+ handle_rmp_page_fault(vcpu, fault_address, error_code);
+ }
+
+ return rc;
}

static int db_interception(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 02edbdd443e4..4cf9dbc442e9 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -762,6 +762,7 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
void sev_es_unmap_ghcb(struct vcpu_svm *svm);
struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
+void handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code);

/* vmenter.S */

--
2.25.1