[PATCH v12 11/29] KVM: SEV: Add KVM_SEV_SNP_LAUNCH_UPDATE command

From: Michael Roth
Date: Fri Mar 29 2024 - 19:00:43 EST


From: Brijesh Singh <brijesh.singh@xxxxxxx>

A key aspect of a launching an SNP guest is initializing it with a
known/measured payload which is then encrypted into guest memory as
pre-validated private pages and then measured into the cryptographic
launch context created with KVM_SEV_SNP_LAUNCH_START so that the guest
can attest itself after booting.

Since all private pages are provided by guest_memfd, make use of the
kvm_gmem_populate() interface to handle this. The general flow is that
guest_memfd will handle allocating the pages associated with the GPA
ranges being initialized by each particular call of
KVM_SEV_SNP_LAUNCH_UPDATE, copying data from userspace into those pages,
and then the post_populate callback will do the work of setting the
RMP entries for these pages to private and issuing the SNP firmware
calls to encrypt/measure them.

For more information see the SEV-SNP specification.

Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
Co-developed-by: Michael Roth <michael.roth@xxxxxxx>
Signed-off-by: Michael Roth <michael.roth@xxxxxxx>
Signed-off-by: Ashish Kalra <ashish.kalra@xxxxxxx>
---
.../virt/kvm/x86/amd-memory-encryption.rst | 39 ++++
arch/x86/include/uapi/asm/kvm.h | 15 ++
arch/x86/kvm/svm/sev.c | 211 ++++++++++++++++++
3 files changed, 265 insertions(+)

diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
index a10b817c162d..4268aa5c380e 100644
--- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst
+++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
@@ -478,6 +478,45 @@ Returns: 0 on success, -negative on error

See the SEV-SNP spec [snp-fw-abi]_ for further detail on the launch input.

+19. KVM_SEV_SNP_LAUNCH_UPDATE
+-----------------------------
+
+The KVM_SEV_SNP_LAUNCH_UPDATE command is used for loading userspace-provided
+data into a guest GPA range, measuring the contents into the SNP guest context
+created by KVM_SEV_SNP_LAUNCH_START, and then encrypting/validating that GPA
+range so that it will be immediately readable using the encryption key
+associated with the guest context once it is booted, after which point it can
+attest the measurement associated with its context before unlocking any
+secrets.
+
+It is required that the GPA ranges initialized by this command have had the
+KVM_MEMORY_ATTRIBUTE_PRIVATE attribute set in advance. See the documentation
+for KVM_SET_MEMORY_ATTRIBUTES for more details on this aspect.
+
+Parameters (in): struct kvm_sev_snp_launch_update
+
+Returns: 0 on success, -negative on error
+
+::
+
+ struct kvm_sev_snp_launch_update {
+ __u64 gfn_start; /* Guest page number to load/encrypt data into. */
+ __u64 uaddr; /* Userspace address of data to be loaded/encrypted. */
+ __u32 len; /* 4k-aligned length in bytes to copy into guest memory.*/
+ __u8 type; /* The type of the guest pages being initialized. */
+ };
+
+where the allowed values for page_type are #define'd as::
+
+ KVM_SEV_SNP_PAGE_TYPE_NORMAL
+ KVM_SEV_SNP_PAGE_TYPE_ZERO
+ KVM_SEV_SNP_PAGE_TYPE_UNMEASURED
+ KVM_SEV_SNP_PAGE_TYPE_SECRETS
+ KVM_SEV_SNP_PAGE_TYPE_CPUID
+
+See the SEV-SNP spec [snp-fw-abi]_ for further details on how each page type is
+used/measured.
+
Device attribute API
====================

diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 350ddd5264ea..956eb548c08e 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -695,6 +695,7 @@ enum sev_cmd_id {

/* SNP-specific commands */
KVM_SEV_SNP_LAUNCH_START,
+ KVM_SEV_SNP_LAUNCH_UPDATE,

KVM_SEV_NR_MAX,
};
@@ -826,6 +827,20 @@ struct kvm_sev_snp_launch_start {
__u8 gosvw[16];
};

+/* Kept in sync with firmware values for simplicity. */
+#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1
+#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3
+#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4
+#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5
+#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6
+
+struct kvm_sev_snp_launch_update {
+ __u64 gfn_start;
+ __u64 uaddr;
+ __u32 len;
+ __u8 type;
+};
+
#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 6c7c77e33e62..a8a8a285b4a4 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -247,6 +247,35 @@ static void sev_decommission(unsigned int handle)
sev_guest_decommission(&decommission, NULL);
}

+static int snp_page_reclaim(u64 pfn)
+{
+ struct sev_data_snp_page_reclaim data = {0};
+ int err, rc;
+
+ data.paddr = __sme_set(pfn << PAGE_SHIFT);
+ rc = sev_do_cmd(SEV_CMD_SNP_PAGE_RECLAIM, &data, &err);
+ if (WARN_ON_ONCE(rc)) {
+ /*
+ * This shouldn't happen under normal circumstances, but if the
+ * reclaim failed, then the page is no longer safe to use.
+ */
+ snp_leak_pages(pfn, 1);
+ }
+
+ return rc;
+}
+
+static int host_rmp_make_shared(u64 pfn, enum pg_level level, bool leak)
+{
+ int rc;
+
+ rc = rmp_make_shared(pfn, level);
+ if (rc && leak)
+ snp_leak_pages(pfn, page_level_size(level) >> PAGE_SHIFT);
+
+ return rc;
+}
+
static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
{
struct sev_data_deactivate deactivate;
@@ -2075,6 +2104,185 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
return rc;
}

+struct sev_gmem_populate_args {
+ __u8 type;
+ int sev_fd;
+ int fw_error;
+};
+
+static int sev_gmem_post_populate(struct kvm *kvm, struct kvm_memory_slot *slot,
+ gfn_t gfn_start, kvm_pfn_t pfn, void __user *src,
+ int order, void *opaque)
+{
+ struct sev_gmem_populate_args *sev_populate_args = opaque;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ int npages = (1 << order);
+ int n_private = 0;
+ int ret, i;
+ gfn_t gfn;
+
+ pr_debug("%s: gfn_start %llx pfn_start %llx npages %d\n",
+ __func__, gfn_start, pfn, npages);
+
+ for (gfn = gfn_start, i = 0; gfn < gfn_start + npages; gfn++, i++) {
+ struct sev_data_snp_launch_update fw_args = {0};
+ bool assigned;
+ int level;
+
+ if (!kvm_mem_is_private(kvm, gfn)) {
+ pr_debug("%s: Failed to ensure GFN 0x%llx has private memory attribute set\n",
+ __func__, gfn);
+ ret = -EINVAL;
+ break;
+ }
+
+ ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level);
+ if (ret || assigned) {
+ pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n",
+ __func__, gfn, ret, assigned);
+ break;
+ }
+
+ ret = rmp_make_private(pfn + i, gfn << PAGE_SHIFT, PG_LEVEL_4K,
+ sev_get_asid(kvm), true);
+ if (ret) {
+ pr_debug("%s: Failed to convert GFN 0x%llx to private, ret: %d\n",
+ __func__, gfn, ret);
+ break;
+ }
+
+ n_private++;
+
+ fw_args.gctx_paddr = __psp_pa(sev->snp_context);
+ fw_args.address = __sme_set(pfn_to_hpa(pfn + i));
+ fw_args.page_size = PG_LEVEL_TO_RMP(PG_LEVEL_4K);
+ fw_args.page_type = sev_populate_args->type;
+ ret = __sev_issue_cmd(sev_populate_args->sev_fd, SEV_CMD_SNP_LAUNCH_UPDATE,
+ &fw_args, &sev_populate_args->fw_error);
+ if (ret) {
+ pr_debug("%s: SEV-SNP launch update failed, ret: 0x%x, fw_error: 0x%x\n",
+ __func__, ret, sev_populate_args->fw_error);
+
+ if (snp_page_reclaim(pfn + i))
+ break;
+
+ /*
+ * When invalid CPUID function entries are detected,
+ * firmware writes the expected values into the page and
+ * leaves it unencrypted so it can be used for debugging
+ * and error-reporting.
+ *
+ * Copy this page back into the source buffer so
+ * userspace can use this information to provide
+ * information on which CPUID leaves/fields failed CPUID
+ * validation.
+ */
+ if (sev_populate_args->type == KVM_SEV_SNP_PAGE_TYPE_CPUID &&
+ sev_populate_args->fw_error == SEV_RET_INVALID_PARAM) {
+ void *vaddr;
+
+ host_rmp_make_shared(pfn + i, PG_LEVEL_4K, true);
+ vaddr = kmap_local_pfn(pfn + i);
+
+ if (copy_to_user(src + i * PAGE_SIZE,
+ vaddr, PAGE_SIZE))
+ pr_debug("Failed to write CPUID page back to userspace\n");
+
+ kunmap_local(vaddr);
+ }
+
+ break;
+ }
+ }
+
+ if (ret) {
+ pr_debug("%s: exiting with error ret %d, undoing %d populated gmem pages.\n",
+ __func__, ret, n_private);
+ for (i = 0; i < n_private; i++)
+ host_rmp_make_shared(pfn + i, PG_LEVEL_4K, true);
+ }
+
+ return ret;
+}
+
+static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct sev_gmem_populate_args sev_populate_args = {0};
+ struct kvm_gmem_populate_args populate_args = {0};
+ struct kvm_sev_snp_launch_update params;
+ struct kvm_memory_slot *memslot;
+ unsigned int npages;
+ int ret = 0;
+
+ if (!sev_snp_guest(kvm) || !sev->snp_context)
+ return -EINVAL;
+
+ if (copy_from_user(&params, u64_to_user_ptr(argp->data), sizeof(params)))
+ return -EFAULT;
+
+ if (!IS_ALIGNED(params.len, PAGE_SIZE) ||
+ (params.type != KVM_SEV_SNP_PAGE_TYPE_NORMAL &&
+ params.type != KVM_SEV_SNP_PAGE_TYPE_ZERO &&
+ params.type != KVM_SEV_SNP_PAGE_TYPE_UNMEASURED &&
+ params.type != KVM_SEV_SNP_PAGE_TYPE_SECRETS &&
+ params.type != KVM_SEV_SNP_PAGE_TYPE_CPUID))
+ return -EINVAL;
+
+ npages = params.len / PAGE_SIZE;
+
+ pr_debug("%s: GFN range 0x%llx-0x%llx type %d\n", __func__,
+ params.gfn_start, params.gfn_start + npages, params.type);
+
+ /*
+ * For each GFN that's being prepared as part of the initial guest
+ * state, the following pre-conditions are verified:
+ *
+ * 1) The backing memslot is a valid private memslot.
+ * 2) The GFN has been set to private via KVM_SET_MEMORY_ATTRIBUTES
+ * beforehand.
+ * 3) The PFN of the guest_memfd has not already been set to private
+ * in the RMP table.
+ *
+ * The KVM MMU relies on kvm->mmu_invalidate_seq to retry nested page
+ * faults if there's a race between a fault and an attribute update via
+ * KVM_SET_MEMORY_ATTRIBUTES, and a similar approach could be utilized
+ * here. However, kvm->slots_lock guards against both this as well as
+ * concurrent memslot updates occurring while these checks are being
+ * performed, so use that here to make it easier to reason about the
+ * initial expected state and better guard against unexpected
+ * situations.
+ */
+ mutex_lock(&kvm->slots_lock);
+
+ memslot = gfn_to_memslot(kvm, params.gfn_start);
+ if (!kvm_slot_can_be_private(memslot)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ sev_populate_args.sev_fd = argp->sev_fd;
+ sev_populate_args.type = params.type;
+
+ populate_args.opaque = &sev_populate_args;
+ populate_args.gfn = params.gfn_start;
+ populate_args.src = u64_to_user_ptr(params.uaddr);
+ populate_args.npages = npages;
+ populate_args.do_memcpy = params.type != KVM_SEV_SNP_PAGE_TYPE_ZERO;
+ populate_args.post_populate = sev_gmem_post_populate;
+
+ ret = kvm_gmem_populate(kvm, memslot, &populate_args);
+ if (ret) {
+ argp->error = sev_populate_args.fw_error;
+ pr_debug("%s: kvm_gmem_populate failed, ret %d\n", __func__, ret);
+ }
+
+out:
+ mutex_unlock(&kvm->slots_lock);
+
+ return ret;
+}
+
int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
{
struct kvm_sev_cmd sev_cmd;
@@ -2165,6 +2373,9 @@ int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
case KVM_SEV_SNP_LAUNCH_START:
r = snp_launch_start(kvm, &sev_cmd);
break;
+ case KVM_SEV_SNP_LAUNCH_UPDATE:
+ r = snp_launch_update(kvm, &sev_cmd);
+ break;
default:
r = -EINVAL;
goto out;
--
2.25.1