[PATCH v10 08/16] KVM: X86: Introduce KVM_HC_PAGE_ENC_STATUS hypercall

From: Ashish Kalra
Date: Wed Feb 03 2021 - 19:40:37 EST


From: Brijesh Singh <brijesh.singh@xxxxxxx>

This hypercall is used by the SEV guest to notify a change in the page
encryption status to the hypervisor. The hypercall should be invoked
only when the encryption attribute is changed from encrypted -> decrypted
and vice versa. By default all guest pages are considered encrypted.

The patch introduces a new shared pages list implemented as a
sorted linked list to track the shared/unencrypted regions marked by the
guest hypercall.

Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: "Radim Krčmář" <rkrcmar@xxxxxxxxxx>
Cc: Joerg Roedel <joro@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxx>
Cc: Tom Lendacky <thomas.lendacky@xxxxxxx>
Cc: x86@xxxxxxxxxx
Cc: kvm@xxxxxxxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
Co-developed-by: Ashish Kalra <ashish.kalra@xxxxxxx>
Signed-off-by: Ashish Kalra <ashish.kalra@xxxxxxx>
---
Documentation/virt/kvm/hypercalls.rst | 15 +++
arch/x86/include/asm/kvm_host.h | 2 +
arch/x86/kvm/svm/sev.c | 150 ++++++++++++++++++++++++++
arch/x86/kvm/svm/svm.c | 2 +
arch/x86/kvm/svm/svm.h | 5 +
arch/x86/kvm/vmx/vmx.c | 1 +
arch/x86/kvm/x86.c | 6 ++
include/uapi/linux/kvm_para.h | 1 +
8 files changed, 182 insertions(+)

diff --git a/Documentation/virt/kvm/hypercalls.rst b/Documentation/virt/kvm/hypercalls.rst
index ed4fddd364ea..7aff0cebab7c 100644
--- a/Documentation/virt/kvm/hypercalls.rst
+++ b/Documentation/virt/kvm/hypercalls.rst
@@ -169,3 +169,18 @@ a0: destination APIC ID

:Usage example: When sending a call-function IPI-many to vCPUs, yield if
any of the IPI target vCPUs was preempted.
+
+
+8. KVM_HC_PAGE_ENC_STATUS
+-------------------------
+:Architecture: x86
+:Status: active
+:Purpose: Notify the encryption status changes in guest page table (SEV guest)
+
+a0: the guest physical address of the start page
+a1: the number of pages
+a2: encryption attribute
+
+ Where:
+ * 1: Encryption attribute is set
+ * 0: Encryption attribute is cleared
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3d6616f6f6ef..2da5f5e2a10e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1301,6 +1301,8 @@ struct kvm_x86_ops {
int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);

void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
+ int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa,
+ unsigned long sz, unsigned long mode);
};

struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 25eaf35ba51d..55c628df5155 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -45,6 +45,11 @@ struct enc_region {
unsigned long size;
};

+struct shared_region {
+ struct list_head list;
+ unsigned long gfn_start, gfn_end;
+};
+
static int sev_flush_asids(void)
{
int ret, error = 0;
@@ -196,6 +201,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
sev->active = true;
sev->asid = asid;
INIT_LIST_HEAD(&sev->regions_list);
+ INIT_LIST_HEAD(&sev->shared_pages_list);
+ sev->shared_pages_list_count = 0;

return 0;

@@ -1473,6 +1480,148 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
return ret;
}

+static int remove_shared_region(unsigned long start, unsigned long end,
+ struct list_head *head)
+{
+ struct shared_region *pos;
+
+ list_for_each_entry(pos, head, list) {
+ if (pos->gfn_start == start &&
+ pos->gfn_end == end) {
+ list_del(&pos->list);
+ kfree(pos);
+ return -1;
+ } else if (start >= pos->gfn_start && end <= pos->gfn_end) {
+ if (start == pos->gfn_start)
+ pos->gfn_start = end + 1;
+ else if (end == pos->gfn_end)
+ pos->gfn_end = start - 1;
+ else {
+ /* Do a de-merge -- split linked list nodes */
+ unsigned long tmp;
+ struct shared_region *shrd_region;
+
+ tmp = pos->gfn_end;
+ pos->gfn_end = start-1;
+ shrd_region = kzalloc(sizeof(*shrd_region), GFP_KERNEL_ACCOUNT);
+ if (!shrd_region)
+ return -ENOMEM;
+ shrd_region->gfn_start = end + 1;
+ shrd_region->gfn_end = tmp;
+ list_add(&shrd_region->list, &pos->list);
+ return 1;
+ }
+ return 0;
+ }
+ }
+ return 0;
+}
+
+static int add_shared_region(unsigned long start, unsigned long end,
+ struct list_head *shared_pages_list)
+{
+ struct list_head *head = shared_pages_list;
+ struct shared_region *shrd_region;
+ struct shared_region *pos;
+
+ if (list_empty(head)) {
+ shrd_region = kzalloc(sizeof(*shrd_region), GFP_KERNEL_ACCOUNT);
+ if (!shrd_region)
+ return -ENOMEM;
+ shrd_region->gfn_start = start;
+ shrd_region->gfn_end = end;
+ list_add_tail(&shrd_region->list, head);
+ return 1;
+ }
+
+ /*
+ * Shared pages list is a sorted list in ascending order of
+ * guest PA's and also merges consecutive range of guest PA's
+ */
+ list_for_each_entry(pos, head, list) {
+ if (pos->gfn_end < start)
+ continue;
+ /* merge consecutive guest PA(s) */
+ if (pos->gfn_start <= start && pos->gfn_end >= start) {
+ pos->gfn_end = end;
+ return 0;
+ }
+ break;
+ }
+ /*
+ * Add a new node, allocate nodes using GFP_KERNEL_ACCOUNT so that
+ * kernel memory can be tracked/throttled in case a
+ * malicious guest makes infinite number of hypercalls to
+ * exhaust host kernel memory and cause a DOS attack.
+ */
+ shrd_region = kzalloc(sizeof(*shrd_region), GFP_KERNEL_ACCOUNT);
+ if (!shrd_region)
+ return -ENOMEM;
+ shrd_region->gfn_start = start;
+ shrd_region->gfn_end = end;
+ list_add_tail(&shrd_region->list, &pos->list);
+ return 1;
+}
+
+int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
+ unsigned long npages, unsigned long enc)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ kvm_pfn_t pfn_start, pfn_end;
+ gfn_t gfn_start, gfn_end;
+ int ret = 0;
+
+ if (!sev_guest(kvm))
+ return -EINVAL;
+
+ if (!npages)
+ return 0;
+
+ gfn_start = gpa_to_gfn(gpa);
+ gfn_end = gfn_start + npages;
+
+ /* out of bound access error check */
+ if (gfn_end <= gfn_start)
+ return -EINVAL;
+
+ /* lets make sure that gpa exist in our memslot */
+ pfn_start = gfn_to_pfn(kvm, gfn_start);
+ pfn_end = gfn_to_pfn(kvm, gfn_end);
+
+ if (is_error_noslot_pfn(pfn_start) && !is_noslot_pfn(pfn_start)) {
+ /*
+ * Allow guest MMIO range(s) to be added
+ * to the shared pages list.
+ */
+ return -EINVAL;
+ }
+
+ if (is_error_noslot_pfn(pfn_end) && !is_noslot_pfn(pfn_end)) {
+ /*
+ * Allow guest MMIO range(s) to be added
+ * to the shared pages list.
+ */
+ return -EINVAL;
+ }
+
+ mutex_lock(&kvm->lock);
+
+ if (enc) {
+ ret = remove_shared_region(gfn_start, gfn_end,
+ &sev->shared_pages_list);
+ if (ret != -ENOMEM)
+ sev->shared_pages_list_count += ret;
+ } else {
+ ret = add_shared_region(gfn_start, gfn_end,
+ &sev->shared_pages_list);
+ if (ret > 0)
+ sev->shared_pages_list_count++;
+ }
+
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
{
struct kvm_sev_cmd sev_cmd;
@@ -1693,6 +1842,7 @@ void sev_vm_destroy(struct kvm *kvm)

sev_unbind_asid(kvm, sev->handle);
sev_asid_free(sev->asid);
+ sev->shared_pages_list_count = 0;
}

void __init sev_hardware_setup(void)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f923e14e87df..bb249ec625fc 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4536,6 +4536,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.complete_emulated_msr = svm_complete_emulated_msr,

.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
+
+ .page_enc_status_hc = svm_page_enc_status_hc,
};

static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 0fe874ae5498..6437c1fa1f24 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -79,6 +79,9 @@ struct kvm_sev_info {
unsigned long pages_locked; /* Number of pages locked */
struct list_head regions_list; /* List of registered regions */
u64 ap_jump_table; /* SEV-ES AP Jump Table address */
+ /* List and count of shared pages */
+ int shared_pages_list_count;
+ struct list_head shared_pages_list;
};

struct kvm_svm {
@@ -472,6 +475,8 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code);
int nested_svm_exit_special(struct vcpu_svm *svm);
void sync_nested_vmcb_control(struct vcpu_svm *svm);
+int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
+ unsigned long npages, unsigned long enc);

extern struct kvm_x86_nested_ops svm_nested_ops;

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cc60b1fc3ee7..bcbf53851612 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7705,6 +7705,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.can_emulate_instruction = vmx_can_emulate_instruction,
.apic_init_signal_blocked = vmx_apic_init_signal_blocked,
.migrate_timers = vmx_migrate_timers,
+ .page_enc_status_hc = NULL,

.msr_filter_changed = vmx_msr_filter_changed,
.complete_emulated_msr = kvm_complete_insn_gp,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 76bce832cade..2f17f0f9ace7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8162,6 +8162,12 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
kvm_sched_yield(vcpu->kvm, a0);
ret = 0;
break;
+ case KVM_HC_PAGE_ENC_STATUS:
+ ret = -KVM_ENOSYS;
+ if (kvm_x86_ops.page_enc_status_hc)
+ ret = kvm_x86_ops.page_enc_status_hc(vcpu->kvm,
+ a0, a1, a2);
+ break;
default:
ret = -KVM_ENOSYS;
break;
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index 8b86609849b9..847b83b75dc8 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -29,6 +29,7 @@
#define KVM_HC_CLOCK_PAIRING 9
#define KVM_HC_SEND_IPI 10
#define KVM_HC_SCHED_YIELD 11
+#define KVM_HC_PAGE_ENC_STATUS 12

/*
* hypercalls use architecture specific
--
2.17.1