[RFC PATCH 34/47] kvm: asi: Unmap guest memory from ASI address space when using nested virt

From: Junaid Shahid
Date: Wed Feb 23 2022 - 00:28:26 EST


L1 guest memory as a whole cannot be considered non-sensitive when an
L2 is running. Even if L1 is using its own mitigations, L2 VM Exits
could, in theory, bring into the cache some sensitive L1 memory without
L1 getting a chance to flush it.

For simplicity, we just unmap the entire L1 memory from the ASI
restricted address space when nested virtualization is turned on. Though
this is overridden if the treat_all_userspace_as_nonsensitive flag is
enabled.

In the future, we could potentially map some portions of L1 memory
which are known to contain non-sensitive memory, which would reduce ASI
overhead during nested virtualization.

Note that unmapping the guest memory still leaves a slight hole because
L2 could also potentially access copies of L1 VCPU registers stored in
L0 kernel structures. In the future, this could be mitigated by having
a separate ASI address space for each VCPU and treating the associated
structures as locally non-sensitive only within that VCPU's ASI address
space.

Signed-off-by: Junaid Shahid <junaids@xxxxxxxxxx>


---
arch/x86/include/asm/kvm_host.h | 6 ++++++
arch/x86/kvm/mmu/mmu.c | 10 ++++++++++
arch/x86/kvm/vmx/nested.c | 22 ++++++++++++++++++++++
3 files changed, 38 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e63a2f244d7b..8ba88bbcf895 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1200,6 +1200,12 @@ struct kvm_arch {
*/
struct list_head tdp_mmu_pages;

+ /*
+ * Number of VCPUs that have enabled nested virtualization.
+ * Currently only maintained when ASI is enabled.
+ */
+ int nested_virt_enabled_count;
+
/*
* Protects accesses to the following fields when the MMU lock
* is held in read mode:
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 485c0ba3ce8b..5785a0d02558 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -94,6 +94,7 @@ module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644);
#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
bool __ro_after_init treat_all_userspace_as_nonsensitive;
module_param(treat_all_userspace_as_nonsensitive, bool, 0444);
+EXPORT_SYMBOL_GPL(treat_all_userspace_as_nonsensitive);
#endif

/*
@@ -2769,6 +2770,15 @@ static void asi_map_gfn_range(struct kvm_vcpu *vcpu,
int err;
size_t hva = __gfn_to_hva_memslot(slot, gfn);

+ /*
+ * For now, we just don't map any guest memory when using nested
+ * virtualization. In the future, we could potentially map some
+ * portions of guest memory which are known to contain only memory
+ * which would be considered non-sensitive.
+ */
+ if (vcpu->kvm->arch.nested_virt_enabled_count)
+ return;
+
err = asi_map_user(vcpu->kvm->asi, (void *)hva, PAGE_SIZE * npages,
&vcpu->arch.asi_pgtbl_pool, slot->userspace_addr,
slot->userspace_addr + slot->npages * PAGE_SIZE);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 9c941535f78c..0a0092e4102d 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -318,6 +318,14 @@ static void free_nested(struct kvm_vcpu *vcpu)
nested_release_evmcs(vcpu);

free_loaded_vmcs(&vmx->nested.vmcs02);
+
+ if (cpu_feature_enabled(X86_FEATURE_ASI) &&
+ !treat_all_userspace_as_nonsensitive) {
+ write_lock(&vcpu->kvm->mmu_lock);
+ WARN_ON(vcpu->kvm->arch.nested_virt_enabled_count <= 0);
+ vcpu->kvm->arch.nested_virt_enabled_count--;
+ write_unlock(&vcpu->kvm->mmu_lock);
+ }
}

/*
@@ -4876,6 +4884,20 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
pt_update_intercept_for_msr(vcpu);
}

+ if (cpu_feature_enabled(X86_FEATURE_ASI) &&
+ !treat_all_userspace_as_nonsensitive) {
+ /*
+ * We do the increment under the MMU lock in order to prevent
+ * it from happening concurrently with asi_map_gfn_range().
+ */
+ write_lock(&vcpu->kvm->mmu_lock);
+ WARN_ON(vcpu->kvm->arch.nested_virt_enabled_count < 0);
+ vcpu->kvm->arch.nested_virt_enabled_count++;
+ write_unlock(&vcpu->kvm->mmu_lock);
+
+ asi_unmap_user(vcpu->kvm->asi, 0, TASK_SIZE_MAX);
+ }
+
return 0;

out_shadow_vmcs:
--
2.35.1.473.g83b2b277ed-goog