[PATCH 8/8] RFC: KVM: gmem: Guarantee the order of destruction

From: isaku . yamahata
Date: Tue Aug 15 2023 - 13:20:09 EST


From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>

Call kvm_flush_shadow_all() before releasing kvm gmem file on the guest
destruction.

The current gmem doesn't guarantee the destruction order between kvm gmem
and kvm_mmu_notifier_release(), which calls kvm_flush_shadow_all(). When
destructing TD guest, it's efficient to call kvm_flush_shadow_all() before
calling kvm_gmem_issue_arch_invalidate() on releasing its struct file
because kvm_flush_shadow_all() releases its host key ID (HKID). After
releasing HKID, the TDX module doesn't have to validate the consistency of
the Secure-EPT structure.

One way is to make struct kvm to reference kvm gmem file. The current gmem
implementation chose to make kvm gmem file to reference struct kvm. So
reference from struct kvm to reference kvm gmem file results in a circular
reference. Use kvm_mmu_notifier_release() to break it.

Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
---
include/linux/kvm_host.h | 24 ++++++++++++++++++++++++
virt/kvm/kvm_main.c | 23 ++++++++++++++++++++++-
2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 349b0bf81fa5..d717945702a8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -594,6 +594,10 @@ struct kvm_memory_slot {
u16 as_id;

#ifdef CONFIG_KVM_PRIVATE_MEM
+#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
+ struct file *file;
+#endif
+ /* Private guest_mem */
struct {
struct file __rcu *file;
pgoff_t pgoff;
@@ -601,6 +605,26 @@ struct kvm_memory_slot {
#endif
};

+static inline int kvm_memslot_gmem_fget(struct kvm_memory_slot *memslot, int fd)
+{
+#if defined(CONFIG_KVM_PRIVATE_MEM) && defined(CONFIG_KVM_GENERIC_MMU_NOTIFIER)
+ memslot->file = fget(fd);
+ if (!memslot->file)
+ return -EBADF;
+#endif
+ return 0;
+}
+
+static inline void kvm_memslot_gmem_fput(struct kvm_memory_slot *memslot)
+{
+#if defined(CONFIG_KVM_PRIVATE_MEM) && defined(CONFIG_KVM_GENERIC_MMU_NOTIFIER)
+ if (memslot->file) {
+ fput(memslot->file);
+ memslot->file = NULL;
+ }
+#endif
+}
+
static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot)
{
return slot && (slot->flags & KVM_MEM_PRIVATE);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4855d0b7a859..35bc3b64b7e4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -926,6 +926,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int idx;
+ int i;

/*
* Avoide race with kvm_gmem_release().
@@ -936,6 +937,18 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
idx = srcu_read_lock(&kvm->srcu);
kvm_flush_shadow_all(kvm);
srcu_read_unlock(&kvm->srcu, idx);
+
+ /* Break circular reference count: kvm->gmem, gmem->kvm. */
+ for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
+ struct kvm_memslots *slots = __kvm_memslots(kvm, i);
+ struct kvm_memory_slot *memslot;
+ struct hlist_node *tmp;
+ int bkt;
+
+ hash_for_each_safe(slots->id_hash, bkt, tmp, memslot, id_node[slots->node_idx])
+ kvm_memslot_gmem_fput(memslot);
+ }
+
mutex_unlock(&kvm->slots_lock);
}

@@ -1008,8 +1021,10 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
/* This does not remove the slot from struct kvm_memslots data structures */
static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
- if (slot->flags & KVM_MEM_PRIVATE)
+ if (slot->flags & KVM_MEM_PRIVATE) {
kvm_gmem_unbind(slot);
+ kvm_memslot_gmem_fput(slot);
+ }

kvm_destroy_dirty_bitmap(slot);

@@ -1734,6 +1749,8 @@ static void kvm_commit_memory_region(struct kvm *kvm,
if (old->dirty_bitmap && !new->dirty_bitmap)
kvm_destroy_dirty_bitmap(old);

+ kvm_memslot_gmem_fput(old);
+
/*
* The final quirk. Free the detached, old slot, but only its
* memory, not any metadata. Metadata, including arch specific
@@ -2088,6 +2105,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
new->flags = mem->flags;
new->userspace_addr = mem->userspace_addr;
if (mem->flags & KVM_MEM_PRIVATE) {
+ r = kvm_memslot_gmem_fget(new, mem->gmem_fd);
+ if (r)
+ goto out;
r = kvm_gmem_bind(kvm, new, mem->gmem_fd, mem->gmem_offset);
if (r)
goto out;
@@ -2103,6 +2123,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (mem->flags & KVM_MEM_PRIVATE)
kvm_gmem_unbind(new);
out:
+ kvm_memslot_gmem_fput(new);
kfree(new);
return r;
}
--
2.25.1