[RFC PATCH v3 11/11] KVM: x86: Add gmem hook for invalidating private memory

From: isaku . yamahata
Date: Wed Jun 28 2023 - 18:45:26 EST


From: Michael Roth <michael.roth@xxxxxxx>

TODO: add a CONFIG option that can be to completely skip arch
invalidation loop and avoid __weak references for arch/platforms that
don't need an additional invalidation hook.

In some cases, like with SEV-SNP, guest memory needs to be updated in a
platform-specific manner before it can be safely freed back to the host.
Add hooks to wire up handling of this sort when freeing memory in
response to FALLOC_FL_PUNCH_HOLE operations.

Also issue invalidations of all allocated pages when releasing the gmem
file so that the pages are not left in an unusable state when they get
freed back to the host.

Signed-off-by: Michael Roth <michael.roth@xxxxxxx>
Link: https://lore.kernel.org/r/20230612042559.375660-3-michael.roth@xxxxxxx

---
Changes v2 -> v3:
- Newly added
---
arch/x86/include/asm/kvm-x86-ops.h | 1 +
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/x86.c | 6 ++++
include/linux/kvm_host.h | 3 ++
virt/kvm/guest_mem.c | 48 ++++++++++++++++++++++++++++--
5 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index cce8621e3216..a864a2093002 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -136,6 +136,7 @@ KVM_X86_OP(complete_emulated_msr)
KVM_X86_OP(vcpu_deliver_sipi_vector)
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
KVM_X86_OP_OPTIONAL_RET0(gmem_prepare)
+KVM_X86_OP_OPTIONAL(gmem_invalidate)

#undef KVM_X86_OP
#undef KVM_X86_OP_OPTIONAL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 653f208979cf..a91d17fa2fe8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1741,6 +1741,7 @@ struct kvm_x86_ops {

int (*gmem_prepare)(struct kvm *kvm, struct kvm_memory_slot *slot,
kvm_pfn_t pfn, gfn_t gfn, u8 *max_level);
+ void (*gmem_invalidate)(struct kvm *kvm, kvm_pfn_t start, kvm_pfn_t end);
};

struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b5f865f39a00..e722ace8150d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13260,6 +13260,12 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_arch_no_poll);

+#ifdef CONFIG_KVM_PRIVATE_MEM
+void kvm_arch_gmem_invalidate(struct kvm *kvm, kvm_pfn_t start, kvm_pfn_t end)
+{
+ static_call_cond(kvm_x86_gmem_invalidate)(kvm, start, end);
+}
+#endif

int kvm_spec_ctrl_test_value(u64 value)
{
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5ca0c8ee4292..cfd98572d8be 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2346,6 +2346,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
#ifdef CONFIG_KVM_PRIVATE_MEM
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
gfn_t gfn, kvm_pfn_t *pfn, int *order);
+void kvm_arch_gmem_invalidate(struct kvm *kvm, kvm_pfn_t start, kvm_pfn_t end);
#else
static inline int kvm_gmem_get_pfn(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn,
@@ -2354,6 +2355,8 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
KVM_BUG_ON(1, kvm);
return -EIO;
}
+
+void kvm_arch_gmem_invalidate(struct kvm *kvm, kvm_pfn_t start, kvm_pfn_t end) { }
#endif /* CONFIG_KVM_PRIVATE_MEM */

#endif
diff --git a/virt/kvm/guest_mem.c b/virt/kvm/guest_mem.c
index 63ac006db7ee..5b8e11760d28 100644
--- a/virt/kvm/guest_mem.c
+++ b/virt/kvm/guest_mem.c
@@ -142,16 +142,58 @@ static void kvm_gmem_invalidate_end(struct kvm *kvm, struct kvm_gmem *gmem,
KVM_MMU_UNLOCK(kvm);
}

+void __weak kvm_arch_gmem_invalidate(struct kvm *kvm, kvm_pfn_t start, kvm_pfn_t end)
+{
+}
+
+/* Handle arch-specific hooks needed before releasing guarded pages. */
+static void kvm_gmem_issue_arch_invalidate(struct kvm *kvm, struct file *file,
+ pgoff_t start, pgoff_t end)
+{
+ pgoff_t file_end = i_size_read(file_inode(file)) >> PAGE_SHIFT;
+ pgoff_t index = start;
+
+ end = min(end, file_end);
+
+ while (index < end) {
+ struct folio *folio;
+ unsigned int order;
+ struct page *page;
+ kvm_pfn_t pfn;
+
+ folio = __filemap_get_folio(file->f_mapping, index,
+ FGP_LOCK, 0);
+ if (!folio) {
+ index++;
+ continue;
+ }
+
+ page = folio_file_page(folio, index);
+ pfn = page_to_pfn(page);
+ order = folio_order(folio);
+
+ kvm_arch_gmem_invalidate(kvm, pfn, pfn + min((1ul << order), end - index));
+
+ index = folio_next_index(folio);
+ folio_unlock(folio);
+ folio_put(folio);
+
+ cond_resched();
+ }
+}
+
static long kvm_gmem_punch_hole(struct file *file, loff_t offset, loff_t len)
{
struct kvm_gmem *gmem = file->private_data;
- pgoff_t start = offset >> PAGE_SHIFT;
- pgoff_t end = (offset + len) >> PAGE_SHIFT;
struct kvm *kvm = gmem->kvm;
+ pgoff_t start, end;

if (!PAGE_ALIGNED(offset) || !PAGE_ALIGNED(len))
return 0;

+ start = offset >> PAGE_SHIFT;
+ end = (offset + len) >> PAGE_SHIFT;
+
/*
* Bindings must stable across invalidation to ensure the start+end
* are balanced.
@@ -160,6 +202,7 @@ static long kvm_gmem_punch_hole(struct file *file, loff_t offset, loff_t len)

kvm_gmem_invalidate_begin(kvm, gmem, start, end);

+ kvm_gmem_issue_arch_invalidate(kvm, file, start, end);
truncate_inode_pages_range(file->f_mapping, offset, offset + len - 1);

kvm_gmem_invalidate_end(kvm, gmem, start, end);
@@ -266,6 +309,7 @@ static int kvm_gmem_release(struct inode *inode, struct file *file)
* pointed at this file.
*/
kvm_gmem_invalidate_begin(kvm, gmem, 0, -1ul);
+ kvm_gmem_issue_arch_invalidate(gmem->kvm, file, 0, -1ul);
truncate_inode_pages_final(file->f_mapping);
kvm_gmem_invalidate_end(kvm, gmem, 0, -1ul);

--
2.25.1