[PATCH v1 01/11] KVM: s390: pv: leak the ASCE page when destroy fails

From: Claudio Imbrenda
Date: Mon May 17 2021 - 16:08:10 EST


When the destroy configuration UVC fails, the page pointed to by the
ASCE of the VM becomes poisoned, and, to avoid issues it must not be
used again.

Since the page becomes in practice unusable, we set it aside and leak it.

Signed-off-by: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx>
---
arch/s390/kvm/pv.c | 53 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 813b6e93dc83..e0532ab725bf 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -150,6 +150,55 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
return -ENOMEM;
}

+/*
+ * Remove the topmost level of page tables from the list of page tables of
+ * the gmap.
+ * This means that it will not be freed when the VM is torn down, and needs
+ * to be handled separately by the caller, unless an intentional leak is
+ * intended.
+ */
+static void kvm_s390_pv_remove_old_asce(struct kvm *kvm)
+{
+ struct page *old;
+
+ old = virt_to_page(kvm->arch.gmap->table);
+ list_del(&old->lru);
+ /* in case the ASCE needs to be "removed" multiple times */
+ INIT_LIST_HEAD(&old->lru);
+}
+
+/*
+ * Try to replace the current ASCE with another equivalent one.
+ * If the allocation of the new top level page table fails, the ASCE is not
+ * replaced.
+ * In any case, the old ASCE is removed from the list, therefore the caller
+ * has to make sure to save a pointer to it beforehands, unless an
+ * intentional leak is intended.
+ */
+static int kvm_s390_pv_replace_asce(struct kvm *kvm)
+{
+ unsigned long asce;
+ struct page *page;
+ void *table;
+
+ kvm_s390_pv_remove_old_asce(kvm);
+
+ page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
+ if (!page)
+ return -ENOMEM;
+ list_add(&page->lru, &kvm->arch.gmap->crst_list);
+
+ table = page_to_virt(page);
+ memcpy(table, kvm->arch.gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
+
+ asce = (kvm->arch.gmap->asce & ~PAGE_MASK) | __pa(table);
+ WRITE_ONCE(kvm->arch.gmap->asce, asce);
+ WRITE_ONCE(kvm->mm->context.gmap_asce, asce);
+ WRITE_ONCE(kvm->arch.gmap->table, table);
+
+ return 0;
+}
+
/* this should not fail, but if it does, we must not free the donated memory */
int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
{
@@ -164,9 +213,11 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
atomic_set(&kvm->mm->context.is_protected, 0);
KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
- /* Inteded memory leak on "impossible" error */
+ /* Intended memory leak on "impossible" error */
if (!cc)
kvm_s390_pv_dealloc_vm(kvm);
+ else
+ kvm_s390_pv_replace_asce(kvm);
return cc ? -EIO : 0;
}

--
2.31.1