Re: [PATCH v7 11/18] mm: x86: Invoke hypercall when page encryption status is changed

From: JÃrgen GroÃ
Date: Thu Apr 30 2020 - 11:37:29 EST


On 30.04.20 17:21, Brijesh Singh wrote:

On 4/30/20 4:49 AM, JÃrgen Groà wrote:
On 30.04.20 10:45, Ashish Kalra wrote:
From: Brijesh Singh <Brijesh.Singh@xxxxxxx>

Invoke a hypercall when a memory region is changed from encrypted ->
decrypted and vice versa. Hypervisor needs to know the page encryption
status during the guest migration.

Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: "Radim KrÄmÃÅ" <rkrcmar@xxxxxxxxxx>
Cc: Joerg Roedel <joro@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxx>
Cc: Tom Lendacky <thomas.lendacky@xxxxxxx>
Cc: x86@xxxxxxxxxx
Cc: kvm@xxxxxxxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
Reviewed-by: Venu Busireddy <venu.busireddy@xxxxxxxxxx>
Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
Signed-off-by: Ashish Kalra <ashish.kalra@xxxxxxx>
---
 arch/x86/include/asm/paravirt.h | 10 +++++
 arch/x86/include/asm/paravirt_types.h | 2 +
 arch/x86/kernel/paravirt.c | 1 +
 arch/x86/mm/mem_encrypt.c | 58 ++++++++++++++++++++++++++-
 arch/x86/mm/pat/set_memory.c | 7 ++++
 5 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/paravirt.h
b/arch/x86/include/asm/paravirt.h
index 694d8daf4983..8127b9c141bf 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -78,6 +78,12 @@ static inline void paravirt_arch_exit_mmap(struct
mm_struct *mm)
ÂÂÂÂÂ PVOP_VCALL1(mmu.exit_mmap, mm);
 }
 +static inline void page_encryption_changed(unsigned long vaddr,
int npages,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ bool enc)
+{
+ÂÂÂ PVOP_VCALL3(mmu.page_encryption_changed, vaddr, npages, enc);
+}
+
 #ifdef CONFIG_PARAVIRT_XXL
 static inline void load_sp0(unsigned long sp0)
 {
@@ -946,6 +952,10 @@ static inline void paravirt_arch_dup_mmap(struct
mm_struct *oldmm,
 static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
 {
 }
+
+static inline void page_encryption_changed(unsigned long vaddr, int
npages, bool enc)
+{
+}
 #endif
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_X86_PARAVIRT_H */
diff --git a/arch/x86/include/asm/paravirt_types.h
b/arch/x86/include/asm/paravirt_types.h
index 732f62e04ddb..03bfd515c59c 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -215,6 +215,8 @@ struct pv_mmu_ops {
 Â /* Hook for intercepting the destruction of an mm_struct. */
ÂÂÂÂÂ void (*exit_mmap)(struct mm_struct *mm);
+ÂÂÂ void (*page_encryption_changed)(unsigned long vaddr, int npages,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ bool enc);
  #ifdef CONFIG_PARAVIRT_XXL
ÂÂÂÂÂ struct paravirt_callee_save read_cr2;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c131ba4e70ef..840c02b23aeb 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -367,6 +367,7 @@ struct paravirt_patch_template pv_ops = {
ÂÂÂÂÂÂÂÂÂÂÂÂÂ (void (*)(struct mmu_gather *, void *))tlb_remove_page,
 Â .mmu.exit_mmap = paravirt_nop,
+ÂÂÂ .mmu.page_encryption_changedÂÂÂ = paravirt_nop,
  #ifdef CONFIG_PARAVIRT_XXL
ÂÂÂÂÂ .mmu.read_cr2ÂÂÂÂÂÂÂ = __PV_IS_CALLEE_SAVE(native_read_cr2),
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index f4bd4b431ba1..603f5abf8a78 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -19,6 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 #include <linux/dma-mapping.h>
+#include <linux/kvm_para.h>
  #include <asm/tlbflush.h>
 #include <asm/fixmap.h>
@@ -29,6 +30,7 @@
 #include <asm/processor-flags.h>
 #include <asm/msr.h>
 #include <asm/cmdline.h>
+#include <asm/kvm_para.h>
  #include "mm_internal.h"
 @@ -196,6 +198,48 @@ void __init sme_early_init(void)
ÂÂÂÂÂÂÂÂÂ swiotlb_force = SWIOTLB_FORCE;
 }
 +static void set_memory_enc_dec_hypercall(unsigned long vaddr, int
npages,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ bool enc)
+{
+ÂÂÂ unsigned long sz = npages << PAGE_SHIFT;
+ÂÂÂ unsigned long vaddr_end, vaddr_next;
+
+ÂÂÂ vaddr_end = vaddr + sz;
+
+ÂÂÂ for (; vaddr < vaddr_end; vaddr = vaddr_next) {
+ÂÂÂÂÂÂÂ int psize, pmask, level;
+ÂÂÂÂÂÂÂ unsigned long pfn;
+ÂÂÂÂÂÂÂ pte_t *kpte;
+
+ÂÂÂÂÂÂÂ kpte = lookup_address(vaddr, &level);
+ÂÂÂÂÂÂÂ if (!kpte || pte_none(*kpte))
+ÂÂÂÂÂÂÂÂÂÂÂ return;
+
+ÂÂÂÂÂÂÂ switch (level) {
+ÂÂÂÂÂÂÂ case PG_LEVEL_4K:
+ÂÂÂÂÂÂÂÂÂÂÂ pfn = pte_pfn(*kpte);
+ÂÂÂÂÂÂÂÂÂÂÂ break;
+ÂÂÂÂÂÂÂ case PG_LEVEL_2M:
+ÂÂÂÂÂÂÂÂÂÂÂ pfn = pmd_pfn(*(pmd_t *)kpte);
+ÂÂÂÂÂÂÂÂÂÂÂ break;
+ÂÂÂÂÂÂÂ case PG_LEVEL_1G:
+ÂÂÂÂÂÂÂÂÂÂÂ pfn = pud_pfn(*(pud_t *)kpte);
+ÂÂÂÂÂÂÂÂÂÂÂ break;
+ÂÂÂÂÂÂÂ default:
+ÂÂÂÂÂÂÂÂÂÂÂ return;
+ÂÂÂÂÂÂÂ }
+
+ÂÂÂÂÂÂÂ psize = page_level_size(level);
+ÂÂÂÂÂÂÂ pmask = page_level_mask(level);
+
+ÂÂÂÂÂÂÂ if (x86_platform.hyper.sev_migration_hcall)
+ÂÂÂÂÂÂÂÂÂÂÂ x86_platform.hyper.sev_migration_hcall(pfn << PAGE_SHIFT,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ psize >> PAGE_SHIFT,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ enc);

Why do you need two indirections? One via pv.mmu_ops and then another
via x86_platform.hyper? Isn't one enough?

Currently, there is no strong reason to have two indirections other than
building a flexibility for the future expansion, e.g when we add SEV
support for the Xen then hypercall invocation may be slightly different
but the code to walk the page table to find the GPA will be same for
both KVM and Xen. The pv.mmu_ops provides a generic indirection which
can be used by set_memory_{decrypted,encrypted}. I will look into
removing the extra indirection in next version. thanks.


And if x86_platform.hyper.sev_migration_hcall isn't set the whole loop
is basically a nop.

Yes, this double indirection has a draw back that we will be executing
the unnecessary code when x86_platform.hyper.sev_migration_hcall isn't
set. I will look into improving it.


+ÂÂÂÂÂÂÂ vaddr_next = (vaddr & pmask) + psize;
+ÂÂÂ }
+}
+
 static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
 {
ÂÂÂÂÂ pgprot_t old_prot, new_prot;
@@ -253,12 +297,13 @@ static void __init __set_clr_pte_enc(pte_t
*kpte, int level, bool enc)
 static int __init early_set_memory_enc_dec(unsigned long vaddr,
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ unsigned long size, bool enc)
 {
-ÂÂÂ unsigned long vaddr_end, vaddr_next;
+ÂÂÂ unsigned long vaddr_end, vaddr_next, start;
ÂÂÂÂÂ unsigned long psize, pmask;
ÂÂÂÂÂ int split_page_size_mask;
ÂÂÂÂÂ int level, ret;
ÂÂÂÂÂ pte_t *kpte;
 + start = vaddr;
ÂÂÂÂÂ vaddr_next = vaddr;
ÂÂÂÂÂ vaddr_end = vaddr + size;
 @@ -313,6 +358,8 @@ static int __init
early_set_memory_enc_dec(unsigned long vaddr,
 Â ret = 0;
 + set_memory_enc_dec_hypercall(start, PAGE_ALIGN(size) >>
PAGE_SHIFT,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ enc);
 out:
ÂÂÂÂÂ __flush_tlb_all();
ÂÂÂÂÂ return ret;
@@ -451,6 +498,15 @@ void __init mem_encrypt_init(void)
ÂÂÂÂÂ if (sev_active())
ÂÂÂÂÂÂÂÂÂ static_branch_enable(&sev_enable_key);
 +#ifdef CONFIG_PARAVIRT
+ÂÂÂ /*
+ÂÂÂÂ * With SEV, we need to make a hypercall when page encryption
state is
+ÂÂÂÂ * changed.
+ÂÂÂÂ */
+ÂÂÂ if (sev_active())
+ÂÂÂÂÂÂÂ pv_ops.mmu.page_encryption_changed =
set_memory_enc_dec_hypercall;
+#endif
+
ÂÂÂÂÂ pr_info("AMD %s active\n",
ÂÂÂÂÂÂÂÂÂ sev_active() ? "Secure Encrypted Virtualization (SEV)"
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ : "Secure Memory Encryption (SME)");
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 59eca6a94ce7..9aaf1b6f5a1b 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -27,6 +27,7 @@
 #include <asm/proto.h>
 #include <asm/memtype.h>
 #include <asm/set_memory.h>
+#include <asm/paravirt.h>
  #include "../mm_internal.h"
 @@ -2003,6 +2004,12 @@ static int __set_memory_enc_dec(unsigned
long addr, int numpages, bool enc)
ÂÂÂÂÂÂ */
ÂÂÂÂÂ cpa_flush(&cpa, 0);
 + /* Notify hypervisor that a given memory range is mapped
encrypted
+ÂÂÂÂ * or decrypted. The hypervisor will use this information during
the
+ÂÂÂÂ * VM migration.
+ÂÂÂÂ */
+ÂÂÂ page_encryption_changed(addr, numpages, enc);

Is this operation really so performance critical that a pv-op is
needed? Wouldn't a static key be sufficient here?

Well, in a typical Linux kernel boot it does not get called so many
times. We noticed that some drivers (mainly nvme) calls it more often
than others. I am open for the suggestions, we went with the pv-op path
based on the previous feedbacks. A static key maybe sufficient as well.


I'm fine with a pv-op if this is seen to be the way to go. But I would
skip the x86_platform.hyper indirection and call the kvm specific
function directly. In case Xen gains SEV capability we can still either
add the indirection again or just copy the KVM function and replace the
hypercall with the Xen variant.


Juergen