RE: [PATCHv3 0/3] x86/tdx: Fix one more load_unaligned_zeropad() issue

From: Michael Kelley (LINUX)
Date: Sat Jul 08 2023 - 19:53:17 EST


From: Kirill A. Shutemov <kirill@xxxxxxxxxxxxx> Sent: Friday, July 7, 2023 7:07 AM
>
> On Thu, Jul 06, 2023 at 04:48:32PM +0000, Michael Kelley (LINUX) wrote:
> > From: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> Sent: Tuesday, June 6, 2023 2:56 AM

[snip]

>
> It only addresses the problem that happens on transition, but
> load_unaligned_zeropad() is still a problem for the shared mappings in
> general, after transition is complete. Like if load_unaligned_zeropad()
> steps from private to shared mapping and shared mapping triggers #VE,
> kernel should be able to handle it.

I'm showing my ignorance of TDX architectural details, but what's the
situation where shared mappings in general can trigger a #VE? How
do such situations get handled for references that aren't from
load_unaligned_zeropad()?

>
> The testcase that triggers the problem (It is ugly, but useful.):
>
> #include <linux/mm.h>
> #include <asm/word-at-a-time.h>
>
> static int f(pte_t *pte, unsigned long addr, void *data)
> {
> pte_t ***p = data;
>
> if (p) {
> *(*p) = pte;
> (*p)++;
> }
> return 0;
> }
>
> static struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
> {
> struct vm_struct *area;
>
> area = get_vm_area_caller(size, VM_IOREMAP,
> __builtin_return_address(0));
> if (area == NULL)
> return NULL;
>
> if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
> size, f, ptes ? &ptes : NULL)) {
> free_vm_area(area);
> return NULL;
> }
>
> return area;
> }
>
> #define SHARED_PFN_TRIGGERS_VE 0x80000
>
> static int test_zeropad(void)
> {
> struct vm_struct *area;
> pte_t *pte[2];
> unsigned long a;
> struct page *page;
>
> page = alloc_page(GFP_KERNEL);
> area = alloc_vm_area(2 * PAGE_SIZE, pte);
>
> set_pte_at(&init_mm, (unsigned long)area->addr, pte[0],
> pfn_pte(page_to_pfn(page), PAGE_KERNEL));
> set_pte_at(&init_mm, (unsigned long)(area->addr + PAGE_SIZE), pte[1],
> pfn_pte(SHARED_PFN_TRIGGERS_VE,
> pgprot_decrypted(PAGE_KERNEL)));
>
> a = load_unaligned_zeropad(area->addr + PAGE_SIZE - 1);
> printk("a: %#lx\n", a);
> for(;;);
> return 0;
> }
> late_initcall(test_zeropad);
>
> Below is a patch that provides fixup code with the address it wants.
>
> Any comments?

This looks good to me. I applied the diff to a TDX VM running on
Hyper-V. When a load_unaligned_zeropad() occurs on a page that is
transitioning between private and shared, the zeropad fixup is now
done correctly via the #VE handler. (This is *without* my RFC patch to
mark the pages invalid during a transition.)

Michael

>
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index 58b1f208eff5..4a817d20ce3b 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -697,9 +697,10 @@ static bool try_fixup_enqcmd_gp(void)
> }
>
> static bool gp_try_fixup_and_notify(struct pt_regs *regs, int trapnr,
> - unsigned long error_code, const char *str)
> + unsigned long error_code, const char *str,
> + unsigned long address)
> {
> - if (fixup_exception(regs, trapnr, error_code, 0))
> + if (fixup_exception(regs, trapnr, error_code, address))
> return true;
>
> current->thread.error_code = error_code;
> @@ -759,7 +760,7 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
> goto exit;
> }
>
> - if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc))
> + if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc, 0))
> goto exit;
>
> if (error_code)
> @@ -1357,17 +1358,20 @@ DEFINE_IDTENTRY(exc_device_not_available)
>
> #define VE_FAULT_STR "VE fault"
>
> -static void ve_raise_fault(struct pt_regs *regs, long error_code)
> +static void ve_raise_fault(struct pt_regs *regs, long error_code,
> + unsigned long address)
> {
> if (user_mode(regs)) {
> gp_user_force_sig_segv(regs, X86_TRAP_VE, error_code,
> VE_FAULT_STR);
> return;
> }
>
> - if (gp_try_fixup_and_notify(regs, X86_TRAP_VE, error_code, VE_FAULT_STR))
> + if (gp_try_fixup_and_notify(regs, X86_TRAP_VE, error_code,
> + VE_FAULT_STR, address)) {
> return;
> + }
>
> - die_addr(VE_FAULT_STR, regs, error_code, 0);
> + die_addr(VE_FAULT_STR, regs, error_code, address);
> }
>
> /*
> @@ -1431,7 +1435,7 @@ DEFINE_IDTENTRY(exc_virtualization_exception)
> * it successfully, treat it as #GP(0) and handle it.
> */
> if (!tdx_handle_virt_exception(regs, &ve))
> - ve_raise_fault(regs, 0);
> + ve_raise_fault(regs, 0, ve.gla);
>
> cond_local_irq_disable(regs);
> }
> --
> Kiryl Shutsemau / Kirill A. Shutemov