Re: [PATCH v11 05/20] x86/virt/tdx: Add SEAMCALL infrastructure

From: Dave Hansen
Date: Wed Jun 07 2023 - 10:24:32 EST


On 6/4/23 07:27, Kai Huang wrote:
> TDX introduces a new CPU mode: Secure Arbitration Mode (SEAM). This
> mode runs only the TDX module itself or other code to load the TDX
> module.
>
> The host kernel communicates with SEAM software via a new SEAMCALL
> instruction. This is conceptually similar to a guest->host hypercall,
> except it is made from the host to SEAM software instead. The TDX
> module establishes a new SEAMCALL ABI which allows the host to
> initialize the module and to manage VMs.
>
> Add infrastructure to make SEAMCALLs. The SEAMCALL ABI is very similar
> to the TDCALL ABI and leverages much TDCALL infrastructure.
>
> SEAMCALL instruction causes #GP when TDX isn't BIOS enabled, and #UD
> when CPU is not in VMX operation. Currently, only KVM code mocks with

"mocks"? Did you mean "mucks"?

> VMX enabling, and KVM is the only user of TDX. This implementation
> chooses to make KVM itself responsible for enabling VMX before using
> TDX and let the rest of the kernel stay blissfully unaware of VMX.
>
> The current TDX_MODULE_CALL macro handles neither #GP nor #UD. The
> kernel would hit Oops if SEAMCALL were mistakenly made w/o enabling VMX
> first. Architecturally, there is no CPU flag to check whether the CPU
> is in VMX operation. Also, if a BIOS were buggy, it could still report
> valid TDX private KeyIDs when TDX actually couldn't be enabled.

I'm not sure this is a great justification. If the BIOS is lying to the
OS, we _should_ oops.

How else can this happen other than silly kernel bugs. It's OK to oops
in the face of silly kernel bugs.

...
> diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
> index 4dfe2e794411..b489b5b9de5d 100644
> --- a/arch/x86/include/asm/tdx.h
> +++ b/arch/x86/include/asm/tdx.h
> @@ -8,6 +8,8 @@
> #include <asm/ptrace.h>
> #include <asm/shared/tdx.h>
>
> +#include <asm/trapnr.h>
> +
> /*
> * SW-defined error codes.
> *
> @@ -18,6 +20,9 @@
> #define TDX_SW_ERROR (TDX_ERROR | GENMASK_ULL(47, 40))
> #define TDX_SEAMCALL_VMFAILINVALID (TDX_SW_ERROR | _UL(0xFFFF0000))
>
> +#define TDX_SEAMCALL_GP (TDX_SW_ERROR | X86_TRAP_GP)
> +#define TDX_SEAMCALL_UD (TDX_SW_ERROR | X86_TRAP_UD)
> +
> #ifndef __ASSEMBLY__
>
> /* TDX supported page sizes from the TDX module ABI. */
> diff --git a/arch/x86/virt/vmx/tdx/Makefile b/arch/x86/virt/vmx/tdx/Makefile
> index 93ca8b73e1f1..38d534f2c113 100644
> --- a/arch/x86/virt/vmx/tdx/Makefile
> +++ b/arch/x86/virt/vmx/tdx/Makefile
> @@ -1,2 +1,2 @@
> # SPDX-License-Identifier: GPL-2.0-only
> -obj-y += tdx.o
> +obj-y += tdx.o seamcall.o
> diff --git a/arch/x86/virt/vmx/tdx/seamcall.S b/arch/x86/virt/vmx/tdx/seamcall.S
> new file mode 100644
> index 000000000000..f81be6b9c133
> --- /dev/null
> +++ b/arch/x86/virt/vmx/tdx/seamcall.S
> @@ -0,0 +1,52 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#include <linux/linkage.h>
> +#include <asm/frame.h>
> +
> +#include "tdxcall.S"
> +
> +/*
> + * __seamcall() - Host-side interface functions to SEAM software module
> + * (the P-SEAMLDR or the TDX module).
> + *
> + * Transform function call register arguments into the SEAMCALL register
> + * ABI. Return TDX_SEAMCALL_VMFAILINVALID if the SEAMCALL itself fails,
> + * or the completion status of the SEAMCALL leaf function. Additional
> + * output operands are saved in @out (if it is provided by the caller).
> + *
> + *-------------------------------------------------------------------------
> + * SEAMCALL ABI:
> + *-------------------------------------------------------------------------
> + * Input Registers:
> + *
> + * RAX - SEAMCALL Leaf number.
> + * RCX,RDX,R8-R9 - SEAMCALL Leaf specific input registers.
> + *
> + * Output Registers:
> + *
> + * RAX - SEAMCALL completion status code.
> + * RCX,RDX,R8-R11 - SEAMCALL Leaf specific output registers.
> + *
> + *-------------------------------------------------------------------------
> + *
> + * __seamcall() function ABI:
> + *
> + * @fn (RDI) - SEAMCALL Leaf number, moved to RAX
> + * @rcx (RSI) - Input parameter 1, moved to RCX
> + * @rdx (RDX) - Input parameter 2, moved to RDX
> + * @r8 (RCX) - Input parameter 3, moved to R8
> + * @r9 (R8) - Input parameter 4, moved to R9
> + *
> + * @out (R9) - struct tdx_module_output pointer
> + * stored temporarily in R12 (not
> + * used by the P-SEAMLDR or the TDX
> + * module). It can be NULL.
> + *
> + * Return (via RAX) the completion status of the SEAMCALL, or
> + * TDX_SEAMCALL_VMFAILINVALID.
> + */
> +SYM_FUNC_START(__seamcall)
> + FRAME_BEGIN
> + TDX_MODULE_CALL host=1
> + FRAME_END
> + RET
> +SYM_FUNC_END(__seamcall)
> diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
> index 2d91e7120c90..e82713dd5d54 100644
> --- a/arch/x86/virt/vmx/tdx/tdx.c
> +++ b/arch/x86/virt/vmx/tdx/tdx.c
> @@ -12,14 +12,70 @@
> #include <linux/init.h>
> #include <linux/errno.h>
> #include <linux/printk.h>
> +#include <linux/smp.h>
> #include <asm/msr-index.h>
> #include <asm/msr.h>
> #include <asm/tdx.h>
> +#include "tdx.h"
>
> static u32 tdx_global_keyid __ro_after_init;
> static u32 tdx_guest_keyid_start __ro_after_init;
> static u32 tdx_nr_guest_keyids __ro_after_init;
>
> +/*
> + * Wrapper of __seamcall() to convert SEAMCALL leaf function error code
> + * to kernel error code. @seamcall_ret and @out contain the SEAMCALL
> + * leaf function return code and the additional output respectively if
> + * not NULL.
> + */
> +static int __always_unused seamcall(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
> + u64 *seamcall_ret,
> + struct tdx_module_output *out)
> +{
> + int cpu, ret = 0;
> + u64 sret;
> +
> + /* Need a stable CPU id for printing error message */
> + cpu = get_cpu();
> +
> + sret = __seamcall(fn, rcx, rdx, r8, r9, out);
> +
> + /* Save SEAMCALL return code if the caller wants it */
> + if (seamcall_ret)
> + *seamcall_ret = sret;
> +
> + /* SEAMCALL was successful */
> + if (!sret)
> + goto out;
> +
> + switch (sret) {
> + case TDX_SEAMCALL_GP:
> + pr_err_once("[firmware bug]: TDX is not enabled by BIOS.\n");
> + ret = -ENODEV;
> + break;
> + case TDX_SEAMCALL_VMFAILINVALID:
> + pr_err_once("TDX module is not loaded.\n");
> + ret = -ENODEV;
> + break;
> + case TDX_SEAMCALL_UD:
> + pr_err_once("SEAMCALL failed: CPU %d is not in VMX operation.\n",
> + cpu);
> + ret = -EINVAL;
> + break;
> + default:
> + pr_err_once("SEAMCALL failed: CPU %d: leaf %llu, error 0x%llx.\n",
> + cpu, fn, sret);
> + if (out)
> + pr_err_once("additional output: rcx 0x%llx, rdx 0x%llx, r8 0x%llx, r9 0x%llx, r10 0x%llx, r11 0x%llx.\n",
> + out->rcx, out->rdx, out->r8,
> + out->r9, out->r10, out->r11);
> + ret = -EIO;
> + }
> +out:
> + put_cpu();
> + return ret;
> +}

This fails to distinguish two very different things:

* A SEAMCALL error
and
* A SEAMCALL *failure*

"Errors" are normal. Hypercalls can return errors and so can SEAMCALLs.
No biggie.

But SEAMCALL failures are another matter. They mean that something
really fundamental is *BROKEN*.

Just saying "SEAMCALL was successful" is a bit ambiguous for me.

> diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h
> new file mode 100644
> index 000000000000..48ad1a1ba737
> --- /dev/null
> +++ b/arch/x86/virt/vmx/tdx/tdx.h
> @@ -0,0 +1,10 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _X86_VIRT_TDX_H
> +#define _X86_VIRT_TDX_H
> +
> +#include <linux/types.h>
> +
> +struct tdx_module_output;
> +u64 __seamcall(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
> + struct tdx_module_output *out);
> +#endif
> diff --git a/arch/x86/virt/vmx/tdx/tdxcall.S b/arch/x86/virt/vmx/tdx/tdxcall.S
> index 49a54356ae99..757b0c34be10 100644
> --- a/arch/x86/virt/vmx/tdx/tdxcall.S
> +++ b/arch/x86/virt/vmx/tdx/tdxcall.S
> @@ -1,6 +1,7 @@
> /* SPDX-License-Identifier: GPL-2.0 */
> #include <asm/asm-offsets.h>
> #include <asm/tdx.h>
> +#include <asm/asm.h>
>
> /*
> * TDCALL and SEAMCALL are supported in Binutils >= 2.36.
> @@ -45,6 +46,7 @@
> /* Leave input param 2 in RDX */
>
> .if \host
> +1:
> seamcall
> /*
> * SEAMCALL instruction is essentially a VMExit from VMX root
> @@ -57,10 +59,23 @@
> * This value will never be used as actual SEAMCALL error code as
> * it is from the Reserved status code class.
> */
> - jnc .Lno_vmfailinvalid
> + jnc .Lseamcall_out
> mov $TDX_SEAMCALL_VMFAILINVALID, %rax
> -.Lno_vmfailinvalid:
> + jmp .Lseamcall_out
> +2:
> + /*
> + * SEAMCALL caused #GP or #UD. By reaching here %eax contains
> + * the trap number. Convert the trap number to the TDX error
> + * code by setting TDX_SW_ERROR to the high 32-bits of %rax.
> + *
> + * Note cannot OR TDX_SW_ERROR directly to %rax as OR instruction
> + * only accepts 32-bit immediate at most.
> + */
> + mov $TDX_SW_ERROR, %r12
> + orq %r12, %rax

I think the justification for doing the #UD/#GP handling is a bit weak.
In the end, it gets us a nicer error message. Is that error message
*REALLY* needed? Or is an oops OK in the very rare circumstance that
the BIOS is totally buggy?