Re: [PATCH v12 20/22] x86/virt/tdx: Allow SEAMCALL to handle #UD and #GP

From: Peter Zijlstra
Date: Wed Jun 28 2023 - 17:18:29 EST


On Wed, Jun 28, 2023 at 10:38:23PM +0200, Peter Zijlstra wrote:
> On Wed, Jun 28, 2023 at 05:29:01PM +0200, Peter Zijlstra wrote:
> > On Tue, Jun 27, 2023 at 02:12:50AM +1200, Kai Huang wrote:
> > > diff --git a/arch/x86/virt/vmx/tdx/tdxcall.S b/arch/x86/virt/vmx/tdx/tdxcall.S
> > > index 49a54356ae99..757b0c34be10 100644
> > > --- a/arch/x86/virt/vmx/tdx/tdxcall.S
> > > +++ b/arch/x86/virt/vmx/tdx/tdxcall.S
> > > @@ -1,6 +1,7 @@
> > > /* SPDX-License-Identifier: GPL-2.0 */
> > > #include <asm/asm-offsets.h>
> > > #include <asm/tdx.h>
> > > +#include <asm/asm.h>
> > >
> > > /*
> > > * TDCALL and SEAMCALL are supported in Binutils >= 2.36.
> > > @@ -45,6 +46,7 @@
> > > /* Leave input param 2 in RDX */
> > >
> > > .if \host
> > > +1:
> > > seamcall
> >
> > So what registers are actually clobbered by SEAMCALL ? There's a
> > distinct lack of it in SDM Vol.2 instruction list :-(
>
> With the exception of the abomination that is TDH.VP.ENTER all SEAMCALLs
> seem to be limited to the set presented here (c,d,8,9,10,11) and all
> other registers should be available.
>
> Can we please make that a hard requirement, SEAMCALL must not use
> registers outside this? We can hardly program to random future
> extentions; we need hard ABI guarantees here.
>
> That also means we should be able to use si,di for the cmovc below.
>
> Kirill, back when we did __tdx_hypercall() we got bp removed as a valid
> register, the 1.0 spec still lists that, and it is also listed in
> TDH.VP.ENTER, I'm assuming it will be removed there too?
>
> bp must not be used -- it violates the pre-existing calling convention.

How's this then? Utterly untested. Not been near a compiler even.

--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -109,10 +109,26 @@ EXPORT_SYMBOL_GPL(tdx_kvm_hypercall);
* should only be used for calls that have no legitimate reason to fail
* or where the kernel can not survive the call failing.
*/
-static inline void tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
- struct tdx_module_output *out)
+static inline void _tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9)
{
- if (__tdx_module_call(fn, rcx, rdx, r8, r9, out))
+ struct tdx_module_args args = {
+ .rcx = rcx,
+ .rdx = rdx,
+ .r8 = r8,
+ .r9 = r9,
+ };
+ return __tdx_module_call(fn, &args);
+}
+
+static inline void tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9)
+{
+ if (_tdx_module_call(fn, rcx, rdx, r8, r9))
+ panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
+}
+
+static inline void tdx_module_call_ret(u64 fn, struct tdx_module_args *args)
+{
+ if (__tdx_module_call(fn, args))
panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
}

@@ -134,9 +150,9 @@ int tdx_mcall_get_report0(u8 *reportdata
{
u64 ret;

- ret = __tdx_module_call(TDX_GET_REPORT, virt_to_phys(tdreport),
- virt_to_phys(reportdata), TDREPORT_SUBTYPE_0,
- 0, NULL);
+ ret = _tdx_module_call(TDX_GET_REPORT,
+ virt_to_phys(tdreport), virt_to_phys(reportdata),
+ TDREPORT_SUBTYPE_0, 0);
if (ret) {
if (TDCALL_RETURN_CODE(ret) == TDCALL_INVALID_OPERAND)
return -EINVAL;
@@ -184,7 +200,7 @@ static void __noreturn tdx_panic(const c

static void tdx_parse_tdinfo(u64 *cc_mask)
{
- struct tdx_module_output out;
+ struct tdx_module_args args = {};
unsigned int gpa_width;
u64 td_attr;

@@ -195,7 +211,7 @@ static void tdx_parse_tdinfo(u64 *cc_mas
* Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL
* [TDG.VP.INFO].
*/
- tdx_module_call(TDX_GET_INFO, 0, 0, 0, 0, &out);
+ tdx_module_call_ret(TDX_GET_INFO, &args);

/*
* The highest bit of a guest physical address is the "sharing" bit.
@@ -204,7 +220,7 @@ static void tdx_parse_tdinfo(u64 *cc_mas
* The GPA width that comes out of this call is critical. TDX guests
* can not meaningfully run without it.
*/
- gpa_width = out.rcx & GENMASK(5, 0);
+ gpa_width = args.rcx & GENMASK(5, 0);
*cc_mask = BIT_ULL(gpa_width - 1);

/*
@@ -212,7 +228,7 @@ static void tdx_parse_tdinfo(u64 *cc_mas
* memory. Ensure that no #VE will be delivered for accesses to
* TD-private memory. Only VMM-shared memory (MMIO) will #VE.
*/
- td_attr = out.rdx;
+ td_attr = args.rdx;
if (!(td_attr & ATTR_SEPT_VE_DISABLE)) {
const char *msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set.";

@@ -620,7 +636,7 @@ __init bool tdx_early_handle_ve(struct p

void tdx_get_ve_info(struct ve_info *ve)
{
- struct tdx_module_output out;
+ struct tdx_module_args args = {};

/*
* Called during #VE handling to retrieve the #VE info from the
@@ -637,15 +653,15 @@ void tdx_get_ve_info(struct ve_info *ve)
* Note, the TDX module treats virtual NMIs as inhibited if the #VE
* valid flag is set. It means that NMI=>#VE will not result in a #DF.
*/
- tdx_module_call(TDX_GET_VEINFO, 0, 0, 0, 0, &out);
+ tdx_module_call_ret(TDX_GET_VEINFO, &args);

/* Transfer the output parameters */
- ve->exit_reason = out.rcx;
- ve->exit_qual = out.rdx;
- ve->gla = out.r8;
- ve->gpa = out.r9;
- ve->instr_len = lower_32_bits(out.r10);
- ve->instr_info = upper_32_bits(out.r10);
+ ve->exit_reason = args.rcx;
+ ve->exit_qual = args.rdx;
+ ve->gla = args.r8;
+ ve->gpa = args.r9;
+ ve->instr_len = lower_32_bits(args.r10);
+ ve->instr_info = upper_32_bits(args.r10);
}

/*
@@ -779,7 +795,7 @@ static bool try_accept_one(phys_addr_t *
}

tdcall_rcx = *start | page_size;
- if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL))
+ if (_tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0))
return false;

*start += accept_size;
@@ -857,7 +873,7 @@ void __init tdx_early_init(void)
cc_set_mask(cc_mask);

/* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
- tdx_module_call(TDX_WR, 0, TDCS_NOTIFY_ENABLES, 0, -1ULL, NULL);
+ tdx_module_call(TDX_WR, 0, TDCS_NOTIFY_ENABLES, 0, -1ULL);

/*
* All bits above GPA width are reserved and kernel treats shared bit
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -37,7 +37,7 @@
*
* This is a software only structure and not part of the TDX module/VMM ABI.
*/
-struct tdx_module_output {
+struct tdx_module_args {
u64 rcx;
u64 rdx;
u64 r8;
@@ -67,8 +67,8 @@ struct ve_info {
void __init tdx_early_init(void);

/* Used to communicate with the TDX module */
-u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
- struct tdx_module_output *out);
+u64 __tdx_module_call(u64 fn, struct tdx_module_args *args);
+u64 __tdx_module_call_ret(u64 fn, struct tdx_module_args *args);

void tdx_get_ve_info(struct ve_info *ve);

--- a/arch/x86/virt/vmx/tdx/tdxcall.S
+++ b/arch/x86/virt/vmx/tdx/tdxcall.S
@@ -17,37 +17,44 @@
* TDX module and hypercalls to the VMM.
* SEAMCALL - used by TDX hosts to make requests to the
* TDX module.
+ *
+ *-------------------------------------------------------------------------
+ * TDCALL/SEAMCALL ABI:
+ *-------------------------------------------------------------------------
+ * Input Registers:
+ *
+ * RAX - TDCALL Leaf number.
+ * RCX,RDX,R8-R9 - TDCALL Leaf specific input registers.
+ *
+ * Output Registers:
+ *
+ * RAX - TDCALL instruction error code.
+ * RCX,RDX,R8-R11 - TDCALL Leaf specific output registers.
+ *
+ *-------------------------------------------------------------------------
+ *
+ * __tdx_module_call() function ABI:
+ *
+ * @fn (RDI) - TDCALL Leaf ID, moved to RAX
+ * @regs (RSI) - struct tdx_regs pointer
+ *
+ * Return status of TDCALL via RAX.
*/
-.macro TDX_MODULE_CALL host:req
- /*
- * R12 will be used as temporary storage for struct tdx_module_output
- * pointer. Since R12-R15 registers are not used by TDCALL/SEAMCALL
- * services supported by this function, it can be reused.
- */
+.macro TDX_MODULE_CALL host:req ret:req
+ FRAME_BEGIN

- /* Callee saved, so preserve it */
- push %r12
+ mov %rdi, %rax
+ mov $TDX_SEAMCALL_VMFAILINVALID, %rdi

- /*
- * Push output pointer to stack.
- * After the operation, it will be fetched into R12 register.
- */
- push %r9
+ mov TDX_MODULE_rcx(%rsi), %rcx
+ mov TDX_MODULE_rdx(%rsi), %rdx
+ mov TDX_MODULE_r8(%rsi), %r8
+ mov TDX_MODULE_r9(%rsi), %r9
+// mov TDX_MODULE_r10(%rsi), %r10
+// mov TDX_MODULE_r11(%rsi), %r11

- /* Mangle function call ABI into TDCALL/SEAMCALL ABI: */
- /* Move Leaf ID to RAX */
- mov %rdi, %rax
- /* Move input 4 to R9 */
- mov %r8, %r9
- /* Move input 3 to R8 */
- mov %rcx, %r8
- /* Move input 1 to RCX */
- mov %rsi, %rcx
- /* Leave input param 2 in RDX */
-
- .if \host
-1:
- seamcall
+.if \host
+1: seamcall
/*
* SEAMCALL instruction is essentially a VMExit from VMX root
* mode to SEAM VMX root mode. VMfailInvalid (CF=1) indicates
@@ -59,53 +66,30 @@
* This value will never be used as actual SEAMCALL error code as
* it is from the Reserved status code class.
*/
- jnc .Lseamcall_out
- mov $TDX_SEAMCALL_VMFAILINVALID, %rax
- jmp .Lseamcall_out
+ cmovc %rdi, %rax
2:
- /*
- * SEAMCALL caused #GP or #UD. By reaching here %eax contains
- * the trap number. Convert the trap number to the TDX error
- * code by setting TDX_SW_ERROR to the high 32-bits of %rax.
- *
- * Note cannot OR TDX_SW_ERROR directly to %rax as OR instruction
- * only accepts 32-bit immediate at most.
- */
- mov $TDX_SW_ERROR, %r12
- orq %r12, %rax
-
- _ASM_EXTABLE_FAULT(1b, 2b)
-.Lseamcall_out:
- .else
+.else
tdcall
- .endif
-
- /*
- * Fetch output pointer from stack to R12 (It is used
- * as temporary storage)
- */
- pop %r12
+.endif

- /*
- * Since this macro can be invoked with NULL as an output pointer,
- * check if caller provided an output struct before storing output
- * registers.
- *
- * Update output registers, even if the call failed (RAX != 0).
- * Other registers may contain details of the failure.
- */
- test %r12, %r12
- jz .Lno_output_struct
+.if \ret
+ movq %rcx, TDX_MODULE_rcx(%rsi)
+ movq %rdx, TDX_MODULE_rdx(%rsi)
+ movq %r8, TDX_MODULE_r8(%rsi)
+ movq %r9, TDX_MODULE_r9(%rsi)
+ movq %r10, TDX_MODULE_r10(%rsi)
+ movq %r11, TDX_MODULE_r11(%rsi)
+.endif
+
+ FRAME_END
+ RET
+
+.if \host
+3:
+ mov $TDX_SW_ERROR, %rdi
+ or %rdi, %rax
+ jmp 2b

- /* Copy result registers to output struct: */
- movq %rcx, TDX_MODULE_rcx(%r12)
- movq %rdx, TDX_MODULE_rdx(%r12)
- movq %r8, TDX_MODULE_r8(%r12)
- movq %r9, TDX_MODULE_r9(%r12)
- movq %r10, TDX_MODULE_r10(%r12)
- movq %r11, TDX_MODULE_r11(%r12)
-
-.Lno_output_struct:
- /* Restore the state of R12 register */
- pop %r12
+ _ASM_EXTABLE_FAULT(1b, 3b)
+.endif
.endm