Re: [PATCH v19 078/130] KVM: TDX: Implement TDX vcpu enter/exit path

From: Sean Christopherson
Date: Fri Mar 15 2024 - 13:26:45 EST


On Mon, Feb 26, 2024, isaku.yamahata@xxxxxxxxx wrote:
> +static noinstr void tdx_vcpu_enter_exit(struct vcpu_tdx *tdx)
> +{
> + struct tdx_module_args args;
> +
> + /*
> + * Avoid section mismatch with to_tdx() with KVM_VM_BUG(). The caller
> + * should call to_tdx().

C'mon. I don't think it's unreasonable to expect that at least one of the many
people working on TDX would figure out why to_vmx() is __always_inline.

> + */
> + struct kvm_vcpu *vcpu = &tdx->vcpu;
> +
> + guest_state_enter_irqoff();
> +
> + /*
> + * TODO: optimization:
> + * - Eliminate copy between args and vcpu->arch.regs.
> + * - copyin/copyout registers only if (tdx->tdvmvall.regs_mask != 0)
> + * which means TDG.VP.VMCALL.
> + */
> + args = (struct tdx_module_args) {
> + .rcx = tdx->tdvpr_pa,
> +#define REG(reg, REG) .reg = vcpu->arch.regs[VCPU_REGS_ ## REG]

Organizing tdx_module_args's registers by volatile vs. non-volatile is asinine.
This code should not need to exist.

> + WARN_ON_ONCE(!kvm_rebooting &&
> + (tdx->exit_reason.full & TDX_SW_ERROR) == TDX_SW_ERROR);
> +
> + guest_state_exit_irqoff();
> +}
> +
> +fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu)
> +{
> + struct vcpu_tdx *tdx = to_tdx(vcpu);
> +
> + if (unlikely(!tdx->initialized))
> + return -EINVAL;
> + if (unlikely(vcpu->kvm->vm_bugged)) {
> + tdx->exit_reason.full = TDX_NON_RECOVERABLE_VCPU;
> + return EXIT_FASTPATH_NONE;
> + }
> +
> + trace_kvm_entry(vcpu);
> +
> + tdx_vcpu_enter_exit(tdx);
> +
> + vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
> + trace_kvm_exit(vcpu, KVM_ISA_VMX);
> +
> + return EXIT_FASTPATH_NONE;
> +}
> +
> void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
> {
> WARN_ON_ONCE(root_hpa & ~PAGE_MASK);
> diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
> index d822e790e3e5..81d301fbe638 100644
> --- a/arch/x86/kvm/vmx/tdx.h
> +++ b/arch/x86/kvm/vmx/tdx.h
> @@ -27,6 +27,37 @@ struct kvm_tdx {
> struct page *source_page;
> };
>
> +union tdx_exit_reason {
> + struct {
> + /* 31:0 mirror the VMX Exit Reason format */

Then use "union vmx_exit_reason", having to maintain duplicate copies of the same
union is not something I want to do.

I'm honestly not even convinced that "union tdx_exit_reason" needs to exist. I
added vmx_exit_reason because we kept having bugs where KVM would fail to strip
bits 31:16, and because nested VMX needs to stuff failed_vmentry, but I don't
see a similar need for TDX.

I would even go so far as to say the vcpu_tdx field shouldn't be exit_reason,
and instead should be "return_code" or something. E.g. if the TDX module refuses
to run the vCPU, there's no VM-Enter and thus no VM-Exit (unless you count the
SEAMCALL itself, har har). Ditto for #GP or #UD on the SEAMCALL (or any other
reason that generates TDX_SW_ERROR).

Ugh, I'm doubling down on that suggesting. This:

WARN_ON_ONCE(!kvm_rebooting &&
(tdx->vp_enter_ret & TDX_SW_ERROR) == TDX_SW_ERROR);

if ((u16)tdx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
is_nmi(tdexit_intr_info(vcpu))) {
kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
vmx_do_nmi_irqoff();
kvm_after_interrupt(vcpu);
}

is heinous. If there's an error that leaves bits 15:0 zero, KVM will synthesize
a spurious NMI. I don't know whether or not that can happen, but it's not
something that should even be possible in KVM, i.e. the exit reason should be
processed if and only if KVM *knows* there was a sane VM-Exit from non-root mode.

tdx_vcpu_run() has a similar issue, though it's probably benign. If there's an
error in bits 15:0 that happens to collide with EXIT_REASON_TDCALL, weird things
will happen.

if (tdx->exit_reason.basic == EXIT_REASON_TDCALL)
tdx->tdvmcall.rcx = vcpu->arch.regs[VCPU_REGS_RCX];
else
tdx->tdvmcall.rcx = 0;

I vote for something like the below, with much more robust checking of vp_enter_ret
before it's converted to a VMX exit reason.

static __always_inline union vmx_exit_reason tdexit_exit_reason(struct kvm_vcpu *vcpu)
{
return (u32)vcpu->vp_enter_ret;
}

diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
index af3a2b8afee8..b9b40b2eaccb 100644
--- a/arch/x86/kvm/vmx/tdx.h
+++ b/arch/x86/kvm/vmx/tdx.h
@@ -43,37 +43,6 @@ struct kvm_tdx {
struct page *source_page;
};

-union tdx_exit_reason {
- struct {
- /* 31:0 mirror the VMX Exit Reason format */
- u64 basic : 16;
- u64 reserved16 : 1;
- u64 reserved17 : 1;
- u64 reserved18 : 1;
- u64 reserved19 : 1;
- u64 reserved20 : 1;
- u64 reserved21 : 1;
- u64 reserved22 : 1;
- u64 reserved23 : 1;
- u64 reserved24 : 1;
- u64 reserved25 : 1;
- u64 bus_lock_detected : 1;
- u64 enclave_mode : 1;
- u64 smi_pending_mtf : 1;
- u64 smi_from_vmx_root : 1;
- u64 reserved30 : 1;
- u64 failed_vmentry : 1;
-
- /* 63:32 are TDX specific */
- u64 details_l1 : 8;
- u64 class : 8;
- u64 reserved61_48 : 14;
- u64 non_recoverable : 1;
- u64 error : 1;
- };
- u64 full;
-};
-
struct vcpu_tdx {
struct kvm_vcpu vcpu;

@@ -103,7 +72,8 @@ struct vcpu_tdx {
};
u64 rcx;
} tdvmcall;
- union tdx_exit_reason exit_reason;
+
+ u64 vp_enter_ret;

bool initialized;