Re: [PATCH v14 004/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module

From: Isaku Yamahata
Date: Wed Jun 14 2023 - 12:05:18 EST


On Wed, Jun 14, 2023 at 09:41:58AM +0000,
"Huang, Kai" <kai.huang@xxxxxxxxx> wrote:

> On Tue, 2023-06-13 at 10:38 -0700, Isaku Yamahata wrote:
> > On Mon, Jun 12, 2023 at 11:55:14PM +0000,
> > "Huang, Kai" <kai.huang@xxxxxxxxx> wrote:
> >
> > > On Wed, 2023-06-07 at 11:06 -0700, Isaku Yamahata wrote:
> > > > Thanks for pointing it out. The following is the fix.
> > > >
> > > > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> > > > index 8a1d0755d275..b0d3f646afb1 100644
> > > > --- a/arch/x86/kvm/vmx/tdx.c
> > > > +++ b/arch/x86/kvm/vmx/tdx.c
> > > > @@ -4499,26 +4499,39 @@ u64 tdx_non_arch_field_switch(u64 field)
> > > >   }
> > > >  }
> > > >  
> > > > -static void __init vmx_tdx_on(void *info)
> > > > +struct vmx_tdx_enabled {
> > > > + cpumask_var_t vmx_enabled;
> > > > + atomic_t *err;
> > > > +};
> > > > +
> > >
> > > Sorry for late reply.
> > >
> > > I think you just need to mimic hardware_enable_all() -- using a per-cpu
> > > variable. In this way you can get rid of this structure.
> > >
> > > But again, we have listed a couple of options in the v13 discussion [1]:
> > >
> > > 1) Call kvm_ops_update() twice before and after hardware_setup() in order to use
> > > hardware_enable_all() directly.
> > >
> > > 2) Expose kvm_x86_ops as symbol so VMX can set hardware_{enable|disable}()
> > > callback before hardware_setup() in order to use hardware_enable_all().
> > >
> > > 3) Implement VMX's own hardware_enable_all() logic as shown in this patch.
> > >
> > > 4) ???
> > >
> > > I think it would be better if Sean can provide some comments here, but until he
> > > does, we can keep using option 3) (this patch).
> > >
> > > [1]
> > > https://lore.kernel.org/lkml/5dc84a2601a47ccc29ef43200cf3ec0d1b485d23.camel@xxxxxxxxx/
> >
> > Ok, makes sense. Here is the updated version with the fix for the error you
> > pointed out. Introduce cpu bitmap to track which cpu enable VMX(VMXON)
> > successfully. Disable VMX off only for cpu with bit set.
> >
> >
> [...]
>
> > +struct vmx_tdx_enabled {
> > + cpumask_var_t vmx_enabled;
> > + atomic_t err;
> > +};
> > +
>
> Again (and again), why not just mimic hardware_enable_all() to use a per-cpu
> variable instead of a cpumask, so that you can get rid of this structure?

Do you mean __hardware_enable_nolock() uses per-cpu variable?
Because hardware setup is one shot on the initialization, we don't want to
allocate the variable statically. Anyway the following is a patch to use
per-cpu variable with dynamic allocation. Which version do you prefer?


diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 95be6b2fba83..40a3c9c01ac6 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -4511,38 +4511,34 @@ u64 tdx_non_arch_field_switch(u64 field)
}

struct vmx_tdx_enabled {
- cpumask_var_t vmx_enabled;
- atomic_t err;
+ bool vmx_enabled;
+ int err;
};

static void __init vmx_tdx_on(void *_vmx_tdx)
{
- struct vmx_tdx_enabled *vmx_tdx = _vmx_tdx;
- int r;
+ struct vmx_tdx_enabled *vmx_tdx = this_cpu_ptr(_vmx_tdx);

- r = vmx_hardware_enable();
- if (!r) {
- cpumask_set_cpu(smp_processor_id(), vmx_tdx->vmx_enabled);
- r = tdx_cpu_enable();
+ vmx_tdx->err = vmx_hardware_enable();
+ if (!vmx_tdx->err) {
+ vmx_tdx->vmx_enabled = true;
+ vmx_tdx->err = tdx_cpu_enable();
}
- if (r)
- atomic_set(&vmx_tdx->err, r);
}

-static void __init vmx_off(void *_vmx_enabled)
+static void __init vmx_off(void *_vmx_tdx)
{
- cpumask_var_t *vmx_enabled = (cpumask_var_t *)_vmx_enabled;
+ struct vmx_tdx_enabled *vmx_tdx = this_cpu_ptr(_vmx_tdx);

- if (cpumask_test_cpu(smp_processor_id(), *vmx_enabled))
+ if (vmx_tdx->vmx_enabled)
vmx_hardware_disable();
}

int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
{
- struct vmx_tdx_enabled vmx_tdx = {
- .err = ATOMIC_INIT(0),
- };
+ struct vmx_tdx_enabled __percpu *vmx_tdx_enabled;
int max_pkgs;
+ int cpu;
int r = 0;
int i;

@@ -4603,7 +4599,8 @@ int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
for (i = 0; i < max_pkgs; i++)
mutex_init(&tdx_mng_key_config_lock[i]);

- if (!zalloc_cpumask_var(&vmx_tdx.vmx_enabled, GFP_KERNEL)) {
+ vmx_tdx_enabled = alloc_percpu_gfp(struct vmx_tdx_enabled, GFP_KERNEL | __GFP_ZERO);
+ if (!vmx_tdx_enabled) {
r = -ENOMEM;
goto out;
}
@@ -4618,15 +4615,21 @@ int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
*/
if (!cpumask_equal(cpu_online_mask, cpu_present_mask))
pr_warn("The old TDX module requires all present CPUs to be online to initialize.\n");
- on_each_cpu(vmx_tdx_on, &vmx_tdx, true); /* TDX requires vmxon. */
- r = atomic_read(&vmx_tdx.err);
+ on_each_cpu(vmx_tdx_on, vmx_tdx_enabled, true); /* TDX requires vmxon. */
+ for_each_present_cpu(cpu) {
+ struct vmx_tdx_enabled *vmx_tdx = per_cpu_ptr(vmx_tdx_enabled, cpu);
+ if (vmx_tdx->err) {
+ r = vmx_tdx->err;
+ break;
+ }
+ }
if (!r)
r = tdx_module_setup();
else
r = -EIO;
- on_each_cpu(vmx_off, &vmx_tdx.vmx_enabled, true);
+ on_each_cpu(vmx_off, vmx_tdx_enabled, true);
cpus_read_unlock();
- free_cpumask_var(vmx_tdx.vmx_enabled);
+ free_percpu(vmx_tdx_enabled);
if (r)
goto out;

--
2.25.1




--
Isaku Yamahata <isaku.yamahata@xxxxxxxxx>