Re: [PATCH v17 023/116] KVM: TDX: Refuse to unplug the last cpu on the package

From: Binbin Wu
Date: Wed Dec 13 2023 - 03:04:43 EST




On 11/7/2023 10:55 PM, isaku.yamahata@xxxxxxxxx wrote:
From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>

In order to reclaim TDX HKID, (i.e. when deleting guest TD), needs to call
TDH.PHYMEM.PAGE.WBINVD on all packages. If we have active TDX HKID, refuse
to offline the last online cpu to guarantee at least one CPU online per
package. Add arch callback for cpu offline.
Because TDX doesn't support suspend, this also refuses suspend if TDs are
running. If no TD is running, suspend is allowed.

Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx>
Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>

Reviewed-by: Binbin Wu <binbin.wu@xxxxxxxxxxxxxxx>

---
arch/x86/include/asm/kvm-x86-ops.h | 1 +
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/vmx/main.c | 1 +
arch/x86/kvm/vmx/tdx.c | 41 ++++++++++++++++++++++++++++++
arch/x86/kvm/vmx/x86_ops.h | 2 ++
arch/x86/kvm/x86.c | 5 ++++
include/linux/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 12 +++++++--
8 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index d05a829254ea..0fbafb287839 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -18,6 +18,7 @@ KVM_X86_OP(check_processor_compatibility)
KVM_X86_OP(hardware_enable)
KVM_X86_OP(hardware_disable)
KVM_X86_OP(hardware_unsetup)
+KVM_X86_OP_OPTIONAL_RET0(offline_cpu)
KVM_X86_OP(has_emulated_msr)
KVM_X86_OP(vcpu_after_set_cpuid)
KVM_X86_OP(is_vm_type_supported)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 742ac63e1992..56d0ba5793cf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1570,6 +1570,7 @@ struct kvm_x86_ops {
int (*hardware_enable)(void);
void (*hardware_disable)(void);
void (*hardware_unsetup)(void);
+ int (*offline_cpu)(void);
bool (*has_emulated_msr)(struct kvm *kvm, u32 index);
void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 7082e9ea8492..c8213d6ea301 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -121,6 +121,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.check_processor_compatibility = vmx_check_processor_compat,
.hardware_unsetup = vt_hardware_unsetup,
+ .offline_cpu = tdx_offline_cpu,
.hardware_enable = vt_hardware_enable,
.hardware_disable = vmx_hardware_disable,
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 6017e0feac1e..51aa114feb86 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -64,6 +64,7 @@ static struct tdx_info tdx_info __ro_after_init;
*/
static DEFINE_MUTEX(tdx_lock);
static struct mutex *tdx_mng_key_config_lock;
+static atomic_t nr_configured_hkid;
static __always_inline hpa_t set_hkid_to_hpa(hpa_t pa, u16 hkid)
{
@@ -79,6 +80,7 @@ static inline void tdx_hkid_free(struct kvm_tdx *kvm_tdx)
{
tdx_guest_keyid_free(kvm_tdx->hkid);
kvm_tdx->hkid = -1;
+ atomic_dec(&nr_configured_hkid);
}
static inline bool is_hkid_assigned(struct kvm_tdx *kvm_tdx)
@@ -562,6 +564,7 @@ static int __tdx_td_init(struct kvm *kvm, struct td_params *td_params,
if (ret < 0)
return ret;
kvm_tdx->hkid = ret;
+ atomic_inc(&nr_configured_hkid);
va = __get_free_page(GFP_KERNEL_ACCOUNT);
if (!va)
@@ -932,3 +935,41 @@ void tdx_hardware_unsetup(void)
/* kfree accepts NULL. */
kfree(tdx_mng_key_config_lock);
}
+
+int tdx_offline_cpu(void)
+{
+ int curr_cpu = smp_processor_id();
+ cpumask_var_t packages;
+ int ret = 0;
+ int i;
+
+ /* No TD is running. Allow any cpu to be offline. */
+ if (!atomic_read(&nr_configured_hkid))
+ return 0;
+
+ /*
+ * In order to reclaim TDX HKID, (i.e. when deleting guest TD), need to
+ * call TDH.PHYMEM.PAGE.WBINVD on all packages to program all memory
+ * controller with pconfig. If we have active TDX HKID, refuse to
+ * offline the last online cpu.
+ */
+ if (!zalloc_cpumask_var(&packages, GFP_KERNEL))
+ return -ENOMEM;
+ for_each_online_cpu(i) {
+ if (i != curr_cpu)
+ cpumask_set_cpu(topology_physical_package_id(i), packages);
+ }
+ /* Check if this cpu is the last online cpu of this package. */
+ if (!cpumask_test_cpu(topology_physical_package_id(curr_cpu), packages))
+ ret = -EBUSY;
+ free_cpumask_var(packages);
+ if (ret)
+ /*
+ * Because it's hard for human operator to understand the
+ * reason, warn it.
+ */
+#define MSG_ALLPKG_ONLINE \
+ "TDX requires all packages to have an online CPU. Delete all TDs in order to offline all CPUs of a package.\n"
+ pr_warn_ratelimited(MSG_ALLPKG_ONLINE);
+ return ret;
+}
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index feee8c1e737f..ffba64008682 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -138,6 +138,7 @@ void vmx_setup_mce(struct kvm_vcpu *vcpu);
int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops);
void tdx_hardware_unsetup(void);
bool tdx_is_vm_type_supported(unsigned long type);
+int tdx_offline_cpu(void);
int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap);
int tdx_vm_init(struct kvm *kvm);
@@ -148,6 +149,7 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user *argp);
static inline int tdx_hardware_setup(struct kvm_x86_ops *x86_ops) { return -EOPNOTSUPP; }
static inline void tdx_hardware_unsetup(void) {}
static inline bool tdx_is_vm_type_supported(unsigned long type) { return false; }
+static inline int tdx_offline_cpu(void) { return 0; }
static inline int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
{
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9ef81d235406..191ac1e0d96d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12421,6 +12421,11 @@ void kvm_arch_hardware_disable(void)
drop_user_return_notifiers();
}
+int kvm_arch_offline_cpu(unsigned int cpu)
+{
+ return static_call(kvm_x86_offline_cpu)();
+}
+
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
{
return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 687589ce9f63..96ff951bdd29 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1502,6 +1502,7 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
int kvm_arch_hardware_enable(void);
void kvm_arch_hardware_disable(void);
#endif
+int kvm_arch_offline_cpu(unsigned int cpu);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ba2f993c9655..29fdb39976e0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -5578,13 +5578,21 @@ static void hardware_disable_nolock(void *junk)
__this_cpu_write(hardware_enabled, false);
}
+__weak int kvm_arch_offline_cpu(unsigned int cpu)
+{
+ return 0;
+}
+
static int kvm_offline_cpu(unsigned int cpu)
{
+ int r = 0;
+
mutex_lock(&kvm_lock);
- if (kvm_usage_count)
+ r = kvm_arch_offline_cpu(cpu);
+ if (!r && kvm_usage_count)
hardware_disable_nolock(NULL);
mutex_unlock(&kvm_lock);
- return 0;
+ return r;
}
static void hardware_disable_all_nolock(void)