Re: [PATCH v2 3/4] KVM: Rename and move CPUHP_AP_KVM_STARTING to ONLINE section

From: Sean Christopherson
Date: Tue Feb 08 2022 - 19:30:10 EST


On Tue, Jan 18, 2022, Chao Gao wrote:
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 148f7169b431..528741601122 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -4856,13 +4856,25 @@ static void hardware_enable_nolock(void *junk)
> }
> }
>
> -static int kvm_starting_cpu(unsigned int cpu)
> +static int kvm_online_cpu(unsigned int cpu)
> {
> + int ret = 0;
> +
> raw_spin_lock(&kvm_count_lock);
> - if (kvm_usage_count)
> + /*
> + * Abort the CPU online process if hardware virtualization cannot
> + * be enabled. Otherwise running VMs would encounter unrecoverable
> + * errors when scheduled to this CPU.
> + */
> + if (kvm_usage_count) {


> hardware_enable_nolock(NULL);
> + if (atomic_read(&hardware_enable_failed)) {

This needs:

atomic_set(&hardware_enable_failed, 0);

otherwise failure to online one CPU will prevent onlining other non-broken CPUs.
It's probably worth adding a WARN_ON_ONCE above this too, e.g.

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 70e034cbe813..b25a00c76b3a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4863,8 +4863,11 @@ static int kvm_online_cpu(unsigned int cpu)
* errors when scheduled to this CPU.
*/
if (kvm_usage_count) {
+ WARN_ON_ONCE(atomic_read(&hardware_enable_failed));
+
hardware_enable_nolock(NULL);
if (atomic_read(&hardware_enable_failed)) {
+ atomic_set(&hardware_enable_failed, 0);
ret = -EIO;
pr_warn("kvm: abort onlining CPU%d", cpu);
}


> + ret = -EIO;
> + pr_warn("kvm: abort onlining CPU%d", cpu);

This is somewhat redundant with the pr_info() message in hardware_enable_nolock().
What about adding the below as a prep patch? I think/hope it would be obvious to
the user/admin that onlining the CPU failed? E.g. this for the output

kvm: enabling virtualization on CPU2 failed during hardware_enable_all()

From: Sean Christopherson <seanjc@xxxxxxxxxx>
Date: Tue, 8 Feb 2022 13:26:19 -0800
Subject: [PATCH] KVM: Provide more information in kernel log if hardware
enabling fails

Provide the name of the calling function to hardware_enable_nolock() and
include it in the error message to provide additional information on
exactly what path failed.

Opportunistically bump the pr_info() to pr_warn(), failure to enable
virtualization support is warn-worthy as _something_ is wrong with the
system.

Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
virt/kvm/kvm_main.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index be614a6325e4..23481fd746aa 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4833,7 +4833,7 @@ static struct miscdevice kvm_dev = {
&kvm_chardev_ops,
};

-static void hardware_enable_nolock(void *junk)
+static void hardware_enable_nolock(void *caller_name)
{
int cpu = raw_smp_processor_id();
int r;
@@ -4848,7 +4848,8 @@ static void hardware_enable_nolock(void *junk)
if (r) {
cpumask_clear_cpu(cpu, cpus_hardware_enabled);
atomic_inc(&hardware_enable_failed);
- pr_info("kvm: enabling virtualization on CPU%d failed\n", cpu);
+ pr_warn("kvm: enabling virtualization on CPU%d failed during %s()\n",
+ cpu, (const char *)caller_name);
}
}

@@ -4856,7 +4857,7 @@ static int kvm_starting_cpu(unsigned int cpu)
{
raw_spin_lock(&kvm_count_lock);
if (kvm_usage_count)
- hardware_enable_nolock(NULL);
+ hardware_enable_nolock((void *)__func__);
raw_spin_unlock(&kvm_count_lock);
return 0;
}
@@ -4905,7 +4906,7 @@ static int hardware_enable_all(void)
kvm_usage_count++;
if (kvm_usage_count == 1) {
atomic_set(&hardware_enable_failed, 0);
- on_each_cpu(hardware_enable_nolock, NULL, 1);
+ on_each_cpu(hardware_enable_nolock, (void *)__func__, 1);

if (atomic_read(&hardware_enable_failed)) {
hardware_disable_all_nolock();
@@ -5530,7 +5531,7 @@ static void kvm_resume(void)
#ifdef CONFIG_LOCKDEP
WARN_ON(lockdep_is_held(&kvm_count_lock));
#endif
- hardware_enable_nolock(NULL);
+ hardware_enable_nolock((void *)__func__);
}
}


base-commit: 357ef9d9c0728bc2bbb9810c662263bba6b8dbc7
--