[PATCH 2/5] xen/pvh*: Support > 32 VCPUs at domain restore

From: Ankur Arora
Date: Fri Jun 02 2017 - 20:06:53 EST


When Xen restores a PVHVM or PVH guest, its shared_info only holds
up to 32 CPUs. The hypercall VCPUOP_register_vcpu_info allows
us to setup per-page areas for VCPUs. This means we can boot
PVH* guests with more than 32 VCPUs. During restore the per-cpu
structure is allocated freshly by the hypervisor (vcpu_info_mfn is
set to INVALID_MFN) so that the newly restored guest can make a
VCPUOP_register_vcpu_info hypercall.

However, we end up triggering this condition in Xen:
/* Run this command on yourself or on other offline VCPUS. */
if ( (v != current) && !test_bit(_VPF_down, &v->pause_flags) )

which means we are unable to setup the per-cpu VCPU structures
for running VCPUS. The Linux PV code paths makes this work by
iterating over cpu_possible in xen_vcpu_restore() with:

1) is target CPU up (VCPUOP_is_up hypercall?)
2) if yes, then VCPUOP_down to pause it
3) VCPUOP_register_vcpu_info
4) if it was down, then VCPUOP_up to bring it back up

With Xen commit 192df6f9122d ("xen/x86: allow HVM guests to use
hypercalls to bring up vCPUs") this is available for non-PV guests.
As such first check if VCPUOP_is_up is actually possible before
trying this dance.

As most of this dance code is done already in xen_vcpu_restore()
let's make it callable on PV, PVH and PVHVM.

Based-on-patch-by: Konrad Wilk <konrad.wilk@xxxxxxxxxx>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
Signed-off-by: Ankur Arora <ankur.a.arora@xxxxxxxxxx>
---
arch/x86/xen/enlighten.c | 45 +++++++++++++++++++++++++++++++-------------
arch/x86/xen/enlighten_hvm.c | 20 +++++++-------------
arch/x86/xen/smp_hvm.c | 10 ++++++++++
arch/x86/xen/suspend_hvm.c | 11 +++--------
include/xen/xen-ops.h | 2 ++
5 files changed, 54 insertions(+), 34 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 96b745e3f56c..276cc21619ec 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -106,6 +106,21 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int),
return rc >= 0 ? 0 : rc;
}

+static void xen_vcpu_setup_restore(int cpu)
+{
+ /* Any per_cpu(xen_vcpu) is stale, so reset it */
+ xen_vcpu_info_reset(cpu);
+
+ /*
+ * For PVH and PVHVM, setup online VCPUs only. The rest will
+ * be handled by hotplug.
+ */
+ if (xen_pv_domain() ||
+ (xen_hvm_domain() && cpu_online(cpu))) {
+ xen_vcpu_setup(cpu);
+ }
+}
+
/*
* On restore, set the vcpu placement up again.
* If it fails, then we're in a bad state, since
@@ -117,17 +132,23 @@ void xen_vcpu_restore(void)

for_each_possible_cpu(cpu) {
bool other_cpu = (cpu != smp_processor_id());
- bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu),
- NULL);
+ bool is_up;
+
+ if (xen_vcpu_nr(cpu) == XEN_VCPU_ID_INVALID)
+ continue;
+
+ /* Only Xen 4.5 and higher support this. */
+ is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up,
+ xen_vcpu_nr(cpu), NULL) > 0;

if (other_cpu && is_up &&
HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL))
BUG();

- xen_setup_runstate_info(cpu);
+ if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock))
+ xen_setup_runstate_info(cpu);

- if (xen_have_vcpu_info_placement)
- xen_vcpu_setup(cpu);
+ xen_vcpu_setup_restore(cpu);

if (other_cpu && is_up &&
HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL))
@@ -163,11 +184,11 @@ void xen_vcpu_setup(int cpu)
BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);

/*
- * This path is called twice on PVHVM - first during bootup via
- * smp_init -> xen_hvm_cpu_notify, and then if the VCPU is being
- * hotplugged: cpu_up -> xen_hvm_cpu_notify.
- * As we can only do the VCPUOP_register_vcpu_info once lets
- * not over-write its result.
+ * This path is called on PVHVM at bootup (xen_hvm_smp_prepare_boot_cpu)
+ * and at restore (xen_vcpu_restore). Also called for hotplugged
+ * VCPUs (cpu_init -> xen_hvm_cpu_prepare_hvm).
+ * However, the hypercall can only be done once (see below) so if a VCPU
+ * is offlined and comes back online then let's not redo the hypercall.
*
* For PV it is called during restore (xen_vcpu_restore) and bootup
* (xen_setup_vcpu_info_placement). The hotplug mechanism does not
@@ -178,8 +199,6 @@ void xen_vcpu_setup(int cpu)
return;
}

- xen_vcpu_info_reset(cpu);
-
if (xen_have_vcpu_info_placement) {
vcpup = &per_cpu(xen_vcpu_info, cpu);
info.mfn = arbitrary_virt_to_mfn(vcpup);
@@ -214,7 +233,7 @@ void xen_vcpu_setup(int cpu)
if (!xen_have_vcpu_info_placement) {
if (cpu >= MAX_VIRT_CPUS)
clamp_max_cpus();
- return;
+ xen_vcpu_info_reset(cpu);
}
}

diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index eb53da6547ee..ba1afadb2512 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -20,7 +20,6 @@

void __ref xen_hvm_init_shared_info(void)
{
- int cpu;
struct xen_add_to_physmap xatp;
static struct shared_info *shared_info_page;

@@ -35,18 +34,6 @@ void __ref xen_hvm_init_shared_info(void)
BUG();

HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
-
- /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
- * page, we use it in the event channel upcall and in some pvclock
- * related functions. We don't need the vcpu_info placement
- * optimizations because we don't use any pv_mmu or pv_irq op on
- * HVM.
- * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
- * online but xen_hvm_init_shared_info is run at resume time too and
- * in that case multiple vcpus might be online. */
- for_each_online_cpu(cpu) {
- xen_vcpu_info_reset(cpu);
- }
}

static void __init init_hvm_pv_info(void)
@@ -150,6 +137,13 @@ static void __init xen_hvm_guest_init(void)

xen_hvm_init_shared_info();

+ /*
+ * xen_vcpu is a pointer to the vcpu_info struct in the shared_info
+ * page, we use it in the event channel upcall and in some pvclock
+ * related functions.
+ */
+ xen_vcpu_info_reset(0);
+
xen_panic_handler_init();

if (xen_feature(XENFEAT_hvm_callback_vector))
diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c
index 9e0fb9a015d4..6c8a805819ff 100644
--- a/arch/x86/xen/smp_hvm.c
+++ b/arch/x86/xen/smp_hvm.c
@@ -28,10 +28,20 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void)

static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
{
+ int cpu;
+
native_smp_prepare_cpus(max_cpus);
WARN_ON(xen_smp_intr_init(0));

xen_init_lock_cpu(0);
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == 0)
+ continue;
+
+ /* Set default vcpu_id to make sure that we don't use cpu-0's */
+ per_cpu(xen_vcpu_id, cpu) = XEN_VCPU_ID_INVALID;
+ }
}

#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/xen/suspend_hvm.c b/arch/x86/xen/suspend_hvm.c
index 01afcadde50a..484999416d8b 100644
--- a/arch/x86/xen/suspend_hvm.c
+++ b/arch/x86/xen/suspend_hvm.c
@@ -8,15 +8,10 @@

void xen_hvm_post_suspend(int suspend_cancelled)
{
- int cpu;
-
- if (!suspend_cancelled)
+ if (!suspend_cancelled) {
xen_hvm_init_shared_info();
+ xen_vcpu_restore();
+ }
xen_callback_vector();
xen_unplug_emulated_devices();
- if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
- for_each_online_cpu(cpu) {
- xen_setup_runstate_info(cpu);
- }
- }
}
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index c44a2ee8c8f8..218e6aae5433 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -15,6 +15,8 @@ static inline uint32_t xen_vcpu_nr(int cpu)
return per_cpu(xen_vcpu_id, cpu);
}

+#define XEN_VCPU_ID_INVALID U32_MAX
+
void xen_arch_pre_suspend(void);
void xen_arch_post_suspend(int suspend_cancelled);

--
2.7.4