[RFC PATCH 07/16] x86/xen: make vcpu_info part of xenhost_t

From: Ankur Arora
Date: Thu May 09 2019 - 13:28:04 EST


Abstract out xen_vcpu_id probing via (*probe_vcpu_id)(). Once that is
availab,e the vcpu_info registration happens via the VCPUOP hypercall.

Note that for the nested case, there are two vcpu_ids, and two vcpu_info
areas, one each for the default xenhost and the remote xenhost.
The vcpu_info is used via pv_irq_ops, and evtchn signaling.

The other VCPUOP hypercalls are used for management (and scheduling)
which is expected to be done purely in the default hypervisor.
However, scheduling of L1-guest does imply L0-Xen-vcpu_info switching,
which might mean that the remote hypervisor needs some visibility
into related events/hypercalls in the default hypervisor.

TODO:
- percpu data structures for xen_vcpu

Signed-off-by: Ankur Arora <ankur.a.arora@xxxxxxxxxx>
---
arch/x86/xen/enlighten.c | 93 +++++++++++++-------------------
arch/x86/xen/enlighten_hvm.c | 87 ++++++++++++++++++------------
arch/x86/xen/enlighten_pv.c | 60 ++++++++++++++-------
arch/x86/xen/enlighten_pvh.c | 3 +-
arch/x86/xen/irq.c | 10 ++--
arch/x86/xen/mmu_pv.c | 6 +--
arch/x86/xen/pci-swiotlb-xen.c | 1 +
arch/x86/xen/setup.c | 1 +
arch/x86/xen/smp.c | 9 +++-
arch/x86/xen/smp_hvm.c | 17 +++---
arch/x86/xen/smp_pv.c | 12 ++---
arch/x86/xen/time.c | 23 ++++----
arch/x86/xen/xen-ops.h | 5 +-
drivers/xen/events/events_base.c | 14 ++---
drivers/xen/events/events_fifo.c | 2 +-
drivers/xen/evtchn.c | 2 +-
drivers/xen/time.c | 2 +-
include/xen/xen-ops.h | 7 +--
include/xen/xenhost.h | 47 ++++++++++++++++
19 files changed, 240 insertions(+), 161 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 20e0de844442..0dafbbc838ef 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -20,35 +20,6 @@
#include "smp.h"
#include "pmu.h"

-/*
- * Pointer to the xen_vcpu_info structure or
- * &HYPERVISOR_shared_info->vcpu_info[cpu]. See xen_hvm_init_shared_info
- * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info
- * but if the hypervisor supports VCPUOP_register_vcpu_info then it can point
- * to xen_vcpu_info. The pointer is used in __xen_evtchn_do_upcall to
- * acknowledge pending events.
- * Also more subtly it is used by the patched version of irq enable/disable
- * e.g. xen_irq_enable_direct and xen_iret in PV mode.
- *
- * The desire to be able to do those mask/unmask operations as a single
- * instruction by using the per-cpu offset held in %gs is the real reason
- * vcpu info is in a per-cpu pointer and the original reason for this
- * hypercall.
- *
- */
-DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
-
-/*
- * Per CPU pages used if hypervisor supports VCPUOP_register_vcpu_info
- * hypercall. This can be used both in PV and PVHVM mode. The structure
- * overrides the default per_cpu(xen_vcpu, cpu) value.
- */
-DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
-
-/* Linux <-> Xen vCPU id mapping */
-DEFINE_PER_CPU(uint32_t, xen_vcpu_id);
-EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
-
enum xen_domain_type xen_domain_type = XEN_NATIVE;
EXPORT_SYMBOL_GPL(xen_domain_type);

@@ -112,12 +83,12 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int),
return rc >= 0 ? 0 : rc;
}

-static int xen_vcpu_setup_restore(int cpu)
+static int xen_vcpu_setup_restore(xenhost_t *xh, int cpu)
{
int rc = 0;

/* Any per_cpu(xen_vcpu) is stale, so reset it */
- xen_vcpu_info_reset(cpu);
+ xen_vcpu_info_reset(xh, cpu);

/*
* For PVH and PVHVM, setup online VCPUs only. The rest will
@@ -125,7 +96,7 @@ static int xen_vcpu_setup_restore(int cpu)
*/
if (xen_pv_domain() ||
(xen_hvm_domain() && cpu_online(cpu))) {
- rc = xen_vcpu_setup(cpu);
+ rc = xen_vcpu_setup(xh, cpu);
}

return rc;
@@ -138,30 +109,42 @@ static int xen_vcpu_setup_restore(int cpu)
*/
void xen_vcpu_restore(void)
{
- int cpu, rc;
+ int cpu, rc = 0;

+ /*
+ * VCPU management is primarily the responsibility of xh_default and
+ * xh_remote only needs VCPUOP_register_vcpu_info.
+ * So, we do VPUOP_down and VCPUOP_up only on xh_default.
+ *
+ * (Currently, however, VCPUOP_register_vcpu_info is allowed only
+ * on VCPUs that are self or down, so we might need a new model
+ * there.)
+ */
for_each_possible_cpu(cpu) {
bool other_cpu = (cpu != smp_processor_id());
bool is_up;
+ xenhost_t **xh;

- if (xen_vcpu_nr(cpu) == XEN_VCPU_ID_INVALID)
+ if (xen_vcpu_nr(xh_default, cpu) == XEN_VCPU_ID_INVALID)
continue;

/* Only Xen 4.5 and higher support this. */
is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up,
- xen_vcpu_nr(cpu), NULL) > 0;
+ xen_vcpu_nr(xh_default, cpu), NULL) > 0;

if (other_cpu && is_up &&
- HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL))
+ HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(xh_default, cpu), NULL))
BUG();

if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock))
xen_setup_runstate_info(cpu);

- rc = xen_vcpu_setup_restore(cpu);
- if (rc)
- pr_emerg_once("vcpu restore failed for cpu=%d err=%d. "
- "System will hang.\n", cpu, rc);
+ for_each_xenhost(xh) {
+ rc = xen_vcpu_setup_restore(*xh, cpu);
+ if (rc)
+ pr_emerg_once("vcpu restore failed for cpu=%d err=%d. "
+ "System will hang.\n", cpu, rc);
+ }
/*
* In case xen_vcpu_setup_restore() fails, do not bring up the
* VCPU. This helps us avoid the resulting OOPS when the VCPU
@@ -172,29 +155,29 @@ void xen_vcpu_restore(void)
* VCPUs to come up.
*/
if (other_cpu && is_up && (rc == 0) &&
- HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL))
+ HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(xh_default, cpu), NULL))
BUG();
}
}

-void xen_vcpu_info_reset(int cpu)
+void xen_vcpu_info_reset(xenhost_t *xh, int cpu)
{
- if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) {
- per_cpu(xen_vcpu, cpu) =
- &xh_default->HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
+ if (xen_vcpu_nr(xh, cpu) < MAX_VIRT_CPUS) {
+ xh->xen_vcpu[cpu] =
+ &xh->HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(xh, cpu)];
} else {
/* Set to NULL so that if somebody accesses it we get an OOPS */
- per_cpu(xen_vcpu, cpu) = NULL;
+ xh->xen_vcpu[cpu] = NULL;
}
}

-int xen_vcpu_setup(int cpu)
+int xen_vcpu_setup(xenhost_t *xh, int cpu)
{
struct vcpu_register_vcpu_info info;
int err;
struct vcpu_info *vcpup;

- BUG_ON(xh_default->HYPERVISOR_shared_info == &xen_dummy_shared_info);
+ BUG_ON(xh->HYPERVISOR_shared_info == &xen_dummy_shared_info);

/*
* This path is called on PVHVM at bootup (xen_hvm_smp_prepare_boot_cpu)
@@ -208,12 +191,12 @@ int xen_vcpu_setup(int cpu)
* use this function.
*/
if (xen_hvm_domain()) {
- if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu))
+ if (xh->xen_vcpu[cpu] == &xh->xen_vcpu_info[cpu])
return 0;
}

if (xen_have_vcpu_info_placement) {
- vcpup = &per_cpu(xen_vcpu_info, cpu);
+ vcpup = &xh->xen_vcpu_info[cpu];
info.mfn = arbitrary_virt_to_mfn(vcpup);
info.offset = offset_in_page(vcpup);

@@ -227,8 +210,8 @@ int xen_vcpu_setup(int cpu)
* hypercall does not allow to over-write info.mfn and
* info.offset.
*/
- err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info,
- xen_vcpu_nr(cpu), &info);
+ err = hypervisor_vcpu_op(xh, VCPUOP_register_vcpu_info,
+ xen_vcpu_nr(xh, cpu), &info);

if (err) {
pr_warn_once("register_vcpu_info failed: cpu=%d err=%d\n",
@@ -239,14 +222,14 @@ int xen_vcpu_setup(int cpu)
* This cpu is using the registered vcpu info, even if
* later ones fail to.
*/
- per_cpu(xen_vcpu, cpu) = vcpup;
+ xh->xen_vcpu[cpu] = vcpup;
}
}

if (!xen_have_vcpu_info_placement)
- xen_vcpu_info_reset(cpu);
+ xen_vcpu_info_reset(xh, cpu);

- return ((per_cpu(xen_vcpu, cpu) == NULL) ? -ENODEV : 0);
+ return ((xh->xen_vcpu[cpu] == NULL) ? -ENODEV : 0);
}

void xen_reboot(int reason)
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 0e53363f9d1f..c1981a3e4989 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -5,6 +5,7 @@
#include <linux/kexec.h>
#include <linux/memblock.h>

+#include <xen/interface/xen.h>
#include <xen/xenhost.h>
#include <xen/features.h>
#include <xen/events.h>
@@ -72,22 +73,22 @@ static void __init xen_hvm_init_mem_mapping(void)
{
xenhost_t **xh;

- for_each_xenhost(xh)
+ for_each_xenhost(xh) {
xenhost_reset_shared_info(*xh);

- /*
- * The virtual address of the shared_info page has changed, so
- * the vcpu_info pointer for VCPU 0 is now stale.
- *
- * The prepare_boot_cpu callback will re-initialize it via
- * xen_vcpu_setup, but we can't rely on that to be called for
- * old Xen versions (xen_have_vector_callback == 0).
- *
- * It is, in any case, bad to have a stale vcpu_info pointer
- * so reset it now.
- * For now, this uses xh_default implictly.
- */
- xen_vcpu_info_reset(0);
+ /*
+ * The virtual address of the shared_info page has changed, so
+ * the vcpu_info pointer for VCPU 0 is now stale.
+ *
+ * The prepare_boot_cpu callback will re-initialize it via
+ * xen_vcpu_setup, but we can't rely on that to be called for
+ * old Xen versions (xen_have_vector_callback == 0).
+ *
+ * It is, in any case, bad to have a stale vcpu_info pointer
+ * so reset it now.
+ */
+ xen_vcpu_info_reset(*xh, 0);
+ }
}

extern uint32_t xen_pv_cpuid_base(xenhost_t *xh);
@@ -103,11 +104,32 @@ void xen_hvm_setup_hypercall_page(xenhost_t *xh)
xh->hypercall_page = xen_hypercall_page;
}

+static void xen_hvm_probe_vcpu_id(xenhost_t *xh, int cpu)
+{
+ uint32_t eax, ebx, ecx, edx, base;
+
+ base = xenhost_cpuid_base(xh);
+
+ if (cpu == 0) {
+ cpuid(base + 4, &eax, &ebx, &ecx, &edx);
+ if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
+ xh->xen_vcpu_id[cpu] = ebx;
+ else
+ xh->xen_vcpu_id[cpu] = smp_processor_id();
+ } else {
+ if (cpu_acpi_id(cpu) != U32_MAX)
+ xh->xen_vcpu_id[cpu] = cpu_acpi_id(cpu);
+ else
+ xh->xen_vcpu_id[cpu] = cpu;
+ }
+}
+
xenhost_ops_t xh_hvm_ops = {
.cpuid_base = xen_pv_cpuid_base,
.setup_hypercall_page = xen_hvm_setup_hypercall_page,
.setup_shared_info = xen_hvm_init_shared_info,
.reset_shared_info = xen_hvm_reset_shared_info,
+ .probe_vcpu_id = xen_hvm_probe_vcpu_id,
};

xenhost_ops_t xh_hvm_nested_ops = {
@@ -116,7 +138,7 @@ xenhost_ops_t xh_hvm_nested_ops = {
static void __init init_hvm_pv_info(void)
{
int major, minor;
- uint32_t eax, ebx, ecx, edx, base;
+ uint32_t eax, base;
xenhost_t **xh;

base = xenhost_cpuid_base(xh_default);
@@ -147,11 +169,8 @@ static void __init init_hvm_pv_info(void)
if (xen_validate_features() == false)
__xenhost_unregister(xenhost_r2);

- cpuid(base + 4, &eax, &ebx, &ecx, &edx);
- if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
- this_cpu_write(xen_vcpu_id, ebx);
- else
- this_cpu_write(xen_vcpu_id, smp_processor_id());
+ for_each_xenhost(xh)
+ xenhost_probe_vcpu_id(*xh, smp_processor_id());
}

#ifdef CONFIG_KEXEC_CORE
@@ -172,6 +191,7 @@ static void xen_hvm_crash_shutdown(struct pt_regs *regs)
static int xen_cpu_up_prepare_hvm(unsigned int cpu)
{
int rc = 0;
+ xenhost_t **xh;

/*
* This can happen if CPU was offlined earlier and
@@ -182,13 +202,12 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu)
xen_uninit_lock_cpu(cpu);
}

- if (cpu_acpi_id(cpu) != U32_MAX)
- per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
- else
- per_cpu(xen_vcpu_id, cpu) = cpu;
- rc = xen_vcpu_setup(cpu);
- if (rc)
- return rc;
+ for_each_xenhost(xh) {
+ xenhost_probe_vcpu_id(*xh, cpu);
+ rc = xen_vcpu_setup(*xh, cpu);
+ if (rc)
+ return rc;
+ }

if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
xen_setup_timer(cpu);
@@ -229,15 +248,15 @@ static void __init xen_hvm_guest_init(void)
for_each_xenhost(xh) {
reserve_shared_info(*xh);
xenhost_setup_shared_info(*xh);
+
+ /*
+ * xen_vcpu is a pointer to the vcpu_info struct in the
+ * shared_info page, we use it in the event channel upcall
+ * and in some pvclock related functions.
+ */
+ xen_vcpu_info_reset(*xh, 0);
}

- /*
- * xen_vcpu is a pointer to the vcpu_info struct in the shared_info
- * page, we use it in the event channel upcall and in some pvclock
- * related functions.
- * For now, this uses xh_default implictly.
- */
- xen_vcpu_info_reset(0);

xen_panic_handler_init();

diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 1a9eded4b76b..5f6a1475ec0c 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -36,8 +36,8 @@

#include <xen/xen.h>
#include <xen/events.h>
-#include <xen/xenhost.h>
#include <xen/interface/xen.h>
+#include <xen/xenhost.h>
#include <xen/interface/version.h>
#include <xen/interface/physdev.h>
#include <xen/interface/vcpu.h>
@@ -126,12 +126,12 @@ static void __init xen_pv_init_platform(void)

populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP));

- for_each_xenhost(xh)
+ for_each_xenhost(xh) {
xenhost_setup_shared_info(*xh);

- /* xen clock uses per-cpu vcpu_info, need to init it for boot cpu */
- /* For now this uses xh_default implicitly. */
- xen_vcpu_info_reset(0);
+ /* xen clock uses per-cpu vcpu_info, need to init it for boot cpu */
+ xen_vcpu_info_reset(*xh, 0);
+ }

/* pvclock is in shared info area */
xen_init_time_ops();
@@ -973,28 +973,31 @@ static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
/* This is called once we have the cpu_possible_mask */
void __init xen_setup_vcpu_info_placement(void)
{
+ xenhost_t **xh;
int cpu;

for_each_possible_cpu(cpu) {
- /* Set up direct vCPU id mapping for PV guests. */
- per_cpu(xen_vcpu_id, cpu) = cpu;
+ for_each_xenhost(xh) {
+ xenhost_probe_vcpu_id(*xh, cpu);

- /*
- * xen_vcpu_setup(cpu) can fail -- in which case it
- * falls back to the shared_info version for cpus
- * where xen_vcpu_nr(cpu) < MAX_VIRT_CPUS.
- *
- * xen_cpu_up_prepare_pv() handles the rest by failing
- * them in hotplug.
- */
- (void) xen_vcpu_setup(cpu);
+ /*
+ * xen_vcpu_setup(cpu) can fail -- in which case it
+ * falls back to the shared_info version for cpus
+ * where xen_vcpu_nr(cpu) < MAX_VIRT_CPUS.
+ *
+ * xen_cpu_up_prepare_pv() handles the rest by failing
+ * them in hotplug.
+ */
+ (void) xen_vcpu_setup(*xh, cpu);
+ }
}

/*
* xen_vcpu_setup managed to place the vcpu_info within the
* percpu area for all cpus, so make use of it.
*/
- if (xen_have_vcpu_info_placement) {
+ if (xen_have_vcpu_info_placement && false) {
+ /* Disable direct access until we have proper pcpu data structures. */
pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
pv_ops.irq.restore_fl =
__PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
@@ -1110,6 +1113,11 @@ static unsigned char xen_get_nmi_reason(void)
{
unsigned char reason = 0;

+ /*
+ * We could get this information from all the xenhosts and OR it.
+ * But, the remote xenhost isn't really expected to send us NMIs.
+ */
+
/* Construct a value which looks like it came from port 0x61. */
if (test_bit(_XEN_NMIREASON_io_error,
&xh_default->HYPERVISOR_shared_info->arch.nmi_reason))
@@ -1222,6 +1230,12 @@ static void xen_pv_reset_shared_info(xenhost_t *xh)
BUG();
}

+void xen_pv_probe_vcpu_id(xenhost_t *xh, int cpu)
+{
+ /* Set up direct vCPU id mapping for PV guests. */
+ xh->xen_vcpu_id[cpu] = cpu;
+}
+
xenhost_ops_t xh_pv_ops = {
.cpuid_base = xen_pv_cpuid_base,

@@ -1229,6 +1243,8 @@ xenhost_ops_t xh_pv_ops = {

.setup_shared_info = xen_pv_setup_shared_info,
.reset_shared_info = xen_pv_reset_shared_info,
+
+ .probe_vcpu_id = xen_pv_probe_vcpu_id,
};

xenhost_ops_t xh_pv_nested_ops = {
@@ -1283,7 +1299,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
* Don't do the full vcpu_info placement stuff until we have
* the cpu_possible_mask and a non-dummy shared_info.
*/
- xen_vcpu_info_reset(0);
+ for_each_xenhost(xh) {
+ xen_vcpu_info_reset(*xh, 0);
+ }

x86_platform.get_nmi_reason = xen_get_nmi_reason;

@@ -1328,7 +1346,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
get_cpu_address_sizes(&boot_cpu_data);

/* Let's presume PV guests always boot on vCPU with id 0. */
- per_cpu(xen_vcpu_id, 0) = 0;
+ /* Note: we should be doing this before xen_vcpu_info_reset above. */
+ for_each_xenhost(xh)
+ xenhost_probe_vcpu_id(*xh, 0);

idt_setup_early_handler();

@@ -1485,7 +1505,7 @@ static int xen_cpu_up_prepare_pv(unsigned int cpu)
{
int rc;

- if (per_cpu(xen_vcpu, cpu) == NULL)
+ if (xh_default->xen_vcpu[cpu] == NULL)
return -ENODEV;

xen_setup_timer(cpu);
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index 50277dfbdf30..3f98526dd041 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -2,13 +2,14 @@
#include <linux/acpi.h>

#include <xen/hvc-console.h>
+#include <xen/interface/xen.h>

#include <asm/io_apic.h>
#include <asm/hypervisor.h>
#include <asm/e820/api.h>

-#include <xen/xen.h>
#include <xen/xenhost.h>
+#include <xen/xen.h>
#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>

diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 850c93f346c7..38ad1a1c4763 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -29,7 +29,7 @@ asmlinkage __visible unsigned long xen_save_fl(void)
struct vcpu_info *vcpu;
unsigned long flags;

- vcpu = this_cpu_read(xen_vcpu);
+ vcpu = xh_default->xen_vcpu[smp_processor_id()];

/* flag has opposite sense of mask */
flags = !vcpu->evtchn_upcall_mask;
@@ -51,7 +51,7 @@ __visible void xen_restore_fl(unsigned long flags)

/* See xen_irq_enable() for why preemption must be disabled. */
preempt_disable();
- vcpu = this_cpu_read(xen_vcpu);
+ vcpu = xh_default->xen_vcpu[smp_processor_id()];
vcpu->evtchn_upcall_mask = flags;

if (flags == 0) {
@@ -70,7 +70,7 @@ asmlinkage __visible void xen_irq_disable(void)
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
- this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
+ xh_default->xen_vcpu[smp_processor_id()]->evtchn_upcall_mask = 1;
preempt_enable_no_resched();
}
PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
@@ -86,7 +86,7 @@ asmlinkage __visible void xen_irq_enable(void)
*/
preempt_disable();

- vcpu = this_cpu_read(xen_vcpu);
+ vcpu = xh_default->xen_vcpu[smp_processor_id()];
vcpu->evtchn_upcall_mask = 0;

/* Doesn't matter if we get preempted here, because any
@@ -111,7 +111,7 @@ static void xen_halt(void)
{
if (irqs_disabled())
HYPERVISOR_vcpu_op(VCPUOP_down,
- xen_vcpu_nr(smp_processor_id()), NULL);
+ xen_vcpu_nr(xh_default, smp_processor_id()), NULL);
else
xen_safe_halt();
}
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 0f4fe206dcc2..e99af51ab481 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1304,17 +1304,17 @@ static void __init xen_pagetable_init(void)
}
static void xen_write_cr2(unsigned long cr2)
{
- this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
+ xh_default->xen_vcpu[smp_processor_id()]->arch.cr2 = cr2;
}

static unsigned long xen_read_cr2(void)
{
- return this_cpu_read(xen_vcpu)->arch.cr2;
+ return xh_default->xen_vcpu[smp_processor_id()]->arch.cr2;
}

unsigned long xen_read_cr2_direct(void)
{
- return this_cpu_read(xen_vcpu_info.arch.cr2);
+ return xh_default->xen_vcpu_info[smp_processor_id()].arch.cr2;
}

static noinline void xen_flush_tlb(void)
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 33293ce01d8d..04f9b2e92f06 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -4,6 +4,7 @@

#include <linux/dma-mapping.h>
#include <linux/pci.h>
+#include <xen/interface/xen.h>
#include <xen/swiotlb-xen.h>

#include <asm/xen/hypervisor.h>
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index d5f303c0e656..ec8f22a54f6e 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -19,6 +19,7 @@
#include <asm/setup.h>
#include <asm/acpi.h>
#include <asm/numa.h>
+#include <xen/interface/xen.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 7a43b2ae19f1..867524be0065 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -6,6 +6,7 @@
#include <linux/percpu.h>

#include <xen/events.h>
+#include <xen/xenhost.h>

#include <xen/hvc-console.h>
#include "xen-ops.h"
@@ -129,7 +130,10 @@ void __init xen_smp_cpus_done(unsigned int max_cpus)
return;

for_each_online_cpu(cpu) {
- if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS)
+ xenhost_t **xh;
+
+ if ((xen_vcpu_nr(xh_default, cpu) < MAX_VIRT_CPUS) &&
+ (!xh_remote || (xen_vcpu_nr(xh_remote, cpu) < MAX_VIRT_CPUS)))
continue;

rc = cpu_down(cpu);
@@ -138,7 +142,8 @@ void __init xen_smp_cpus_done(unsigned int max_cpus)
/*
* Reset vcpu_info so this cpu cannot be onlined again.
*/
- xen_vcpu_info_reset(cpu);
+ for_each_xenhost(xh)
+ xen_vcpu_info_reset(*xh, cpu);
count++;
} else {
pr_warn("%s: failed to bring CPU %d down, error %d\n",
diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c
index f8d39440b292..5e7f591bfdd9 100644
--- a/arch/x86/xen/smp_hvm.c
+++ b/arch/x86/xen/smp_hvm.c
@@ -9,6 +9,7 @@

static void __init xen_hvm_smp_prepare_boot_cpu(void)
{
+ xenhost_t **xh;
BUG_ON(smp_processor_id() != 0);
native_smp_prepare_boot_cpu();

@@ -16,7 +17,8 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void)
* Setup vcpu_info for boot CPU. Secondary CPUs get their vcpu_info
* in xen_cpu_up_prepare_hvm().
*/
- xen_vcpu_setup(0);
+ for_each_xenhost(xh)
+ xen_vcpu_setup(*xh, 0);

/*
* The alternative logic (which patches the unlock/lock) runs before
@@ -29,6 +31,7 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void)

static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
{
+ xenhost_t **xh;
int cpu;

native_smp_prepare_cpus(max_cpus);
@@ -36,12 +39,14 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)

xen_init_lock_cpu(0);

- for_each_possible_cpu(cpu) {
- if (cpu == 0)
- continue;
+ for_each_xenhost(xh) {
+ for_each_possible_cpu(cpu) {
+ if (cpu == 0)
+ continue;

- /* Set default vcpu_id to make sure that we don't use cpu-0's */
- per_cpu(xen_vcpu_id, cpu) = XEN_VCPU_ID_INVALID;
+ /* Set default vcpu_id to make sure that we don't use cpu-0's */
+ (*xh)->xen_vcpu_id[cpu] = XEN_VCPU_ID_INVALID;
+ }
}
}

diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 145506f9fdbe..6d9c3e6611ef 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -350,7 +350,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);

ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
- if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
+ if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(xh_default, cpu), ctxt))
BUG();

kfree(ctxt);
@@ -374,7 +374,7 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)
return rc;

/* make sure interrupts start blocked */
- per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
+ xh_default->xen_vcpu[cpu]->evtchn_upcall_mask = 1;

rc = cpu_initialize_context(cpu, idle);
if (rc)
@@ -382,7 +382,7 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)

xen_pmu_init(cpu);

- rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL);
+ rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(xh_default, cpu), NULL);
BUG_ON(rc);

while (cpu_report_state(cpu) != CPU_ONLINE)
@@ -407,7 +407,7 @@ static int xen_pv_cpu_disable(void)
static void xen_pv_cpu_die(unsigned int cpu)
{
while (HYPERVISOR_vcpu_op(VCPUOP_is_up,
- xen_vcpu_nr(cpu), NULL)) {
+ xen_vcpu_nr(xh_default, cpu), NULL)) {
__set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(HZ/10);
}
@@ -423,7 +423,7 @@ static void xen_pv_cpu_die(unsigned int cpu)
static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
{
play_dead_common();
- HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL);
+ HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(xh_default, smp_processor_id()), NULL);
cpu_bringup();
/*
* commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down)
@@ -464,7 +464,7 @@ static void stop_self(void *v)

set_cpu_online(cpu, false);

- HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL);
+ HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(xh_default, cpu), NULL);
BUG();
}

diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index d4bb1f8b4f58..217bc4de07ee 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -18,12 +18,12 @@
#include <linux/timekeeper_internal.h>

#include <asm/pvclock.h>
+#include <xen/interface/xen.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>

#include <xen/events.h>
#include <xen/features.h>
-#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>

#include "xen-ops.h"
@@ -48,7 +48,7 @@ static u64 xen_clocksource_read(void)
u64 ret;

preempt_disable_notrace();
- src = &__this_cpu_read(xen_vcpu)->time;
+ src = &xh_default->xen_vcpu[smp_processor_id()]->time;
ret = pvclock_clocksource_read(src);
preempt_enable_notrace();
return ret;
@@ -70,9 +70,10 @@ static void xen_read_wallclock(struct timespec64 *ts)
struct pvclock_wall_clock *wall_clock = &(s->wc);
struct pvclock_vcpu_time_info *vcpu_time;

- vcpu_time = &get_cpu_var(xen_vcpu)->time;
+ preempt_disable_notrace();
+ vcpu_time = &xh_default->xen_vcpu[smp_processor_id()]->time;
pvclock_read_wallclock(wall_clock, vcpu_time, ts);
- put_cpu_var(xen_vcpu);
+ preempt_enable_notrace();
}

static void xen_get_wallclock(struct timespec64 *now)
@@ -233,9 +234,9 @@ static int xen_vcpuop_shutdown(struct clock_event_device *evt)
{
int cpu = smp_processor_id();

- if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(cpu),
+ if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(xh_default, cpu),
NULL) ||
- HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
+ HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(xh_default, cpu),
NULL))
BUG();

@@ -246,7 +247,7 @@ static int xen_vcpuop_set_oneshot(struct clock_event_device *evt)
{
int cpu = smp_processor_id();

- if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
+ if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(xh_default, cpu),
NULL))
BUG();

@@ -266,7 +267,7 @@ static int xen_vcpuop_set_next_event(unsigned long delta,
/* Get an event anyway, even if the timeout is already expired */
single.flags = 0;

- ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(cpu),
+ ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(xh_default, cpu),
&single);
BUG_ON(ret != 0);

@@ -366,7 +367,7 @@ void xen_timer_resume(void)

for_each_online_cpu(cpu) {
if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer,
- xen_vcpu_nr(cpu), NULL))
+ xen_vcpu_nr(xh_default, cpu), NULL))
BUG();
}
}
@@ -482,7 +483,7 @@ static void __init xen_time_init(void)

clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);

- if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
+ if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(xh_default, cpu),
NULL) == 0) {
/* Successfully turned off 100Hz tick, so we have the
vcpuop-based timer interface */
@@ -500,7 +501,7 @@ static void __init xen_time_init(void)
* We check ahead on the primary time info if this
* bit is supported hence speeding up Xen clocksource.
*/
- pvti = &__this_cpu_read(xen_vcpu)->time;
+ pvti = &xh_default->xen_vcpu[smp_processor_id()]->time;
if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) {
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
xen_setup_vsyscall_time_info();
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 5085ce88a8d7..96fd7edea7e9 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -22,7 +22,6 @@ extern void *xen_initial_gdt;
struct trap_info;
void xen_copy_trap_info(struct trap_info *traps);

-DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info);
DECLARE_PER_CPU(unsigned long, xen_cr3);
DECLARE_PER_CPU(unsigned long, xen_current_cr3);

@@ -76,8 +75,8 @@ bool xen_vcpu_stolen(int vcpu);

extern int xen_have_vcpu_info_placement;

-int xen_vcpu_setup(int cpu);
-void xen_vcpu_info_reset(int cpu);
+int xen_vcpu_setup(xenhost_t *xh, int cpu);
+void xen_vcpu_info_reset(xenhost_t *xh, int cpu);
void xen_setup_vcpu_info_placement(void);

#ifdef CONFIG_SMP
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 117e76b2f939..ae497876fe41 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -884,7 +884,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
handle_percpu_irq, "ipi");

- bind_ipi.vcpu = xen_vcpu_nr(cpu);
+ bind_ipi.vcpu = xen_vcpu_nr(xh_default, cpu);
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
&bind_ipi) != 0)
BUG();
@@ -937,7 +937,7 @@ static int find_virq(unsigned int virq, unsigned int cpu)
continue;
if (status.status != EVTCHNSTAT_virq)
continue;
- if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
+ if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(xh_default, cpu)) {
rc = port;
break;
}
@@ -980,7 +980,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
handle_edge_irq, "virq");

bind_virq.virq = virq;
- bind_virq.vcpu = xen_vcpu_nr(cpu);
+ bind_virq.vcpu = xen_vcpu_nr(xh_default, cpu);
ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
&bind_virq);
if (ret == 0)
@@ -1200,7 +1200,7 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)

#ifdef CONFIG_X86
if (unlikely(vector == XEN_NMI_VECTOR)) {
- int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(cpu),
+ int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(xh_default, cpu),
NULL);
if (rc < 0)
printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
@@ -1306,7 +1306,7 @@ int xen_rebind_evtchn_to_cpu(int evtchn, unsigned tcpu)

/* Send future instances of this interrupt to other vcpu. */
bind_vcpu.port = evtchn;
- bind_vcpu.vcpu = xen_vcpu_nr(tcpu);
+ bind_vcpu.vcpu = xen_vcpu_nr(xh_default, tcpu);

/*
* Mask the event while changing the VCPU binding to prevent
@@ -1451,7 +1451,7 @@ static void restore_cpu_virqs(unsigned int cpu)

/* Get a new binding from Xen. */
bind_virq.virq = virq;
- bind_virq.vcpu = xen_vcpu_nr(cpu);
+ bind_virq.vcpu = xen_vcpu_nr(xh_default, cpu);
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
&bind_virq) != 0)
BUG();
@@ -1475,7 +1475,7 @@ static void restore_cpu_ipis(unsigned int cpu)
BUG_ON(ipi_from_irq(irq) != ipi);

/* Get a new binding from Xen. */
- bind_ipi.vcpu = xen_vcpu_nr(cpu);
+ bind_ipi.vcpu = xen_vcpu_nr(xh_default, cpu);
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
&bind_ipi) != 0)
BUG();
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
index 76b318e88382..eed766219dd0 100644
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -113,7 +113,7 @@ static int init_control_block(int cpu,

init_control.control_gfn = virt_to_gfn(control_block);
init_control.offset = 0;
- init_control.vcpu = xen_vcpu_nr(cpu);
+ init_control.vcpu = xen_vcpu_nr(xh_default, cpu);

return HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control);
}
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 6d1a5e58968f..66622109f2be 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -475,7 +475,7 @@ static long evtchn_ioctl(struct file *file,
break;

bind_virq.virq = bind.virq;
- bind_virq.vcpu = xen_vcpu_nr(0);
+ bind_virq.vcpu = xen_vcpu_nr(xh_default, 0);
rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
&bind_virq);
if (rc != 0)
diff --git a/drivers/xen/time.c b/drivers/xen/time.c
index 0968859c29d0..feee74bbab0a 100644
--- a/drivers/xen/time.c
+++ b/drivers/xen/time.c
@@ -164,7 +164,7 @@ void xen_setup_runstate_info(int cpu)
area.addr.v = &per_cpu(xen_runstate, cpu);

if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
- xen_vcpu_nr(cpu), &area))
+ xen_vcpu_nr(xh_default, cpu), &area))
BUG();
}

diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 4969817124a8..75be9059893f 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -9,12 +9,9 @@
#include <asm/xen/interface.h>
#include <xen/interface/vcpu.h>

-DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
-
-DECLARE_PER_CPU(uint32_t, xen_vcpu_id);
-static inline uint32_t xen_vcpu_nr(int cpu)
+static inline uint32_t xen_vcpu_nr(xenhost_t *xh, int cpu)
{
- return per_cpu(xen_vcpu_id, cpu);
+ return xh->xen_vcpu_id[cpu];
}

#define XEN_VCPU_ID_INVALID U32_MAX
diff --git a/include/xen/xenhost.h b/include/xen/xenhost.h
index 7c19c361d16e..f6092a8987f1 100644
--- a/include/xen/xenhost.h
+++ b/include/xen/xenhost.h
@@ -90,6 +90,28 @@ typedef struct {
struct shared_info *HYPERVISOR_shared_info;
unsigned long shared_info_pfn;
};
+
+ struct {
+ /*
+ * Events on xen-evtchn ports show up in struct vcpu_info.
+ * With multiple xenhosts, the evtchn-port numbering space that
+ * was global so far is now attached to a xenhost.
+ *
+ * So, now we allocate vcpu_info for each processor (we had space
+ * for only MAX_VIRT_CPUS in the shared_info above.)
+ *
+ * FIXME we statically allocate for NR_CPUS because alloc_percpu()
+ * isn't available at PV boot time but this is slow.
+ */
+ struct vcpu_info xen_vcpu_info[NR_CPUS];
+ struct vcpu_info *xen_vcpu[NR_CPUS];
+
+ /*
+ * Different xenhosts might have different Linux <-> Xen vCPU-id
+ * mapping.
+ */
+ uint32_t xen_vcpu_id[NR_CPUS];
+ };
} xenhost_t;

typedef struct xenhost_ops {
@@ -139,6 +161,26 @@ typedef struct xenhost_ops {
*/
void (*setup_shared_info)(xenhost_t *xenhost);
void (*reset_shared_info)(xenhost_t *xenhost);
+
+ /*
+ * vcpu_info, vcpu_id: needs to be setup early -- all IRQ code accesses
+ * relevant bits.
+ *
+ * vcpu_id is probed on PVH/PVHVM via xen_cpuid(). For PV, its direct
+ * mapped to smp_processor_id().
+ *
+ * This is part of xenhost_t because we might be registered with two
+ * different xenhosts and both of those might have their own vcpu
+ * numbering.
+ *
+ * After the vcpu numbering is identified, we can go ahead and register
+ * vcpu_info with the xenhost; on the default xenhost this happens via
+ * the register_vcpu_info hypercall.
+ *
+ * Once vcpu_info is setup (this or the shared_info version), it would
+ * get accessed via pv_ops.irq.* and the evtchn logic.
+ */
+ void (*probe_vcpu_id)(xenhost_t *xenhost, int cpu);
} xenhost_ops_t;

extern xenhost_t *xh_default, *xh_remote;
@@ -185,4 +227,9 @@ static inline void xenhost_reset_shared_info(xenhost_t *xh)
(xh->ops->reset_shared_info)(xh);
}

+static inline void xenhost_probe_vcpu_id(xenhost_t *xh, int cpu)
+{
+ (xh->ops->probe_vcpu_id)(xh, cpu);
+}
+
#endif /* __XENHOST_H */
--
2.20.1