Re: [PATCH v7 2/5] Drivers: hv: Setup synic registers in case of nested root partition

From: Jinank Jain
Date: Fri Dec 02 2022 - 02:05:22 EST



On 12/2/2022 9:30 AM, Michael Kelley (LINUX) wrote:
From: Jinank Jain <jinankjain@xxxxxxxxxxxxxxxxxxx> Sent: Thursday, December 1, 2022 3:04 AM
Child partitions are free to allocate SynIC message and event page but in
case of root partition it must use the pages allocated by Microsoft
Hypervisor (MSHV). Base address for these pages can be found using
synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
for nested vs non-nested root partition.

Signed-off-by: Jinank Jain <jinankjain@xxxxxxxxxxxxxxxxxxx>
---
arch/x86/include/asm/hyperv-tlfs.h | 11 ++++
arch/x86/include/asm/mshyperv.h | 30 ++-------
arch/x86/kernel/cpu/mshyperv.c | 69 +++++++++++++++++++++
drivers/hv/hv.c | 99 ++++++++++++++++++++++--------
include/asm-generic/mshyperv.h | 5 +-
5 files changed, 165 insertions(+), 49 deletions(-)

diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 58c03d18c235..b5019becb618 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -225,6 +225,17 @@ enum hv_isolation_type {
#define HV_REGISTER_SINT14 0x4000009E
#define HV_REGISTER_SINT15 0x4000009F

+/*
+ * Define synthetic interrupt controller model specific registers for
+ * nested hypervisor.
+ */
+#define HV_REGISTER_NESTED_SCONTROL 0x40001080
+#define HV_REGISTER_NESTED_SVERSION 0x40001081
+#define HV_REGISTER_NESTED_SIEFP 0x40001082
+#define HV_REGISTER_NESTED_SIMP 0x40001083
+#define HV_REGISTER_NESTED_EOM 0x40001084
+#define HV_REGISTER_NESTED_SINT0 0x40001090
+
/*
* Synthetic Timer MSRs. Four timers per vcpu.
*/
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 61f0c206bff0..3197d49c888c 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -198,30 +198,10 @@ static inline bool hv_is_synic_reg(unsigned int reg)
return false;
}

-static inline u64 hv_get_register(unsigned int reg)
-{
- u64 value;
-
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
- hv_ghcb_msr_read(reg, &value);
- else
- rdmsrl(reg, value);
- return value;
-}
-
-static inline void hv_set_register(unsigned int reg, u64 value)
-{
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
- hv_ghcb_msr_write(reg, value);
-
- /* Write proxy bit via wrmsl instruction */
- if (reg >= HV_REGISTER_SINT0 &&
- reg <= HV_REGISTER_SINT15)
- wrmsrl(reg, value | 1 << 20);
- } else {
- wrmsrl(reg, value);
- }
-}
+u64 hv_get_register(unsigned int reg);
+void hv_set_register(unsigned int reg, u64 value);
+u64 hv_get_nested_register(unsigned int reg);
+void hv_set_nested_register(unsigned int reg, u64 value);

#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
@@ -241,6 +221,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
}
static inline void hv_set_register(unsigned int reg, u64 value) { }
static inline u64 hv_get_register(unsigned int reg) { return 0; }
+static inline void hv_set_nested_register(unsigned int reg, u64 value) { }
+static inline u64 hv_get_nested_register(unsigned int reg) { return 0; }
static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages,
bool visible)
{
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index f9b78d4829e3..f2f6e10301a8 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -41,7 +41,76 @@ bool hv_root_partition;
bool hv_nested;
struct ms_hyperv_info ms_hyperv;

+static inline unsigned int hv_get_nested_reg(unsigned int reg)
+{
+ switch (reg) {
+ case HV_REGISTER_SIMP:
+ return HV_REGISTER_NESTED_SIMP;
+ case HV_REGISTER_NESTED_SIEFP:
+ return HV_REGISTER_SIEFP;
+ case HV_REGISTER_SCONTROL:
+ return HV_REGISTER_NESTED_SCONTROL;
+ case HV_REGISTER_SINT0:
+ return HV_REGISTER_NESTED_SINT0;
+ case HV_REGISTER_EOM:
+ return HV_REGISTER_NESTED_EOM;
+ default:
+ return reg;
+ }
Just a question: You added #defines for 6 nested registers. But
the switch statement above maps only 5 registers. Is it intentional
that there's not a mapping for HV_REGISTER_SVERSION?

Good catch! Will fix it in the next revision.


+}
+
#if IS_ENABLED(CONFIG_HYPERV)
+static u64 _hv_get_register(unsigned int reg, bool nested)
+{
+ u64 value;
+
+ if (nested)
+ reg = hv_get_nested_reg(reg);
+
+ if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
+ hv_ghcb_msr_read(reg, &value);
+ else
+ rdmsrl(reg, value);
+ return value;
+}
+
+static void _hv_set_register(unsigned int reg, u64 value, bool nested)
+{
+ if (nested)
+ reg = hv_get_nested_reg(reg);
+
+ if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
+ hv_ghcb_msr_write(reg, value);
+
+ /* Write proxy bit via wrmsl instruction */
+ if (reg >= HV_REGISTER_SINT0 &&
+ reg <= HV_REGISTER_SINT15)
+ wrmsrl(reg, value | 1 << 20);
+ } else {
+ wrmsrl(reg, value);
+ }
+}
+
+u64 hv_get_register(unsigned int reg)
+{
+ return _hv_get_register(reg, false);
+}
+
+void hv_set_register(unsigned int reg, u64 value)
+{
+ _hv_set_register(reg, value, false);
+}
+
+u64 hv_get_nested_register(unsigned int reg)
+{
+ return _hv_get_register(reg, true);
+}
+
+void hv_set_nested_register(unsigned int reg, u64 value)
+{
+ _hv_set_register(reg, value, true);
+}
+
static void (*vmbus_handler)(void);
static void (*hv_stimer0_handler)(void);
static void (*hv_kexec_handler)(void);
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 4d6480d57546..0ed052f2423e 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -147,7 +147,7 @@ int hv_synic_alloc(void)
* Synic message and event pages are allocated by paravisor.
* Skip these pages allocation here.
*/
- if (!hv_isolation_type_snp()) {
+ if (!hv_isolation_type_snp() && !hv_root_partition) {
hv_cpu->synic_message_page =
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_message_page == NULL) {
@@ -188,8 +188,16 @@ void hv_synic_free(void)
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);

- free_page((unsigned long)hv_cpu->synic_event_page);
- free_page((unsigned long)hv_cpu->synic_message_page);
+ if (hv_root_partition) {
+ if (hv_cpu->synic_event_page != NULL)
+ memunmap(hv_cpu->synic_event_page);
+
+ if (hv_cpu->synic_message_page != NULL)
+ memunmap(hv_cpu->synic_message_page);
+ } else {
+ free_page((unsigned long)hv_cpu->synic_event_page);
+ free_page((unsigned long)hv_cpu->synic_message_page);
+ }
free_page((unsigned long)hv_cpu->post_msg_page);
}

@@ -213,10 +221,12 @@ void hv_synic_enable_regs(unsigned int cpu)
union hv_synic_scontrol sctrl;

/* Setup the Synic's message page */
- simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
+ simp.as_uint64 = hv_nested ? hv_get_nested_register(HV_REGISTER_SIMP) :
+ hv_get_register(HV_REGISTER_SIMP);
Unfortunately, this code and the similar places below will run into
problems on ARM64. Drivers/hv/hv.c is common code on all architectures
so it needs to compile and run on ARM64 as well as x86/x64. But there's
no hv_get_nested_register() defined or implemented on the ARM64 side,
so the code will fail to compile.

I think there's a better way to do this. Based on Nuno's comments, it
seems like there are two hv_get_register() functions needed:

1) Get the value of the register or its nested cousin, based on the value
of hv_nested. That's what you are explicitly coding here.
2) Get the value of the register. Don't access the nested cousin, regardless
of the value of hv_nested.

Based on how you coded things earlier, I'm assuming #1 is what you want to
use in most cases, and specifically here in drivers/hv/hv.c. That's good,
because #1 can hide the testing of hv_nested in the x86-specific
implementation of hv_get_register(), while the ARM64 version of
hv_get_register() continues to do whatever it does now with no changes.

I'm also assuming that #2 may be used in particular cases in the code
that is specifically related to nesting. Give the #2 version a different
name --- maybe hv_get_nonnested_register(), or something like that --
and use it only in code under arch/x86 that is related to nesting. That
way, ARM64 won't be affected.

Of course, the same approach applies to hv_set_register().

hv_get_register() and hv_get_nonnested_register() will obviously
share some code. But rather than calling a common function starting
with underscore like you've done above, let me suggest that
hv_get_register() test hv_nested and potentially do the translation,
then call hv_get_nonnested_register(). That way you'll end up
with just two functions instead of three as above with
hv_get_register(), hv_get_nested_register(), and _hv_get_register().

I tried the way you suggested and it worked for ARM64 this time. But still I would have three functions. Because the base function _hv_get_register() would still be required in order to avoid code duplication in hv_get_non_nested_register().


I haven't coded up any of this, so take it as a suggestion. There
could be some problem with it that I haven't seen, or my assumptions
might be wrong. But give it a try and see if it works out. I'm hoping
it can all be handled on the x86 side without having to add complexity
on the ARM64 side.

Michael

+
simp.simp_enabled = 1;

- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_message_page
= memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
@@ -227,13 +237,18 @@ void hv_synic_enable_regs(unsigned int cpu)
>> HV_HYP_PAGE_SHIFT;
}

- hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_SIMP, simp.as_uint64);
+ else
+ hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);

/* Setup the Synic's event page */
- siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
+ siefp.as_uint64 = hv_nested ?
+ hv_get_nested_register(HV_REGISTER_SIEFP) :
+ hv_get_register(HV_REGISTER_SIEFP);
siefp.siefp_enabled = 1;

- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_event_page =
memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
@@ -245,13 +260,19 @@ void hv_synic_enable_regs(unsigned int cpu)
>> HV_HYP_PAGE_SHIFT;
}

- hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_SIEFP, siefp.as_uint64);
+ else
+ hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);

/* Setup the shared SINT. */
if (vmbus_irq != -1)
enable_percpu_irq(vmbus_irq, 0);
- shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
- VMBUS_MESSAGE_SINT);
+ shared_sint.as_uint64 =
+ hv_nested ?
+ hv_get_nested_register(HV_REGISTER_SINT0 +
+ VMBUS_MESSAGE_SINT) :
+ hv_get_register(HV_REGISTER_SINT0 +
VMBUS_MESSAGE_SINT);

shared_sint.vector = vmbus_interrupt;
shared_sint.masked = false;
@@ -266,14 +287,22 @@ void hv_synic_enable_regs(unsigned int cpu)
#else
shared_sint.auto_eoi = 0;
#endif
- hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_SINT0 +
VMBUS_MESSAGE_SINT,
+ shared_sint.as_uint64);
+ else
+ hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
shared_sint.as_uint64);
-
/* Enable the global synic bit */
- sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
+ sctrl.as_uint64 = hv_nested ?
+ hv_get_nested_register(HV_REGISTER_SCONTROL) :
+ hv_get_register(HV_REGISTER_SCONTROL);
sctrl.enable = 1;

- hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
+ else
+ hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
}

int hv_synic_init(unsigned int cpu)
@@ -297,17 +326,25 @@ void hv_synic_disable_regs(unsigned int cpu)
union hv_synic_siefp siefp;
union hv_synic_scontrol sctrl;

- shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
- VMBUS_MESSAGE_SINT);
+ shared_sint.as_uint64 =
+ hv_nested ?
+ hv_get_nested_register(HV_REGISTER_SINT0 +
+ VMBUS_MESSAGE_SINT) :
+ hv_get_register(HV_REGISTER_SINT0 +
VMBUS_MESSAGE_SINT);

shared_sint.masked = 1;

/* Need to correctly cleanup in the case of SMP!!! */
/* Disable the interrupt */
- hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_SINT0 +
VMBUS_MESSAGE_SINT,
+ shared_sint.as_uint64);
+ else
+ hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
shared_sint.as_uint64);

- simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
+ simp.as_uint64 = hv_nested ? hv_get_nested_register(HV_REGISTER_SIMP) :
+ hv_get_register(HV_REGISTER_SIMP);
/*
* In Isolation VM, sim and sief pages are allocated by
* paravisor. These pages also will be used by kdump
@@ -320,9 +357,14 @@ void hv_synic_disable_regs(unsigned int cpu)
else
simp.base_simp_gpa = 0;

- hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_SIMP, simp.as_uint64);
+ else
+ hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);

- siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
+ siefp.as_uint64 = hv_nested ?
+ hv_get_nested_register(HV_REGISTER_SIEFP) :
+ hv_get_register(HV_REGISTER_SIEFP);
siefp.siefp_enabled = 0;

if (hv_isolation_type_snp())
@@ -330,12 +372,21 @@ void hv_synic_disable_regs(unsigned int cpu)
else
siefp.base_siefp_gpa = 0;

- hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_SIEFP, siefp.as_uint64);
+ else
+ hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);

/* Disable the global synic bit */
- sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
+ sctrl.as_uint64 = hv_nested ?
+ hv_get_nested_register(HV_REGISTER_SCONTROL) :
+ hv_get_register(HV_REGISTER_SCONTROL);
sctrl.enable = 0;
- hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
+
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
+ else
+ hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);

if (vmbus_irq != -1)
disable_percpu_irq(vmbus_irq);
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index f131027830c3..db0b5be1e087 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -147,7 +147,10 @@ static inline void vmbus_signal_eom(struct hv_message *msg,
u32 old_msg_type)
* possibly deliver another msg from the
* hypervisor
*/
- hv_set_register(HV_REGISTER_EOM, 0);
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_EOM, 0);
+ else
+ hv_set_register(HV_REGISTER_EOM, 0);
}
}

--
2.25.1


Regards,

Jinank