[PATCH v3 5/6] Drivers: hv: vmbus: Support TDX guests

From: Dexuan Cui
Date: Mon Feb 06 2023 - 14:27:57 EST


Add Hyper-V specific code so that a TDX guest can run on Hyper-V:
No need to use hv_vp_assist_page.
Don't use the unsafe Hyper-V TSC page.
Don't try to use HV_REGISTER_CRASH_CTL.
Don't trust Hyper-V's TLB-flushing hypercalls.
Don't use lazy EOI.
Share SynIC Event/Message pages and VMBus Monitor pages with the host.
Use pgprot_decrypted(PAGE_KERNEL)in hv_ringbuffer_init().

Signed-off-by: Dexuan Cui <decui@xxxxxxxxxxxxx>

---

Changes in v2:
Used a new function hv_set_memory_enc_dec_needed() in
__set_memory_enc_pgtable().
Added the missing set_memory_encrypted() in hv_synic_free().

Changes in v3:
Use pgprot_decrypted(PAGE_KERNEL)in hv_ringbuffer_init().
(Do not use PAGE_KERNEL_NOENC, which doesn't exist for ARM64).

Used cc_mkdec() in hv_synic_enable_regs().

ms_hyperv_init_platform():
Explicitly do not use HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED.
Explicitly do not use HV_X64_APIC_ACCESS_RECOMMENDED.

Enabled __send_ipi_mask() and __send_ipi_one() for TDX guests.

arch/x86/hyperv/hv_apic.c | 6 ++--
arch/x86/hyperv/hv_init.c | 19 ++++++++---
arch/x86/hyperv/ivm.c | 5 +++
arch/x86/kernel/cpu/mshyperv.c | 23 ++++++++++++-
arch/x86/mm/pat/set_memory.c | 2 +-
drivers/hv/connection.c | 4 ++-
drivers/hv/hv.c | 62 +++++++++++++++++++++++++++++++---
drivers/hv/hv_common.c | 6 ++++
drivers/hv/ring_buffer.c | 2 +-
include/asm-generic/mshyperv.h | 2 ++
10 files changed, 116 insertions(+), 15 deletions(-)

diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index fb8b2c088681..16919c7b3196 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -173,7 +173,8 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
(exclude_self && weight == 1 && cpumask_test_cpu(this_cpu, mask)))
return true;

- if (!hv_hypercall_pg)
+ /* A TDX guest doesn't use hv_hypercall_pg. */
+ if (!hv_isolation_type_tdx() && !hv_hypercall_pg)
return false;

if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
@@ -227,7 +228,8 @@ static bool __send_ipi_one(int cpu, int vector)

trace_hyperv_send_ipi_one(cpu, vector);

- if (!hv_hypercall_pg || (vp == VP_INVAL))
+ /* A TDX guest doesn't use hv_hypercall_pg. */
+ if ((!hv_isolation_type_tdx() && !hv_hypercall_pg) || (vp == VP_INVAL))
return false;

if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 6a0bcbd18306..d641c9808c31 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -77,7 +77,7 @@ static int hyperv_init_ghcb(void)
static int hv_cpu_init(unsigned int cpu)
{
union hv_vp_assist_msr_contents msr = { 0 };
- struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu];
+ struct hv_vp_assist_page **hvp;
int ret;

ret = hv_common_cpu_init(cpu);
@@ -87,6 +87,7 @@ static int hv_cpu_init(unsigned int cpu)
if (!hv_vp_assist_page)
return 0;

+ hvp = &hv_vp_assist_page[cpu];
if (hv_root_partition) {
/*
* For root partition we get the hypervisor provided VP assist
@@ -396,11 +397,21 @@ void __init hyperv_init(void)
if (hv_common_init())
return;

- hv_vp_assist_page = kcalloc(num_possible_cpus(),
- sizeof(*hv_vp_assist_page), GFP_KERNEL);
+ /*
+ * The VP assist page is useless to a TDX guest: the only use we
+ * would have for it is lazy EOI, which can not be used with TDX.
+ */
+ if (hv_isolation_type_tdx())
+ hv_vp_assist_page = NULL;
+ else
+ hv_vp_assist_page = kcalloc(num_possible_cpus(),
+ sizeof(*hv_vp_assist_page),
+ GFP_KERNEL);
if (!hv_vp_assist_page) {
ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
- goto common_free;
+
+ if (!hv_isolation_type_tdx())
+ goto common_free;
}

if (hv_isolation_type_snp()) {
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 07e4253b5809..4398042f10d5 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -258,6 +258,11 @@ bool hv_is_isolation_supported(void)
return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
}

+bool hv_set_memory_enc_dec_needed(void)
+{
+ return hv_is_isolation_supported() && !hv_isolation_type_tdx();
+}
+
DEFINE_STATIC_KEY_FALSE(isolation_type_snp);

/*
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 941372449ff2..6a57af60ec9f 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -345,8 +345,29 @@ static void __init ms_hyperv_init_platform(void)
}

if (IS_ENABLED(CONFIG_INTEL_TDX_GUEST) &&
- hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX)
+ hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) {
static_branch_enable(&isolation_type_tdx);
+
+ /*
+ * The GPAs of SynIC Event/Message pages and VMBus
+ * Moniter pages need to be added by this offset.
+ */
+ ms_hyperv.shared_gpa_boundary = cc_mkdec(0);
+
+ /* Don't use the unsafe Hyper-V TSC page */
+ ms_hyperv.features &=
+ ~HV_MSR_REFERENCE_TSC_AVAILABLE;
+
+ /* HV_REGISTER_CRASH_CTL is unsupported */
+ ms_hyperv.misc_features &=
+ ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
+
+ /* Don't trust Hyper-V's TLB-flushing hypercalls */
+ ms_hyperv.hints &= ~HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
+
+ /* A TDX VM must use x2APIC and doesn't use lazy EOI */
+ ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED;
+ }
}

if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 356758b7d4b4..e069bf28d683 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2175,7 +2175,7 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)

static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
{
- if (hv_is_isolation_supported())
+ if (hv_set_memory_enc_dec_needed())
return hv_set_mem_host_visibility(addr, numpages, !enc);

if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 9dc27e5d367a..1ecc3c29e3f7 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -250,12 +250,14 @@ int vmbus_connect(void)
* Isolation VM with AMD SNP needs to access monitor page via
* address space above shared gpa boundary.
*/
- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_isolation_type_tdx()) {
vmbus_connection.monitor_pages_pa[0] +=
ms_hyperv.shared_gpa_boundary;
vmbus_connection.monitor_pages_pa[1] +=
ms_hyperv.shared_gpa_boundary;
+ }

+ if (hv_isolation_type_snp()) {
vmbus_connection.monitor_pages[0]
= memremap(vmbus_connection.monitor_pages_pa[0],
HV_HYP_PAGE_SIZE,
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 4d6480d57546..76a19cc56894 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -18,6 +18,7 @@
#include <linux/clockchips.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
+#include <linux/set_memory.h>
#include <clocksource/hyperv_timer.h>
#include <asm/mshyperv.h>
#include "hyperv_vmbus.h"
@@ -119,6 +120,7 @@ int hv_synic_alloc(void)
{
int cpu;
struct hv_per_cpu_context *hv_cpu;
+ int ret = -ENOMEM;

/*
* First, zero all per-cpu memory areas so hv_synic_free() can
@@ -168,6 +170,30 @@ int hv_synic_alloc(void)
pr_err("Unable to allocate post msg page\n");
goto err;
}
+
+
+ if (hv_isolation_type_tdx()) {
+ ret = set_memory_decrypted(
+ (unsigned long)hv_cpu->synic_message_page, 1);
+ if (ret) {
+ pr_err("Failed to decrypt SYNIC msg page\n");
+ goto err;
+ }
+
+ ret = set_memory_decrypted(
+ (unsigned long)hv_cpu->synic_event_page, 1);
+ if (ret) {
+ pr_err("Failed to decrypt SYNIC event page\n");
+ goto err;
+ }
+
+ ret = set_memory_decrypted(
+ (unsigned long)hv_cpu->post_msg_page, 1);
+ if (ret) {
+ pr_err("Failed to decrypt post msg page\n");
+ goto err;
+ }
+ }
}

return 0;
@@ -176,18 +202,42 @@ int hv_synic_alloc(void)
* Any memory allocations that succeeded will be freed when
* the caller cleans up by calling hv_synic_free()
*/
- return -ENOMEM;
+ return ret;
}


void hv_synic_free(void)
{
int cpu;
+ int ret;

for_each_present_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);

+ if (hv_isolation_type_tdx()) {
+ ret = set_memory_encrypted(
+ (unsigned long)hv_cpu->synic_message_page, 1);
+ if (ret) {
+ pr_err("Failed to encrypt SYNIC msg page\n");
+ continue;
+ }
+
+ ret = set_memory_encrypted(
+ (unsigned long)hv_cpu->synic_event_page, 1);
+ if (ret) {
+ pr_err("Failed to encrypt SYNIC event page\n");
+ continue;
+ }
+
+ ret = set_memory_encrypted(
+ (unsigned long)hv_cpu->post_msg_page, 1);
+ if (ret) {
+ pr_err("Failed to encrypt post msg page\n");
+ continue;
+ }
+ }
+
free_page((unsigned long)hv_cpu->synic_event_page);
free_page((unsigned long)hv_cpu->synic_message_page);
free_page((unsigned long)hv_cpu->post_msg_page);
@@ -223,8 +273,9 @@ void hv_synic_enable_regs(unsigned int cpu)
if (!hv_cpu->synic_message_page)
pr_err("Fail to map syinc message page.\n");
} else {
- simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
- >> HV_HYP_PAGE_SHIFT;
+ simp.base_simp_gpa =
+ cc_mkdec(virt_to_phys(hv_cpu->synic_message_page)) >>
+ HV_HYP_PAGE_SHIFT;
}

hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
@@ -241,8 +292,9 @@ void hv_synic_enable_regs(unsigned int cpu)
if (!hv_cpu->synic_event_page)
pr_err("Fail to map syinc event page.\n");
} else {
- siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
- >> HV_HYP_PAGE_SHIFT;
+ siefp.base_siefp_gpa =
+ cc_mkdec(virt_to_phys(hv_cpu->synic_event_page)) >>
+ HV_HYP_PAGE_SHIFT;
}

hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 219c3f235c50..42f9274a3f82 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -283,6 +283,12 @@ bool __weak hv_is_isolation_supported(void)
}
EXPORT_SYMBOL_GPL(hv_is_isolation_supported);

+bool __weak hv_set_memory_enc_dec_needed(void)
+{
+ return false;
+}
+EXPORT_SYMBOL_GPL(hv_set_memory_enc_dec_needed);
+
bool __weak hv_isolation_type_snp(void)
{
return false;
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index c6692fd5ab15..ae3b3cd7ddbe 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -233,7 +233,7 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,

ring_info->ring_buffer = (struct hv_ring_buffer *)
vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP,
- PAGE_KERNEL);
+ pgprot_decrypted(PAGE_KERNEL));

kfree(pages_wraparound);
if (!ring_info->ring_buffer)
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index d55d2833a37b..c8c16b3df68d 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -263,6 +263,7 @@ bool hv_is_hyperv_initialized(void);
bool hv_is_hibernation_supported(void);
enum hv_isolation_type hv_get_isolation_type(void);
bool hv_is_isolation_supported(void);
+bool hv_set_memory_enc_dec_needed(void);
bool hv_isolation_type_snp(void);
u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size);
void hyperv_cleanup(void);
@@ -275,6 +276,7 @@ static inline bool hv_is_hyperv_initialized(void) { return false; }
static inline bool hv_is_hibernation_supported(void) { return false; }
static inline void hyperv_cleanup(void) {}
static inline bool hv_is_isolation_supported(void) { return false; }
+static inline bool hv_set_memory_enc_dec_needed(void) { return false; }
static inline enum hv_isolation_type hv_get_isolation_type(void)
{
return HV_ISOLATION_TYPE_NONE;
--
2.25.1