RE: [PATCH v4 5/6] Drivers: hv: vmbus: Support TDX guests

From: Michael Kelley (LINUX)
Date: Wed Apr 12 2023 - 10:05:23 EST


From: Dexuan Cui <decui@xxxxxxxxxxxxx> Sent: Saturday, April 8, 2023 1:48 PM
>
> Add Hyper-V specific code so that a TDX guest can run on Hyper-V:
> No need to use hv_vp_assist_page.
> Don't use the unsafe Hyper-V TSC page.
> Don't try to use HV_REGISTER_CRASH_CTL.
> Don't trust Hyper-V's TLB-flushing hypercalls.
> Don't use lazy EOI.
> Share SynIC Event/Message pages and VMBus Monitor pages with the host.
> Use pgprot_decrypted(PAGE_KERNEL)in hv_ringbuffer_init().
>
> Signed-off-by: Dexuan Cui <decui@xxxxxxxxxxxxx>
> ---
>
> Changes in v2:
> Used a new function hv_set_memory_enc_dec_needed() in
> __set_memory_enc_pgtable().
> Added the missing set_memory_encrypted() in hv_synic_free().
>
> Changes in v3:
> Use pgprot_decrypted(PAGE_KERNEL)in hv_ringbuffer_init().
> (Do not use PAGE_KERNEL_NOENC, which doesn't exist for ARM64).
>
> Used cc_mkdec() in hv_synic_enable_regs().
>
> ms_hyperv_init_platform():
> Explicitly do not use HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED.
> Explicitly do not use HV_X64_APIC_ACCESS_RECOMMENDED.
>
> Enabled __send_ipi_mask() and __send_ipi_one() for TDX guests.
>
> Changes in v4:
> A minor rebase to Michael's v7 DDA patchset. I'm very happy that
> I can drop my v3 change to arch/x86/mm/pat/set_memory.c due to
> Michael's work.
>
> arch/x86/hyperv/hv_apic.c | 6 ++--
> arch/x86/hyperv/hv_init.c | 19 ++++++++---
> arch/x86/kernel/cpu/mshyperv.c | 21 +++++++++++-
> drivers/hv/hv.c | 62 +++++++++++++++++++++++++++++++---
> 4 files changed, 96 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
> index fb8b2c088681a..16919c7b3196e 100644
> --- a/arch/x86/hyperv/hv_apic.c
> +++ b/arch/x86/hyperv/hv_apic.c
> @@ -173,7 +173,8 @@ static bool __send_ipi_mask(const struct cpumask *mask, int
> vector,
> (exclude_self && weight == 1 && cpumask_test_cpu(this_cpu, mask)))
> return true;
>
> - if (!hv_hypercall_pg)
> + /* A TDX guest doesn't use hv_hypercall_pg. */
> + if (!hv_isolation_type_tdx() && !hv_hypercall_pg)
> return false;
>
> if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
> @@ -227,7 +228,8 @@ static bool __send_ipi_one(int cpu, int vector)
>
> trace_hyperv_send_ipi_one(cpu, vector);
>
> - if (!hv_hypercall_pg || (vp == VP_INVAL))
> + /* A TDX guest doesn't use hv_hypercall_pg. */
> + if ((!hv_isolation_type_tdx() && !hv_hypercall_pg) || (vp == VP_INVAL))
> return false;
>
> if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index f175e0de821c3..f28357ecad7d9 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -79,7 +79,7 @@ static int hyperv_init_ghcb(void)
> static int hv_cpu_init(unsigned int cpu)
> {
> union hv_vp_assist_msr_contents msr = { 0 };
> - struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu];
> + struct hv_vp_assist_page **hvp;
> int ret;
>
> ret = hv_common_cpu_init(cpu);
> @@ -89,6 +89,7 @@ static int hv_cpu_init(unsigned int cpu)
> if (!hv_vp_assist_page)
> return 0;
>
> + hvp = &hv_vp_assist_page[cpu];
> if (hv_root_partition) {
> /*
> * For root partition we get the hypervisor provided VP assist
> @@ -398,11 +399,21 @@ void __init hyperv_init(void)
> if (hv_common_init())
> return;
>
> - hv_vp_assist_page = kcalloc(num_possible_cpus(),
> - sizeof(*hv_vp_assist_page), GFP_KERNEL);
> + /*
> + * The VP assist page is useless to a TDX guest: the only use we
> + * would have for it is lazy EOI, which can not be used with TDX.
> + */
> + if (hv_isolation_type_tdx())
> + hv_vp_assist_page = NULL;
> + else
> + hv_vp_assist_page = kcalloc(num_possible_cpus(),
> + sizeof(*hv_vp_assist_page),
> + GFP_KERNEL);
> if (!hv_vp_assist_page) {
> ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
> - goto common_free;
> +
> + if (!hv_isolation_type_tdx())
> + goto common_free;
> }
>
> if (hv_isolation_type_snp()) {
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index a87fb934cd4b4..e9106c9d92f81 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -405,8 +405,27 @@ static void __init ms_hyperv_init_platform(void)
>
> if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP)
> static_branch_enable(&isolation_type_snp);
> - else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX)
> + else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) {
> static_branch_enable(&isolation_type_tdx);
> +
> + /*
> + * The GPAs of SynIC Event/Message pages and VMBus
> + * Moniter pages need to be added by this offset.
> + */
> + ms_hyperv.shared_gpa_boundary = cc_mkdec(0);
> +
> + /* Don't use the unsafe Hyper-V TSC page */
> + ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE;
> +
> + /* HV_REGISTER_CRASH_CTL is unsupported */
> + ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
> +
> + /* Don't trust Hyper-V's TLB-flushing hypercalls */
> + ms_hyperv.hints &= ~HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
> +
> + /* A TDX VM must use x2APIC and doesn't use lazy EOI */
> + ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED;
> + }
> }
>
> if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index 008234894d287..22ecb79d21efd 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -18,6 +18,7 @@
> #include <linux/clockchips.h>
> #include <linux/delay.h>
> #include <linux/interrupt.h>
> +#include <linux/set_memory.h>
> #include <clocksource/hyperv_timer.h>
> #include <asm/mshyperv.h>
> #include "hyperv_vmbus.h"
> @@ -119,6 +120,7 @@ int hv_synic_alloc(void)
> {
> int cpu;
> struct hv_per_cpu_context *hv_cpu;
> + int ret = -ENOMEM;
>
> /*
> * First, zero all per-cpu memory areas so hv_synic_free() can
> @@ -168,6 +170,30 @@ int hv_synic_alloc(void)
> pr_err("Unable to allocate post msg page\n");
> goto err;
> }
> +
> +
> + if (hv_isolation_type_tdx()) {
> + ret = set_memory_decrypted(
> + (unsigned long)hv_cpu->synic_message_page, 1);
> + if (ret) {
> + pr_err("Failed to decrypt SYNIC msg page\n");
> + goto err;
> + }
> +
> + ret = set_memory_decrypted(
> + (unsigned long)hv_cpu->synic_event_page, 1);
> + if (ret) {
> + pr_err("Failed to decrypt SYNIC event page\n");
> + goto err;
> + }
> +
> + ret = set_memory_decrypted(
> + (unsigned long)hv_cpu->post_msg_page, 1);
> + if (ret) {
> + pr_err("Failed to decrypt post msg page\n");
> + goto err;
> + }
> + }

One other comment: The memory for the synic_message_page,
synic_event_page, and post_msg_page is obtained using get_zeroed_page().
But after the decryption, the memory contents will be random garbage that
isn't all zeroes. You'll need to do a memset() after the decryption to get the
contents back to zero. Compare with Patch 6 in Tianyu's fully enlightened
SNP patch series.

Michael

> }
>
> return 0;
> @@ -176,18 +202,42 @@ int hv_synic_alloc(void)
> * Any memory allocations that succeeded will be freed when
> * the caller cleans up by calling hv_synic_free()
> */
> - return -ENOMEM;
> + return ret;
> }
>
>
> void hv_synic_free(void)
> {
> int cpu;
> + int ret;
>
> for_each_present_cpu(cpu) {
> struct hv_per_cpu_context *hv_cpu
> = per_cpu_ptr(hv_context.cpu_context, cpu);
>
> + if (hv_isolation_type_tdx()) {
> + ret = set_memory_encrypted(
> + (unsigned long)hv_cpu->synic_message_page, 1);
> + if (ret) {
> + pr_err("Failed to encrypt SYNIC msg page\n");
> + continue;
> + }
> +
> + ret = set_memory_encrypted(
> + (unsigned long)hv_cpu->synic_event_page, 1);
> + if (ret) {
> + pr_err("Failed to encrypt SYNIC event page\n");
> + continue;
> + }
> +
> + ret = set_memory_encrypted(
> + (unsigned long)hv_cpu->post_msg_page, 1);
> + if (ret) {
> + pr_err("Failed to encrypt post msg page\n");
> + continue;
> + }
> + }
> +
> free_page((unsigned long)hv_cpu->synic_event_page);
> free_page((unsigned long)hv_cpu->synic_message_page);
> free_page((unsigned long)hv_cpu->post_msg_page);
> @@ -225,8 +275,9 @@ void hv_synic_enable_regs(unsigned int cpu)
> if (!hv_cpu->synic_message_page)
> pr_err("Fail to map synic message page.\n");
> } else {
> - simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
> - >> HV_HYP_PAGE_SHIFT;
> + simp.base_simp_gpa =
> + cc_mkdec(virt_to_phys(hv_cpu->synic_message_page)) >>
> + HV_HYP_PAGE_SHIFT;
> }
>
> hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
> @@ -244,8 +295,9 @@ void hv_synic_enable_regs(unsigned int cpu)
> if (!hv_cpu->synic_event_page)
> pr_err("Fail to map synic event page.\n");
> } else {
> - siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
> - >> HV_HYP_PAGE_SHIFT;
> + siefp.base_siefp_gpa =
> + cc_mkdec(virt_to_phys(hv_cpu->synic_event_page)) >>
> + HV_HYP_PAGE_SHIFT;
> }
>
> hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
> --
> 2.25.1