RE: [PATCH v6 5/6] Drivers: hv: vmbus: Support TDX guests

From: Michael Kelley (LINUX)
Date: Fri May 05 2023 - 12:22:51 EST


From: Dexuan Cui <decui@xxxxxxxxxxxxx>
>
> Add Hyper-V specific code so that a TDX guest can run on Hyper-V:
> No need to use hv_vp_assist_page.
> Don't use the unsafe Hyper-V TSC page.
> Don't try to use HV_REGISTER_CRASH_CTL.
> Don't trust Hyper-V's TLB-flushing hypercalls.
> Don't use lazy EOI.

Nit: Actually, you overdid the cleanup. :-( The line in v5 about
"Share SynIC Event/Message pages" was correct. It was only the
part about VMBus Monitor pages that no longer applied.

>
> Signed-off-by: Dexuan Cui <decui@xxxxxxxxxxxxx>
> ---
>
> Changes in v2:
> Used a new function hv_set_memory_enc_dec_needed() in
> __set_memory_enc_pgtable().
> Added the missing set_memory_encrypted() in hv_synic_free().
>
> Changes in v3:
> Use pgprot_decrypted(PAGE_KERNEL)in hv_ringbuffer_init().
> (Do not use PAGE_KERNEL_NOENC, which doesn't exist for ARM64).
>
> Used cc_mkdec() in hv_synic_enable_regs().
>
> ms_hyperv_init_platform():
> Explicitly do not use HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED.
> Explicitly do not use HV_X64_APIC_ACCESS_RECOMMENDED.
>
> Enabled __send_ipi_mask() and __send_ipi_one() for TDX guests.
>
> Changes in v4:
> A minor rebase to Michael's v7 DDA patchset. I'm very happy that
> I can drop my v3 change to arch/x86/mm/pat/set_memory.c due to
> Michael's work.
>
> Changes in v5:
> Added memset() to clear synic_message_page and synic_event_page()
> after set_memory_decrypted().
> Rebased the patch since "post_msg_page" has been removed in
> hyperv-next.
> Improved the error handling in hv_synic_alloc()/free() [Michael
> Kelley]
>
> Changes in v6:
> Adressed Michael Kelley's comments on patch 5:
> Removed 2 unnecessary lines of messages from the commit log.
> Fixed the error handling path for hv_synic_alloc()/free().
> Printed the 'ret' in hv_synic_alloc()/free().
>
> arch/x86/hyperv/hv_apic.c | 6 ++--
> arch/x86/hyperv/hv_init.c | 19 +++++++---
> arch/x86/kernel/cpu/mshyperv.c | 21 ++++++++++-
> drivers/hv/hv.c | 65 ++++++++++++++++++++++++++++++++--
> 4 files changed, 101 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
> index 1fbda2f94184..b28da8b41b45 100644
> --- a/arch/x86/hyperv/hv_apic.c
> +++ b/arch/x86/hyperv/hv_apic.c
> @@ -177,7 +177,8 @@ static bool __send_ipi_mask(const struct cpumask *mask, int
> vector,
> (exclude_self && weight == 1 && cpumask_test_cpu(this_cpu, mask)))
> return true;
>
> - if (!hv_hypercall_pg)
> + /* A TDX guest doesn't use hv_hypercall_pg. */
> + if (!hv_isolation_type_tdx() && !hv_hypercall_pg)
> return false;
>
> if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
> @@ -231,7 +232,8 @@ static bool __send_ipi_one(int cpu, int vector)
>
> trace_hyperv_send_ipi_one(cpu, vector);
>
> - if (!hv_hypercall_pg || (vp == VP_INVAL))
> + /* A TDX guest doesn't use hv_hypercall_pg. */
> + if ((!hv_isolation_type_tdx() && !hv_hypercall_pg) || (vp == VP_INVAL))
> return false;
>
> if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index f175e0de821c..f28357ecad7d 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -79,7 +79,7 @@ static int hyperv_init_ghcb(void)
> static int hv_cpu_init(unsigned int cpu)
> {
> union hv_vp_assist_msr_contents msr = { 0 };
> - struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu];
> + struct hv_vp_assist_page **hvp;
> int ret;
>
> ret = hv_common_cpu_init(cpu);
> @@ -89,6 +89,7 @@ static int hv_cpu_init(unsigned int cpu)
> if (!hv_vp_assist_page)
> return 0;
>
> + hvp = &hv_vp_assist_page[cpu];
> if (hv_root_partition) {
> /*
> * For root partition we get the hypervisor provided VP assist
> @@ -398,11 +399,21 @@ void __init hyperv_init(void)
> if (hv_common_init())
> return;
>
> - hv_vp_assist_page = kcalloc(num_possible_cpus(),
> - sizeof(*hv_vp_assist_page), GFP_KERNEL);
> + /*
> + * The VP assist page is useless to a TDX guest: the only use we
> + * would have for it is lazy EOI, which can not be used with TDX.
> + */
> + if (hv_isolation_type_tdx())
> + hv_vp_assist_page = NULL;
> + else
> + hv_vp_assist_page = kcalloc(num_possible_cpus(),
> + sizeof(*hv_vp_assist_page),
> + GFP_KERNEL);
> if (!hv_vp_assist_page) {
> ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
> - goto common_free;
> +
> + if (!hv_isolation_type_tdx())
> + goto common_free;
> }
>
> if (hv_isolation_type_snp()) {
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 2fd687a80033..b95b689efa07 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -404,8 +404,27 @@ static void __init ms_hyperv_init_platform(void)
>
> if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP)
> static_branch_enable(&isolation_type_snp);
> - else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX)
> + else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) {
> static_branch_enable(&isolation_type_tdx);
> +
> + /*
> + * The GPAs of SynIC Event/Message pages and VMBus
> + * Moniter pages need to be added by this offset.
> + */
> + ms_hyperv.shared_gpa_boundary = cc_mkdec(0);
> +
> + /* Don't use the unsafe Hyper-V TSC page */
> + ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE;
> +
> + /* HV_REGISTER_CRASH_CTL is unsupported */
> + ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
> +
> + /* Don't trust Hyper-V's TLB-flushing hypercalls */
> + ms_hyperv.hints &= ~HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
> +
> + /* A TDX VM must use x2APIC and doesn't use lazy EOI */
> + ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED;
> + }
> }
>
> if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index de6708dbe0df..af959e87b6e7 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -18,6 +18,7 @@
> #include <linux/clockchips.h>
> #include <linux/delay.h>
> #include <linux/interrupt.h>
> +#include <linux/set_memory.h>
> #include <clocksource/hyperv_timer.h>
> #include <asm/mshyperv.h>
> #include "hyperv_vmbus.h"
> @@ -80,6 +81,7 @@ int hv_synic_alloc(void)
> {
> int cpu;
> struct hv_per_cpu_context *hv_cpu;
> + int ret = -ENOMEM;
>
> /*
> * First, zero all per-cpu memory areas so hv_synic_free() can
> @@ -120,9 +122,42 @@ int hv_synic_alloc(void)
> (void *)get_zeroed_page(GFP_ATOMIC);
> if (hv_cpu->synic_event_page == NULL) {
> pr_err("Unable to allocate SYNIC event page\n");
> +
> + free_page((unsigned long)hv_cpu->synic_message_page);
> + hv_cpu->synic_message_page = NULL;
> +
> goto err;
> }
> }
> +
> + /* It's better to leak the page if the decryption fails. */
> + if (hv_isolation_type_tdx()) {
> + ret = set_memory_decrypted(
> + (unsigned long)hv_cpu->synic_message_page, 1);
> + if (ret) {
> + pr_err("Failed to decrypt SYNIC msg page: %d\n", ret);
> + hv_cpu->synic_message_page = NULL;
> +
> + /*
> + * Free the event page so that a TDX VM won't
> + * try to encrypt the page in hv_synic_free().
> + */
> + free_page((unsigned long)hv_cpu->synic_event_page);
> + hv_cpu->synic_event_page = NULL;
> + goto err;
> + }
> +
> + ret = set_memory_decrypted(
> + (unsigned long)hv_cpu->synic_event_page, 1);
> + if (ret) {
> + pr_err("Failed to decrypt SYNIC event page: %d\n", ret);
> + hv_cpu->synic_event_page = NULL;
> + goto err;
> + }
> +
> + memset(hv_cpu->synic_message_page, 0, PAGE_SIZE);
> + memset(hv_cpu->synic_event_page, 0, PAGE_SIZE);
> + }
> }
>
> return 0;
> @@ -131,18 +166,40 @@ int hv_synic_alloc(void)
> * Any memory allocations that succeeded will be freed when
> * the caller cleans up by calling hv_synic_free()
> */
> - return -ENOMEM;
> + return ret;
> }
>
>
> void hv_synic_free(void)
> {
> int cpu;
> + int ret;
>
> for_each_present_cpu(cpu) {
> struct hv_per_cpu_context *hv_cpu
> = per_cpu_ptr(hv_context.cpu_context, cpu);
>
> + /* It's better to leak the page if the encryption fails. */
> + if (hv_isolation_type_tdx()) {
> + if (hv_cpu->synic_message_page) {
> + ret = set_memory_encrypted((unsigned long)
> + hv_cpu->synic_message_page, 1);
> + if (ret) {
> + pr_err("Failed to encrypt SYNIC msg page: %d\n", ret);
> + hv_cpu->synic_message_page = NULL;
> + }
> + }
> +
> + if (hv_cpu->synic_event_page) {
> + ret = set_memory_encrypted((unsigned long)
> + hv_cpu->synic_event_page, 1);
> + if (ret) {
> + pr_err("Failed to encrypt SYNIC event page: %d\n", ret);
> + hv_cpu->synic_event_page = NULL;
> + }
> + }
> + }
> +
> free_page((unsigned long)hv_cpu->synic_event_page);
> free_page((unsigned long)hv_cpu->synic_message_page);
> }
> @@ -179,7 +236,8 @@ void hv_synic_enable_regs(unsigned int cpu)
> if (!hv_cpu->synic_message_page)
> pr_err("Fail to map synic message page.\n");
> } else {
> - simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
> + simp.base_simp_gpa =
> + cc_mkdec(virt_to_phys(hv_cpu->synic_message_page))
> >> HV_HYP_PAGE_SHIFT;
> }
>
> @@ -198,7 +256,8 @@ void hv_synic_enable_regs(unsigned int cpu)
> if (!hv_cpu->synic_event_page)
> pr_err("Fail to map synic event page.\n");
> } else {
> - siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
> + siefp.base_siefp_gpa =
> + cc_mkdec(virt_to_phys(hv_cpu->synic_event_page))
> >> HV_HYP_PAGE_SHIFT;
> }
>
> --
> 2.25.1

Commit message nit notwithstanding --

Reviewed-by: Michael Kelley <mikelley@xxxxxxxxxxxxx>