RE: [PATCH v2 8/9] x86/hyperv: Use TDX GHCI to access some MSRs in a TDX VM with the paravisor

From: Michael Kelley (LINUX)
Date: Mon Aug 21 2023 - 15:33:35 EST


From: Dexuan Cui <decui@xxxxxxxxxxxxx> Sent: Sunday, August 20, 2023 1:27 PM
>
> When the paravisor is present, a SNP VM must use GHCB to access some
> special MSRs, including HV_X64_MSR_GUEST_OS_ID and some SynIC MSRs.
>
> Similarly, when the paravisor is present, a TDX VM must use TDX GHCI
> to access the same MSRs.
>
> Implement hv_tdx_read_msr() and hv_tdx_write_msr(), and use the helper
> functions hv_ivm_msr_read() and hv_ivm_msr_write() to access the MSRs
> in a unified way for SNP/TDX VMs with the paravisor.
>
> Signed-off-by: Dexuan Cui <decui@xxxxxxxxxxxxx>
> ---
>
> Changes in v2: None
>
> arch/x86/hyperv/hv_init.c | 8 ++--
> arch/x86/hyperv/ivm.c | 72 +++++++++++++++++++++++++++++++--
> arch/x86/include/asm/mshyperv.h | 8 ++--
> arch/x86/kernel/cpu/mshyperv.c | 8 ++--
> 4 files changed, 80 insertions(+), 16 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 892e52afa37cd..18afbb10edc64 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -500,8 +500,8 @@ void __init hyperv_init(void)
> guest_id = hv_generate_guest_id(LINUX_VERSION_CODE);
> wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
>
> - /* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
> - hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
> + /* With the paravisor, the VM must also write the ID via GHCB/GHCI */
> + hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
>
> /* A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg
> */
> if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
> @@ -590,7 +590,7 @@ void __init hyperv_init(void)
>
> clean_guest_os_id:
> wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
> - hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
> + hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
> cpuhp_remove_state(cpuhp);
> free_ghcb_page:
> free_percpu(hv_ghcb_pg);
> @@ -611,7 +611,7 @@ void hyperv_cleanup(void)
>
> /* Reset our OS id */
> wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
> - hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
> + hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
>
> /*
> * Reset hypercall page reference before reset the page,
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index 920ecb85802b8..93d54d8ef12e1 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -186,7 +186,49 @@ bool hv_ghcb_negotiate_protocol(void)
> return true;
> }
>
> -void hv_ghcb_msr_write(u64 msr, u64 value)
> +#define EXIT_REASON_MSR_READ 31
> +#define EXIT_REASON_MSR_WRITE 32

These exit reasons are defined in arch/x86/include/uapi/asm/vmx.h.
Are they conceptually the same thing and should be reused?

> +
> +static void hv_tdx_read_msr(u64 msr, u64 *val)

Could you make the function name be
hv_tdx_msr_read() so it matches hv_ghcb_msr_read()
and hv_ivm_msr_read()? :-)

> +{
> + struct tdx_hypercall_args args = {
> + .r10 = TDX_HYPERCALL_STANDARD,
> + .r11 = EXIT_REASON_MSR_READ,
> + .r12 = msr,
> + };
> +
> +#ifdef CONFIG_INTEL_TDX_GUEST
> + u64 ret = __tdx_hypercall_ret(&args);
> +#else
> + u64 ret = HV_STATUS_INVALID_PARAMETER;
> +#endif
> +
> + if (WARN_ONCE(ret, "Failed to emulate MSR read: %lld\n", ret))
> + *val = 0;
> + else
> + *val = args.r11;
> +}
> +
> +static void hv_tdx_write_msr(u64 msr, u64 val)

Same here on the function name.

> +{
> + struct tdx_hypercall_args args = {
> + .r10 = TDX_HYPERCALL_STANDARD,
> + .r11 = EXIT_REASON_MSR_WRITE,
> + .r12 = msr,
> + .r13 = val,
> + };
> +
> +#ifdef CONFIG_INTEL_TDX_GUEST
> + u64 ret = __tdx_hypercall(&args);
> +#else
> + u64 ret = HV_STATUS_INVALID_PARAMETER;
> + (void)args;
> +#endif
> +
> + WARN_ONCE(ret, "Failed to emulate MSR write: %lld\n", ret);
> +}
> +
> +static void hv_ghcb_msr_write(u64 msr, u64 value)
> {
> union hv_ghcb *hv_ghcb;
> void **ghcb_base;
> @@ -214,9 +256,20 @@ void hv_ghcb_msr_write(u64 msr, u64 value)
>
> local_irq_restore(flags);
> }
> -EXPORT_SYMBOL_GPL(hv_ghcb_msr_write);
>
> -void hv_ghcb_msr_read(u64 msr, u64 *value)
> +void hv_ivm_msr_write(u64 msr, u64 value)
> +{
> + if (!hyperv_paravisor_present)
> + return;
> +
> + if (hv_isolation_type_tdx())
> + hv_tdx_write_msr(msr, value);
> + else if (hv_isolation_type_snp())
> + hv_ghcb_msr_write(msr, value);
> +}
> +EXPORT_SYMBOL_GPL(hv_ivm_msr_write);
> +
> +static void hv_ghcb_msr_read(u64 msr, u64 *value)
> {
> union hv_ghcb *hv_ghcb;
> void **ghcb_base;
> @@ -246,7 +299,18 @@ void hv_ghcb_msr_read(u64 msr, u64 *value)
> | ((u64)lower_32_bits(hv_ghcb->ghcb.save.rdx) << 32);
> local_irq_restore(flags);
> }
> -EXPORT_SYMBOL_GPL(hv_ghcb_msr_read);
> +
> +void hv_ivm_msr_read(u64 msr, u64 *value)
> +{
> + if (!hyperv_paravisor_present)
> + return;
> +
> + if (hv_isolation_type_tdx())
> + hv_tdx_read_msr(msr, value);
> + else if (hv_isolation_type_snp())
> + hv_ghcb_msr_read(msr, value);
> +}
> +EXPORT_SYMBOL_GPL(hv_ivm_msr_read);
>
> /*
> * hv_mark_gpa_visibility - Set pages visible to host via hvcall.
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 2a4c7dcf64038..18f569c44c39d 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -280,15 +280,15 @@ int hv_map_ioapic_interrupt(int ioapic_id, bool level, int
> vcpu, int vector,
> int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
>
> #ifdef CONFIG_AMD_MEM_ENCRYPT
> -void hv_ghcb_msr_write(u64 msr, u64 value);
> -void hv_ghcb_msr_read(u64 msr, u64 *value);
> +void hv_ivm_msr_write(u64 msr, u64 value);
> +void hv_ivm_msr_read(u64 msr, u64 *value);

These declarations are under CONFIG_AMD_MEM_ENCRYPT, which
is problematic for TDX if the kernel is built with CONFIG_INTEL_TDX_GUEST
but not CONFIG_AMD_MEM_ENCRYPT. Presumably we want to make
sure that combination builds and works correctly.

I think there's a bigger problem in that arch/x86/hyperv/ivm.c has
a big #ifdef CONFIG_AMD_MEM_ENCRYPT in it, and TDX with paravisor
wants to use the "vtom" functions that are under that #ifdef.

> bool hv_ghcb_negotiate_protocol(void);
> void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason);
> void hv_vtom_init(void);
> int hv_snp_boot_ap(int cpu, unsigned long start_ip);
> #else
> -static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
> -static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
> +static inline void hv_ivm_msr_write(u64 msr, u64 value) {}
> +static inline void hv_ivm_msr_read(u64 msr, u64 *value) {}
> static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
> static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
> static inline void hv_vtom_init(void) {}
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 3dff2ef43bc73..a196760afa7a1 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -72,8 +72,8 @@ u64 hv_get_non_nested_register(unsigned int reg)
> {
> u64 value;
>
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> - hv_ghcb_msr_read(reg, &value);
> + if (hv_is_synic_reg(reg) && hyperv_paravisor_present)
> + hv_ivm_msr_read(reg, &value);
> else
> rdmsrl(reg, value);
> return value;
> @@ -82,8 +82,8 @@ EXPORT_SYMBOL_GPL(hv_get_non_nested_register);
>
> void hv_set_non_nested_register(unsigned int reg, u64 value)
> {
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> - hv_ghcb_msr_write(reg, value);
> + if (hv_is_synic_reg(reg) && hyperv_paravisor_present) {
> + hv_ivm_msr_write(reg, value);
>
> /* Write proxy bit via wrmsl instruction */
> if (hv_is_sint_reg(reg))
> --
> 2.25.1