Re: [RFC PATCH V5 09/15] x86/hyperv: Add smp support for sev-snp guest

From: Tom Lendacky
Date: Mon May 01 2023 - 11:47:05 EST


On 5/1/23 03:57, Tianyu Lan wrote:
From: Tianyu Lan <tiala@xxxxxxxxxxxxx>

The wakeup_secondary_cpu callback was populated with wakeup_
cpu_via_vmgexit() which doesn't work for Hyper-V and Hyper-V
requires to call Hyper-V specific hvcall to start APs. So override
it with Hyper-V specific hook to start AP sev_es_save_area data
structure.

Signed-off-by: Tianyu Lan <tiala@xxxxxxxxxxxxx>
---
Change sicne RFC v3:
* Replace struct sev_es_save_area with struct
vmcb_save_area
* Move code from mshyperv.c to ivm.c

Change since RFC v2:
* Add helper function to initialize segment
* Fix some coding style
---
arch/x86/hyperv/ivm.c | 89 +++++++++++++++++++++++++++++++
arch/x86/include/asm/mshyperv.h | 18 +++++++
arch/x86/include/asm/sev.h | 13 +++++
arch/x86/include/asm/svm.h | 15 +++++-
arch/x86/kernel/cpu/mshyperv.c | 13 ++++-
arch/x86/kernel/sev.c | 4 +-
include/asm-generic/hyperv-tlfs.h | 19 +++++++
7 files changed, 166 insertions(+), 5 deletions(-)

diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 522eab55c0dd..0ef46f1874e6 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -22,11 +22,15 @@
#include <asm/sev.h>
#include <asm/realmode.h>
#include <asm/e820/api.h>
+#include <asm/desc.h>
#ifdef CONFIG_AMD_MEM_ENCRYPT
#define GHCB_USAGE_HYPERV_CALL 1
+static u8 ap_start_input_arg[PAGE_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
+static u8 ap_start_stack[PAGE_SIZE] __aligned(PAGE_SIZE);
+
union hv_ghcb {
struct ghcb ghcb;
struct {
@@ -442,6 +446,91 @@ __init void hv_sev_init_mem_and_cpu(void)
}
}
+#define hv_populate_vmcb_seg(seg, gdtr_base) \
+do { \
+ if (seg.selector) { \
+ seg.base = 0; \
+ seg.limit = HV_AP_SEGMENT_LIMIT; \
+ seg.attrib = *(u16 *)(gdtr_base + seg.selector + 5); \
+ seg.attrib = (seg.attrib & 0xFF) | ((seg.attrib >> 4) & 0xF00); \
+ } \
+} while (0) \
+
+int hv_snp_boot_ap(int cpu, unsigned long start_ip)
+{
+ struct sev_es_save_area *vmsa = (struct sev_es_save_area *)
+ __get_free_page(GFP_KERNEL | __GFP_ZERO);
+ struct desc_ptr gdtr;
+ u64 ret, retry = 5;
+ struct hv_start_virtual_processor_input *start_vp_input;
+ union sev_rmp_adjust rmp_adjust;
+ unsigned long flags;
+
+ native_store_gdt(&gdtr);
+
+ vmsa->gdtr.base = gdtr.address;
+ vmsa->gdtr.limit = gdtr.size;
+
+ asm volatile("movl %%es, %%eax;" : "=a" (vmsa->es.selector));
+ hv_populate_vmcb_seg(vmsa->es, vmsa->gdtr.base);
+
+ asm volatile("movl %%cs, %%eax;" : "=a" (vmsa->cs.selector));
+ hv_populate_vmcb_seg(vmsa->cs, vmsa->gdtr.base);
+
+ asm volatile("movl %%ss, %%eax;" : "=a" (vmsa->ss.selector));
+ hv_populate_vmcb_seg(vmsa->ss, vmsa->gdtr.base);
+
+ asm volatile("movl %%ds, %%eax;" : "=a" (vmsa->ds.selector));
+ hv_populate_vmcb_seg(vmsa->ds, vmsa->gdtr.base);
+
+ vmsa->efer = native_read_msr(MSR_EFER);
+
+ asm volatile("movq %%cr4, %%rax;" : "=a" (vmsa->cr4));
+ asm volatile("movq %%cr3, %%rax;" : "=a" (vmsa->cr3));
+ asm volatile("movq %%cr0, %%rax;" : "=a" (vmsa->cr0));
+
+ vmsa->xcr0 = 1;
+ vmsa->g_pat = HV_AP_INIT_GPAT_DEFAULT;
+ vmsa->rip = (u64)secondary_startup_64_no_verify;
+ vmsa->rsp = (u64)&ap_start_stack[PAGE_SIZE];
+
+ vmsa->sev_features.snp = 1;
+ vmsa->sev_features.restrict_injection = 1;

So this means that any other feature bits set in the BSP won't be set in the AP? Shouldn't you be using the BSP value and checking to be sure what you really want set is set?

+
+ rmp_adjust.as_uint64 = 0;
+ rmp_adjust.target_vmpl = 1;
+ rmp_adjust.vmsa = 1;
+ ret = rmpadjust((unsigned long)vmsa, RMP_PG_SIZE_4K,
+ rmp_adjust.as_uint64);
+ if (ret != 0) {
+ pr_err("RMPADJUST(%llx) failed: %llx\n", (u64)vmsa, ret);
+ return ret;
+ }
+
+ local_irq_save(flags);
+ start_vp_input =
+ (struct hv_start_virtual_processor_input *)ap_start_input_arg;
+ memset(start_vp_input, 0, sizeof(*start_vp_input));
+ start_vp_input->partitionid = -1;
+ start_vp_input->vpindex = cpu;
+ start_vp_input->targetvtl = ms_hyperv.vtl;
+ *(u64 *)&start_vp_input->context[0] = __pa(vmsa) | 1;
+
+ do {
+ ret = hv_do_hypercall(HVCALL_START_VP,
+ start_vp_input, NULL);
+ } while (hv_result(ret) == HV_STATUS_TIME_OUT && retry--);
+
+ if (!hv_result_success(ret)) {
+ pr_err("HvCallStartVirtualProcessor failed: %llx\n", ret);
+ goto done;
+ }
+
+done:
+ local_irq_restore(flags);
+ return ret;
+}
+
void __init hv_vtom_init(void)
{
/*


diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 13dc2a9d23c1..0d57cb1c1bb4 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -88,6 +88,19 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
#define RMPADJUST_VMSA_PAGE_BIT BIT(16)
+union sev_rmp_adjust {
+ u64 as_uint64;

Why not "as_u64" or "val" like below ?

+ struct {
+ unsigned long target_vmpl : 8;
+ unsigned long enable_read : 1;
+ unsigned long enable_write : 1;
+ unsigned long enable_user_execute : 1;
+ unsigned long enable_kernel_execute : 1;
+ unsigned long reserved1 : 4;
+ unsigned long vmsa : 1;

Please do these as
u64 target_vmpl : 8,
enable_read : 1,
...
vmsa : 1;

But why not continue to use the bit definition format that already exists in this file (just above what you added)? If you want to do these union type changes (here and below), it should be a separate patch that converts all the usage sites and then this patch can be purely functional.

+ };
+};
+
/* SNP Guest message request */
struct snp_req_data {
unsigned long req_gpa;
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 770dcf75eaa9..cd6bf989d918 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -425,7 +425,20 @@ struct sev_es_save_area {
u64 guest_exit_info_2;
u64 guest_exit_int_info;
u64 guest_nrip;
- u64 sev_features;
+ union {
+ struct {
+ u64 snp : 1;
+ u64 vtom : 1;
+ u64 reflectvc : 1;
+ u64 restrict_injection : 1;
+ u64 alternate_injection : 1;
+ u64 full_debug : 1;
+ u64 reserved1 : 1;
+ u64 snpbtb_isolation : 1;
+ u64 resrved2 : 56;

Ditto here for the bit definitions and fix the spelling of resrved2. Although, as above, why aren't you expanding the use of the bit definitions that are in here (SVM_SEV_FEAT_SNP_ACTIVE) instead of modifying the structure?

+ };
+ u64 val;

This should be consistent with what you do above in the rmp_adjust union.

+ } sev_features;
u64 vintr_ctrl;
u64 guest_exit_code;
u64 virtual_tom;
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index dea9b881180b..0c5f9f7bd7ba 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -295,6 +295,16 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
native_smp_prepare_cpus(max_cpus);
+ /*
+ * Override wakeup_secondary_cpu_64 callback for SEV-SNP
+ * enlightened guest.
+ */
+ if (hv_isolation_type_en_snp())
+ apic->wakeup_secondary_cpu_64 = hv_snp_boot_ap;
+
+ if (!hv_root_partition)
+ return;
+
#ifdef CONFIG_X86_64
for_each_present_cpu(i) {
if (i == 0)
@@ -502,8 +512,7 @@ static void __init ms_hyperv_init_platform(void)
# ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
- if (hv_root_partition)
- smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
+ smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
# endif
/*
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index b031244d6d2d..20f3fd8ade2f 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -1082,7 +1082,7 @@ static int wakeup_cpu_via_vmgexit(int apic_id, unsigned long start_ip)
* SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
*/
vmsa->vmpl = 0;
- vmsa->sev_features = sev_status >> 2;
+ vmsa->sev_features.val = sev_status >> 2;

Looks like maybe the tab character was lost here between val and the "=" ?

Thanks,
Tom

/* Switch the page over to a VMSA page now that it is initialized */
ret = snp_set_vmsa(vmsa, true);
@@ -1099,7 +1099,7 @@ static int wakeup_cpu_via_vmgexit(int apic_id, unsigned long start_ip)
ghcb = __sev_get_ghcb(&state);
vc_ghcb_invalidate(ghcb);
- ghcb_set_rax(ghcb, vmsa->sev_features);
+ ghcb_set_rax(ghcb, vmsa->sev_features.val);
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
ghcb_set_sw_exit_info_1(ghcb, ((u64)apic_id << 32) | SVM_VMGEXIT_AP_CREATE);
ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index f4e4cc4f965f..959b075591b2 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -149,6 +149,7 @@ union hv_reference_tsc_msr {
#define HVCALL_ENABLE_VP_VTL 0x000f
#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
#define HVCALL_SEND_IPI 0x000b
+#define HVCALL_ENABLE_VP_VTL 0x000f
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
#define HVCALL_SEND_IPI_EX 0x0015
@@ -168,6 +169,7 @@ union hv_reference_tsc_msr {
#define HVCALL_RETARGET_INTERRUPT 0x007e
#define HVCALL_START_VP 0x0099
#define HVCALL_GET_VP_ID_FROM_APIC_ID 0x009a
+#define HVCALL_START_VIRTUAL_PROCESSOR 0x0099
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
#define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db
@@ -223,6 +225,7 @@ enum HV_GENERIC_SET_FORMAT {
#define HV_STATUS_INVALID_PORT_ID 17
#define HV_STATUS_INVALID_CONNECTION_ID 18
#define HV_STATUS_INSUFFICIENT_BUFFERS 19
+#define HV_STATUS_TIME_OUT 120
#define HV_STATUS_VTL_ALREADY_ENABLED 134
/*
@@ -783,6 +786,22 @@ struct hv_input_unmap_device_interrupt {
struct hv_interrupt_entry interrupt_entry;
} __packed;
+struct hv_enable_vp_vtl_input {
+ u64 partitionid;
+ u32 vpindex;
+ u8 targetvtl;
+ u8 padding[3];
+ u8 context[0xe0];
+} __packed;
+
+struct hv_start_virtual_processor_input {
+ u64 partitionid;
+ u32 vpindex;
+ u8 targetvtl;
+ u8 padding[3];
+ u8 context[0xe0];
+} __packed;
+
#define HV_SOURCE_SHADOW_NONE 0x0
#define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE 0x1

Attachment: OpenPGP_0xDEFF9AE44F304D53.asc
Description: OpenPGP public key