Re: [PATCH v6 13/14] KVM: x86: Introduce new KVM_FEATURE_SEV_LIVE_MIGRATION feature & Custom MSR.

From: Ashish Kalra
Date: Mon Mar 30 2020 - 12:42:52 EST


Hello Brijesh,

On Mon, Mar 30, 2020 at 10:52:16AM -0500, Brijesh Singh wrote:
>
> On 3/30/20 1:23 AM, Ashish Kalra wrote:
> > From: Ashish Kalra <ashish.kalra@xxxxxxx>
> >
> > Add new KVM_FEATURE_SEV_LIVE_MIGRATION feature for guest to check
> > for host-side support for SEV live migration. Also add a new custom
> > MSR_KVM_SEV_LIVE_MIG_EN for guest to enable the SEV live migration
> > feature.
> >
> > Also, ensure that _bss_decrypted section is marked as decrypted in the
> > page encryption bitmap.
> >
> > Signed-off-by: Ashish Kalra <ashish.kalra@xxxxxxx>
> > ---
> > Documentation/virt/kvm/cpuid.rst | 4 ++++
> > Documentation/virt/kvm/msr.rst | 10 ++++++++++
> > arch/x86/include/asm/kvm_host.h | 3 +++
> > arch/x86/include/uapi/asm/kvm_para.h | 5 +++++
> > arch/x86/kernel/kvm.c | 4 ++++
> > arch/x86/kvm/cpuid.c | 3 ++-
> > arch/x86/kvm/svm.c | 5 +++++
> > arch/x86/kvm/x86.c | 7 +++++++
> > arch/x86/mm/mem_encrypt.c | 14 +++++++++++++-
> > 9 files changed, 53 insertions(+), 2 deletions(-)
>
>
> IMHO, this patch should be broken into multiple patches as it touches
> guest, and hypervisor at the same time. The first patch can introduce
> the feature flag in the kvm, second patch can make the changes specific
> to svm,  and third patch can focus on how to make use of that feature
> inside the guest. Additionally invoking the HC to clear the
> __bss_decrypted section should be either squash in Patch 10/14 or be a
> separate patch itself.
>
>

Ok.

I will also move the __bss_decrypted section HC to a separate patch.

> > diff --git a/Documentation/virt/kvm/cpuid.rst b/Documentation/virt/kvm/cpuid.rst
> > index 01b081f6e7ea..fcb191bb3016 100644
> > --- a/Documentation/virt/kvm/cpuid.rst
> > +++ b/Documentation/virt/kvm/cpuid.rst
> > @@ -86,6 +86,10 @@ KVM_FEATURE_PV_SCHED_YIELD 13 guest checks this feature bit
> > before using paravirtualized
> > sched yield.
> >
> > +KVM_FEATURE_SEV_LIVE_MIGRATION 14 guest checks this feature bit
> > + before enabling SEV live
> > + migration feature.
> > +
> > KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24 host will warn if no guest-side
> > per-cpu warps are expeced in
> > kvmclock
> > diff --git a/Documentation/virt/kvm/msr.rst b/Documentation/virt/kvm/msr.rst
> > index 33892036672d..7cd7786bbb03 100644
> > --- a/Documentation/virt/kvm/msr.rst
> > +++ b/Documentation/virt/kvm/msr.rst
> > @@ -319,3 +319,13 @@ data:
> >
> > KVM guests can request the host not to poll on HLT, for example if
> > they are performing polling themselves.
> > +
> > +MSR_KVM_SEV_LIVE_MIG_EN:
> > + 0x4b564d06
> > +
> > + Control SEV Live Migration features.
> > +
> > +data:
> > + Bit 0 enables (1) or disables (0) host-side SEV Live Migration feature.
> > + Bit 1 enables (1) or disables (0) support for SEV Live Migration extensions.
> > + All other bits are reserved.
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index a96ef6338cd2..ad5faaed43c0 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -780,6 +780,9 @@ struct kvm_vcpu_arch {
> >
> > u64 msr_kvm_poll_control;
> >
> > + /* SEV Live Migration MSR (AMD only) */
> > + u64 msr_kvm_sev_live_migration_flag;
> > +
> > /*
> > * Indicates the guest is trying to write a gfn that contains one or
> > * more of the PTEs used to translate the write itself, i.e. the access
> > diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
> > index 2a8e0b6b9805..d9d4953b42ad 100644
> > --- a/arch/x86/include/uapi/asm/kvm_para.h
> > +++ b/arch/x86/include/uapi/asm/kvm_para.h
> > @@ -31,6 +31,7 @@
> > #define KVM_FEATURE_PV_SEND_IPI 11
> > #define KVM_FEATURE_POLL_CONTROL 12
> > #define KVM_FEATURE_PV_SCHED_YIELD 13
> > +#define KVM_FEATURE_SEV_LIVE_MIGRATION 14
> >
> > #define KVM_HINTS_REALTIME 0
> >
> > @@ -50,6 +51,7 @@
> > #define MSR_KVM_STEAL_TIME 0x4b564d03
> > #define MSR_KVM_PV_EOI_EN 0x4b564d04
> > #define MSR_KVM_POLL_CONTROL 0x4b564d05
> > +#define MSR_KVM_SEV_LIVE_MIG_EN 0x4b564d06
> >
> > struct kvm_steal_time {
> > __u64 steal;
> > @@ -122,4 +124,7 @@ struct kvm_vcpu_pv_apf_data {
> > #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
> > #define KVM_PV_EOI_DISABLED 0x0
> >
> > +#define KVM_SEV_LIVE_MIGRATION_ENABLED (1 << 0)
> > +#define KVM_SEV_LIVE_MIGRATION_EXTENSIONS_SUPPORTED (1 << 1)
> > +
> > #endif /* _UAPI_ASM_X86_KVM_PARA_H */
> > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> > index 6efe0410fb72..8fcee0b45231 100644
> > --- a/arch/x86/kernel/kvm.c
> > +++ b/arch/x86/kernel/kvm.c
> > @@ -418,6 +418,10 @@ static void __init sev_map_percpu_data(void)
> > if (!sev_active())
> > return;
> >
> > + if (kvm_para_has_feature(KVM_FEATURE_SEV_LIVE_MIGRATION)) {
> > + wrmsrl(MSR_KVM_SEV_LIVE_MIG_EN, KVM_SEV_LIVE_MIGRATION_ENABLED);
> > + }
> > +
> > for_each_possible_cpu(cpu) {
> > __set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason));
> > __set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time));
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > index b1c469446b07..74c8b2a7270c 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -716,7 +716,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
> > (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
> > (1 << KVM_FEATURE_PV_SEND_IPI) |
> > (1 << KVM_FEATURE_POLL_CONTROL) |
> > - (1 << KVM_FEATURE_PV_SCHED_YIELD);
> > + (1 << KVM_FEATURE_PV_SCHED_YIELD) |
> > + (1 << KVM_FEATURE_SEV_LIVE_MIGRATION);
>
>
> Do we want to enable this feature unconditionally ? Who will clear the
> feature flags for the non-SEV guest ?
>

The guest only enables/activates this feature if sev is active.

> >
> > if (sched_info_on())
> > entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index c99b0207a443..60ddc242a133 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -7632,6 +7632,7 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
> > unsigned long npages, unsigned long enc)
> > {
> > struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> > + struct kvm_vcpu *vcpu = kvm->vcpus[0];
> > kvm_pfn_t pfn_start, pfn_end;
> > gfn_t gfn_start, gfn_end;
> > int ret;
> > @@ -7639,6 +7640,10 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
> > if (!sev_guest(kvm))
> > return -EINVAL;
> >
> > + if (!(vcpu->arch.msr_kvm_sev_live_migration_flag &
> > + KVM_SEV_LIVE_MIGRATION_ENABLED))
> > + return -ENOTTY;
> > +
> > if (!npages)
> > return 0;
> >
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 2127ed937f53..82867b8798f8 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -2880,6 +2880,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > vcpu->arch.msr_kvm_poll_control = data;
> > break;
> >
> > + case MSR_KVM_SEV_LIVE_MIG_EN:
> > + vcpu->arch.msr_kvm_sev_live_migration_flag = data;
> > + break;
> > +
> > case MSR_IA32_MCG_CTL:
> > case MSR_IA32_MCG_STATUS:
> > case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
> > @@ -3126,6 +3130,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > case MSR_KVM_POLL_CONTROL:
> > msr_info->data = vcpu->arch.msr_kvm_poll_control;
> > break;
> > + case MSR_KVM_SEV_LIVE_MIG_EN:
> > + msr_info->data = vcpu->arch.msr_kvm_sev_live_migration_flag;
> > + break;
> > case MSR_IA32_P5_MC_ADDR:
> > case MSR_IA32_P5_MC_TYPE:
> > case MSR_IA32_MCG_CAP:
> > diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> > index c9800fa811f6..f6a841494845 100644
> > --- a/arch/x86/mm/mem_encrypt.c
> > +++ b/arch/x86/mm/mem_encrypt.c
> > @@ -502,8 +502,20 @@ void __init mem_encrypt_init(void)
> > * With SEV, we need to make a hypercall when page encryption state is
> > * changed.
> > */
> > - if (sev_active())
> > + if (sev_active()) {
> > + unsigned long nr_pages;
> > +
> > pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall;
> > +
> > + /*
> > + * Ensure that _bss_decrypted section is marked as decrypted in the
> > + * page encryption bitmap.
> > + */
> > + nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted,
> > + PAGE_SIZE);
> > + set_memory_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
> > + nr_pages, 0);
> > + }
>
>
> Isn't this too late, should we be making hypercall at the same time we
> clear the encryption bit ?
>
>

Actually this is being done somewhat lazily, after the guest enables/activates the live migration feature, it should be fine to do it
here or it can be moved into sev_map_percpu_data() where the first hypercalls are done, in both cases the __bss_decrypted section will
be marked before the live migration process is initiated.

> > #endif
> >
> > pr_info("AMD %s active\n",

Thanks,
Ashish