Re: [PATCH] KVM: SEV: Allow SEV intra-host migration of VM with mirrors

From: Peter Gonda
Date: Fri Feb 11 2022 - 14:38:47 EST


On Mon, Feb 7, 2022 at 4:19 PM Sean Christopherson <seanjc@xxxxxxxxxx> wrote:
>
> On Tue, Jan 11, 2022, Peter Gonda wrote:
> > @@ -1623,22 +1624,41 @@ static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
> > }
> > }
> >
> > -static void sev_migrate_from(struct kvm_sev_info *dst,
> > - struct kvm_sev_info *src)
> > +static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
> > {
> > + struct kvm_sev_info *dst = &to_kvm_svm(dst_kvm)->sev_info;
> > + struct kvm_sev_info *src = &to_kvm_svm(src_kvm)->sev_info;
> > + struct kvm_sev_info *mirror, *tmp;
> > +
> > dst->active = true;
> > dst->asid = src->asid;
> > dst->handle = src->handle;
> > dst->pages_locked = src->pages_locked;
> > dst->enc_context_owner = src->enc_context_owner;
> > + dst->num_mirrored_vms = src->num_mirrored_vms;
> >
> > src->asid = 0;
> > src->active = false;
> > src->handle = 0;
> > src->pages_locked = 0;
> > src->enc_context_owner = NULL;
> > + src->num_mirrored_vms = 0;
> >
> > list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list);
> > + list_cut_before(&dst->mirror_vms, &src->mirror_vms, &src->mirror_vms);
> > +
> > + /*
> > + * If this VM has mirrors we need to update the KVM refcounts from the
> > + * source to the destination.
> > + */
>
> It's worth calling out that a reference is being taken on behalf of the mirror,
> that detail is easy to miss. And maybe call out that the caller holds a reference
> to @src_kvm?
>
> /*
> * If this VM has mirrors, "transfer" each mirror's refcount of the
> * source to the destination (this KVM). The caller holds a reference
> * to the source, so there's no danger of use-after-free.
> */

Thanks took your comment.

>
> > + if (dst->num_mirrored_vms > 0) {
> > + list_for_each_entry_safe(mirror, tmp, &dst->mirror_vms,
> > + mirror_entry) {
> > + kvm_get_kvm(dst_kvm);
> > + kvm_put_kvm(src_kvm);
> > + mirror->enc_context_owner = dst_kvm;
> > + }
> > + }
> > }
> >
> > static int sev_es_migrate_from(struct kvm *dst, struct kvm *src)
>
> ...
>
> > @@ -2050,10 +2062,17 @@ void sev_vm_destroy(struct kvm *kvm)
> > if (is_mirroring_enc_context(kvm)) {
> > struct kvm *owner_kvm = sev->enc_context_owner;
> > struct kvm_sev_info *owner_sev = &to_kvm_svm(owner_kvm)->sev_info;
> > + struct kvm_sev_info *mirror, *tmp;
> >
> > mutex_lock(&owner_kvm->lock);
> > if (!WARN_ON(!owner_sev->num_mirrored_vms))
> > owner_sev->num_mirrored_vms--;
> > +
> > + list_for_each_entry_safe(mirror, tmp, &owner_sev->mirror_vms,
> > + mirror_entry)
> > + if (mirror == sev)
> > + list_del(&mirror->mirror_entry);
> > +
>
> There's no need to walk the list just to find the entry you already have. Maaaaybe
> if you were sanity checking, but it's not like we can do anything helpful if the
> sanity check fails, so eating a #GP due to consuming e.g. LIST_POISON1 is just as
> good as anything else.
>
> if (is_mirroring_enc_context(kvm)) {
> struct kvm *owner_kvm = sev->enc_context_owner;
>
> mutex_lock(&owner_kvm->lock);
> list_del(&->mirror_entry);
> mutex_unlock(&owner_kvm->lock);
> kvm_put_kvm(owner_kvm);
> return;
> }

Thats better thanks.

>
> > mutex_unlock(&owner_kvm->lock);
> > kvm_put_kvm(owner_kvm);
> > return;
> > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> > index daa8ca84afcc..b9f5e33d5232 100644
> > --- a/arch/x86/kvm/svm/svm.h
> > +++ b/arch/x86/kvm/svm/svm.h
> > @@ -81,6 +81,10 @@ struct kvm_sev_info {
> > u64 ap_jump_table; /* SEV-ES AP Jump Table address */
> > struct kvm *enc_context_owner; /* Owner of copied encryption context */
> > unsigned long num_mirrored_vms; /* Number of VMs sharing this ASID */
> > + union {
> > + struct list_head mirror_vms; /* List of VMs mirroring */
> > + struct list_head mirror_entry; /* Use as a list entry of mirrors */
> > + };
>
>
> Whoops. IIRC, I suggested a union for tracking mirrors vs mirrored. After seeing
> the code, that was a bad suggestion. Memory isn't at a premimum for a per-VM
> object, so storing an extra list_head is a non-issue.
>
> If we split the two, then num_mirrored_vms goes away, and more importantly we won't
> have to deal with bugs where we inevitably forget to guard access to the union with
> a check against num_mirrored_vms.
>
> E.g. (completely untested and probably incomplete)

Done. Thats more readable I think now.

>
> ---
> arch/x86/kvm/svm/sev.c | 32 +++++++++-----------------------
> arch/x86/kvm/svm/svm.h | 7 ++-----
> 2 files changed, 11 insertions(+), 28 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 369cf8c4da61..41f7e733c33e 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -1635,29 +1635,25 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
> dst->handle = src->handle;
> dst->pages_locked = src->pages_locked;
> dst->enc_context_owner = src->enc_context_owner;
> - dst->num_mirrored_vms = src->num_mirrored_vms;
>
> src->asid = 0;
> src->active = false;
> src->handle = 0;
> src->pages_locked = 0;
> src->enc_context_owner = NULL;
> - src->num_mirrored_vms = 0;
>
> list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list);
> list_cut_before(&dst->mirror_vms, &src->mirror_vms, &src->mirror_vms);
>
> /*
> - * If this VM has mirrors we need to update the KVM refcounts from the
> - * source to the destination.
> + * If this VM has mirrors, "transfer" each mirror's refcount from the
> + * source to the destination (this KVM). The caller holds a reference
> + * to the source, so there's no danger of use-after-free.
> */
> - if (dst->num_mirrored_vms > 0) {
> - list_for_each_entry_safe(mirror, tmp, &dst->mirror_vms,
> - mirror_entry) {
> - kvm_get_kvm(dst_kvm);
> - kvm_put_kvm(src_kvm);
> - mirror->enc_context_owner = dst_kvm;
> - }
> + list_for_each_entry_safe(mirror, tmp, &dst->mirror_vms, mirror_entry) {
> + kvm_get_kvm(dst_kvm);
> + kvm_put_kvm(src_kvm);
> + mirror->enc_context_owner = dst_kvm;
> }
> }
>
> @@ -2019,7 +2015,6 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd)
> */
> source_sev = &to_kvm_svm(source_kvm)->sev_info;
> kvm_get_kvm(source_kvm);
> - source_sev->num_mirrored_vms++;
> mirror_sev = &to_kvm_svm(kvm)->sev_info;
> list_add_tail(&mirror_sev->mirror_entry, &source_sev->mirror_vms);
>
> @@ -2053,7 +2048,7 @@ void sev_vm_destroy(struct kvm *kvm)
> struct list_head *head = &sev->regions_list;
> struct list_head *pos, *q;
>
> - WARN_ON(sev->num_mirrored_vms);
> + WARN_ON(!list_empty(&sev->mirror_vms));
>
> if (!sev_guest(kvm))
> return;
> @@ -2061,18 +2056,9 @@ void sev_vm_destroy(struct kvm *kvm)
> /* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
> if (is_mirroring_enc_context(kvm)) {
> struct kvm *owner_kvm = sev->enc_context_owner;
> - struct kvm_sev_info *owner_sev = &to_kvm_svm(owner_kvm)->sev_info;
> - struct kvm_sev_info *mirror, *tmp;
>
> mutex_lock(&owner_kvm->lock);
> - if (!WARN_ON(!owner_sev->num_mirrored_vms))
> - owner_sev->num_mirrored_vms--;
> -
> - list_for_each_entry_safe(mirror, tmp, &owner_sev->mirror_vms,
> - mirror_entry)
> - if (mirror == sev)
> - list_del(&mirror->mirror_entry);
> -
> + list_del(&mirror->mirror_entry);
> mutex_unlock(&owner_kvm->lock);
> kvm_put_kvm(owner_kvm);
> return;
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 0876329f273d..79bf568c2558 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -79,11 +79,8 @@ struct kvm_sev_info {
> struct list_head regions_list; /* List of registered regions */
> u64 ap_jump_table; /* SEV-ES AP Jump Table address */
> struct kvm *enc_context_owner; /* Owner of copied encryption context */
> - unsigned long num_mirrored_vms; /* Number of VMs sharing this ASID */
> - union {
> - struct list_head mirror_vms; /* List of VMs mirroring */
> - struct list_head mirror_entry; /* Use as a list entry of mirrors */
> - };
> + struct list_head mirror_vms; /* List of VMs mirroring */
> + struct list_head mirror_entry; /* Use as a list entry of mirrors */
> struct misc_cg *misc_cg; /* For misc cgroup accounting */
> atomic_t migration_in_progress;
> };
>
> base-commit: 618a9a6fda17f48d86a1ce9851bd8ceffdc57d75

I added a base commit to my V2 patch.

> --
>