Re: [PATCH v4 09/13] KVM: xen: automatically use the vcpu_info embedded in shared_info

From: David Woodhouse
Date: Tue Sep 19 2023 - 10:18:47 EST


On Tue, 2023-09-19 at 13:41 +0000, Paul Durrant wrote:
> --- a/arch/x86/kvm/xen.c
> +++ b/arch/x86/kvm/xen.c
> @@ -491,6 +491,21 @@ static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v)
>  
>  static struct gfn_to_pfn_cache *get_vcpu_info_cache(struct kvm_vcpu *v, unsigned long *offset)
>  {
> +       if (!v->arch.xen.vcpu_info_cache.active && v->arch.xen.vcpu_id < MAX_VIRT_CPUS) {
> +               struct kvm *kvm = v->kvm;
> +
> +               if (offset) {
> +                       if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode)
> +                               *offset = offsetof(struct shared_info,
> +                                                  vcpu_info[v->arch.xen.vcpu_id]);
> +                       else
> +                               *offset = offsetof(struct compat_shared_info,
> +                                                  vcpu_info[v->arch.xen.vcpu_id]);
> +               }
> +
> +               return &kvm->arch.xen.shinfo_cache;
> +       }
> +
>         if (offset)
>                 *offset = 0;
>  
> @@ -764,6 +779,92 @@ static int kvm_xen_set_vcpu_id(struct kvm_vcpu *vcpu, unsigned int vcpu_id)
>         return 0;
>  }
>  
> +static int kvm_xen_set_vcpu_info(struct kvm_vcpu *vcpu, gpa_t gpa)
> +{
> +       struct kvm *kvm = vcpu->kvm;
> +       struct gfn_to_pfn_cache *si_gpc = &kvm->arch.xen.shinfo_cache;
> +       struct gfn_to_pfn_cache *vi_gpc = &vcpu->arch.xen.vcpu_info_cache;
> +       unsigned long flags;
> +       unsigned long offset;
> +       int ret;
> +
> +       if (gpa == KVM_XEN_INVALID_GPA) {
> +               kvm_gpc_deactivate(vi_gpc);
> +               return 0;
> +       }
> +
> +       /*
> +        * In Xen it is not possible for an explicit vcpu_info to be set
> +        * before the shared_info exists since the former is done in response
> +        * to a hypercall and the latter is set up as part of domain creation.
> +        * The first 32 vCPUs have a default vcpu_info embedded in shared_info
> +        * the content of which is copied across when an explicit vcpu_info is
> +        * set, which can also clearly not be done if we don't know where the
> +        * shared_info is. Hence we need to enforce that the shared_info cache
> +        * is active here.
> +        */
> +       if (!si_gpc->active)
> +               return -EINVAL;
> +
> +       /* Setting an explicit vcpu_info is a one-off operation */
> +       if (vi_gpc->active)
> +               return -EINVAL;

Is that the errno that Xen will return to the hypercall if a guest
tries it? I.e. if the VMM simply returns the errno that it gets from
the kernel, is that OK?

> +       ret = kvm_gpc_activate(vi_gpc, gpa, sizeof(struct vcpu_info));

From this moment, can't interrupts be delivered to the new vcpu_info,
even though the memcpy hasn't happened yet?

I think we need to ensure that any kvm_xen_set_evtchn_fast() which
happens at this point cannot proceed, and falls back to the slow path.

Can we set a flag before we activate the vcpu_info and clear it after
the memcpy is done, then make kvm_xen_set_evtchn_fast() return
EWOULDBLOCK whenever that flag is set?

The slow path in kvm_xen_set_evtchn() takes kvm->arch.xen.xen_lock and
I think kvm_xen_vcpu_set_attr() has taken that same lock before you get
to this code, so it works out nicely?



> +       if (ret)
> +               return ret;
> +
> +       /* Nothing more to do if the vCPU is not among the first 32 */
> +       if (vcpu->arch.xen.vcpu_id >= MAX_VIRT_CPUS)
> +               return 0;
> +
> +       /*
> +        * It's possible that the vcpu_info cache has been invalidated since
> +        * we activated it so we need to go through the check-refresh dance.
> +        */
> +       read_lock_irqsave(&vi_gpc->lock, flags);
> +       while (!kvm_gpc_check(vi_gpc, sizeof(struct vcpu_info))) {
> +               read_unlock_irqrestore(&vi_gpc->lock, flags);
> +
> +               ret = kvm_gpc_refresh(vi_gpc, sizeof(struct vcpu_info));
> +               if (ret) {
> +                       kvm_gpc_deactivate(vi_gpc);
> +                       return ret;
> +               }
> +
> +               read_lock_irqsave(&vi_gpc->lock, flags);
> +       }
> +
> +       /* Now lock the shared_info cache so we can copy the vcpu_info */
> +       read_lock(&si_gpc->lock);

This adds a new lock ordering rule of the vcpu_info lock(s) before the
shared_info lock. I don't know that it's *wrong* but it seems weird to
me; I expected the shared_info to come first?

I avoided taking both at once in kvm_xen_set_evtchn_fast(), although
maybe if we are going to have a rule that allows both, we could revisit
that. Suspect it isn't needed.

Either way it is worth a clear comment somewhere to document the lock
ordering, and I'd also like to know this has been tested with lockdep,
which is often cleverer than me.

> +       while (!kvm_gpc_check(si_gpc, PAGE_SIZE)) {
> +               read_unlock(&si_gpc->lock);
> +
> +               ret = kvm_gpc_refresh(si_gpc, PAGE_SIZE);
> +               if (ret) {
> +                       read_unlock_irqrestore(&vi_gpc->lock, flags);
> +                       kvm_gpc_deactivate(vi_gpc);
> +                       return ret;
> +               }
> +
> +               read_lock(&si_gpc->lock);
> +       }
> +
> +       if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode)
> +               offset = offsetof(struct shared_info,
> +                                 vcpu_info[vcpu->arch.xen.vcpu_id]);
> +       else
> +               offset = offsetof(struct compat_shared_info,
> +                                 vcpu_info[vcpu->arch.xen.vcpu_id]);
> +
> +       memcpy(vi_gpc->khva, si_gpc->khva + offset, sizeof(struct vcpu_info));
> +
> +       read_unlock(&si_gpc->lock);
> +       read_unlock_irqrestore(&vi_gpc->lock, flags);
> +
> +       return 0;
> +}
> +
>  int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
>  {
>         int idx, r = -ENOENT;
> @@ -779,14 +880,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
>                 BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
>                              offsetof(struct compat_vcpu_info, time));
>  
> -               if (data->u.gpa == KVM_XEN_INVALID_GPA) {
> -                       kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache);
> -                       r = 0;
> -                       break;
> -               }
> -
> -               r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_info_cache,
> -                                    data->u.gpa, sizeof(struct vcpu_info));
> +               r = kvm_xen_set_vcpu_info(vcpu, data->u.gpa);
>                 if (!r)
>                         kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
>  

Attachment: smime.p7s
Description: S/MIME cryptographic signature