Re: [PATCH v3 09/11] KVM: x86/mmu: serialize vCPUs to zap gfn when guest MTRRs are honored

From: Yuan Yao
Date: Fri Jun 16 2023 - 04:09:35 EST


On Fri, Jun 16, 2023 at 03:37:29PM +0800, Yan Zhao wrote:
> On Fri, Jun 16, 2023 at 03:45:50PM +0800, Yuan Yao wrote:
> > > +/*
> > > + * Add @range into kvm->arch.mtrr_zap_list and sort the list in
> > > + * "length" ascending + "start" descending order, so that
> > > + * ranges consuming more zap cycles can be dequeued later and their
> > > + * chances of being found duplicated are increased.
> > > + */
> > > +static void kvm_add_mtrr_zap_list(struct kvm *kvm, struct mtrr_zap_range *range)
> > > +{
> > > + struct list_head *head = &kvm->arch.mtrr_zap_list;
> > > + u64 len = range->end - range->start;
> > > + struct mtrr_zap_range *cur, *n;
> > > + bool added = false;
> > > +
> > > + spin_lock(&kvm->arch.mtrr_zap_list_lock);
> > > +
> > > + if (list_empty(head)) {
> > > + list_add(&range->node, head);
> > > + spin_unlock(&kvm->arch.mtrr_zap_list_lock);
> > > + return;
> > > + }
> > > +
> > > + list_for_each_entry_safe(cur, n, head, node) {
> > > + u64 cur_len = cur->end - cur->start;
> > > +
> > > + if (len < cur_len)
> > > + break;
> > > +
> > > + if (len > cur_len)
> > > + continue;
> > > +
> > > + if (range->start > cur->start)
> > > + break;
> > > +
> > > + if (range->start < cur->start)
> > > + continue;
> > > +
> > > + /* equal len & start, no need to add */
> > > + added = true;
> >
> > Possible/worth to ignore the range already covered
> > by queued range ?
>
> I may not get you correctly, but
> the "added" here means an queued range with exactly same start + len
> found, so free and drop adding the new range here.

I mean drop adding three B below if A already in the queue:

|------A--------|
|----B----|

|------A--------|
|----B----|

|------A--------|
|----B----|

>
> >
> > > + kfree(range);
> > > + break;
> > > + }
> > > +
> > > + if (!added)
> > > + list_add_tail(&range->node, &cur->node);
> > > +
> > > + spin_unlock(&kvm->arch.mtrr_zap_list_lock);
> > > +}
> > > +
> > > +static void kvm_zap_mtrr_zap_list(struct kvm *kvm)
> > > +{
> > > + struct list_head *head = &kvm->arch.mtrr_zap_list;
> > > + struct mtrr_zap_range *cur = NULL;
> > > +
> > > + spin_lock(&kvm->arch.mtrr_zap_list_lock);
> > > +
> > > + while (!list_empty(head)) {
> > > + u64 start, end;
> > > +
> > > + cur = list_first_entry(head, typeof(*cur), node);
> > > + start = cur->start;
> > > + end = cur->end;
> > > + list_del(&cur->node);
> > > + kfree(cur);
> > > + spin_unlock(&kvm->arch.mtrr_zap_list_lock);
> > > +
> > > + kvm_zap_gfn_range(kvm, start, end);
> > > +
> > > + spin_lock(&kvm->arch.mtrr_zap_list_lock);
> > > + }
> > > +
> > > + spin_unlock(&kvm->arch.mtrr_zap_list_lock);
> > > +}
> > > +
> > > +static void kvm_zap_or_wait_mtrr_zap_list(struct kvm *kvm)
> > > +{
> > > + if (atomic_cmpxchg_acquire(&kvm->arch.mtrr_zapping, 0, 1) == 0) {
> > > + kvm_zap_mtrr_zap_list(kvm);
> > > + atomic_set_release(&kvm->arch.mtrr_zapping, 0);
> > > + return;
> > > + }
> > > +
> > > + while (atomic_read(&kvm->arch.mtrr_zapping))
> > > + cpu_relax();
> > > +}
> > > +
> > > +static void kvm_mtrr_zap_gfn_range(struct kvm_vcpu *vcpu,
> > > + gfn_t gfn_start, gfn_t gfn_end)
> > > +{
> > > + struct mtrr_zap_range *range;
> > > +
> > > + range = kmalloc(sizeof(*range), GFP_KERNEL_ACCOUNT);
> > > + if (!range)
> > > + goto fail;
> > > +
> > > + range->start = gfn_start;
> > > + range->end = gfn_end;
> > > +
> > > + kvm_add_mtrr_zap_list(vcpu->kvm, range);
> > > +
> > > + kvm_zap_or_wait_mtrr_zap_list(vcpu->kvm);
> > > + return;
> > > +
> > > +fail:
> > > + kvm_clear_mtrr_zap_list(vcpu->kvm);
> > A very small chance race condition that incorrectly
> > clear the queued ranges which have not been zapped by another thread ?
> > Like below:
> >
> > Thread A | Thread B
> > kvm_add_mtrr_zap_list() |
> > | kvm_clear_mtrr_zap_list()
> > kvm_zap_or_wait_mtrr_zap_list() |
> >
> > Call kvm_clear_mtrr_zap_list() here looks unnecessary, other
> > threads(B here) who put thing in the queue will take care them well.
>
> > > + kvm_zap_gfn_range(vcpu->kvm, gfn_start, gfn_end);
>
> Yes, if gfn_start and gfn_end here are not 0 and ~0ULL, the
> kvm_clear_mtrr_zap_list() is not necessary.
> Though in reality, they are always 0-~0ULL, I agree dropping the
> kvm_clear_mtrr_zap_list() here is better.
>
> Thanks!