Re: [PATCH v3 09/11] KVM: x86/mmu: serialize vCPUs to zap gfn when guest MTRRs are honored

From: Yan Zhao
Date: Fri Jun 16 2023 - 04:03:12 EST


On Fri, Jun 16, 2023 at 03:45:50PM +0800, Yuan Yao wrote:
> > +/*
> > + * Add @range into kvm->arch.mtrr_zap_list and sort the list in
> > + * "length" ascending + "start" descending order, so that
> > + * ranges consuming more zap cycles can be dequeued later and their
> > + * chances of being found duplicated are increased.
> > + */
> > +static void kvm_add_mtrr_zap_list(struct kvm *kvm, struct mtrr_zap_range *range)
> > +{
> > + struct list_head *head = &kvm->arch.mtrr_zap_list;
> > + u64 len = range->end - range->start;
> > + struct mtrr_zap_range *cur, *n;
> > + bool added = false;
> > +
> > + spin_lock(&kvm->arch.mtrr_zap_list_lock);
> > +
> > + if (list_empty(head)) {
> > + list_add(&range->node, head);
> > + spin_unlock(&kvm->arch.mtrr_zap_list_lock);
> > + return;
> > + }
> > +
> > + list_for_each_entry_safe(cur, n, head, node) {
> > + u64 cur_len = cur->end - cur->start;
> > +
> > + if (len < cur_len)
> > + break;
> > +
> > + if (len > cur_len)
> > + continue;
> > +
> > + if (range->start > cur->start)
> > + break;
> > +
> > + if (range->start < cur->start)
> > + continue;
> > +
> > + /* equal len & start, no need to add */
> > + added = true;
>
> Possible/worth to ignore the range already covered
> by queued range ?

I may not get you correctly, but
the "added" here means an queued range with exactly same start + len
found, so free and drop adding the new range here.

>
> > + kfree(range);
> > + break;
> > + }
> > +
> > + if (!added)
> > + list_add_tail(&range->node, &cur->node);
> > +
> > + spin_unlock(&kvm->arch.mtrr_zap_list_lock);
> > +}
> > +
> > +static void kvm_zap_mtrr_zap_list(struct kvm *kvm)
> > +{
> > + struct list_head *head = &kvm->arch.mtrr_zap_list;
> > + struct mtrr_zap_range *cur = NULL;
> > +
> > + spin_lock(&kvm->arch.mtrr_zap_list_lock);
> > +
> > + while (!list_empty(head)) {
> > + u64 start, end;
> > +
> > + cur = list_first_entry(head, typeof(*cur), node);
> > + start = cur->start;
> > + end = cur->end;
> > + list_del(&cur->node);
> > + kfree(cur);
> > + spin_unlock(&kvm->arch.mtrr_zap_list_lock);
> > +
> > + kvm_zap_gfn_range(kvm, start, end);
> > +
> > + spin_lock(&kvm->arch.mtrr_zap_list_lock);
> > + }
> > +
> > + spin_unlock(&kvm->arch.mtrr_zap_list_lock);
> > +}
> > +
> > +static void kvm_zap_or_wait_mtrr_zap_list(struct kvm *kvm)
> > +{
> > + if (atomic_cmpxchg_acquire(&kvm->arch.mtrr_zapping, 0, 1) == 0) {
> > + kvm_zap_mtrr_zap_list(kvm);
> > + atomic_set_release(&kvm->arch.mtrr_zapping, 0);
> > + return;
> > + }
> > +
> > + while (atomic_read(&kvm->arch.mtrr_zapping))
> > + cpu_relax();
> > +}
> > +
> > +static void kvm_mtrr_zap_gfn_range(struct kvm_vcpu *vcpu,
> > + gfn_t gfn_start, gfn_t gfn_end)
> > +{
> > + struct mtrr_zap_range *range;
> > +
> > + range = kmalloc(sizeof(*range), GFP_KERNEL_ACCOUNT);
> > + if (!range)
> > + goto fail;
> > +
> > + range->start = gfn_start;
> > + range->end = gfn_end;
> > +
> > + kvm_add_mtrr_zap_list(vcpu->kvm, range);
> > +
> > + kvm_zap_or_wait_mtrr_zap_list(vcpu->kvm);
> > + return;
> > +
> > +fail:
> > + kvm_clear_mtrr_zap_list(vcpu->kvm);
> A very small chance race condition that incorrectly
> clear the queued ranges which have not been zapped by another thread ?
> Like below:
>
> Thread A | Thread B
> kvm_add_mtrr_zap_list() |
> | kvm_clear_mtrr_zap_list()
> kvm_zap_or_wait_mtrr_zap_list() |
>
> Call kvm_clear_mtrr_zap_list() here looks unnecessary, other
> threads(B here) who put thing in the queue will take care them well.

> > + kvm_zap_gfn_range(vcpu->kvm, gfn_start, gfn_end);

Yes, if gfn_start and gfn_end here are not 0 and ~0ULL, the
kvm_clear_mtrr_zap_list() is not necessary.
Though in reality, they are always 0-~0ULL, I agree dropping the
kvm_clear_mtrr_zap_list() here is better.

Thanks!