Re: [RFC PATCH 2/8] KVM: Add KVM_MAP_MEMORY vcpu ioctl to pre-populate guest memory

From: Isaku Yamahata
Date: Mon Mar 11 2024 - 18:19:47 EST


On Mon, Mar 11, 2024 at 10:23:28AM -0700,
Sean Christopherson <seanjc@xxxxxxxxxx> wrote:

> On Fri, Mar 01, 2024, isaku.yamahata@xxxxxxxxx wrote:
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index d1fd9cb5d037..d77c9b79d76b 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -4419,6 +4419,69 @@ static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu)
> > return fd;
> > }
> >
> > +__weak int kvm_arch_vcpu_pre_map_memory(struct kvm_vcpu *vcpu)
> > +{
> > + return -EOPNOTSUPP;
> > +}
> > +
> > +__weak int kvm_arch_vcpu_map_memory(struct kvm_vcpu *vcpu,
> > + struct kvm_memory_mapping *mapping)
> > +{
> > + return -EOPNOTSUPP;
> > +}
> > +
> > +static int kvm_vcpu_map_memory(struct kvm_vcpu *vcpu,
> > + struct kvm_memory_mapping *mapping)
> > +{
> > + bool added = false;
> > + int idx, r = 0;
>
> Pointless initialization of 'r'.
>
> > +
> > + if (mapping->flags & ~(KVM_MEMORY_MAPPING_FLAG_WRITE |
> > + KVM_MEMORY_MAPPING_FLAG_EXEC |
> > + KVM_MEMORY_MAPPING_FLAG_USER |
> > + KVM_MEMORY_MAPPING_FLAG_PRIVATE))
> > + return -EINVAL;
> > + if ((mapping->flags & KVM_MEMORY_MAPPING_FLAG_PRIVATE) &&
> > + !kvm_arch_has_private_mem(vcpu->kvm))
> > + return -EINVAL;
> > +
> > + /* Sanity check */
>
> Pointless comment.
>
> > + if (!IS_ALIGNED(mapping->source, PAGE_SIZE) ||
> > + !mapping->nr_pages ||
>
> > + mapping->base_gfn + mapping->nr_pages <= mapping->base_gfn)
> > + return -EINVAL;
> > +
> > + vcpu_load(vcpu);
> > + idx = srcu_read_lock(&vcpu->kvm->srcu);
> > + r = kvm_arch_vcpu_pre_map_memory(vcpu);
>
> This hooks is unnecessary, x86's kvm_mmu_reload() is optimized for the happy path
> where the MMU is already loaded. Just make the call from kvm_arch_vcpu_map_memory().
>
> > + if (r)
> > + return r;
>
> Which is a good thing, because this leaks the SRCU lock.
>
> > +
> > + while (mapping->nr_pages) {
> > + if (signal_pending(current)) {
> > + r = -ERESTARTSYS;
>
> Why -ERESTARTSYS instead of -EINTR? The latter is KVM's typical response to a
> pending signal.
>
> > + break;
> > + }
> > +
> > + if (need_resched())
>
> No need to manually check need_resched(), the below is a _conditional_ resched.
> The reason KVM explicitly checks need_resched() in MMU flows is because KVM needs
> to drop mmu_lock before rescheduling, i.e. calling cond_resched() directly would
> try to schedule() while holding a spinlock.
>
> > + cond_resched();
> > +
> > + r = kvm_arch_vcpu_map_memory(vcpu, mapping);
> > + if (r)
> > + break;
> > +
> > + added = true;
> > + }
> > +
> > + srcu_read_unlock(&vcpu->kvm->srcu, idx);
> > + vcpu_put(vcpu);
> > +
> > + if (added && mapping->nr_pages > 0)
> > + r = -EAGAIN;
>
> No, this clobbers 'r', which might hold a fatal error code. I don't see any
> reason for common code to ever force -EAGAIN, it can't possibly know if trying
> again is reasonable.

Thanks for review. With those included, the hunk is as follows.


diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d1fd9cb5d037..342269ef9f13 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4419,6 +4419,47 @@ static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu)
return fd;
}

+__weak int kvm_arch_vcpu_map_memory(struct kvm_vcpu *vcpu,
+ struct kvm_memory_mapping *mapping)
+{
+ return -EOPNOTSUPP;
+}
+
+static int kvm_vcpu_map_memory(struct kvm_vcpu *vcpu,
+ struct kvm_memory_mapping *mapping)
+{
+ int idx, r;
+
+ if (mapping->flags)
+ return -EINVAL;
+
+ if (!IS_ALIGNED(mapping->source, PAGE_SIZE) ||
+ mapping->base_gfn + mapping->nr_pages <= mapping->base_gfn)
+ return -EINVAL;
+
+ vcpu_load(vcpu);
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ r = 0;
+ while (mapping->nr_pages) {
+ if (signal_pending(current)) {
+ r = -EINTR;
+ break;
+ }
+
+ r = kvm_arch_vcpu_map_memory(vcpu, mapping);
+ if (r)
+ break;
+
+ cond_resched();
+ }
+
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ vcpu_put(vcpu);
+
+ return r;
+}
+
static long kvm_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
--
Isaku Yamahata <isaku.yamahata@xxxxxxxxx>