Re: [RFC PATCH v2 2/3] KVM: selftests: add selftest driver for KVM to test memory slots for MMIO BARs

From: Yan Zhao
Date: Fri Jan 05 2024 - 05:16:56 EST


On Thu, Jan 04, 2024 at 04:16:04PM +0800, Yuan Yao wrote:
> On Wed, Jan 03, 2024 at 04:44:57PM +0800, Yan Zhao wrote:
> > This driver is for testing KVM memory slots for device MMIO BARs that are
> > mapped to pages serving as device resources.
> >
> > This driver implements a mock device whose device resources are pages
> > array that can be mmaped into user space. It provides ioctl interface to
> > users to configure whether the pages are allocated as a compound huge page
> > or not.
>
> I just think that it can be used in other scenarios, not only KVM.
>
Right. But I just want to make it to serve only KVM specific tests :)

> >
> > KVM selftest code can then map the mock device resource to KVM memslots
> > to check if any error encountered. After VM shutdown, mock device
> > resource's page reference counters are checked to ensure KVM does not hold
> > extra reference count during memslot add/removal.
> >
> > Signed-off-by: Yan Zhao <yan.y.zhao@xxxxxxxxx>
...

> > +struct kvm_mock_device {
> > + bool compound;
> > + struct page *resource;
> > + u64 bar_size;
> > + int order;
>
> Do you have plan to allow user to change the bar_size via IOCTL ?
> If no "order" and "bar_size" can be removed.
>
Currently no. But this structure is private to the test driver.
What the benefit to remove the two?

> > + int *ref_array;
> > + struct mutex lock;
> > + bool prepared;
> > +};
> > +
> > +static bool opened;
> > +
> > +#define BAR_SIZE 0x200000UL
> > +#define DEFAULT_COMPOUND true
>
> "kmdev->compound = true;" is more easy to understand,
> but "kmdev->compound = DEFAULT_COMPOUND;" not.
>
Ok. But I want to make the state that "default compound state is true"
more explicit and configurable by a macro.


> > +
> > +static vm_fault_t kvm_mock_device_mmap_fault(struct vm_fault *vmf)
> > +{
> > + struct vm_area_struct *vma = vmf->vma;
> > + struct kvm_mock_device *kmdev = vma->vm_private_data;
> > + struct page *p = kmdev->resource;
> > + vm_fault_t ret = VM_FAULT_NOPAGE;
> > + unsigned long addr;
> > + int i;
> > +
> > + for (addr = vma->vm_start, i = vma->vm_pgoff; addr < vma->vm_end;
> > + addr += PAGE_SIZE, i++) {
>
> Just question:
> Will it be enough if only map the accessed page for the testing purpose ?
>
It should be enough.
But as VFIO usually maps the whole BAR in a single fault, I want to
keep align with it :)

> > +
> > + ret = vmf_insert_pfn(vma, addr, page_to_pfn(p + i));
> > + if (ret == VM_FAULT_NOPAGE)
> > + continue;
> > +
> > + zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> > + return ret;
> > +
> > + }
> > + return ret;
> > +}
> > +
> > +static const struct vm_operations_struct kvm_mock_device_mmap_ops = {
> > + .fault = kvm_mock_device_mmap_fault,
> > +};
> > +
> > +static int kvm_mock_device_fops_mmap(struct file *file, struct vm_area_struct *vma)
> > +{
> > + struct kvm_mock_device *kmdev = file->private_data;
> > + u64 offset, req_len;
> > + int ret = 0;
> > +
> > + mutex_lock(&kmdev->lock);
> > + if (!kmdev->prepared) {
> > + ret = -ENODEV;
> > + goto out;
> > + }
> > +
> > + offset = vma->vm_pgoff << PAGE_SHIFT;
> > + req_len = vma->vm_end - vma->vm_start;
> > + if (offset + req_len > BAR_SIZE) {
> > + ret = -EINVAL;
> > + goto out;
> > + }
> > +
> > + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
> > + vma->vm_ops = &kvm_mock_device_mmap_ops;
> > + vma->vm_private_data = kmdev;
> > +out:
> > + mutex_unlock(&kmdev->lock);
> > + return ret;
> > +}
> > +
> > +static int kvm_mock_device_prepare_resource(struct kvm_mock_device *kmdev)
> > +{
> > + gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO;
> > + unsigned int order = kmdev->order;
> > + unsigned long count = 1 << order;
> > + unsigned long i;
> > + struct page *p;
> > + int ret;
> > +
> > + mutex_lock(&kmdev->lock);
> > + if (kmdev->prepared) {
> > + ret = -EBUSY;
> > + goto out;
> > + }
> > +
> > + if (kmdev->compound)
> > + gfp_flags |= __GFP_COMP;
> > +
> > + p = alloc_pages_node(0, gfp_flags, order);
>
> Please alloc_pages() to honor the memory policy of current task.
> Hardcode to node 0 just works, but not hard to do this better.
>
Ok, will do it. thanks!

> > + if (!p) {
> > + ret = -ENOMEM;
> > + goto out;
> > + }
> > +
> > + kmdev->ref_array = kmalloc_array(count, sizeof(kmdev->ref_array),
> > + GFP_KERNEL_ACCOUNT);
> > + if (!kmdev->ref_array) {
> > + __free_pages(p, order);
> > + ret = -ENOMEM;
> > + goto out;
> > + }
> > +
> > + for (i = 0; i < count; i++)
> > + kmdev->ref_array[i] = page_ref_count(p + i);
> > +
> > + kmdev->resource = p;
> > + kmdev->prepared = true;
> > +out:
> > + mutex_unlock(&kmdev->lock);
> > + return ret;
> > +}
> > +
> > +static int kvm_mock_device_check_resource_ref(struct kvm_mock_device *kmdev)
> > +{
> > + u32 i, count = 1 << kmdev->order;
> > + struct page *p = kmdev->resource;
> > + int inequal = 0;
> > +
> > + mutex_lock(&kmdev->lock);
> > + if (!kmdev->prepared) {
> > + mutex_unlock(&kmdev->lock);
> > + return -ENODEV;
> > + }
> > +
> > + for (i = 0; i < count; i++) {
> > + if (kmdev->ref_array[i] == page_ref_count(p + i))
> > + continue;
> > +
> > + pr_err("kvm test device check resource page %d old ref=%d new ref=%d\n",
> > + i, kmdev->ref_array[i], page_ref_count(p + i));
>
> How about just return a bitmap to userspace for each page ineuqal ?
> Or if inequal number itself is enough then just remove this output, in worst case
> it prints 512 times for 2MB bar case, which looks just useless.
>
Right, print for 512 times is too much though it will only appear in the
worst failure case.
But I do think the info of "old ref" and "new ref" are useful for debugging.
So, instead of printing bitmap, what about only printing the error message
for once for the first error page?

> > + inequal++;
> > + }
> > + mutex_unlock(&kmdev->lock);
> > +
> > + return inequal;
> > +}
> > +
> > +static int kvm_mock_device_fops_open(struct inode *inode, struct file *filp)
> > +{
> > + struct kvm_mock_device *kmdev;
> > +
> > + if (opened)
> > + return -EBUSY;
>
> It can't work in case of 2 who open the device file at *real* same time, at least
> you need atomic helpers for that purpose.
>
Ah, right. Will turn it to atomic.

> BTW I saw "kvm_mock_devie" instance is per file level, so maybe not hard
> to remove this limitation ?
Yes, but as it's a test driver, I don't see any needs to complicate the code.

> > +
> > + kmdev = kzalloc(sizeof(*kmdev), GFP_KERNEL_ACCOUNT);
> > + if (!kmdev)
> > + return -ENOMEM;
> > +
> > + kmdev->compound = DEFAULT_COMPOUND;
> > + kmdev->bar_size = BAR_SIZE;
> > + kmdev->order = get_order(kmdev->bar_size);
> > + mutex_init(&kmdev->lock);
> > + filp->private_data = kmdev;
> > +
> > + opened = true;
> > + return 0;
> > +}
> > +
> > +static int kvm_mock_device_fops_release(struct inode *inode, struct file *filp)
> > +{
> > + struct kvm_mock_device *kmdev = filp->private_data;
> > +
> > + if (kmdev->prepared)
> > + __free_pages(kmdev->resource, kmdev->order);
> > + mutex_destroy(&kmdev->lock);
> > + kfree(kmdev->ref_array);
> > + kfree(kmdev);
> > + opened = false;
> > + return 0;
> > +}
> > +
> > +static long kvm_mock_device_fops_unlocked_ioctl(struct file *filp,
> > + unsigned int command,
> > + unsigned long arg)
> > +{
> > + struct kvm_mock_device *kmdev = filp->private_data;
> > + int r;
> > +
> > + switch (command) {
> > + case KVM_MOCK_DEVICE_GET_BAR_SIZE: {
> > + u64 bar_size;
> > +
> > + bar_size = kmdev->bar_size;
> > + r = put_user(bar_size, (u64 __user *)arg);
> > + break;
> > + }
> > + case KVM_MOCK_DEVICE_PREPARE_RESOURCE: {
> > + u32 compound;
> > +
> > + r = get_user(compound, (u32 __user *)arg);
> > + if (r)
> > + return r;
> > +
> > + kmdev->compound = compound;
> > + r = kvm_mock_device_prepare_resource(kmdev);
> > + break;
> > +
> > + }
> > + case KVM_MOCK_DEVICE_CHECK_BACKEND_REF: {
> > + int inequal;
> > +
> > + inequal = kvm_mock_device_check_resource_ref(kmdev);
> > +
> > + if (inequal < 0)
> > + return inequal;
> > +
> > + r = put_user(inequal, (u32 __user *)arg);
> > + break;
> > + }
> > + default:
> > + r = -EOPNOTSUPP;
> > + }
> > +
> > + return r;
> > +}
> > +
> > +
> > +static const struct file_operations kvm_mock_device_fops = {
> > + .open = kvm_mock_device_fops_open,
> > + .release = kvm_mock_device_fops_release,
> > + .mmap = kvm_mock_device_fops_mmap,
> > + .unlocked_ioctl = kvm_mock_device_fops_unlocked_ioctl,
> > + .llseek = default_llseek,
> > + .owner = THIS_MODULE,
> > +};
> > +
> > +
> > +static int __init kvm_mock_device_test_init(void)
> > +{
> > + int ret;
> > +
> > + ret = alloc_chrdev_region(&kvm_mock_dev.devt, 0, 1, "KVM-MOCK-DEVICE");
>
> How about misc_register() ? Like how KVM create /dev/kvm.
> I think that will be more simpler.
Ah, right. Will try to use it in next version.

Thanks!

> > + if (ret)
> > + goto out;
> > +
> > + cdev_init(&kvm_mock_dev.cdev, &kvm_mock_device_fops);
> > + kvm_mock_dev.cdev.owner = THIS_MODULE;
> > + device_initialize(&kvm_mock_dev.device);
> > + kvm_mock_dev.device.devt = MKDEV(MAJOR(kvm_mock_dev.devt), 0);
> > + ret = dev_set_name(&kvm_mock_dev.device, "kvm_mock_device");
> > + if (ret)
> > + goto out;
> > +
> > + ret = cdev_device_add(&kvm_mock_dev.cdev, &kvm_mock_dev.device);
> > + if (ret)
> > + goto out;
> > +
> > +out:
> > + return ret;
> > +}
> > +
> > +static void __exit kvm_mock_device_test_exit(void)
> > +{
> > + cdev_device_del(&kvm_mock_dev.cdev, &kvm_mock_dev.device);
> > + unregister_chrdev_region(kvm_mock_dev.devt, 1);
> > +}
> > +
> > +module_init(kvm_mock_device_test_init);
> > +module_exit(kvm_mock_device_test_exit);
> > +MODULE_LICENSE("GPL");
> > diff --git a/lib/test_kvm_mock_device_uapi.h b/lib/test_kvm_mock_device_uapi.h
> > new file mode 100644
> > index 000000000000..227d0bf1d430
> > --- /dev/null
> > +++ b/lib/test_kvm_mock_device_uapi.h
> > @@ -0,0 +1,16 @@
> > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> > +/*
> > + * This is a module to help test KVM guest access of KVM mock device's BAR,
> > + * whose backend is mapped to pages.
> > + */
> > +#ifndef _LIB_TEST_KVM_MOCK_DEVICE_UAPI_H
> > +#define _LIB_TEST_KVM_MOCK_DEVICE_UAPI_H
> > +
> > +#include <linux/types.h>
> > +#include <linux/ioctl.h>
> > +
> > +#define KVM_MOCK_DEVICE_GET_BAR_SIZE _IOR('M', 0x00, u64)
> > +#define KVM_MOCK_DEVICE_PREPARE_RESOURCE _IOWR('M', 0x01, u32)
> > +#define KVM_MOCK_DEVICE_CHECK_BACKEND_REF _IOWR('M', 0x02, u32)
> > +
> > +#endif /* _LIB_TEST_KVM_MOCK_DEVICE_UAPI_H */
> > --
> > 2.17.1
> >
> >