Re: [PATCH v11 01/22] vfio: Mediated device Core driver

From: Kirti Wankhede
Date: Tue Nov 08 2016 - 16:11:27 EST




On 11/8/2016 2:55 PM, Dong Jia Shi wrote:
> * Kirti Wankhede <kwankhede@xxxxxxxxxx> [2016-11-05 02:40:35 +0530]:
>
> Hi Kirti,
>
> [...]
>> diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
>> index da6e2ce77495..23eced02aaf6 100644
>> --- a/drivers/vfio/Kconfig
>> +++ b/drivers/vfio/Kconfig
>> @@ -48,4 +48,5 @@ menuconfig VFIO_NOIOMMU
>>
>> source "drivers/vfio/pci/Kconfig"
>> source "drivers/vfio/platform/Kconfig"
>> +source "drivers/vfio/mdev/Kconfig"
>> source "virt/lib/Kconfig"
>> diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
>> index 7b8a31f63fea..4a23c13b6be4 100644
>> --- a/drivers/vfio/Makefile
>> +++ b/drivers/vfio/Makefile
>> @@ -7,3 +7,4 @@ obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
>> obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
>> obj-$(CONFIG_VFIO_PCI) += pci/
>> obj-$(CONFIG_VFIO_PLATFORM) += platform/
>> +obj-$(CONFIG_VFIO_MDEV) += mdev/
>> diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig
>> new file mode 100644
>> index 000000000000..303c14ce2847
>> --- /dev/null
>> +++ b/drivers/vfio/mdev/Kconfig
>> @@ -0,0 +1,10 @@
>> +
>> +config VFIO_MDEV
>> + tristate "Mediated device driver framework"
>> + depends on VFIO
>> + default n
>> + help
>> + Provides a framework to virtualize devices.
>> + See Documentation/vfio-mdev/vfio-mediated-device.txt for more details.
> We don't have this doc at this point of time.
>

Yes, but I have this doc in this patch series.

>> +
>> + If you don't know what do here, say N.
>> diff --git a/drivers/vfio/mdev/Makefile b/drivers/vfio/mdev/Makefile
>> new file mode 100644
>> index 000000000000..31bc04801d94
>> --- /dev/null
>> +++ b/drivers/vfio/mdev/Makefile
>> @@ -0,0 +1,4 @@
>> +
>> +mdev-y := mdev_core.o mdev_sysfs.o mdev_driver.o
>> +
>> +obj-$(CONFIG_VFIO_MDEV) += mdev.o
>> diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
>> new file mode 100644
>> index 000000000000..54c59f325336
>> --- /dev/null
>> +++ b/drivers/vfio/mdev/mdev_core.c
> [...]
>
>> +
>> +/*
>> + * mdev_device_remove_ops gets called from sysfs's 'remove' and when parent
>> + * device is being unregistered from mdev device framework.
>> + * - 'force_remove' is set to 'false' when called from sysfs's 'remove' which
>> + * indicates that if the mdev device is active, used by VMM or userspace
>> + * application, vendor driver could return error then don't remove the device.
>> + * - 'force_remove' is set to 'true' when called from mdev_unregister_device()
>> + * which indicate that parent device is being removed from mdev device
>> + * framework so remove mdev device forcefully.
>> + */
>> +static int mdev_device_remove_ops(struct mdev_device *mdev, bool force_remove)
> ?
> s/force_remove/force/
>
>> +{
>> + struct parent_device *parent = mdev->parent;
>> + int ret;
>> +
>> + /*
>> + * Vendor driver can return error if VMM or userspace application is
>> + * using this mdev device.
>> + */
>> + ret = parent->ops->remove(mdev);
>> + if (ret && !force_remove)
>> + return -EBUSY;
>> +
>> + sysfs_remove_groups(&mdev->dev.kobj, parent->ops->mdev_attr_groups);
>> + return 0;
>> +}
>> +
>> +static int mdev_device_remove_cb(struct device *dev, void *data)
>> +{
>> + if (!dev_is_mdev(dev))
>> + return 0;
>> +
>> + return mdev_device_remove(dev, data ? *(bool *)data : true);
>> +}
>> +
>> +/*
>> + * mdev_register_device : Register a device
>> + * @dev: device structure representing parent device.
>> + * @ops: Parent device operation structure to be registered.
>> + *
>> + * Add device to list of registered parent devices.
>> + * Returns a negative value on error, otherwise 0.
>> + */
>> +int mdev_register_device(struct device *dev, const struct parent_ops *ops)
>> +{
>> + int ret;
>> + struct parent_device *parent;
>> +
>> + /* check for mandatory ops */
>> + if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups)
>> + return -EINVAL;
>> +
>> + dev = get_device(dev);
>> + if (!dev)
>> + return -EINVAL;
>> +
>> + mutex_lock(&parent_list_lock);
>> +
>> + /* Check for duplicate */
>> + parent = __find_parent_device(dev);
>> + if (parent) {
>> + ret = -EEXIST;
>> + goto add_dev_err;
>> + }
>> +
>> + parent = kzalloc(sizeof(*parent), GFP_KERNEL);
>> + if (!parent) {
>> + ret = -ENOMEM;
>> + goto add_dev_err;
>> + }
>> +
>> + kref_init(&parent->ref);
>> + mutex_init(&parent->lock);
>> +
>> + parent->dev = dev;
>> + parent->ops = ops;
>> +
>> + if (!mdev_bus_compat_class) {
>> + mdev_bus_compat_class = class_compat_register("mdev_bus");
>> + if (!mdev_bus_compat_class) {
>> + ret = -ENOMEM;
>> + goto add_dev_err;
>> + }
>> + }
>> +
>> + ret = parent_create_sysfs_files(parent);
>> + if (ret)
>> + goto add_dev_err;
>> +
>> + ret = class_compat_create_link(mdev_bus_compat_class, dev, NULL);
>> + if (ret)
>> + dev_warn(dev, "Failed to create compatibility class link\n");
>> +
>> + list_add(&parent->next, &parent_list);
>> + mutex_unlock(&parent_list_lock);
>> +
>> + dev_info(dev, "MDEV: Registered\n");
>> + return 0;
>> +
>> +add_dev_err:
>> + mutex_unlock(&parent_list_lock);
>> + if (parent)
>> + mdev_put_parent(parent);
> Why do this? I don't find the place that you call mdev_get_parent above.
>

kref_init(&parent->ref);
Above increments the ref_count, so mdev_put_parent() should be called if
anything fails.

>> + else
>> + put_device(dev);
> Shouldn't we always do this?
>

When mdev_put_parent() is called, its release function do this. So if
mdev_put_parent() is called, we don't need this.

>> + return ret;
>> +}
>> +EXPORT_SYMBOL(mdev_register_device);
>> +
>> +/*
>> + * mdev_unregister_device : Unregister a parent device
>> + * @dev: device structure representing parent device.
>> + *
>> + * Remove device from list of registered parent devices. Give a chance to free
>> + * existing mediated devices for given device.
>> + */
>> +
>> +void mdev_unregister_device(struct device *dev)
>> +{
>> + struct parent_device *parent;
>> + bool force_remove = true;
>> +
>> + mutex_lock(&parent_list_lock);
>> + parent = __find_parent_device(dev);
>> +
>> + if (!parent) {
>> + mutex_unlock(&parent_list_lock);
>> + return;
>> + }
>> + dev_info(dev, "MDEV: Unregistering\n");
>> +
>> + list_del(&parent->next);
>> + class_compat_remove_link(mdev_bus_compat_class, dev, NULL);
>> +
>> + device_for_each_child(dev, (void *)&force_remove,
>> + mdev_device_remove_cb);
>> +
>> + parent_remove_sysfs_files(parent);
>> +
>> + mutex_unlock(&parent_list_lock);
>> + mdev_put_parent(parent);
> We also need to call put_device(dev) here since we have a get_device
> during registration, no?
> Or I must miss something...
>

As explained above.

>> +}
>> +EXPORT_SYMBOL(mdev_unregister_device);
>> +
> [...]
>
>> +static int __init mdev_init(void)
>> +{
>> + int ret;
>> +
>> + ret = mdev_bus_register();
>> + if (ret) {
>> + pr_err("Failed to register mdev bus\n");
>> + return ret;
>> + }
>> +
>> + /*
>> + * Attempt to load known vfio_mdev. This gives us a working environment
>> + * without the user needing to explicitly load vfio_mdev driver.
>> + */
>> + request_module_nowait("vfio_mdev");
> We don't have this module yet.
>

Yes, this module is added in 02/22 patch. I'll move only this call to
02/22 patch in my next update.

But ideally it should not fails anything if this vfio_mdev module is not
found.

>> +
>> + return ret;
>> +}
>> +
>> +static void __exit mdev_exit(void)
>> +{
>> + if (mdev_bus_compat_class)
>> + class_compat_unregister(mdev_bus_compat_class);
>> +
>> + mdev_bus_unregister();
>> +}
>> +
>> +module_init(mdev_init)
>> +module_exit(mdev_exit)
>> +
>> +MODULE_VERSION(DRIVER_VERSION);
>> +MODULE_LICENSE("GPL");
>> +MODULE_AUTHOR(DRIVER_AUTHOR);
>> +MODULE_DESCRIPTION(DRIVER_DESC);
>> diff --git a/drivers/vfio/mdev/mdev_driver.c b/drivers/vfio/mdev/mdev_driver.c
>> new file mode 100644
>> index 000000000000..0b3250044a80
>> --- /dev/null
>> +++ b/drivers/vfio/mdev/mdev_driver.c
>> @@ -0,0 +1,122 @@
>> +/*
>> + * MDEV driver
>> + *
>> + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
>> + * Author: Neo Jia <cjia@xxxxxxxxxx>
>> + * Kirti Wankhede <kwankhede@xxxxxxxxxx>
> Don't know if you care much for this:
> There is a TAB before your name. :>
>

Oh ok, Thanks for pointing that, I'll remove TAB.
I'll also take care of all the nits you suggested below.

Thanks,
Kirti

>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#include <linux/device.h>
>> +#include <linux/iommu.h>
>> +#include <linux/mdev.h>
>> +
>> +#include "mdev_private.h"
>> +
>> +static int mdev_attach_iommu(struct mdev_device *mdev)
>> +{
>> + int ret;
>> + struct iommu_group *group;
>> +
>> + group = iommu_group_alloc();
>> + if (IS_ERR(group))
>> + return PTR_ERR(group);
>> +
>> + ret = iommu_group_add_device(group, &mdev->dev);
>> + if (ret)
>> + goto attach_fail;
>> +
>> + dev_info(&mdev->dev, "MDEV: group_id = %d\n",
>> + iommu_group_id(group));
> nit: strange indentation.
> The above two lines could be safely merge into one line.
>
>> +attach_fail:
>> + iommu_group_put(group);
>> + return ret;
>> +}
>> +
>> +static void mdev_detach_iommu(struct mdev_device *mdev)
>> +{
>> + iommu_group_remove_device(&mdev->dev);
>> + dev_info(&mdev->dev, "MDEV: detaching iommu\n");
>> +}
>> +
>> +static int mdev_probe(struct device *dev)
>> +{
>> + struct mdev_driver *drv = to_mdev_driver(dev->driver);
>> + struct mdev_device *mdev = to_mdev_device(dev);
>> + int ret;
>> +
>> + ret = mdev_attach_iommu(mdev);
>> + if (ret)
>> + return ret;
>> +
>> + if (drv && drv->probe)
>> + ret = drv->probe(dev);
>> +
>> + if (ret)
>> + mdev_detach_iommu(mdev);
> ?
> if (drv && drv->probe) {
> ret = drv->probe(dev);
> if (ret)
> mdev_detach_iommu(mdev);
> }
>
>> +
>> + return ret;
>> +}
>> +
>> +static int mdev_remove(struct device *dev)
>> +{
>> + struct mdev_driver *drv = to_mdev_driver(dev->driver);
>> + struct mdev_device *mdev = to_mdev_device(dev);
>> +
>> + if (drv && drv->remove)
>> + drv->remove(dev);
>> +
>> + mdev_detach_iommu(mdev);
>> +
>> + return 0;
>> +}
>> +
>> +struct bus_type mdev_bus_type = {
>> + .name = "mdev",
>> + .probe = mdev_probe,
>> + .remove = mdev_remove,
>> +};
>> +EXPORT_SYMBOL_GPL(mdev_bus_type);
>> +
>> +/*
> Is this a kernel-doc comment?
> It should be started with:
> /**
>
>> + * mdev_register_driver - register a new MDEV driver
>> + * @drv: the driver to register
>> + * @owner: module owner of driver to be registered
>> + *
>> + * Returns a negative value on error, otherwise 0.
>> + */
>> +int mdev_register_driver(struct mdev_driver *drv, struct module *owner)
>> +{
>> + /* initialize common driver fields */
>> + drv->driver.name = drv->name;
>> + drv->driver.bus = &mdev_bus_type;
>> + drv->driver.owner = owner;
>> +
>> + /* register with core */
>> + return driver_register(&drv->driver);
>> +}
>> +EXPORT_SYMBOL(mdev_register_driver);
>> +
>> +/*
>> + * mdev_unregister_driver - unregister MDEV driver
>> + * @drv: the driver to unregister
>> + *
> Empty line.
>
>> + */
>> +void mdev_unregister_driver(struct mdev_driver *drv)
>> +{
>> + driver_unregister(&drv->driver);
>> +}
>> +EXPORT_SYMBOL(mdev_unregister_driver);
>> +
>> +int mdev_bus_register(void)
>> +{
>> + return bus_register(&mdev_bus_type);
>> +}
>> +
>> +void mdev_bus_unregister(void)
>> +{
>> + bus_unregister(&mdev_bus_type);
>> +}
> [...]
>
>> diff --git a/include/linux/mdev.h b/include/linux/mdev.h
>> +/* Mediated device */
>> +struct mdev_device {
>> + struct device dev;
>> + struct parent_device *parent;
>> + uuid_le uuid;
>> + void *driver_data;
>> +
>> + /* internal */
>> + struct kref ref;
>> + struct list_head next;
>> + struct kobject *type_kobj;
>> +};
>> +
>> +
> Empty line.
>
>> +/**
>> + * struct parent_ops - Structure to be registered for each parent device to
>> + * register the device to mdev module.
>> + *
> [...]
>
>> + * @mmap: mmap callback
> No need a piece of description for arguments of the mmap callback?
>
>> + * Parent device that support mediated device should be registered with mdev
>> + * module with parent_ops structure.
>> + **/
>> +
>> +struct parent_ops {
>> + struct module *owner;
>> + const struct attribute_group **dev_attr_groups;
>> + const struct attribute_group **mdev_attr_groups;
>> + struct attribute_group **supported_type_groups;
>> +
>> + int (*create)(struct kobject *kobj, struct mdev_device *mdev);
>> + int (*remove)(struct mdev_device *mdev);
>> + int (*open)(struct mdev_device *mdev);
>> + void (*release)(struct mdev_device *mdev);
>> + ssize_t (*read)(struct mdev_device *mdev, char __user *buf,
>> + size_t count, loff_t *ppos);
>> + ssize_t (*write)(struct mdev_device *mdev, const char __user *buf,
>> + size_t count, loff_t *ppos);
>> + ssize_t (*ioctl)(struct mdev_device *mdev, unsigned int cmd,
>> + unsigned long arg);
>> + int (*mmap)(struct mdev_device *mdev, struct vm_area_struct *vma);
>> +};
>> +
> [...]
>