Re: [Xen-devel] [RFC PATCH] Xen PCI back - do slot and bus reset (v0).

From: Sander Eikelenboom
Date: Mon Dec 16 2013 - 10:46:16 EST



Monday, December 16, 2013, 4:36:12 PM, you wrote:

> On Mon, Dec 16, 2013 at 04:23:53PM +0100, Sander Eikelenboom wrote:
>>
>> Monday, December 16, 2013, 3:35:15 PM, you wrote:
>>
>> > On Mon, Dec 16, 2013 at 10:59:01AM +0000, David Vrabel wrote:
>> >> On 13/12/13 16:09, Konrad Rzeszutek Wilk wrote:
>> >> > Hey,
>> >> >
>> >> > While I was trying to narrow down the state of GPU passthrough
>> >> > (still not finished) and figuring what needs to be done I realized
>> >> > that Xen PCIback did not reset my GPU properly (when I crashed the
>> >> > Windows guest by mistake). It does an FLR reset or Power one - if
>> >> > the device supports it. But it seems that some of these GPUs
>> >> > are liars and actually don't do the power part properly.
>> >>
>> >> In my experience the devices do not lie. They correctly report that
>> >> they do not perform a reset in D3hot.
>> >>
>> >> Here's the patch I'm using to solve this. It does something similar.
>> >> i.e., a SBR if all devices on that bus are safe to be reset.
>> >>
>> >> I prefer it because it provides the standard 'reset' sysfs file that the
>> >> toolstack/userspace can use.
>>
>> > We can still add the 'reset' to SysFS
>> >>
>> >> It does have some limitations: a) It does not check whether a device is
>> >> in use (only if it is bound to pciback); and b) it hand rolls
>> >> pci_slot_reset() (because it didn't exist at the time).
>>
>> > .. which can have those limiations removed and be based on this patchset.
>> > Meaning it won't do a bus-reset or device reset if the rest of the devices
>> > are _not_ assigned to pciback.
>>
>> Perhaps there is something to learn from the steps vfio-pci takes to do this ?
>> (they sorted out quite some stuff around pci/vga passtrough)

> That is actually what I based it on :-)

OK was already suspecting it somehow :-)

Reminds me to see if the Radeon maintainer knows a way to hookup a sysfs reset entry for
Radeon cards, that completly cycles the card (including potential bios voodoo). Since AMD is supporting the development
of the opensource driver now, there should be chance and it would be welcome for both Xen and KVM
since those cards don't support FLR.

>>
>> >>
>> >> diff --git a/drivers/xen/xen-pciback/pci_stub.c
>> >> b/drivers/xen/xen-pciback/pci_stub.c
>> >> index 4e8ba38..5a03e63 100644
>> >> --- a/drivers/xen/xen-pciback/pci_stub.c
>> >> +++ b/drivers/xen/xen-pciback/pci_stub.c
>> >> @@ -14,6 +14,7 @@
>> >> #include <linux/wait.h>
>> >> #include <linux/sched.h>
>> >> #include <linux/atomic.h>
>> >> +#include <linux/delay.h>
>> >> #include <xen/events.h>
>> >> #include <asm/xen/pci.h>
>> >> #include <asm/xen/hypervisor.h>
>> >> @@ -43,6 +44,7 @@ struct pcistub_device {
>> >> struct kref kref;
>> >> struct list_head dev_list;
>> >> spinlock_t lock;
>> >> + bool created_reset_file;
>> >>
>> >> struct pci_dev *dev;
>> >> struct xen_pcibk_device *pdev;/* non-NULL if struct pci_dev is in use */
>> >> @@ -60,6 +62,114 @@ static LIST_HEAD(pcistub_devices);
>> >> static int initialize_devices;
>> >> static LIST_HEAD(seized_devices);
>> >>
>> >> +/*
>> >> + * pci_reset_function() will only work if there is a mechanism to
>> >> + * reset that single function (e.g., FLR or a D-state transition).
>> >> + * For PCI hardware that has two or more functions but no per-function
>> >> + * reset, we can do a bus reset iff all the functions are co-assigned
>> >> + * to the same domain.
>> >> + *
>> >> + * If a function has no per-function reset mechanism the 'reset' sysfs
>> >> + * file that the toolstack uses to reset a function prior to assigning
>> >> + * the device will be missing. In this case, pciback adds its own
>> >> + * which will try a bus reset.
>> >> + *
>> >> + * Note: pciback does not check for co-assigment before doing a bus
>> >> + * reset, only that the devices are bound to pciback. The toolstack
>> >> + * is assumed to have done the right thing.
>> >> + */
>> >> +static int __pcistub_reset_function(struct pci_dev *dev)
>> >> +{
>> >> + struct pci_dev *pdev;
>> >> + u16 ctrl;
>> >> + int ret;
>> >> +
>> >> + ret = __pci_reset_function_locked(dev);
>> >> + if (ret == 0)
>> >> + return 0;
>> >> +
>> >> + if (pci_is_root_bus(dev->bus) || dev->subordinate || !dev->bus->self)
>> >> + return -ENOTTY;
>> >> +
>> >> + list_for_each_entry(pdev, &dev->bus->devices, bus_list) {
>> >> + if (pdev != dev && (!pdev->driver
>> >> + || strcmp(pdev->driver->name, "pciback")))
>> >> + return -ENOTTY;
>> >> + pci_save_state(pdev);
>> >> + }
>> >> +
>> >> + pci_read_config_word(dev->bus->self, PCI_BRIDGE_CONTROL, &ctrl);
>> >> + ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
>> >> + pci_write_config_word(dev->bus->self, PCI_BRIDGE_CONTROL, ctrl);
>> >> + msleep(200);
>> >> +
>> >> + ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
>> >> + pci_write_config_word(dev->bus->self, PCI_BRIDGE_CONTROL, ctrl);
>> >> + msleep(200);
>> >> +
>> >> + list_for_each_entry(pdev, &dev->bus->devices, bus_list)
>> >> + pci_restore_state(pdev);
>> >> +
>> >> + return 0;
>> >> +}
>> >> +
>> >> +static int pcistub_reset_function(struct pci_dev *dev)
>> >> +{
>> >> + int ret;
>> >> +
>> >> + device_lock(&dev->dev);
>> >> + ret = __pcistub_reset_function(dev);
>> >> + device_unlock(&dev->dev);
>> >> +
>> >> + return ret;
>> >> +}
>> >> +
>> >> +static ssize_t pcistub_reset_store(struct device *dev,
>> >> + struct device_attribute *attr,
>> >> + const char *buf, size_t count)
>> >> +{
>> >> + struct pci_dev *pdev = to_pci_dev(dev);
>> >> + unsigned long val;
>> >> + ssize_t result = strict_strtoul(buf, 0, &val);
>> >> +
>> >> + if (result < 0)
>> >> + return result;
>> >> +
>> >> + if (val != 1)
>> >> + return -EINVAL;
>> >> +
>> >> + result = pcistub_reset_function(pdev);
>> >> + if (result < 0)
>> >> + return result;
>> >> + return count;
>> >> +}
>> >> +static DEVICE_ATTR(reset, 0200, NULL, pcistub_reset_store);
>> >> +
>> >> +static int pcistub_try_create_reset_file(struct pcistub_device *psdev)
>> >> +{
>> >> + struct device *dev = &psdev->dev->dev;
>> >> + struct sysfs_dirent *reset_dirent;
>> >> + int ret;
>> >> +
>> >> + reset_dirent = sysfs_get_dirent(dev->kobj.sd, NULL, "reset");
>> >> + if (reset_dirent) {
>> >> + sysfs_put(reset_dirent);
>> >> + return 0;
>> >> + }
>> >> +
>> >> + ret = device_create_file(dev, &dev_attr_reset);
>> >> + if (ret < 0)
>> >> + return ret;
>> >> + psdev->created_reset_file = true;
>> >> + return 0;
>> >> +}
>> >> +
>> >> +static void pcistub_remove_reset_file(struct pcistub_device *psdev)
>> >> +{
>> >> + if (psdev && psdev->created_reset_file)
>> >> + device_remove_file(&psdev->dev->dev, &dev_attr_reset);
>> >> +}
>> >> +
>> >> static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
>> >> {
>> >> struct pcistub_device *psdev;
>> >> @@ -95,12 +205,15 @@ static void pcistub_device_release(struct kref *kref)
>> >>
>> >> dev_dbg(&dev->dev, "pcistub_device_release\n");
>> >>
>> >> + pcistub_remove_reset_file(psdev);
>> >> +
>> >> xen_unregister_device_domain_owner(dev);
>> >>
>> >> /* Call the reset function which does not take lock as this
>> >> * is called from "unbind" which takes a device_lock mutex.
>> >> */
>> >> - __pci_reset_function_locked(dev);
>> >> + __pcistub_reset_function(psdev->dev);
>> >> +
>> >> if (pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state))
>> >> dev_dbg(&dev->dev, "Could not reload PCI state\n");
>> >> else
>> >> @@ -268,7 +381,7 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
>> >> /* This is OK - we are running from workqueue context
>> >> * and want to inhibit the user from fiddling with 'reset'
>> >> */
>> >> - pci_reset_function(dev);
>> >> + pcistub_reset_function(psdev->dev);
>> >> pci_restore_state(psdev->dev);
>> >>
>> >> /* This disables the device. */
>> >> @@ -392,7 +505,7 @@ static int pcistub_init_device(struct pci_dev *dev)
>> >> dev_err(&dev->dev, "Could not store PCI conf saved state!\n");
>> >> else {
>> >> dev_dbg(&dev->dev, "resetting (FLR, D3, etc) the device\n");
>> >> - __pci_reset_function_locked(dev);
>> >> + __pcistub_reset_function(dev);
>> >> pci_restore_state(dev);
>> >> }
>> >> /* Now disable the device (this also ensures some private device
>> >> @@ -467,6 +580,10 @@ static int pcistub_seize(struct pci_dev *dev)
>> >> if (!psdev)
>> >> return -ENOMEM;
>> >>
>> >> + err = pcistub_try_create_reset_file(psdev);
>> >> + if (err < 0)
>> >> + goto out;
>> >> +
>> >> spin_lock_irqsave(&pcistub_devices_lock, flags);
>> >>
>> >> if (initialize_devices) {
>> >> @@ -485,10 +602,9 @@ static int pcistub_seize(struct pci_dev *dev)
>> >> }
>> >>
>> >> spin_unlock_irqrestore(&pcistub_devices_lock, flags);
>> >> -
>> >> +out:
>> >> if (err)
>> >> pcistub_device_put(psdev);
>> >> -
>> >> return err;
>> >> }
>> >>
>>
>>
>>


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/