[PATCH] usb: core: hub: hub_port_init lock controller instead of bus

From: Chris Bainbridge
Date: Mon Feb 08 2016 - 08:50:27 EST


The XHCI controller presents two USB buses to the system - one for USB 2
and one for USB 3. When only one bus is locked there is a race condition
with two threads in hub_port_init:

[ 8.984500] Call Trace:
[ 8.985698] [<ffffffff81b9bab7>] schedule+0x37/0x90
[ 8.986918] [<ffffffff817da7cd>] usb_kill_urb+0x8d/0xd0
[ 8.988130] [<ffffffff8111e5e0>] ? wake_up_atomic_t+0x30/0x30
[ 8.989343] [<ffffffff817dafbe>] usb_start_wait_urb+0xbe/0x150
[ 8.990561] [<ffffffff817db10c>] usb_control_msg+0xbc/0xf0
[ 8.991766] [<ffffffff817d07de>] hub_port_init+0x51e/0xb70
[ 8.992964] [<ffffffff817d4697>] hub_event+0x817/0x1570
[ 8.994156] [<ffffffff810f3e6f>] process_one_work+0x1ff/0x620
[ 8.995342] [<ffffffff810f3dcf>] ? process_one_work+0x15f/0x620
[ 8.996528] [<ffffffff810f4684>] worker_thread+0x64/0x4b0
[ 8.997707] [<ffffffff810f4620>] ? rescuer_thread+0x390/0x390
[ 8.998883] [<ffffffff810fa7f5>] kthread+0x105/0x120
[ 9.000056] [<ffffffff810fa6f0>] ? kthread_create_on_node+0x200/0x200
[ 9.001241] [<ffffffff81ba183f>] ret_from_fork+0x3f/0x70
[ 9.002420] [<ffffffff810fa6f0>] ? kthread_create_on_node+0x200/0x200

[ 9.870837] Call Trace:
[ 9.875664] [<ffffffff817fd36d>] xhci_setup_device+0x53d/0xa40
[ 9.876871] [<ffffffff817fd87e>] xhci_address_device+0xe/0x10
[ 9.878068] [<ffffffff817d047f>] hub_port_init+0x1bf/0xb70
[ 9.879262] [<ffffffff811247ed>] ? trace_hardirqs_on+0xd/0x10
[ 9.880465] [<ffffffff817d4697>] hub_event+0x817/0x1570
[ 9.881668] [<ffffffff810f3e6f>] process_one_work+0x1ff/0x620
[ 9.882869] [<ffffffff810f3dcf>] ? process_one_work+0x15f/0x620
[ 9.884074] [<ffffffff810f4684>] worker_thread+0x64/0x4b0
[ 9.885268] [<ffffffff810f4620>] ? rescuer_thread+0x390/0x390
[ 9.886457] [<ffffffff810fa7f5>] kthread+0x105/0x120
[ 9.887634] [<ffffffff810fa6f0>] ? kthread_create_on_node+0x200/0x200
[ 9.888817] [<ffffffff81ba183f>] ret_from_fork+0x3f/0x70
[ 9.889995] [<ffffffff810fa6f0>] ? kthread_create_on_node+0x200/0x200

Which results from the two call chains:

hub_port_init
usb_get_device_descriptor
usb_get_descriptor
usb_control_msg
usb_internal_control_msg
usb_start_wait_urb
usb_submit_urb / wait_for_completion_timeout / usb_kill_urb

hub_port_init
hub_set_address
xhci_address_device
xhci_setup_device

The logged kernel errors are:

[ 8.034843] xhci_hcd 0000:00:14.0: Timeout while waiting for setup device command
[ 13.183701] usb 3-3: device descriptor read/all, error -110

On a test system this failure occurred on 6% of all boots.

Hypothetically, it should perhaps be possible to set the address of the
hub on one bus while the hub on the other bus already has an outstanding
descriptor read request. But as this is not working reliably, fix it by
locking the controller in hub_port_init to prevent parallel
initialisation of both buses.

Fixes: 638139eb95d2 ("usb: hub: allow to process more usb hub events in parallel")
Signed-off-by: Chris Bainbridge <chris.bainbridge@xxxxxxxxx>
---
drivers/usb/core/hcd.c | 15 +++++++++++++--
drivers/usb/core/hub.c | 8 ++++----
include/linux/usb.h | 3 +--
include/linux/usb/hcd.h | 1 +
4 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index df0e3b92533a..5824e819176a 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -966,7 +966,7 @@ static void usb_bus_init (struct usb_bus *bus)
bus->bandwidth_allocated = 0;
bus->bandwidth_int_reqs = 0;
bus->bandwidth_isoc_reqs = 0;
- mutex_init(&bus->usb_address0_mutex);
+ mutex_init(&bus->devnum_next_mutex);

INIT_LIST_HEAD (&bus->bus_list);
}
@@ -2497,6 +2497,14 @@ struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver,
return NULL;
}
if (primary_hcd == NULL) {
+ hcd->address0_mutex = kmalloc(sizeof(*hcd->address0_mutex),
+ GFP_KERNEL);
+ if (!hcd->address0_mutex) {
+ kfree(hcd);
+ dev_dbg(dev, "hcd address0 mutex alloc failed\n");
+ return NULL;
+ }
+ mutex_init(hcd->address0_mutex);
hcd->bandwidth_mutex = kmalloc(sizeof(*hcd->bandwidth_mutex),
GFP_KERNEL);
if (!hcd->bandwidth_mutex) {
@@ -2508,6 +2516,7 @@ struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver,
dev_set_drvdata(dev, hcd);
} else {
mutex_lock(&usb_port_peer_mutex);
+ hcd->address0_mutex = primary_hcd->address0_mutex;
hcd->bandwidth_mutex = primary_hcd->bandwidth_mutex;
hcd->primary_hcd = primary_hcd;
primary_hcd->primary_hcd = primary_hcd;
@@ -2574,8 +2583,10 @@ static void hcd_release(struct kref *kref)
struct usb_hcd *hcd = container_of (kref, struct usb_hcd, kref);

mutex_lock(&usb_port_peer_mutex);
- if (usb_hcd_is_primary_hcd(hcd))
+ if (usb_hcd_is_primary_hcd(hcd)) {
+ kfree(hcd->address0_mutex);
kfree(hcd->bandwidth_mutex);
+ }
if (hcd->shared_hcd) {
struct usb_hcd *peer = hcd->shared_hcd;

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 350dcd9af5d8..72ee2338bde0 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2066,7 +2066,7 @@ static void choose_devnum(struct usb_device *udev)
struct usb_bus *bus = udev->bus;

/* be safe when more hub events are proceed in parallel */
- mutex_lock(&bus->usb_address0_mutex);
+ mutex_lock(&bus->devnum_next_mutex);
if (udev->wusb) {
devnum = udev->portnum + 1;
BUG_ON(test_bit(devnum, bus->devmap.devicemap));
@@ -2084,7 +2084,7 @@ static void choose_devnum(struct usb_device *udev)
set_bit(devnum, bus->devmap.devicemap);
udev->devnum = devnum;
}
- mutex_unlock(&bus->usb_address0_mutex);
+ mutex_unlock(&bus->devnum_next_mutex);
}

static void release_devnum(struct usb_device *udev)
@@ -4312,7 +4312,7 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
if (oldspeed == USB_SPEED_LOW)
delay = HUB_LONG_RESET_TIME;

- mutex_lock(&hdev->bus->usb_address0_mutex);
+ mutex_lock(hcd->address0_mutex);

/* Reset the device; full speed may morph to high speed */
/* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */
@@ -4588,7 +4588,7 @@ fail:
hub_port_disable(hub, port1, 0);
update_devnum(udev, devnum); /* for disconnect processing */
}
- mutex_unlock(&hdev->bus->usb_address0_mutex);
+ mutex_unlock(hcd->address0_mutex);
return retval;
}

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 89533ba38691..6b736c82b9d1 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -371,14 +371,13 @@ struct usb_bus {

int devnum_next; /* Next open device number in
* round-robin allocation */
+ struct mutex devnum_next_mutex; /* devnum_next mutex */

struct usb_devmap devmap; /* device address allocation map */
struct usb_device *root_hub; /* Root hub */
struct usb_bus *hs_companion; /* Companion EHCI bus, if any */
struct list_head bus_list; /* list of busses */

- struct mutex usb_address0_mutex; /* unaddressed device mutex */
-
int bandwidth_allocated; /* on this bus: how much of the time
* reserved for periodic (intr/iso)
* requests is used, on average?
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 4dcf8446dbcd..66c4303659d7 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -180,6 +180,7 @@ struct usb_hcd {
* bandwidth_mutex should be dropped after a successful control message
* to the device, or resetting the bandwidth after a failed attempt.
*/
+ struct mutex *address0_mutex;
struct mutex *bandwidth_mutex;
struct usb_hcd *shared_hcd;
struct usb_hcd *primary_hcd;
--
2.1.4