[PATCH v2 1/1] usb: xhci: handle endpoint error caused by TRB error

From: Lu Baolu
Date: Thu Nov 10 2016 - 21:01:42 EST


When xHCI host sees a malformed TRB in a transfer ring,
it will generate a transfer event with the completion
code set to COMP_TRB_ERR (5), and sets the endpoint
state in output endpoint context to EP_STATE_ERROR.
The endpoint enters ERROR state as the result.

XHCI specification requires that Set TR Dequeue Pointer
Command shall be used to transition the endpoint from
Error to Stopped state. Current xHCI driver doesn't
clear this endpoint error, hence the successive URB
enqueue requests will result in error messages of
"WARN waiting for error on ep to be cleared". And the
corresponding USB device stays in unresponsive state.

This patch enhances xHCI driver on this by printing out
the malformed TRB and clearing the endpoint Error state.

Tested-by: Wang Wendy <wendy.wang@xxxxxxxxx>
Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
---
Change log:
v1->v2:
- Correct the email of Tested-by.

drivers/usb/host/xhci-ring.c | 35 +++++++++++++++++++++++++++++++++++
drivers/usb/host/xhci.c | 22 ++++++++++++++++++++++
drivers/usb/host/xhci.h | 2 ++
3 files changed, 59 insertions(+)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 822f88a..f81c1be 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1773,6 +1773,20 @@ static void xhci_cleanup_halted_endpoint(struct xhci_hcd *xhci,
xhci_ring_cmd_db(xhci);
}

+static void xhci_cleanup_trb_error_endpoint(struct xhci_hcd *xhci,
+ unsigned int slot_id, unsigned int ep_index,
+ unsigned int stream_id,
+ struct xhci_td *td, union xhci_trb *event_trb)
+{
+ struct xhci_virt_ep *ep = &xhci->devs[slot_id]->eps[ep_index];
+
+ ep->stopped_stream = stream_id;
+ xhci_cleanup_trb_error_ring(xhci, ep_index, td);
+ ep->stopped_stream = 0;
+
+ xhci_ring_cmd_db(xhci);
+}
+
/* Check if an error has halted the endpoint ring. The class driver will
* cleanup the halt for a non-default control endpoint if we indicate a stall.
* However, a babble and other errors also halt the endpoint ring, and the class
@@ -1860,6 +1874,13 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_td *td,
*/
xhci_cleanup_halted_endpoint(xhci, slot_id, ep_index,
ep_ring->stream_id, td, ep_trb);
+ } else if (trb_comp_code == COMP_TRB_ERR) {
+ /* Clear the Endpoint error caused by a TRB error by issuing
+ * a set dequeue command to move the dequeue pointer past the
+ * last TD.
+ */
+ xhci_cleanup_trb_error_endpoint(xhci, slot_id, ep_index,
+ ep_ring->stream_id, td, ep_trb);
} else {
/* Update ring dequeue pointer */
while (ep_ring->dequeue != td->last_trb)
@@ -2474,6 +2495,20 @@ static int handle_tx_event(struct xhci_hcd *xhci,
goto cleanup;
}

+ /*
+ * Dump the original TRB which caused a transfer error with
+ * completion code set to TRB error.
+ */
+ if (trb_comp_code == COMP_TRB_ERR) {
+ xhci_err(xhci, "Malformed transfer TRB deteced:\n");
+ xhci_err(xhci, "@%016llx %08x %08x %08x %08x\n",
+ (unsigned long long)ep_trb_dma,
+ le32_to_cpu(ep_trb->generic.field[0]),
+ le32_to_cpu(ep_trb->generic.field[1]),
+ le32_to_cpu(ep_trb->generic.field[2]),
+ le32_to_cpu(ep_trb->generic.field[3]));
+ }
+
/* update the urb's actual_length and give back to the core */
if (usb_endpoint_xfer_control(&td->urb->ep->desc))
process_ctrl_td(xhci, td, ep_trb, event, ep, &status);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index ba46c70..e10a490 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -2944,6 +2944,28 @@ void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci,
}
}

+void xhci_cleanup_trb_error_ring(struct xhci_hcd *xhci,
+ unsigned int ep_index, struct xhci_td *td)
+{
+ struct xhci_dequeue_state deq_state;
+ struct xhci_virt_ep *ep;
+ struct usb_device *udev = td->urb->dev;
+
+ xhci_dbg_trace(xhci, trace_xhci_dbg_reset_ep,
+ "Cleaning up trb error endpoint ring");
+ ep = &xhci->devs[udev->slot_id]->eps[ep_index];
+
+ xhci_find_new_dequeue_state(xhci, udev->slot_id,
+ ep_index, ep->stopped_stream, td, &deq_state);
+
+ if (!deq_state.new_deq_ptr || !deq_state.new_deq_seg)
+ return;
+
+ xhci_queue_new_dequeue_state(xhci, udev->slot_id,
+ ep_index, ep->stopped_stream,
+ &deq_state);
+}
+
/* Called when clearing halted device. The core should have sent the control
* message to clear the device halt condition. The host side of the halt should
* already be cleared with a reset endpoint command issued when the STALL tx
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 266e3a8..fb2b07f 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1935,6 +1935,8 @@ void xhci_queue_new_dequeue_state(struct xhci_hcd *xhci,
struct xhci_dequeue_state *deq_state);
void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci,
unsigned int ep_index, struct xhci_td *td);
+void xhci_cleanup_trb_error_ring(struct xhci_hcd *xhci,
+ unsigned int ep_index, struct xhci_td *td);
void xhci_queue_config_ep_quirk(struct xhci_hcd *xhci,
unsigned int slot_id, unsigned int ep_index,
struct xhci_dequeue_state *deq_state);
--
2.1.4