[PATCH] xhci: Cope with VIA VL805 readahead

From: Robin Murphy
Date: Tue Oct 10 2017 - 14:10:08 EST


The VIA VL805 host controller is well-known for causing problems on
systems with IOMMUs enabled, ranging from triggering endless streams of
fault messages to locking itself up completely. It appears that the root
of the problem might be an over-aggressive prefetching of TRBs, wherein
consuming commands near the end of a queue segment causes it to read off
the end of the segment, even across a page boundary. This blows up when
DMA mapping ops are backed by an IOMMU, since there is no guarantee that
addresses outside the allocated segment are accessible at all.

Some trial-and-error investigation reveals that we can avoid such
cross-page reads by not using the last few TRBs in a segment; to that
end, factor out the implicit index of the end-of-segemnt link TRB, and
implement a quirk to move it slightly further forward when necessary.

Signed-off-by: Robin Murphy <robin.murphy@xxxxxxx>
---
drivers/usb/host/xhci-mem.c | 32 +++++++++++++++++++-------------
drivers/usb/host/xhci-pci.c | 5 +++++
drivers/usb/host/xhci-ring.c | 10 +++++++++-
drivers/usb/host/xhci.c | 10 +++++-----
drivers/usb/host/xhci.h | 2 ++
5 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 2a82c927ded2..bb62f100d028 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -23,6 +23,7 @@
#include <linux/usb.h>
#include <linux/pci.h>
#include <linux/slab.h>
+#include <linux/kernel.h>
#include <linux/dmapool.h>
#include <linux/dma-mapping.h>

@@ -108,17 +109,18 @@ static void xhci_free_segments_for_ring(struct xhci_hcd *xhci,
static void xhci_link_segments(struct xhci_hcd *xhci, struct xhci_segment *prev,
struct xhci_segment *next, enum xhci_ring_type type)
{
+ unsigned int link_idx;
u32 val;

if (!prev || !next)
return;
prev->next = next;
if (type != TYPE_EVENT) {
- prev->trbs[TRBS_PER_SEGMENT-1].link.segment_ptr =
- cpu_to_le64(next->dma);
+ link_idx = xhci_segment_link_idx(xhci);
+ prev->trbs[link_idx].link.segment_ptr = cpu_to_le64(next->dma);

/* Set the last TRB in the segment to have a TRB type ID of Link TRB */
- val = le32_to_cpu(prev->trbs[TRBS_PER_SEGMENT-1].link.control);
+ val = le32_to_cpu(prev->trbs[link_idx].link.control);
val &= ~TRB_TYPE_BITMASK;
val |= TRB_TYPE(TRB_LINK);
/* Always set the chain bit with 0.95 hardware */
@@ -127,7 +129,7 @@ static void xhci_link_segments(struct xhci_hcd *xhci, struct xhci_segment *prev,
(type == TYPE_ISOC &&
(xhci->quirks & XHCI_AMD_0x96_HOST)))
val |= TRB_CHAIN;
- prev->trbs[TRBS_PER_SEGMENT-1].link.control = cpu_to_le32(val);
+ prev->trbs[link_idx].link.control = cpu_to_le32(val);
}
}

@@ -140,20 +142,22 @@ static void xhci_link_rings(struct xhci_hcd *xhci, struct xhci_ring *ring,
unsigned int num_segs)
{
struct xhci_segment *next;
+ unsigned int link_idx;

if (!ring || !first || !last)
return;

next = ring->enq_seg->next;
+ link_idx = xhci_segment_link_idx(xhci);
xhci_link_segments(xhci, ring->enq_seg, first, ring->type);
xhci_link_segments(xhci, last, next, ring->type);
ring->num_segs += num_segs;
- ring->num_trbs_free += (TRBS_PER_SEGMENT - 1) * num_segs;
+ ring->num_trbs_free += link_idx * num_segs;

if (ring->type != TYPE_EVENT && ring->enq_seg == ring->last_seg) {
- ring->last_seg->trbs[TRBS_PER_SEGMENT-1].link.control
+ ring->last_seg->trbs[link_idx].link.control
&= ~cpu_to_le32(LINK_TOGGLE);
- last->trbs[TRBS_PER_SEGMENT-1].link.control
+ last->trbs[link_idx].link.control
|= cpu_to_le32(LINK_TOGGLE);
ring->last_seg = last;
}
@@ -300,7 +304,8 @@ void xhci_ring_free(struct xhci_hcd *xhci, struct xhci_ring *ring)
}

static void xhci_initialize_ring_info(struct xhci_ring *ring,
- unsigned int cycle_state)
+ unsigned int cycle_state,
+ unsigned int link_idx)
{
/* The ring is empty, so the enqueue pointer == dequeue pointer */
ring->enqueue = ring->first_seg->trbs;
@@ -320,7 +325,7 @@ static void xhci_initialize_ring_info(struct xhci_ring *ring,
* Each segment has a link TRB, and leave an extra TRB for SW
* accounting purpose
*/
- ring->num_trbs_free = ring->num_segs * (TRBS_PER_SEGMENT - 1) - 1;
+ ring->num_trbs_free = ring->num_segs * link_idx - 1;
}

/* Allocate segments and link them for a ring */
@@ -373,6 +378,7 @@ static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
enum xhci_ring_type type, unsigned int max_packet, gfp_t flags)
{
struct xhci_ring *ring;
+ unsigned int link_idx;
int ret;

ring = kzalloc(sizeof *(ring), flags);
@@ -393,12 +399,13 @@ static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
goto fail;

/* Only event ring does not use link TRB */
+ link_idx = xhci_segment_link_idx(xhci);
if (type != TYPE_EVENT) {
/* See section 4.9.2.1 and 6.4.4.1 */
- ring->last_seg->trbs[TRBS_PER_SEGMENT - 1].link.control |=
+ ring->last_seg->trbs[link_idx].link.control |=
cpu_to_le32(LINK_TOGGLE);
}
- xhci_initialize_ring_info(ring, cycle_state);
+ xhci_initialize_ring_info(ring, cycle_state, link_idx);
trace_xhci_ring_alloc(ring);
return ring;

@@ -428,8 +435,7 @@ int xhci_ring_expansion(struct xhci_hcd *xhci, struct xhci_ring *ring,
unsigned int num_segs_needed;
int ret;

- num_segs_needed = (num_trbs + (TRBS_PER_SEGMENT - 1) - 1) /
- (TRBS_PER_SEGMENT - 1);
+ num_segs_needed = DIV_ROUND_UP(num_trbs, xhci_segment_link_idx(xhci));

/* Allocate number of segments we needed, or double the ring size */
num_segs = ring->num_segs > num_segs_needed ?
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 8071c8fdd15e..458404a22cf1 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -212,6 +212,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
pdev->device == 0x3432)
xhci->quirks |= XHCI_BROKEN_STREAMS;

+ /* VIA VL805 reads past the end of queue segments */
+ if (pdev->vendor == PCI_VENDOR_ID_VIA &&
+ pdev->device == 0x3483)
+ xhci->quirks |= XHCI_READAHEAD_QUIRK;
+
if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
pdev->device == 0x1042)
xhci->quirks |= XHCI_BROKEN_STREAMS;
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index a9443651ce0f..42aee439ed8c 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -89,6 +89,14 @@ dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg,
return seg->dma + (segment_offset * sizeof(*trb));
}

+unsigned int xhci_segment_link_idx(struct xhci_hcd *xhci)
+{
+ if (xhci->quirks & XHCI_READAHEAD_QUIRK)
+ return TRBS_PER_SEGMENT - 4;
+
+ return TRBS_PER_SEGMENT - 1;
+}
+
static bool trb_is_noop(union xhci_trb *trb)
{
return TRB_TYPE_NOOP_LE32(trb->generic.field[3]);
@@ -1780,7 +1788,7 @@ struct xhci_segment *trb_in_td(struct xhci_hcd *xhci,
return NULL;
/* We may get an event for a Link TRB in the middle of a TD */
end_seg_dma = xhci_trb_virt_to_dma(cur_seg,
- &cur_seg->trbs[TRBS_PER_SEGMENT - 1]);
+ &cur_seg->trbs[xhci_segment_link_idx(xhci)]);
/* If the end TRB isn't in this segment, this is set to 0 */
end_trb_dma = xhci_trb_virt_to_dma(cur_seg, end_trb);

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index b2ff1ff1a02f..74b4500641c2 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -788,14 +788,14 @@ static void xhci_clear_command_ring(struct xhci_hcd *xhci)
{
struct xhci_ring *ring;
struct xhci_segment *seg;
+ unsigned int link_idx;

ring = xhci->cmd_ring;
seg = ring->deq_seg;
+ link_idx = xhci_segment_link_idx(xhci);
do {
- memset(seg->trbs, 0,
- sizeof(union xhci_trb) * (TRBS_PER_SEGMENT - 1));
- seg->trbs[TRBS_PER_SEGMENT - 1].link.control &=
- cpu_to_le32(~TRB_CYCLE);
+ memset(seg->trbs, 0, sizeof(*seg->trbs) * link_idx);
+ seg->trbs[link_idx].link.control &= cpu_to_le32(~TRB_CYCLE);
seg = seg->next;
} while (seg != ring->deq_seg);

@@ -805,7 +805,7 @@ static void xhci_clear_command_ring(struct xhci_hcd *xhci)
ring->enq_seg = ring->deq_seg;
ring->enqueue = ring->dequeue;

- ring->num_trbs_free = ring->num_segs * (TRBS_PER_SEGMENT - 1) - 1;
+ ring->num_trbs_free = ring->num_segs * link_idx - 1;
/*
* Ring is now zeroed, so the HW should look for change of ownership
* when the cycle bit is set to 1.
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 2abaa4d6d39d..7ef69ea0b480 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1828,6 +1828,7 @@ struct xhci_hcd {
#define XHCI_LIMIT_ENDPOINT_INTERVAL_7 (1 << 26)
#define XHCI_U2_DISABLE_WAKE (1 << 27)
#define XHCI_ASMEDIA_MODIFY_FLOWCONTROL (1 << 28)
+#define XHCI_READAHEAD_QUIRK (1 << 29)

unsigned int num_active_eps;
unsigned int limit_active_eps;
@@ -2031,6 +2032,7 @@ struct xhci_segment *trb_in_td(struct xhci_hcd *xhci,
union xhci_trb *end_trb, dma_addr_t suspect_dma, bool debug);
int xhci_is_vendor_info_code(struct xhci_hcd *xhci, unsigned int trb_comp_code);
void xhci_ring_cmd_db(struct xhci_hcd *xhci);
+unsigned int xhci_segment_link_idx(struct xhci_hcd *xhci);
int xhci_queue_slot_control(struct xhci_hcd *xhci, struct xhci_command *cmd,
u32 trb_type, u32 slot_id);
int xhci_queue_address_device(struct xhci_hcd *xhci, struct xhci_command *cmd,
--
2.13.4.dirty