Re: [RFC PATCH v2 0/3] prerequisites for device reserved local mem rework

From: Fredrik Noring
Date: Fri May 17 2019 - 13:44:13 EST


Hi Laurentiu,

> > that I tracked down to the calls
> >
> > hub_port_init
> > -> usb_control_msg
> > -> usb_internal_control_msg
> > -> usb_start_wait_urb
> > -> usb_submit_urb
> > -> usb_hcd_submit_urb
> > -> hcd->driver->urb_enqueue
> > -> ohci_urb_enqueue
> > -> ed_get
> > -> ed_alloc

I found that the generic OHCI driver takes a wrong turn here, in ed_alloc,
and eventually also in td_alloc. Fortunately, your patch can be easily
extended to fix them as well. Please see attached patch below.

With that, the OHCI seems to work as expected with HCD_LOCAL_MEM. :)

Would you like me to submit gen_pool_dma_zalloc as a separate patch?

> That looks indeed worrisome but I'd expect that it's not related to
> these genalloc changes that I'm working on. I'll try to look into it but
> as I'm not familiar with the area maybe others will comment.

It seems appropriate to verify that the IRQ will not end up sleeping
somewhere in the pool.

> > Also, DMA_BUFFER_SIZE in ohci-ps2.c is only 256 KiB in total. Is the new
> > pool implementation at least as efficient as the previous one?
>
> I don't see any obvious reasons for genalloc to be less efficient.

OK, good.

> One thing I noticed between genalloc and the original implementation is
> that previously the device memory was mapped with memremap() while with
> genalloc I'm doing a devm_ioremap(). I can't think of a relevant
> difference except that memremap() maps with WC buth maybe there are
> other arch specific subtleties. I've attached a new version of the
> ohci-ps2 patch replacing the devm_ioremap() with devm_memremap() to
> better match the original implementation. Please test if you find some time.

You're right, it makes no difference.

Fredrik

diff --git a/drivers/usb/host/ohci-mem.c b/drivers/usb/host/ohci-mem.c
--- a/drivers/usb/host/ohci-mem.c
+++ b/drivers/usb/host/ohci-mem.c
@@ -82,10 +82,14 @@ dma_to_td (struct ohci_hcd *hc, dma_addr_t td_dma)
static struct td *
td_alloc (struct ohci_hcd *hc, gfp_t mem_flags)
{
+ struct usb_hcd *hcd = ohci_to_hcd(hc);
dma_addr_t dma;
struct td *td;

- td = dma_pool_zalloc (hc->td_cache, mem_flags, &dma);
+ if (hcd->driver->flags & HCD_LOCAL_MEM)
+ td = gen_pool_dma_zalloc (hcd->localmem_pool, sizeof(*td), &dma);
+ else
+ td = dma_pool_zalloc (hc->td_cache, mem_flags, &dma);
if (td) {
/* in case hc fetches it, make it look dead */
td->hwNextTD = cpu_to_hc32 (hc, dma);
@@ -98,6 +102,7 @@ td_alloc (struct ohci_hcd *hc, gfp_t mem_flags)
static void
td_free (struct ohci_hcd *hc, struct td *td)
{
+ struct usb_hcd *hcd = ohci_to_hcd(hc);
struct td **prev = &hc->td_hash [TD_HASH_FUNC (td->td_dma)];

while (*prev && *prev != td)
@@ -106,7 +111,11 @@ td_free (struct ohci_hcd *hc, struct td *td)
*prev = td->td_hash;
else if ((td->hwINFO & cpu_to_hc32(hc, TD_DONE)) != 0)
ohci_dbg (hc, "no hash for td %p\n", td);
- dma_pool_free (hc->td_cache, td, td->td_dma);
+
+ if (hcd->driver->flags & HCD_LOCAL_MEM)
+ gen_pool_free (hcd->localmem_pool, (unsigned long)td, sizeof(*td));
+ else
+ dma_pool_free (hc->td_cache, td, td->td_dma);
}

/*-------------------------------------------------------------------------*/
@@ -115,10 +124,14 @@ td_free (struct ohci_hcd *hc, struct td *td)
static struct ed *
ed_alloc (struct ohci_hcd *hc, gfp_t mem_flags)
{
+ struct usb_hcd *hcd = ohci_to_hcd(hc);
dma_addr_t dma;
struct ed *ed;

- ed = dma_pool_zalloc (hc->ed_cache, mem_flags, &dma);
+ if (hcd->driver->flags & HCD_LOCAL_MEM)
+ ed = gen_pool_dma_zalloc (hcd->localmem_pool, sizeof(*ed), &dma);
+ else
+ ed = dma_pool_zalloc (hc->ed_cache, mem_flags, &dma);
if (ed) {
INIT_LIST_HEAD (&ed->td_list);
ed->dma = dma;
@@ -129,6 +142,11 @@ ed_alloc (struct ohci_hcd *hc, gfp_t mem_flags)
static void
ed_free (struct ohci_hcd *hc, struct ed *ed)
{
- dma_pool_free (hc->ed_cache, ed, ed->dma);
+ struct usb_hcd *hcd = ohci_to_hcd(hc);
+
+ if (hcd->driver->flags & HCD_LOCAL_MEM)
+ gen_pool_free (hcd->localmem_pool, (unsigned long)ed, sizeof(*ed));
+ else
+ dma_pool_free (hc->ed_cache, ed, ed->dma);
}

diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -121,6 +121,8 @@ extern unsigned long gen_pool_alloc_algo(struct gen_pool *, size_t,
genpool_algo_t algo, void *data);
extern void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size,
dma_addr_t *dma);
+extern void *gen_pool_dma_zalloc(struct gen_pool *pool, size_t size,
+ dma_addr_t *dma);
extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
extern void gen_pool_for_each_chunk(struct gen_pool *,
void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *);
diff --git a/lib/genalloc.c b/lib/genalloc.c
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -362,6 +362,17 @@ void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma)
}
EXPORT_SYMBOL(gen_pool_dma_alloc);

+void *gen_pool_dma_zalloc(struct gen_pool *pool, size_t size, dma_addr_t *dma)
+{
+ void *vaddr = gen_pool_dma_alloc(pool, size, dma);
+
+ if (vaddr)
+ memset(vaddr, 0, size);
+
+ return vaddr;
+}
+EXPORT_SYMBOL(gen_pool_dma_zalloc);
+
/**
* gen_pool_free - free allocated special memory back to the pool
* @pool: pool to free to