Re: usb: cdc-acm: BUG kmalloc-128 Poison overwritten

From: Oliver Neukum
Date: Thu Feb 25 2021 - 05:11:06 EST


Am Mittwoch, den 24.02.2021, 16:21 +0100 schrieb Bruno Thomsen:

Hi,

> No, this is not a regression from 5.10. It seems that many attempts to
> fix cdc-acm in the 5.x kernel series have failed to fix the root cause of
> these oops. I have not seen this on 4.14 and 4.19, but I have observed
> it on at least 5.3 and newer kernels in slight variations.
> I guess this is because cdc-acm is very common in the embedded
> ARM world and rarely used on servers or laptops. Combined with
> ARM devices still commonly use 4.x LTS kernels. Not sure if
> hardening options on the kernel has increased change of reproducing
> oops.

OK, so this is not an additional problem.
According to your logs, an URB that should have been killed wasn't.

> I am ready to test new patches and will continue to report oops

Could you test the attached patches?

Regards
Oliver

From 307097e80657ca44ac99da8efc8397070b1aff3f Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@xxxxxxxx>
Date: Thu, 18 Feb 2021 13:42:40 +0100
Subject: [PATCH 1/2] cdc-wdm: untangle a circular dependency between callback
and softint

We have a cycle of callbacks scheduling works which submit
URBs with thos callbacks. This needs to be blocked, stopped
and unblocked to untangle the circle..

Signed-off-by: Oliver Neukum <oneukum@xxxxxxxx>
---
drivers/usb/class/cdc-wdm.c | 30 ++++++++++++++++++++++--------
1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 508b1c3f8b73..d1e4a7379beb 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -321,12 +321,23 @@ static void wdm_int_callback(struct urb *urb)

}

-static void kill_urbs(struct wdm_device *desc)
+static void poison_urbs(struct wdm_device *desc)
{
/* the order here is essential */
- usb_kill_urb(desc->command);
- usb_kill_urb(desc->validity);
- usb_kill_urb(desc->response);
+ usb_poison_urb(desc->command);
+ usb_poison_urb(desc->validity);
+ usb_poison_urb(desc->response);
+}
+
+static void unpoison_urbs(struct wdm_device *desc)
+{
+ /*
+ * the order here is not essential
+ * it is symmetrical just to be nice
+ */
+ usb_unpoison_urb(desc->response);
+ usb_unpoison_urb(desc->validity);
+ usb_unpoison_urb(desc->command);
}

static void free_urbs(struct wdm_device *desc)
@@ -741,11 +752,12 @@ static int wdm_release(struct inode *inode, struct file *file)
if (!desc->count) {
if (!test_bit(WDM_DISCONNECTING, &desc->flags)) {
dev_dbg(&desc->intf->dev, "wdm_release: cleanup\n");
- kill_urbs(desc);
+ poison_urbs(desc);
spin_lock_irq(&desc->iuspin);
desc->resp_count = 0;
spin_unlock_irq(&desc->iuspin);
desc->manage_power(desc->intf, 0);
+ unpoison_urbs(desc);
} else {
/* must avoid dev_printk here as desc->intf is invalid */
pr_debug(KBUILD_MODNAME " %s: device gone - cleaning up\n", __func__);
@@ -1037,9 +1049,9 @@ static void wdm_disconnect(struct usb_interface *intf)
wake_up_all(&desc->wait);
mutex_lock(&desc->rlock);
mutex_lock(&desc->wlock);
+ poison_urbs(desc);
cancel_work_sync(&desc->rxwork);
cancel_work_sync(&desc->service_outs_intr);
- kill_urbs(desc);
mutex_unlock(&desc->wlock);
mutex_unlock(&desc->rlock);

@@ -1080,9 +1092,10 @@ static int wdm_suspend(struct usb_interface *intf, pm_message_t message)
set_bit(WDM_SUSPENDING, &desc->flags);
spin_unlock_irq(&desc->iuspin);
/* callback submits work - order is essential */
- kill_urbs(desc);
+ poison_urbs(desc);
cancel_work_sync(&desc->rxwork);
cancel_work_sync(&desc->service_outs_intr);
+ unpoison_urbs(desc);
}
if (!PMSG_IS_AUTO(message)) {
mutex_unlock(&desc->wlock);
@@ -1140,7 +1153,7 @@ static int wdm_pre_reset(struct usb_interface *intf)
wake_up_all(&desc->wait);
mutex_lock(&desc->rlock);
mutex_lock(&desc->wlock);
- kill_urbs(desc);
+ poison_urbs(desc);
cancel_work_sync(&desc->rxwork);
cancel_work_sync(&desc->service_outs_intr);
return 0;
@@ -1151,6 +1164,7 @@ static int wdm_post_reset(struct usb_interface *intf)
struct wdm_device *desc = wdm_find_device(intf);
int rv;

+ unpoison_urbs(desc);
clear_bit(WDM_OVERFLOW, &desc->flags);
clear_bit(WDM_RESETTING, &desc->flags);
rv = recover_from_urb_loss(desc);
--
2.26.2

From 3eeb644af140174ebad6ddce5526bcaf42ccd9c9 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@xxxxxxxx>
Date: Thu, 18 Feb 2021 13:52:28 +0100
Subject: [PATCH 2/2] cdc-acm: untangle a circular dependency between callback
and softint

We have a cycle of callbacks scheduling works which submit
URBs with thos callbacks. This needs to be blocked, stopped
and unblocked to untangle the circle.

Signed-off-by: Oliver Neukum <oneukum@xxxxxxxx>
---
drivers/usb/class/cdc-acm.c | 41 +++++++++++++++++++++++++------------
1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 781905745812..235fd1f654a4 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -147,17 +147,29 @@ static inline int acm_set_control(struct acm *acm, int control)
#define acm_send_break(acm, ms) \
acm_ctrl_msg(acm, USB_CDC_REQ_SEND_BREAK, ms, NULL, 0)

-static void acm_kill_urbs(struct acm *acm)
+static void acm_poison_urbs(struct acm *acm)
{
int i;

- usb_kill_urb(acm->ctrlurb);
+ usb_poison_urb(acm->ctrlurb);
for (i = 0; i < ACM_NW; i++)
- usb_kill_urb(acm->wb[i].urb);
+ usb_poison_urb(acm->wb[i].urb);
for (i = 0; i < acm->rx_buflimit; i++)
- usb_kill_urb(acm->read_urbs[i]);
+ usb_poison_urb(acm->read_urbs[i]);
+}
+
+static void acm_unpoison_urbs(struct acm *acm)
+{
+ int i;
+
+ for (i = 0; i < acm->rx_buflimit; i++)
+ usb_unpoison_urb(acm->read_urbs[i]);
+ for (i = 0; i < ACM_NW; i++)
+ usb_unpoison_urb(acm->wb[i].urb);
+ usb_unpoison_urb(acm->ctrlurb);
}

+
/*
* Write buffer management.
* All of these assume proper locks taken by the caller.
@@ -480,11 +492,6 @@ static void acm_read_bulk_callback(struct urb *urb)
dev_vdbg(&acm->data->dev, "got urb %d, len %d, status %d\n",
rb->index, urb->actual_length, status);

- if (!acm->dev) {
- dev_dbg(&acm->data->dev, "%s - disconnected\n", __func__);
- return;
- }
-
switch (status) {
case 0:
usb_mark_last_busy(acm->dev);
@@ -731,6 +738,7 @@ static void acm_port_shutdown(struct tty_port *port)
* Need to grab write_lock to prevent race with resume, but no need to
* hold it due to the tty-port initialised flag.
*/
+ acm_poison_urbs(acm);
spin_lock_irq(&acm->write_lock);
spin_unlock_irq(&acm->write_lock);

@@ -747,7 +755,8 @@ static void acm_port_shutdown(struct tty_port *port)
usb_autopm_put_interface_async(acm->control);
}

- acm_kill_urbs(acm);
+ acm_unpoison_urbs(acm);
+
}

static void acm_tty_cleanup(struct tty_struct *tty)
@@ -1540,8 +1549,14 @@ static void acm_disconnect(struct usb_interface *intf)
if (!acm)
return;

- mutex_lock(&acm->mutex);
acm->disconnected = true;
+ /*
+ * there is a circular dependency. acm_softint() can resubmit
+ * the URBs in error handling so we need to block any
+ * submission right away
+ */
+ acm_poison_urbs(acm);
+ mutex_lock(&acm->mutex);
if (acm->country_codes) {
device_remove_file(&acm->control->dev,
&dev_attr_wCountryCodes);
@@ -1560,7 +1575,6 @@ static void acm_disconnect(struct usb_interface *intf)
tty_kref_put(tty);
}

- acm_kill_urbs(acm);
cancel_delayed_work_sync(&acm->dwork);

tty_unregister_device(acm_tty_driver, acm->minor);
@@ -1602,7 +1616,7 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message)
if (cnt)
return 0;

- acm_kill_urbs(acm);
+ acm_poison_urbs(acm);
cancel_delayed_work_sync(&acm->dwork);
acm->urbs_in_error_delay = 0;

@@ -1615,6 +1629,7 @@ static int acm_resume(struct usb_interface *intf)
struct urb *urb;
int rv = 0;

+ acm_unpoison_urbs(acm);
spin_lock_irq(&acm->write_lock);

if (--acm->susp_count)
--
2.26.2