[PATCH] devcoredump: Remove devcoredump device if failing device is gone

From: Rodrigo Vivi
Date: Wed Jan 17 2024 - 14:54:30 EST


Make dev_coredumpm a real device managed helper, that not only
frees the device after a scheduled delay (DEVCD_TIMEOUT), but
also when the failing/crashed device is gone.

The module remove for the drivers using devcoredump are currently
broken if attempted between the crash and the DEVCD_TIMEOUT, since
the symbolic sysfs link won't be deleted.

On top of that, for PCI devices, the unbind of the device will
call the pci .remove void function, that cannot fail. At that
time, our device is pretty much gone, but the read and free
functions are alive trough the devcoredump device and they
can get some NULL dereferences or use after free.

So, if the failing-device is gone, let's cancel the scheduled
work and remove devcoredump-device immediately.

Cc: Jose Souza <jose.souza@xxxxxxxxx>
Cc: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxxxx>
Cc: Johannes Berg <johannes@xxxxxxxxxxxxxxxx>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx>
---
drivers/base/devcoredump.c | 29 ++++++++++++++++++++++-------
1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
index 7e2d1f0d903a..6db7a2fd9a02 100644
--- a/drivers/base/devcoredump.c
+++ b/drivers/base/devcoredump.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/devcoredump.h>
+#include <linux/devm-helpers.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/fs.h>
@@ -118,19 +119,24 @@ static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj,
return devcd->read(buffer, offset, count, devcd->data, devcd->datalen);
}

-static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr,
- char *buffer, loff_t offset, size_t count)
+static void devcd_remove_now(struct devcd_entry *devcd)
{
- struct device *dev = kobj_to_dev(kobj);
- struct devcd_entry *devcd = dev_to_devcd(dev);
-
mutex_lock(&devcd->mutex);
if (!devcd->delete_work) {
devcd->delete_work = true;
mod_delayed_work(system_wq, &devcd->del_wk, 0);
}
mutex_unlock(&devcd->mutex);
+}
+
+static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buffer, loff_t offset, size_t count)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct devcd_entry *devcd = dev_to_devcd(dev);
+
+ devcd_remove_now(devcd);

return count;
}
@@ -304,6 +310,12 @@ static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset,
offset);
}

+static void devcd_remove(void *data)
+{
+ struct devcd_entry *devcd = data;
+ devcd_remove_now(devcd);
+}
+
/**
* dev_coredumpm - create device coredump with read/free methods
* @dev: the struct device for the crashed device
@@ -379,7 +391,10 @@ void dev_coredumpm(struct device *dev, struct module *owner,

dev_set_uevent_suppress(&devcd->devcd_dev, false);
kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);
- INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
+ if (devm_add_action(dev, devcd_remove, devcd))
+ dev_warn(dev, "devcoredump managed auto-removal registration failed\n");
+ if (devm_delayed_work_autocancel(dev, &devcd->del_wk, devcd_del))
+ dev_warn(dev, "devcoredump managed autocancel work failed\n");
schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
mutex_unlock(&devcd->mutex);
return;
--
2.43.0