Re: [PATCH 2/2] devcoredump: Remove the mutex serialization

From: Souza, Jose
Date: Mon Jan 29 2024 - 10:51:18 EST


On Fri, 2024-01-26 at 10:11 -0500, Rodrigo Vivi wrote:
> The commit 01daccf74832 ("devcoredump : Serialize devcd_del work")
> introduced the mutex to protect the case where mod_delayed_work
> could be called before the delayed work even existed.
>
> Instead, we can simply initialize the delayed work before the device
> is added, so the race condition doesn't exist at first place.
>
> The mutex_unlock is very problematic here. Although mod_delayed_work
> is async, we have no warranty that the work is not finished before
> the mutex_unlock(devcd->mutex), and if that happen 'devcd' is used
> after freed.
>

Reviewed-by: José Roberto de Souza <jose.souza@xxxxxxxxx>

> Cc: Mukesh Ojha <quic_mojha@xxxxxxxxxxx>
> Cc: Johannes Berg <johannes@xxxxxxxxxxxxxxxx>
> Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
> Cc: Rafael J. Wysocki <rafael@xxxxxxxxxx>
> Cc: Jose Souza <jose.souza@xxxxxxxxx>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx>
> ---
> drivers/base/devcoredump.c | 97 +++-----------------------------------
> 1 file changed, 6 insertions(+), 91 deletions(-)
>
> diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
> index 678ecc2fa242..0e26b1273920 100644
> --- a/drivers/base/devcoredump.c
> +++ b/drivers/base/devcoredump.c
> @@ -25,47 +25,6 @@ struct devcd_entry {
> struct device devcd_dev;
> void *data;
> size_t datalen;
> - /*
> - * Here, mutex is required to serialize the calls to del_wk work between
> - * user/kernel space which happens when devcd is added with device_add()
> - * and that sends uevent to user space. User space reads the uevents,
> - * and calls to devcd_data_write() which try to modify the work which is
> - * not even initialized/queued from devcoredump.
> - *
> - *
> - *
> - * cpu0(X) cpu1(Y)
> - *
> - * dev_coredump() uevent sent to user space
> - * device_add() ======================> user space process Y reads the
> - * uevents writes to devcd fd
> - * which results into writes to
> - *
> - * devcd_data_write()
> - * mod_delayed_work()
> - * try_to_grab_pending()
> - * del_timer()
> - * debug_assert_init()
> - * INIT_DELAYED_WORK()
> - * schedule_delayed_work()
> - *
> - *
> - * Also, mutex alone would not be enough to avoid scheduling of
> - * del_wk work after it get flush from a call to devcd_free()
> - * mentioned as below.
> - *
> - * disabled_store()
> - * devcd_free()
> - * mutex_lock() devcd_data_write()
> - * flush_delayed_work()
> - * mutex_unlock()
> - * mutex_lock()
> - * mod_delayed_work()
> - * mutex_unlock()
> - * So, delete_work flag is required.
> - */
> - struct mutex mutex;
> - bool delete_work;
> struct module *owner;
> ssize_t (*read)(char *buffer, loff_t offset, size_t count,
> void *data, size_t datalen);
> @@ -125,13 +84,8 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
> struct device *dev = kobj_to_dev(kobj);
> struct devcd_entry *devcd = dev_to_devcd(dev);
>
> - mutex_lock(&devcd->mutex);
> - if (!devcd->delete_work) {
> - devcd->delete_work = true;
> - mod_delayed_work(system_wq, &devcd->del_wk, 0);
> - }
> - mutex_unlock(&devcd->mutex);
> -
> + /* This file needs to be closed before devcd can be deleted */
> + mod_delayed_work(system_wq, &devcd->del_wk, 0);
> return count;
> }
>
> @@ -158,12 +112,7 @@ static int devcd_free(struct device *dev, void *data)
> {
> struct devcd_entry *devcd = dev_to_devcd(dev);
>
> - mutex_lock(&devcd->mutex);
> - if (!devcd->delete_work)
> - devcd->delete_work = true;
> -
> flush_delayed_work(&devcd->del_wk);
> - mutex_unlock(&devcd->mutex);
> return 0;
> }
>
> @@ -173,30 +122,6 @@ static ssize_t disabled_show(const struct class *class, const struct class_attri
> return sysfs_emit(buf, "%d\n", devcd_disabled);
> }
>
> -/*
> - *
> - * disabled_store() worker()
> - * class_for_each_device(&devcd_class,
> - * NULL, NULL, devcd_free)
> - * ...
> - * ...
> - * while ((dev = class_dev_iter_next(&iter))
> - * devcd_del()
> - * device_del()
> - * put_device() <- last reference
> - * error = fn(dev, data) devcd_dev_release()
> - * devcd_free(dev, data) kfree(devcd)
> - * mutex_lock(&devcd->mutex);
> - *
> - *
> - * In the above diagram, It looks like disabled_store() would be racing with parallely
> - * running devcd_del() and result in memory abort while acquiring devcd->mutex which
> - * is called after kfree of devcd memory after dropping its last reference with
> - * put_device(). However, this will not happens as fn(dev, data) runs
> - * with its own reference to device via klist_node so it is not its last reference.
> - * so, above situation would not occur.
> - */
> -
> static ssize_t disabled_store(const struct class *class, const struct class_attribute *attr,
> const char *buf, size_t count)
> {
> @@ -308,13 +233,7 @@ static void devcd_remove(void *data)
> {
> struct devcd_entry *devcd = data;
>
> - mutex_lock(&devcd->mutex);
> - if (!devcd->delete_work) {
> - devcd->delete_work = true;
> - /* XXX: Cannot flush otherwise the mutex below will hit a UAF */
> - mod_delayed_work(system_wq, &devcd->del_wk, 0);
> - }
> - mutex_unlock(&devcd->mutex);
> + flush_delayed_work(&devcd->del_wk);
> }
>
> /**
> @@ -365,16 +284,15 @@ void dev_coredumpm(struct device *dev, struct module *owner,
> devcd->read = read;
> devcd->free = free;
> devcd->failing_dev = get_device(dev);
> - devcd->delete_work = false;
>
> - mutex_init(&devcd->mutex);
> device_initialize(&devcd->devcd_dev);
>
> dev_set_name(&devcd->devcd_dev, "devcd%d",
> atomic_inc_return(&devcd_count));
> devcd->devcd_dev.class = &devcd_class;
>
> - mutex_lock(&devcd->mutex);
> + INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
> + schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
> dev_set_uevent_suppress(&devcd->devcd_dev, true);
> if (device_add(&devcd->devcd_dev))
> goto put_device;
> @@ -392,15 +310,12 @@ void dev_coredumpm(struct device *dev, struct module *owner,
>
> dev_set_uevent_suppress(&devcd->devcd_dev, false);
> kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);
> - INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
> - schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
> if (devm_add_action(dev, devcd_remove, devcd))
> dev_warn(dev, "devcoredump managed auto-removal registration failed\n");
> - mutex_unlock(&devcd->mutex);
> return;
> put_device:
> + cancel_delayed_work(&devcd->del_wk);
> put_device(&devcd->devcd_dev);
> - mutex_unlock(&devcd->mutex);
> put_module:
> module_put(owner);
> free: