[PATCH] blk-mq, elevator: pair up elevator_get and elevator_put to avoid refcnt problems

From: Jinlong Chen
Date: Sat Oct 15 2022 - 06:53:21 EST


Currently, the getting and putting of io scheduler module references are
not paired. The root cause is that elevator_alloc relies on its caller to
get the references to io scheduler modules instead of getting the
references by itself, while the corresponding elevator_release does put
the references to io scheduler modules back on its own.

This results in some weird code containing bugs:

1. Both elevator_switch_mq and elevator_init_mq call blk_mq_init_sched,
but elevator_init_mq calls elevator_put on failure while
elevator_switch_mq does not. These inconsistent behaviors may cause
negative refcnt or ghost refcnt due to the position where the failure
happens in blk_mq_init_sched.

2. blk_mq_elv_switch_none gets references to the io scheduler modules to
prevent them from being removed. But blk_mq_elv_switch_back does not
put the references back. This is confusing.

To address the problem, firstly, we make elevator_alloc to get its own
references to io scheduler modules. These references will be put back by
elevator_release later. Then, we can just pair each elevator_get with an
elevator_put.

The bugs and the patch can be validated with tools here:
https://github.com/nickyc975/linux_elv_refcnt_bug.git

Signed-off-by: Jinlong Chen <nickyc975@xxxxxxxxxx>
---
block/blk-mq.c | 6 ++++++
block/elevator.c | 29 ++++++++++++++++++++---------
2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 8070b6c10e8d..2adfd52786dc 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4595,6 +4595,12 @@ static void blk_mq_elv_switch_back(struct list_head *head,

mutex_lock(&q->sysfs_lock);
elevator_switch(q, t);
+ /**
+ * We got a reference of the io scheduler module in blk_mq_elv_switch_none
+ * to prevent the module from being removed. We need to put that reference
+ * back once we are done.
+ */
+ module_put(t->elevator_owner);
mutex_unlock(&q->sysfs_lock);
}

diff --git a/block/elevator.c b/block/elevator.c
index bd71f0fc4e4b..aaafd415f922 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -166,9 +166,12 @@ struct elevator_queue *elevator_alloc(struct request_queue *q,
{
struct elevator_queue *eq;

+ if (!try_module_get(e->elevator_owner))
+ goto err_out;
+
eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node);
if (unlikely(!eq))
- return NULL;
+ goto err_put_elevator;

eq->type = e;
kobject_init(&eq->kobj, &elv_ktype);
@@ -176,6 +179,11 @@ struct elevator_queue *elevator_alloc(struct request_queue *q,
hash_init(eq->hash);

return eq;
+
+err_put_elevator:
+ elevator_put(e);
+err_out:
+ return NULL;
}
EXPORT_SYMBOL(elevator_alloc);

@@ -713,8 +721,9 @@ void elevator_init_mq(struct request_queue *q)
if (err) {
pr_warn("\"%s\" elevator initialization failed, "
"falling back to \"none\"\n", e->elevator_name);
- elevator_put(e);
}
+
+ elevator_put(e);
}

/*
@@ -747,6 +756,7 @@ static int __elevator_change(struct request_queue *q, const char *name)
{
char elevator_name[ELV_NAME_MAX];
struct elevator_type *e;
+ int ret = 0;

/* Make sure queue is not in the middle of being removed */
if (!blk_queue_registered(q))
@@ -762,17 +772,18 @@ static int __elevator_change(struct request_queue *q, const char *name)
}

strlcpy(elevator_name, name, sizeof(elevator_name));
- e = elevator_get(q, strstrip(elevator_name), true);
- if (!e)
- return -EINVAL;
-
if (q->elevator &&
- elevator_match(q->elevator->type, elevator_name, 0)) {
- elevator_put(e);
+ elevator_match(q->elevator->type, strstrip(elevator_name), 0)) {
return 0;
}

- return elevator_switch(q, e);
+ e = elevator_get(q, strstrip(elevator_name), true);
+ if (!e)
+ return -EINVAL;
+
+ ret = elevator_switch(q, e);
+ elevator_put(e);
+ return ret;
}

ssize_t elv_iosched_store(struct request_queue *q, const char *name,
--
2.31.1