Re: [PATCH 06/14] block: store elevator state in request

From: Guillaume Tucker
Date: Tue Oct 19 2021 - 18:21:14 EST


Hi Jens,

On 17/10/2021 02:37, Jens Axboe wrote:
> Add an rq private RQF_ELV flag, which tells the block layer that this
> request was initialized on a queue that has an IO scheduler attached.
> This allows for faster checking in the fast path, rather than having to
> deference rq->q later on.
>
> Elevator switching does full quiesce of the queue before detaching an
> IO scheduler, so it's safe to cache this in the request itself.

A kernelci.org automated bisection found that this patch
introduced a regression in next-20211019 with a NULL pointer
dereference, which only seems to be affecting QEMU but across all
architectures.

More details about the regression can be found here:

https://linux.kernelci.org/test/job/next/branch/master/kernel/next-20211019/plan/baseline/
https://linux.kernelci.org/test/case/id/616ea20eb7104071c43358ea/

See also all the test jobs involved in the automated bisection:

https://lava.collabora.co.uk/scheduler/device_type/qemu?dt_search=bisection-287

If you do send a fix, please include this trailer:

Reported-by: "kernelci.org bot" <bot@xxxxxxxxxxxx>


Please let us know if this seems like a valid bisection result
and if you need any help to debug the issue or try a fix.

Best wishes,
Guillaume


GitHub: https://github.com/kernelci/kernelci-project/issues/68


> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
> ---
> block/blk-mq-sched.h | 27 ++++++++++++++++-----------
> block/blk-mq.c | 20 +++++++++++---------
> include/linux/blk-mq.h | 2 ++
> 3 files changed, 29 insertions(+), 20 deletions(-)
>
> diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
> index fe252278ed9a..98836106b25f 100644
> --- a/block/blk-mq-sched.h
> +++ b/block/blk-mq-sched.h
> @@ -56,29 +56,34 @@ static inline bool
> blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
> struct bio *bio)
> {
> - struct elevator_queue *e = q->elevator;
> -
> - if (e && e->type->ops.allow_merge)
> - return e->type->ops.allow_merge(q, rq, bio);
> + if (rq->rq_flags & RQF_ELV) {
> + struct elevator_queue *e = q->elevator;
>
> + if (e->type->ops.allow_merge)
> + return e->type->ops.allow_merge(q, rq, bio);
> + }
> return true;
> }
>
> static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
> {
> - struct elevator_queue *e = rq->q->elevator;
> + if (rq->rq_flags & RQF_ELV) {
> + struct elevator_queue *e = rq->q->elevator;
>
> - if (e && e->type->ops.completed_request)
> - e->type->ops.completed_request(rq, now);
> + if (e->type->ops.completed_request)
> + e->type->ops.completed_request(rq, now);
> + }
> }
>
> static inline void blk_mq_sched_requeue_request(struct request *rq)
> {
> - struct request_queue *q = rq->q;
> - struct elevator_queue *e = q->elevator;
> + if (rq->rq_flags & RQF_ELV) {
> + struct request_queue *q = rq->q;
> + struct elevator_queue *e = q->elevator;
>
> - if ((rq->rq_flags & RQF_ELVPRIV) && e && e->type->ops.requeue_request)
> - e->type->ops.requeue_request(rq);
> + if ((rq->rq_flags & RQF_ELVPRIV) && e->type->ops.requeue_request)
> + e->type->ops.requeue_request(rq);
> + }
> }
>
> static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index fa5b12200404..5d22c228f6df 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -299,7 +299,7 @@ void blk_mq_wake_waiters(struct request_queue *q)
> */
> static inline bool blk_mq_need_time_stamp(struct request *rq)
> {
> - return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS)) || rq->q->elevator;
> + return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV));
> }
>
> static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
> @@ -309,9 +309,11 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
> struct request *rq = tags->static_rqs[tag];
>
> if (data->q->elevator) {
> + rq->rq_flags = RQF_ELV;
> rq->tag = BLK_MQ_NO_TAG;
> rq->internal_tag = tag;
> } else {
> + rq->rq_flags = 0;
> rq->tag = tag;
> rq->internal_tag = BLK_MQ_NO_TAG;
> }
> @@ -320,7 +322,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
> rq->q = data->q;
> rq->mq_ctx = data->ctx;
> rq->mq_hctx = data->hctx;
> - rq->rq_flags = 0;
> rq->cmd_flags = data->cmd_flags;
> if (data->flags & BLK_MQ_REQ_PM)
> rq->rq_flags |= RQF_PM;
> @@ -356,11 +357,11 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
> data->ctx->rq_dispatched[op_is_sync(data->cmd_flags)]++;
> refcount_set(&rq->ref, 1);
>
> - if (!op_is_flush(data->cmd_flags)) {
> + if (!op_is_flush(data->cmd_flags) && (rq->rq_flags & RQF_ELV)) {
> struct elevator_queue *e = data->q->elevator;
>
> rq->elv.icq = NULL;
> - if (e && e->type->ops.prepare_request) {
> + if (e->type->ops.prepare_request) {
> if (e->type->icq_cache)
> blk_mq_sched_assign_ioc(rq);
>
> @@ -575,12 +576,13 @@ static void __blk_mq_free_request(struct request *rq)
> void blk_mq_free_request(struct request *rq)
> {
> struct request_queue *q = rq->q;
> - struct elevator_queue *e = q->elevator;
> struct blk_mq_ctx *ctx = rq->mq_ctx;
> struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
>
> - if (rq->rq_flags & RQF_ELVPRIV) {
> - if (e && e->type->ops.finish_request)
> + if (rq->rq_flags & (RQF_ELVPRIV | RQF_ELV)) {
> + struct elevator_queue *e = q->elevator;
> +
> + if (e->type->ops.finish_request)
> e->type->ops.finish_request(rq);
> if (rq->elv.icq) {
> put_io_context(rq->elv.icq->ioc);
> @@ -2239,7 +2241,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
> goto insert;
> }
>
> - if (q->elevator && !bypass_insert)
> + if ((rq->rq_flags & RQF_ELV) && !bypass_insert)
> goto insert;
>
> budget_token = blk_mq_get_dispatch_budget(q);
> @@ -2475,7 +2477,7 @@ void blk_mq_submit_bio(struct bio *bio)
> }
>
> blk_add_rq_to_plug(plug, rq);
> - } else if (q->elevator) {
> + } else if (rq->rq_flags & RQF_ELV) {
> /* Insert the request at the IO scheduler queue */
> blk_mq_sched_insert_request(rq, false, true, true);
> } else if (plug && !blk_queue_nomerges(q)) {
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index a9c1d0882550..3a399aa372b5 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -55,6 +55,8 @@ typedef __u32 __bitwise req_flags_t;
> #define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20))
> /* ->timeout has been called, don't expire again */
> #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21))
> +/* queue has elevator attached */
> +#define RQF_ELV ((__force req_flags_t)(1 << 22))
>
> /* flags that prevent us from merging requests: */
> #define RQF_NOMERGE_FLAGS \
>