Re: [PATCH] blk-mq: plug based timestamp caching

From: Chengming Zhou
Date: Thu Jul 27 2023 - 11:35:59 EST


Hello, Jens and Tejun, does this patch look fine to you?
Looking forward to your comments.

Thanks.

On 2023/7/17 16:16, chengming.zhou@xxxxxxxxx wrote:
> From: Chengming Zhou <zhouchengming@xxxxxxxxxxxxx>
>
> This idea is from Tejun [1] that don't like manually optimized timestamp
> reads, so come up the plug based timestamp caching infrastructure, which
> is more generic and has better performance. It works since we don't care
> about nanosec accuracy.
>
> Have the plug init start with the timestamp invalid, and use blk_get_time()
> helper that return the time for no plug, and set it in the plug if not set.
> Flushing the plug would mark it invalid again at the end.
>
> We replaces all "alloc_time_ns", "start_time_ns" and "io_start_time_ns"
> settings to use the blk_get_time() helper.
>
> The only direct caller of ktime_get_ns() left in blk-mq is in request end,
> which don't use cached timestamp for better accuracy of completion time.
>
> [1] https://lore.kernel.org/lkml/ZLA7QAfSojxu_FMW@xxxxxxxxxxxxxxx/
>
> Suggested-by: Tejun Heo <tj@xxxxxxxxxx>
> Suggested-by: Jens Axboe <axboe@xxxxxxxxx>
> Signed-off-by: Chengming Zhou <zhouchengming@xxxxxxxxxxxxx>
> ---
> block/blk-core.c | 3 +++
> block/blk-mq.c | 22 +++++++++++++++++-----
> include/linux/blkdev.h | 2 ++
> 3 files changed, 22 insertions(+), 5 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 90de50082146..a63d33af7287 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -1054,6 +1054,7 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
> return;
>
> plug->mq_list = NULL;
> + plug->cached_time_ns = 0;
> plug->cached_rq = NULL;
> plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
> plug->rq_count = 0;
> @@ -1153,6 +1154,8 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
> */
> if (unlikely(!rq_list_empty(plug->cached_rq)))
> blk_mq_free_plug_rqs(plug);
> +
> + plug->cached_time_ns = 0;
> }
>
> /**
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index b04ff6f56926..54648bfaab9c 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -311,6 +311,18 @@ void blk_mq_wake_waiters(struct request_queue *q)
> blk_mq_tag_wakeup_all(hctx->tags, true);
> }
>
> +static inline u64 blk_get_time(void)
> +{
> + struct blk_plug *plug = current->plug;
> +
> + if (!plug)
> + return ktime_get_ns();
> +
> + if (!plug->cached_time_ns)
> + plug->cached_time_ns = ktime_get_ns();
> + return plug->cached_time_ns;
> +}
> +
> void blk_rq_init(struct request_queue *q, struct request *rq)
> {
> memset(rq, 0, sizeof(*rq));
> @@ -322,7 +334,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
> RB_CLEAR_NODE(&rq->rb_node);
> rq->tag = BLK_MQ_NO_TAG;
> rq->internal_tag = BLK_MQ_NO_TAG;
> - rq->start_time_ns = ktime_get_ns();
> + rq->start_time_ns = blk_get_time();
> rq->part = NULL;
> blk_crypto_rq_set_defaults(rq);
> }
> @@ -332,7 +344,7 @@ EXPORT_SYMBOL(blk_rq_init);
> static inline void blk_mq_rq_time_init(struct request *rq, u64 alloc_time_ns)
> {
> if (blk_mq_need_time_stamp(rq))
> - rq->start_time_ns = ktime_get_ns();
> + rq->start_time_ns = blk_get_time();
> else
> rq->start_time_ns = 0;
>
> @@ -441,7 +453,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
>
> /* alloc_time includes depth and tag waits */
> if (blk_queue_rq_alloc_time(q))
> - alloc_time_ns = ktime_get_ns();
> + alloc_time_ns = blk_get_time();
>
> if (data->cmd_flags & REQ_NOWAIT)
> data->flags |= BLK_MQ_REQ_NOWAIT;
> @@ -624,7 +636,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
>
> /* alloc_time includes depth and tag waits */
> if (blk_queue_rq_alloc_time(q))
> - alloc_time_ns = ktime_get_ns();
> + alloc_time_ns = blk_get_time();
>
> /*
> * If the tag allocator sleeps we could get an allocation for a
> @@ -1235,7 +1247,7 @@ void blk_mq_start_request(struct request *rq)
> trace_block_rq_issue(rq);
>
> if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
> - rq->io_start_time_ns = ktime_get_ns();
> + rq->io_start_time_ns = blk_get_time();
> rq->stats_sectors = blk_rq_sectors(rq);
> rq->rq_flags |= RQF_STATS;
> rq_qos_issue(q, rq);
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index ed44a997f629..21a3d4d7ab2b 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -961,6 +961,8 @@ void blk_mark_disk_dead(struct gendisk *disk);
> struct blk_plug {
> struct request *mq_list; /* blk-mq requests */
>
> + u64 cached_time_ns;
> +
> /* if ios_left is > 1, we can batch tag/rq allocations */
> struct request *cached_rq;
> unsigned short nr_ios;