[PATCH RFC v2 6/8] blk-mq: add new helpers blk_mq_driver_tag_busy/idle()

From: Yu Kuai
Date: Sat Oct 21 2023 - 03:52:49 EST


From: Yu Kuai <yukuai3@xxxxxxxxxx>

Refer to the implementation of blk_mq_tag_busy/idle():

- blk_mq_driver_tag_busy() will be used the first time when get driver
tag failed;
- blk_mq_driver_tag_idle() will be used when driver tag is no longer
exhausted.
- A new counter 'busy_queues' is added to indicate how many shared
queues/hctxs are busy(drivers tags is exhausted);

Tag sharing will be delayed until fail to get driver tag based on these
new helpers.

Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx>
---
block/blk-mq-debugfs.c | 2 ++
block/blk-mq-tag.c | 53 +++++++++++++++++++++++++++++++++++++++++-
block/blk-mq.c | 9 +++++--
block/blk-mq.h | 25 ++++++++++++++++----
include/linux/blk-mq.h | 7 ++++--
include/linux/blkdev.h | 1 +
6 files changed, 88 insertions(+), 9 deletions(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 1d460119f5b3..170bc2236e81 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -417,6 +417,8 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m,
seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags);
seq_printf(m, "active_queues=%d\n",
READ_ONCE(tags->ctl.active_queues));
+ seq_printf(m, "busy_queues=%d\n",
+ READ_ONCE(tags->ctl.busy_queues));

seq_puts(m, "\nbitmap_tags:\n");
sbitmap_queue_show(&tags->bitmap_tags, m);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 261769251282..cd13d8e512f7 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -165,6 +165,51 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
blk_mq_tag_wakeup_all(tags, false);
}

+void __blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx)
+{
+ unsigned int users;
+ struct blk_mq_tags *tags = hctx->tags;
+
+ if (blk_mq_is_shared_tags(hctx->flags)) {
+ struct request_queue *q = hctx->queue;
+
+ if (test_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags) ||
+ test_and_set_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags))
+ return;
+ } else {
+ if (test_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state) ||
+ test_and_set_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state))
+ return;
+ }
+
+ spin_lock_irq(&tags->lock);
+ users = tags->ctl.busy_queues + 1;
+ WRITE_ONCE(tags->ctl.busy_queues, users);
+ spin_unlock_irq(&tags->lock);
+}
+
+void __blk_mq_driver_tag_idle(struct blk_mq_hw_ctx *hctx)
+{
+ unsigned int users;
+ struct blk_mq_tags *tags = hctx->tags;
+
+ if (blk_mq_is_shared_tags(hctx->flags)) {
+ struct request_queue *q = hctx->queue;
+
+ if (!test_and_clear_bit(QUEUE_FLAG_HCTX_BUSY,
+ &q->queue_flags))
+ return;
+ } else {
+ if (!test_and_clear_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state))
+ return;
+ }
+
+ spin_lock_irq(&tags->lock);
+ users = tags->ctl.busy_queues - 1;
+ WRITE_ONCE(tags->ctl.busy_queues, users);
+ spin_unlock_irq(&tags->lock);
+}
+
static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
struct sbitmap_queue *bt)
{
@@ -218,8 +263,11 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
if (tag != BLK_MQ_NO_TAG)
goto found_tag;

- if (data->flags & BLK_MQ_REQ_NOWAIT)
+ if (data->flags & BLK_MQ_REQ_NOWAIT) {
+ if (!(data->rq_flags & RQF_SCHED_TAGS))
+ blk_mq_driver_tag_busy(data->hctx);
return BLK_MQ_NO_TAG;
+ }

ws = bt_wait_ptr(bt, data->hctx);
do {
@@ -246,6 +294,9 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
if (tag != BLK_MQ_NO_TAG)
break;

+ if (!(data->rq_flags & RQF_SCHED_TAGS))
+ blk_mq_driver_tag_busy(data->hctx);
+
bt_prev = bt;
io_schedule();

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 8775616bc85c..a106533f063f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1668,8 +1668,10 @@ static void blk_mq_timeout_work(struct work_struct *work)
*/
queue_for_each_hw_ctx(q, hctx, i) {
/* the hctx may be unmapped, so check it here */
- if (blk_mq_hw_queue_mapped(hctx))
+ if (blk_mq_hw_queue_mapped(hctx)) {
blk_mq_tag_idle(hctx);
+ blk_mq_driver_tag_idle(hctx);
+ }
}
}
blk_queue_exit(q);
@@ -3594,8 +3596,10 @@ static void blk_mq_exit_hctx(struct request_queue *q,
{
struct request *flush_rq = hctx->fq->flush_rq;

- if (blk_mq_hw_queue_mapped(hctx))
+ if (blk_mq_hw_queue_mapped(hctx)) {
blk_mq_tag_idle(hctx);
+ blk_mq_driver_tag_idle(hctx);
+ }

if (blk_queue_init_done(q))
blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],
@@ -3931,6 +3935,7 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared)
hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;
} else {
blk_mq_tag_idle(hctx);
+ blk_mq_driver_tag_idle(hctx);
hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;
}
}
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 5c0d19562848..3e555af1de49 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -195,8 +195,10 @@ static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt,
return sbq_wait_ptr(bt, &hctx->wait_index);
}

-void __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
-void __blk_mq_tag_idle(struct blk_mq_hw_ctx *);
+void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx);
+void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx);
+void __blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx);
+void __blk_mq_driver_tag_idle(struct blk_mq_hw_ctx *hctx);

static inline void blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
@@ -210,6 +212,18 @@ static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
__blk_mq_tag_idle(hctx);
}

+static inline void blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx)
+{
+ if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
+ __blk_mq_driver_tag_busy(hctx);
+}
+
+static inline void blk_mq_driver_tag_idle(struct blk_mq_hw_ctx *hctx)
+{
+ if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
+ __blk_mq_driver_tag_idle(hctx);
+}
+
static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
unsigned int tag)
{
@@ -293,7 +307,8 @@ static inline void __blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx,
struct shared_tag_info *info = blk_mq_is_shared_tags(hctx->flags) ?
&hctx->queue->shared_tag_info : &hctx->shared_tag_info;

- atomic_sub(val, &info->active_tags);
+ if (!atomic_sub_return(val, &info->active_tags))
+ blk_mq_driver_tag_idle(hctx);
}

static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx)
@@ -354,8 +369,10 @@ bool __blk_mq_alloc_driver_tag(struct request *rq);

static inline bool blk_mq_get_driver_tag(struct request *rq)
{
- if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_alloc_driver_tag(rq))
+ if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_alloc_driver_tag(rq)) {
+ blk_mq_driver_tag_busy(rq->mq_hctx);
return false;
+ }

return true;
}
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c93955f5f28f..9182ceca8c7a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -666,10 +666,11 @@ enum {

BLK_MQ_S_STOPPED = 0,
BLK_MQ_S_TAG_ACTIVE = 1,
- BLK_MQ_S_SCHED_RESTART = 2,
+ BLK_MQ_S_DTAG_BUSY = 2,
+ BLK_MQ_S_SCHED_RESTART = 3,

/* hw queue is inactive after all its CPUs become offline */
- BLK_MQ_S_INACTIVE = 3,
+ BLK_MQ_S_INACTIVE = 4,

BLK_MQ_MAX_DEPTH = 10240,

@@ -728,6 +729,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,

struct tag_sharing_ctl {
unsigned int active_queues;
+ /* The number of shared queues/hctxs with exhausted driver tags. */
+ unsigned int busy_queues;
/*
* If driver tags is shared for multiple queue/hctx, this is the head of
* a list with request_queue/hctx->shared_tag_info.node entries.
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b364d65fe4e5..8fd6a0a92233 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -552,6 +552,7 @@ struct request_queue {
#define QUEUE_FLAG_DAX 19 /* device supports DAX */
#define QUEUE_FLAG_STATS 20 /* track IO start and completion times */
#define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */
+#define QUEUE_FLAG_HCTX_BUSY 23 /* driver tag is exhausted for at least one blk-mq hctx */
#define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */
#define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */
#define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */
--
2.39.2