[PATCH -next 6/8] md/md-bitmap: support to unplug bitmap asynchrously

From: Yu Kuai
Date: Thu Apr 20 2023 - 07:32:14 EST


From: Yu Kuai <yukuai3@xxxxxxxxxx>

If bitmap is enabled, bitmap must update before submiting write io, this
is why unplug callback must move these io to 'conf->pending_io_list' if
'current->bio_list' is not empty, which will suffer performance
degeration.

This patch add a new helper md_bitmap_unplug_async() to submit bitmap io
in a kworker, so that submit bitmap io in raid10_unplug() doesn't require
that 'current->bio_list' is empty.

This patch prepare to limit the number of plugged bio.

Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx>
---
drivers/md/md-bitmap.c | 59 +++++++++++++++++++++++++++++++++++++++---
drivers/md/md-bitmap.h | 3 +++
drivers/md/raid1.c | 3 ++-
drivers/md/raid10.c | 2 +-
4 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index 4bd980b272ef..da8ad2e95e88 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -1000,10 +1000,18 @@ static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
return set;
}

-/* this gets called when the md device is ready to unplug its underlying
+struct bitmap_unplug_work {
+ struct work_struct work;
+ struct bitmap *bitmap;
+ struct completion *done;
+};
+
+/*
+ * This gets called when the md device is ready to unplug its underlying
* (slave) device queues -- before we let any writes go down, we need to
- * sync the dirty pages of the bitmap file to disk */
-void md_bitmap_unplug(struct bitmap *bitmap)
+ * sync the dirty pages of the bitmap file to disk.
+ */
+static void md_do_bitmap_unplug(struct bitmap *bitmap)
{
unsigned long i;
int dirty, need_write;
@@ -1035,9 +1043,45 @@ void md_bitmap_unplug(struct bitmap *bitmap)

if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
md_bitmap_file_kick(bitmap);
+
+}
+static void md_bitmap_unplug_fn(struct work_struct *work)
+{
+ struct bitmap_unplug_work *unplug_work =
+ container_of(work, struct bitmap_unplug_work, work);
+
+ md_do_bitmap_unplug(unplug_work->bitmap);
+ complete(unplug_work->done);
+}
+
+static void __md_bitmap_unplug(struct bitmap *bitmap, bool async)
+{
+ DECLARE_COMPLETION_ONSTACK(done);
+ struct bitmap_unplug_work unplug_work;
+
+ if (!async)
+ return md_do_bitmap_unplug(bitmap);
+
+ INIT_WORK(&unplug_work.work, md_bitmap_unplug_fn);
+ unplug_work.bitmap = bitmap;
+ unplug_work.done = &done;
+
+ queue_work(bitmap->unplug_wq, &unplug_work.work);
+ wait_for_completion(&done);
+}
+
+void md_bitmap_unplug(struct bitmap *bitmap)
+{
+ return __md_bitmap_unplug(bitmap, false);
}
EXPORT_SYMBOL(md_bitmap_unplug);

+void md_bitmap_unplug_async(struct bitmap *bitmap)
+{
+ return __md_bitmap_unplug(bitmap, true);
+}
+EXPORT_SYMBOL(md_bitmap_unplug_async);
+
static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
* the in-memory bitmap from the on-disk bitmap -- also, sets up the
@@ -1753,6 +1797,9 @@ void md_bitmap_free(struct bitmap *bitmap)
if (!bitmap) /* there was no bitmap */
return;

+ if (bitmap->unplug_wq)
+ destroy_workqueue(bitmap->unplug_wq);
+
if (bitmap->sysfs_can_clear)
sysfs_put(bitmap->sysfs_can_clear);

@@ -1843,6 +1890,12 @@ struct bitmap *md_bitmap_create(struct mddev *mddev, int slot)
if (!bitmap)
return ERR_PTR(-ENOMEM);

+ bitmap->unplug_wq = create_workqueue("md_bitmap");
+ if (!bitmap->unplug_wq) {
+ err = -ENOMEM;
+ goto error;
+ }
+
spin_lock_init(&bitmap->counts.lock);
atomic_set(&bitmap->pending_writes, 0);
init_waitqueue_head(&bitmap->write_wait);
diff --git a/drivers/md/md-bitmap.h b/drivers/md/md-bitmap.h
index 3a4750952b3a..55531669db24 100644
--- a/drivers/md/md-bitmap.h
+++ b/drivers/md/md-bitmap.h
@@ -231,6 +231,8 @@ struct bitmap {

struct kernfs_node *sysfs_can_clear;
int cluster_slot; /* Slot offset for clustered env */
+
+ struct workqueue_struct *unplug_wq;
};

/* the bitmap API */
@@ -264,6 +266,7 @@ void md_bitmap_sync_with_cluster(struct mddev *mddev,
sector_t new_lo, sector_t new_hi);

void md_bitmap_unplug(struct bitmap *bitmap);
+void md_bitmap_unplug_async(struct bitmap *bitmap);
void md_bitmap_daemon_work(struct mddev *mddev);

int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c068ed3e6c96..7389e599f34e 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -792,7 +792,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
static void flush_bio_list(struct r1conf *conf, struct bio *bio)
{
/* flush any pending bitmap writes to disk before proceeding w/ I/O */
- md_bitmap_unplug(conf->mddev->bitmap);
wake_up(&conf->wait_barrier);

while (bio) { /* submit pending writes */
@@ -829,6 +828,7 @@ static void flush_pending_writes(struct r1conf *conf)
*/
__set_current_state(TASK_RUNNING);
blk_start_plug(&plug);
+ md_bitmap_unplug(conf->mddev->bitmap);
flush_bio_list(conf, bio);
blk_finish_plug(&plug);
} else
@@ -1176,6 +1176,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)

/* we aren't scheduling, so we can do the write-out directly. */
bio = bio_list_get(&plug->pending);
+ md_bitmap_unplug_async(conf->mddev->bitmap);
flush_bio_list(conf, bio);
kfree(plug);
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index fd625026c97b..9f307ff5d4f6 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1113,7 +1113,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)

/* we aren't scheduling, so we can do the write-out directly. */
bio = bio_list_get(&plug->pending);
- md_bitmap_unplug(mddev->bitmap);
+ md_bitmap_unplug_async(mddev->bitmap);
wake_up(&conf->wait_barrier);

while (bio) { /* submit pending writes */
--
2.39.2