Re: [PATCH -next 2/8] md: also clone new io if io accounting is disabled

From: Xiao Ni
Date: Tue Jun 20 2023 - 04:49:31 EST


On Mon, Jun 19, 2023 at 8:49 PM Yu Kuai <yukuai1@xxxxxxxxxxxxxxx> wrote:
>
> From: Yu Kuai <yukuai3@xxxxxxxxxx>
>
> Currently, 'active_io' is grabed before make_reqeust() is called, and
> it's dropped immediately make_reqeust() returns. Hence 'active_io'
> actually means io is dispatching, not io is inflight.

Hi Kuai

There are three typo errors in the commit message: s/grabed/grabbed/g

>
> For raid0 and raid456 that io accounting is enabled, 'active_io' will
> also be grabed when bio is cloned for io accounting, and this 'active_io'
> is dropped until io is done.
>
> Always clone new bio so that 'active_io' will mean that io is inflight,
> raid1 and raid10 will switch to use this method in later patches. Once
> these are implemented, it can be cleaned up that 'active_io' is grabed
> twice for one io.
>
> Now that bio will be cloned even if io accounting is disabled, also
> rename related structure from '*_acct_*' to '*_clone_*'.
>
> Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx>
> ---
> drivers/md/md.c | 61 ++++++++++++++++++++++++----------------------
> drivers/md/md.h | 4 +--
> drivers/md/raid5.c | 18 +++++++-------
> 3 files changed, 43 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 42347289195a..5ad8e7f3aebd 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -2314,7 +2314,7 @@ int md_integrity_register(struct mddev *mddev)
> pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
> if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
> (mddev->level != 1 && mddev->level != 10 &&
> - bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) {
> + bioset_integrity_create(&mddev->io_clone_set, BIO_POOL_SIZE))) {
> /*
> * No need to handle the failure of bioset_integrity_create,
> * because the function is called by md_run() -> pers->run(),
> @@ -5886,9 +5886,9 @@ int md_run(struct mddev *mddev)
> goto exit_bio_set;
> }
>
> - if (!bioset_initialized(&mddev->io_acct_set)) {
> - err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
> - offsetof(struct md_io_acct, bio_clone), 0);
> + if (!bioset_initialized(&mddev->io_clone_set)) {
> + err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
> + offsetof(struct md_io_clone, bio_clone), 0);
> if (err)
> goto exit_sync_set;
> }
> @@ -6070,7 +6070,7 @@ int md_run(struct mddev *mddev)
> module_put(pers->owner);
> md_bitmap_destroy(mddev);
> abort:
> - bioset_exit(&mddev->io_acct_set);
> + bioset_exit(&mddev->io_clone_set);
> exit_sync_set:
> bioset_exit(&mddev->sync_set);
> exit_bio_set:
> @@ -6295,7 +6295,7 @@ static void __md_stop(struct mddev *mddev)
> percpu_ref_exit(&mddev->active_io);
> bioset_exit(&mddev->bio_set);
> bioset_exit(&mddev->sync_set);
> - bioset_exit(&mddev->io_acct_set);
> + bioset_exit(&mddev->io_clone_set);
> }
>
> void md_stop(struct mddev *mddev)
> @@ -8661,45 +8661,48 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
> }
> EXPORT_SYMBOL_GPL(md_submit_discard_bio);
>
> -static void md_end_io_acct(struct bio *bio)
> +static void md_end_clone_io(struct bio *bio)
> {
> - struct md_io_acct *md_io_acct = bio->bi_private;
> - struct bio *orig_bio = md_io_acct->orig_bio;
> - struct mddev *mddev = md_io_acct->mddev;
> + struct md_io_clone *md_io_clone = bio->bi_private;
> + struct bio *orig_bio = md_io_clone->orig_bio;
> + struct mddev *mddev = md_io_clone->mddev;
>
> orig_bio->bi_status = bio->bi_status;
>
> - bio_end_io_acct(orig_bio, md_io_acct->start_time);
> + if (md_io_clone->start_time)
> + bio_end_io_acct(orig_bio, md_io_clone->start_time);
> +
> bio_put(bio);
> bio_endio(orig_bio);
> -
> percpu_ref_put(&mddev->active_io);
> }
>
> +static void md_clone_bio(struct mddev *mddev, struct bio **bio)
> +{
> + struct block_device *bdev = (*bio)->bi_bdev;
> + struct md_io_clone *md_io_clone;
> + struct bio *clone =
> + bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_clone_set);
> +
> + md_io_clone = container_of(clone, struct md_io_clone, bio_clone);
> + md_io_clone->orig_bio = *bio;
> + md_io_clone->mddev = mddev;
> + if (blk_queue_io_stat(bdev->bd_disk->queue))
> + md_io_clone->start_time = bio_start_io_acct(*bio);
> +
> + clone->bi_end_io = md_end_clone_io;
> + clone->bi_private = md_io_clone;
> + *bio = clone;
> +}
> +
> /*
> * Used by personalities that don't already clone the bio and thus can't
> * easily add the timestamp to their extended bio structure.
> */
> void md_account_bio(struct mddev *mddev, struct bio **bio)
> {
> - struct block_device *bdev = (*bio)->bi_bdev;
> - struct md_io_acct *md_io_acct;
> - struct bio *clone;
> -
> - if (!blk_queue_io_stat(bdev->bd_disk->queue))
> - return;
> -
> percpu_ref_get(&mddev->active_io);
> -
> - clone = bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_acct_set);
> - md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
> - md_io_acct->orig_bio = *bio;
> - md_io_acct->start_time = bio_start_io_acct(*bio);
> - md_io_acct->mddev = mddev;
> -
> - clone->bi_end_io = md_end_io_acct;
> - clone->bi_private = md_io_acct;
> - *bio = clone;
> + md_clone_bio(mddev, bio);
> }
> EXPORT_SYMBOL_GPL(md_account_bio);
>
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 11299d94b239..892a598a5029 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -510,7 +510,7 @@ struct mddev {
> struct bio_set sync_set; /* for sync operations like
> * metadata and bitmap writes
> */
> - struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */
> + struct bio_set io_clone_set;
>
> /* Generic flush handling.
> * The last to finish preflush schedules a worker to submit
> @@ -738,7 +738,7 @@ struct md_thread {
> void *private;
> };
>
> -struct md_io_acct {
> +struct md_io_clone {
> struct mddev *mddev;
> struct bio *orig_bio;
> unsigned long start_time;
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 29cf5455d7a5..cef0b400b2ee 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -5468,13 +5468,13 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
> */
> static void raid5_align_endio(struct bio *bi)
> {
> - struct md_io_acct *md_io_acct = bi->bi_private;
> - struct bio *raid_bi = md_io_acct->orig_bio;
> + struct md_io_clone *md_io_clone = bi->bi_private;
> + struct bio *raid_bi = md_io_clone->orig_bio;
> struct mddev *mddev;
> struct r5conf *conf;
> struct md_rdev *rdev;
> blk_status_t error = bi->bi_status;
> - unsigned long start_time = md_io_acct->start_time;
> + unsigned long start_time = md_io_clone->start_time;
>
> bio_put(bi);
>
> @@ -5506,7 +5506,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
> struct md_rdev *rdev;
> sector_t sector, end_sector, first_bad;
> int bad_sectors, dd_idx;
> - struct md_io_acct *md_io_acct;
> + struct md_io_clone *md_io_clone;
> bool did_inc;
>
> if (!in_chunk_boundary(mddev, raid_bio)) {
> @@ -5544,15 +5544,15 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
> }
>
> align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
> - &mddev->io_acct_set);
> - md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone);
> + &mddev->io_clone_set);
> + md_io_clone = container_of(align_bio, struct md_io_clone, bio_clone);
> raid_bio->bi_next = (void *)rdev;
> if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
> - md_io_acct->start_time = bio_start_io_acct(raid_bio);
> - md_io_acct->orig_bio = raid_bio;
> + md_io_clone->start_time = bio_start_io_acct(raid_bio);
> + md_io_clone->orig_bio = raid_bio;
>
> align_bio->bi_end_io = raid5_align_endio;
> - align_bio->bi_private = md_io_acct;
> + align_bio->bi_private = md_io_clone;
> align_bio->bi_iter.bi_sector = sector;
>
> /* No reshape active, so we can trust rdev->data_offset */
> --
> 2.39.2
>

Reviewed-by: Xiao Ni <xni@xxxxxxxxxx>