Re: [PATCHv3 1/1] block: introduce content activity based ioprio

From: Zhaoyang Huang
Date: Fri Jan 26 2024 - 05:20:10 EST


On Fri, Jan 26, 2024 at 5:36 PM Matthew Wilcox <willy@xxxxxxxxxxxxx> wrote:
>
> On Fri, Jan 26, 2024 at 05:28:58PM +0800, Zhaoyang Huang wrote:
> > On Fri, Jan 26, 2024 at 4:55 PM Matthew Wilcox <willy@infradeadorg> wrote:
> > >
> > > On Fri, Jan 26, 2024 at 03:59:48PM +0800, Zhaoyang Huang wrote:
> > > > loop more mm and fs guys for more comments
> > >
> > > I agree with everything Damien said. But also ...
> > ok, I will find a way to solve this problem.
> > >
> > > > > +bool BIO_ADD_FOLIO(struct bio *bio, struct folio *folio, size_t len,
> > > > > + size_t off)
> > >
> > > You don't add any users of these functions. It's hard to assess whether
> > > this is the right API when there are no example users.
> > Actually, the code has been tested on ext4 and f2fs by patchv2 on a
> > v6.6 6GB android system where I get the test result posted on the
> > commit message. These APIs is to keep block layer clean and wrap
> > things up for fs.
>
> well, where's patch v2? i don't see it in my inbox. i'm not going
> to go hunting around the email lists for it. this is not good enough.
>
> > > why are BIO_ADD_PAGE and BIO_ADD_FOLIO so very different from each
> > > other?
> > These two API just repeat the same thing that bio_add_page and
> > bio_add_folio do.
>
> what?
>
> here's the patch you sent. these two functions do wildly different
> things:
>
> +bool BIO_ADD_FOLIO(struct bio *bio, struct folio *folio, size_t len,
> + size_t off)
> +{
> + int class, level, hint, activity;
> +
> + if (len > UINT_MAX || off > UINT_MAX)
> + return false;
> +
> + class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
> + level = IOPRIO_PRIO_LEVEL(bio->bi_ioprio);
> + hint = IOPRIO_PRIO_HINT(bio->bi_ioprio);
> + activity = IOPRIO_PRIO_ACTIVITY(bio->bi_ioprio);
> +
> + activity += (bio->bi_vcnt + 1 <= IOPRIO_NR_ACTIVITY &&
> + PageWorkingset(&folio->page)) ? 1 : 0;
> + if (activity >= bio->bi_vcnt / 2)
> + class = IOPRIO_CLASS_RT;
> + else if (activity >= bio->bi_vcnt / 4)
> + class = max(IOPRIO_PRIO_CLASS(get_current_ioprio()), IOPRIO_CLASS_BE);
> +
> + bio->bi_ioprio = IOPRIO_PRIO_VALUE_ACTIVITY(class, level, hint, activity);
> +
> + return bio_add_page(bio, &folio->page, len, off) > 0;
> +}
> +
> +int BIO_ADD_PAGE(struct bio *bio, struct page *page,
> + unsigned int len, unsigned int offset)
> +{
> + int class, level, hint, activity;
> +
> + if (bio_add_page(bio, page, len, offset) > 0) {
> + class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
> + level = IOPRIO_PRIO_LEVEL(bio->bi_ioprio);
> + hint = IOPRIO_PRIO_HINT(bio->bi_ioprio);
> + activity = IOPRIO_PRIO_ACTIVITY(bio->bi_ioprio);
> + activity += (bio->bi_vcnt <= IOPRIO_NR_ACTIVITY && PageWorkingset(page)) ? 1 : 0;
> + bio->bi_ioprio = IOPRIO_PRIO_VALUE_ACTIVITY(class, level, hint, activity);
> + }
> +
> + return len;
> +}
>
> did you change one and forget to change the other?
Sorry for missing you in the list. Please find below patchv2 where all
activity calculation is located within _bio_add_page which aims at
avoiding iterating the bio->bvec before submit_bio. This is rejected
by Jens as it introduces page operation in the block layer.

block/Kconfig | 8 ++++++++
block/bio.c | 10 ++++++++++
block/blk-mq.c | 21 +++++++++++++++++++++
fs/buffer.c | 6 ++++++
include/linux/buffer_head.h | 1 +
include/uapi/linux/ioprio.h | 20 +++++++++++++++-----
6 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/block/Kconfig b/block/Kconfig
index f1364d1c0d93..8d6075575eae 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -228,6 +228,14 @@ config BLOCK_HOLDER_DEPRECATED
config BLK_MQ_STACKING
bool

+config CONTENT_ACT_BASED_IOPRIO
+ bool "Enable content activity based ioprio"
+ depends on LRU_GEN
+ default y
+ help
+ This item enable the feature of adjust bio's priority by
+ calculating its content's activity.
+
source "block/Kconfig.iosched"

endif # BLOCK
diff --git a/block/bio.c b/block/bio.c
index 816d412c06e9..1228e2a4940f 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -24,6 +24,7 @@
#include "blk.h"
#include "blk-rq-qos.h"
#include "blk-cgroup.h"
+#include "blk-ioprio.h"

#define ALLOC_CACHE_THRESHOLD 16
#define ALLOC_CACHE_MAX 256
@@ -1069,12 +1070,21 @@ EXPORT_SYMBOL_GPL(bio_add_zone_append_page);
void __bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off)
{
+ int class, level, hint, activity;
+
+ class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
+ level = IOPRIO_PRIO_LEVEL(bio->bi_ioprio);
+ hint = IOPRIO_PRIO_HINT(bio->bi_ioprio);
+ activity = IOPRIO_PRIO_ACTIVITY(bio->bi_ioprio);
+
WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
WARN_ON_ONCE(bio_full(bio, len));

bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, off);
bio->bi_iter.bi_size += len;
bio->bi_vcnt++;
+ activity += bio_page_if_active(bio, page, IOPRIO_NR_ACTIVITY);
+ bio->bi_ioprio = IOPRIO_PRIO_VALUE_ACTIVITY(class, level,
hint, activity);
}
EXPORT_SYMBOL_GPL(__bio_add_page);

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1fafd54dce3c..05cdd3adde94 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2939,6 +2939,26 @@ static inline struct request
*blk_mq_get_cached_request(struct request_queue *q,
return rq;
}

+#ifdef CONFIG_CONTENT_ACT_BASED_IOPRIO
+static void bio_set_ioprio(struct bio *bio)
+{
+ int class, level, hint, activity;
+
+ class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
+ level = IOPRIO_PRIO_LEVEL(bio->bi_ioprio);
+ hint = IOPRIO_PRIO_HINT(bio->bi_ioprio);
+ activity = IOPRIO_PRIO_ACTIVITY(bio->bi_ioprio);
+
+ if (activity >= bio->bi_vcnt / 2)
+ class = IOPRIO_CLASS_RT;
+ else if (activity >= bio->bi_vcnt / 4)
+ class = max(IOPRIO_PRIO_CLASS(get_current_ioprio()),
IOPRIO_CLASS_BE);
+
+ bio->bi_ioprio = IOPRIO_PRIO_VALUE_ACTIVITY(class, level,
hint, activity);
+
+ blkcg_set_ioprio(bio);
+}
+#else
static void bio_set_ioprio(struct bio *bio)
{
/* Nobody set ioprio so far? Initialize it based on task's nice value */
@@ -2946,6 +2966,7 @@ static void bio_set_ioprio(struct bio *bio)
bio->bi_ioprio = get_current_ioprio();
blkcg_set_ioprio(bio);
}
+#endif

/**
* blk_mq_submit_bio - Create and send a request to block device.
diff --git a/fs/buffer.c b/fs/buffer.c
index 12e9a71c693d..b15bff481706 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2832,6 +2832,12 @@ void submit_bh(blk_opf_t opf, struct buffer_head *bh)
}
EXPORT_SYMBOL(submit_bh);

+int bio_page_if_active(struct bio *bio, struct page *page, unsigned
short limit)
+{
+ return (bio->bi_vcnt <= limit && PageWorkingset(page)) ? 1 : 0;
+}
+EXPORT_SYMBOL(bio_page_if_active);
+
void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags)
{
lock_buffer(bh);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 44e9de51eedf..9a374f5965ec 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -248,6 +248,7 @@ int bh_uptodate_or_lock(struct buffer_head *bh);
int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait);
void __bh_read_batch(int nr, struct buffer_head *bhs[],
blk_opf_t op_flags, bool force_lock);
+int bio_page_if_active(struct bio *bio, struct page *page, unsigned
short limit);

/*
* Generic address_space_operations implementations for buffer_head-backed
diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h
index bee2bdb0eedb..d1c6081e796b 100644
--- a/include/uapi/linux/ioprio.h
+++ b/include/uapi/linux/ioprio.h
@@ -71,12 +71,18 @@ enum {
* class and level.
*/
#define IOPRIO_HINT_SHIFT IOPRIO_LEVEL_NR_BITS
-#define IOPRIO_HINT_NR_BITS 10
+#define IOPRIO_HINT_NR_BITS 3
#define IOPRIO_NR_HINTS (1 << IOPRIO_HINT_NR_BITS)
#define IOPRIO_HINT_MASK (IOPRIO_NR_HINTS - 1)
#define IOPRIO_PRIO_HINT(ioprio) \
(((ioprio) >> IOPRIO_HINT_SHIFT) & IOPRIO_HINT_MASK)

+#define IOPRIO_ACTIVITY_SHIFT (IOPRIO_HINT_NR_BITS +
IOPRIO_LEVEL_NR_BITS)
+#define IOPRIO_ACTIVITY_NR_BITS 7
+#define IOPRIO_NR_ACTIVITY (1 << IOPRIO_ACTIVITY_NR_BITS)
+#define IOPRIO_ACTIVITY_MASK (IOPRIO_NR_ACTIVITY - 1)
+#define IOPRIO_PRIO_ACTIVITY(ioprio) \
+ (((ioprio) >> IOPRIO_ACTIVITY_SHIFT) & IOPRIO_ACTIVITY_MASK)
/*
* I/O hints.
*/
@@ -108,20 +114,24 @@ enum {
* Return an I/O priority value based on a class, a level and a hint.
*/
static __always_inline __u16 ioprio_value(int prioclass, int priolevel,
- int priohint)
+ int priohint, int activity)
{
if (IOPRIO_BAD_VALUE(prioclass, IOPRIO_NR_CLASSES) ||
IOPRIO_BAD_VALUE(priolevel, IOPRIO_NR_LEVELS) ||
- IOPRIO_BAD_VALUE(priohint, IOPRIO_NR_HINTS))
+ IOPRIO_BAD_VALUE(priohint, IOPRIO_NR_HINTS) ||
+ IOPRIO_BAD_VALUE(activity, IOPRIO_NR_ACTIVITY))
return IOPRIO_CLASS_INVALID << IOPRIO_CLASS_SHIFT;

return (prioclass << IOPRIO_CLASS_SHIFT) |
+ (activity << IOPRIO_ACTIVITY_SHIFT) |
(priohint << IOPRIO_HINT_SHIFT) | priolevel;
}

#define IOPRIO_PRIO_VALUE(prioclass, priolevel) \
- ioprio_value(prioclass, priolevel, IOPRIO_HINT_NONE)
+ ioprio_value(prioclass, priolevel, IOPRIO_HINT_NONE, 0)
#define IOPRIO_PRIO_VALUE_HINT(prioclass, priolevel, priohint) \
- ioprio_value(prioclass, priolevel, priohint)
+ ioprio_value(prioclass, priolevel, priohint, 0)
+#define IOPRIO_PRIO_VALUE_ACTIVITY(prioclass, priolevel, priohint,
activity) \
+ ioprio_value(prioclass, priolevel, priohint, activity)

#endif /* _UAPI_LINUX_IOPRIO_H */


>
> > These white spaces are trimmed by vim, I will change them back in next version.
>
> vim doesn't do that by default.
>