Re: [PATCH v2] block: simplify I/O stat accounting

From: Jens Axboe
Date: Fri Apr 17 2009 - 07:38:14 EST


On Fri, Apr 17 2009, Jerome Marchand wrote:
>
> This simplifies I/O stat accounting switching code and separates it
> completely from I/O scheduler switch code.
>
> Requests are accounted according to the state of their request queue
> at the time of the request allocation. There is no need anymore to
> flush the request queue when switching I/O accounting state.
>
>
> Signed-off-by: Jerome Marchand <jmarchan@xxxxxxxxxx>
> ---
> block/blk-core.c | 10 ++++++----
> block/blk-merge.c | 6 +++---
> block/blk-sysfs.c | 4 ----
> block/blk.h | 7 +------
> include/linux/blkdev.h | 3 +++
> 5 files changed, 13 insertions(+), 17 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 07ab754..42a646f 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -643,7 +643,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
> }
>
> static struct request *
> -blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
> +blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
> {
> struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
>
> @@ -652,7 +652,7 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
>
> blk_rq_init(q, rq);
>
> - rq->cmd_flags = rw | REQ_ALLOCED;
> + rq->cmd_flags = flags | REQ_ALLOCED;
>
> if (priv) {
> if (unlikely(elv_set_request(q, rq, gfp_mask))) {
> @@ -744,7 +744,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
> struct request_list *rl = &q->rq;
> struct io_context *ioc = NULL;
> const bool is_sync = rw_is_sync(rw_flags) != 0;
> - int may_queue, priv;
> + int may_queue, priv, iostat = 0;
>
> may_queue = elv_may_queue(q, rw_flags);
> if (may_queue == ELV_MQUEUE_NO)
> @@ -792,9 +792,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
> if (priv)
> rl->elvpriv++;
>
> + if (blk_queue_io_stat(q))
> + iostat = REQ_IO_STAT;

On second thought, not sure why you add 'iostat' for this. It would be
OK to just do

if (blk_queue_io_stat(q))
rw_flags |= REQ_IO_STAT;

since it's just used for the allocation call, and the trace call (which
does & 1 on it anyway).

>
> - rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
> + rq = blk_alloc_request(q, rw_flags | iostat, priv, gfp_mask);
> if (unlikely(!rq)) {
> /*
> * Allocation failed presumably due to memory. Undo anything
> diff --git a/block/blk-merge.c b/block/blk-merge.c
> index 63760ca..6a05270 100644
> --- a/block/blk-merge.c
> +++ b/block/blk-merge.c
> @@ -338,9 +338,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
> return 1;
> }
>
> -static void blk_account_io_merge(struct request *req)
> +static void blk_account_io_merge(struct request *req, struct request *next)
> {
> - if (blk_do_io_stat(req)) {
> + if (req->rq_disk && blk_rq_io_stat(next)) {

This at least needs a comment, it's not at all directly clear why we are
checking 'next' for io stat and ->rq_disk in 'req'. Since it's just
called from that one spot, it would be cleaner to do:

/*
* 'next' is going away, so update stats accordingly
*/
if (blk_rq_io_stat(next))
blk_account_io_merge(req->rq_disk, req->sector);

and have blk_account_io_merge() be more ala:

static void blk_account_io_merge(struct request *req)
{
struct hd_struct *part;
int cpu;

cpu = part_stat_lock();
part = disk_map_sector_rcu(disk, sector);
...
}

> struct hd_struct *part;
> int cpu;
>
> @@ -402,7 +402,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
>
> elv_merge_requests(q, req, next);
>
> - blk_account_io_merge(req);
> + blk_account_io_merge(req, next);
>
> req->ioprio = ioprio_best(req->ioprio, next->ioprio);
> if (blk_rq_cpu_valid(next))
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index cac4e9f..3ff9bba 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -209,14 +209,10 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
> ssize_t ret = queue_var_store(&stats, page, count);
>
> spin_lock_irq(q->queue_lock);
> - elv_quiesce_start(q);
> -
> if (stats)
> queue_flag_set(QUEUE_FLAG_IO_STAT, q);
> else
> queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
> -
> - elv_quiesce_end(q);
> spin_unlock_irq(q->queue_lock);
>
> return ret;
> diff --git a/block/blk.h b/block/blk.h
> index 5dfc412..79c85f7 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -114,12 +114,7 @@ static inline int blk_cpu_to_group(int cpu)
>
> static inline int blk_do_io_stat(struct request *rq)
> {
> - struct gendisk *disk = rq->rq_disk;
> -
> - if (!disk || !disk->queue)
> - return 0;
> -
> - return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV);
> + return rq->rq_disk && blk_rq_io_stat(rq);
> }
>
> #endif
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index ba54c83..2755d5c 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -118,6 +118,7 @@ enum rq_flag_bits {
> __REQ_COPY_USER, /* contains copies of user pages */
> __REQ_INTEGRITY, /* integrity metadata has been remapped */
> __REQ_NOIDLE, /* Don't anticipate more IO after this one */
> + __REQ_IO_STAT, /* account I/O stat */
> __REQ_NR_BITS, /* stops here */
> };
>
> @@ -145,6 +146,7 @@ enum rq_flag_bits {
> #define REQ_COPY_USER (1 << __REQ_COPY_USER)
> #define REQ_INTEGRITY (1 << __REQ_INTEGRITY)
> #define REQ_NOIDLE (1 << __REQ_NOIDLE)
> +#define REQ_IO_STAT (1 << __REQ_IO_STAT)
>
> #define BLK_MAX_CDB 16
>
> @@ -598,6 +600,7 @@ enum {
> blk_failfast_transport(rq) || \
> blk_failfast_driver(rq))
> #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED)
> +#define blk_rq_io_stat(rq) ((rq)->cmd_flags & REQ_IO_STAT)
>
> #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq)))
>

--
Jens Axboe

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/