Re: [BUG] rbtree bug with mmotm 2009-04-14-17-24

From: Jens Axboe
Date: Wed Apr 22 2009 - 14:34:26 EST


On Wed, Apr 22 2009, Steve Wise wrote:
> Jens Axboe wrote:
>>>
>>> Don't bother, I see what the bug is now. It's caused by commit
>>> a36e71f996e25d6213f57951f7ae1874086ec57e and it's due to ->ioprio being
>>> changed without the prio rb root being adjusted.
>>>
>>> I'll provide a fix shortly.
>>>
>>
>> Quick'n dirty, I think this should fix the issue.
>>
>> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
>> index 7e13f04..79ebb4c 100644
>> --- a/block/cfq-iosched.c
>> +++ b/block/cfq-iosched.c
>> @@ -594,7 +594,7 @@ cfq_prio_tree_lookup(struct cfq_data *cfqd, int ioprio, sector_t sector,
>> static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue
>> *cfqq)
>> {
>> - struct rb_root *root = &cfqd->prio_trees[cfqq->ioprio];
>> + struct rb_root *root = &cfqd->prio_trees[cfqq->org_ioprio];
>> struct rb_node **p, *parent;
>> struct cfq_queue *__cfqq;
>> @@ -606,8 +606,8 @@ static void cfq_prio_tree_add(struct cfq_data
>> *cfqd, struct cfq_queue *cfqq)
>> if (!cfqq->next_rq)
>> return;
>> - __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->ioprio,
>> cfqq->next_rq->sector,
>> - &parent, &p);
>> + __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->org_ioprio,
>> + cfqq->next_rq->sector, &parent, &p);
>> BUG_ON(__cfqq);
>> rb_link_node(&cfqq->p_node, parent, p);
>> @@ -656,8 +656,10 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
>> if (!RB_EMPTY_NODE(&cfqq->rb_node))
>> cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
>> - if (!RB_EMPTY_NODE(&cfqq->p_node))
>> - rb_erase_init(&cfqq->p_node, &cfqd->prio_trees[cfqq->ioprio]);
>> + if (!RB_EMPTY_NODE(&cfqq->p_node)) {
>> + rb_erase_init(&cfqq->p_node,
>> + &cfqd->prio_trees[cfqq->org_ioprio]);
>> + }
>> BUG_ON(!cfqd->busy_queues);
>> cfqd->busy_queues--;
>> @@ -976,7 +978,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
>> * First, if we find a request starting at the end of the last
>> * request, choose it.
>> */
>> - __cfqq = cfq_prio_tree_lookup(cfqd, cur_cfqq->ioprio,
>> + __cfqq = cfq_prio_tree_lookup(cfqd, cur_cfqq->org_ioprio,
>> sector, &parent, NULL);
>> if (__cfqq)
>> return __cfqq;
>> @@ -1559,8 +1561,9 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
>> static void cfq_init_prio_data(struct cfq_queue *cfqq, struct
>> io_context *ioc)
>> {
>> + struct cfq_data *cfqd = cfqq->cfqd;
>> struct task_struct *tsk = current;
>> - int ioprio_class;
>> + int ioprio_class, prio_readd = 0;
>> if (!cfq_cfqq_prio_changed(cfqq))
>> return;
>> @@ -1592,12 +1595,24 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
>> }
>> /*
>> + * Remove us from the prio_tree if we are present, since we index
>> + * by ->org_ioprio
>> + */
>> + if (!RB_EMPTY_NODE(&cfqq->p_node)) {
>> + rb_erase(&cfqq->p_node, &cfqd->prio_trees[cfqq->org_ioprio]);
>> + prio_readd = 1;
>> + }
>> +
>> + /*
>> * keep track of original prio settings in case we have to temporarily
>> * elevate the priority of this queue
>> */
>> cfqq->org_ioprio = cfqq->ioprio;
>> cfqq->org_ioprio_class = cfqq->ioprio_class;
>> cfq_clear_cfqq_prio_changed(cfqq);
>> +
>> + if (prio_readd)
>> + cfq_prio_tree_add(cfqd, cfqq);
>> }
>> static void changed_ioprio(struct io_context *ioc, struct
>> cfq_io_context *cic)
>>
>>
>
> Still crashes:

[snip]

Odd... Can you try this variant?

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 7e13f04..3a97c18 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -154,6 +154,7 @@ struct cfq_queue {
unsigned long rb_key;
/* prio tree member */
struct rb_node p_node;
+ struct rb_root *p_root;
/* sorted list of pending requests */
struct rb_root sort_list;
/* if fifo isn't expired, next request to serve */
@@ -594,22 +595,25 @@ cfq_prio_tree_lookup(struct cfq_data *cfqd, int ioprio, sector_t sector,

static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
- struct rb_root *root = &cfqd->prio_trees[cfqq->ioprio];
+ struct rb_root *root = &cfqd->prio_trees[cfqq->org_ioprio];
struct rb_node **p, *parent;
struct cfq_queue *__cfqq;

- if (!RB_EMPTY_NODE(&cfqq->p_node))
- rb_erase_init(&cfqq->p_node, root);
+ if (cfqq->p_root) {
+ rb_erase_init(&cfqq->p_node, cfqq->p_root);
+ cfqq->p_root = NULL;
+ }

if (cfq_class_idle(cfqq))
return;
if (!cfqq->next_rq)
return;

- __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->ioprio, cfqq->next_rq->sector,
- &parent, &p);
+ __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->org_ioprio,
+ cfqq->next_rq->sector, &parent, &p);
BUG_ON(__cfqq);

+ cfqq->p_root = root;
rb_link_node(&cfqq->p_node, parent, p);
rb_insert_color(&cfqq->p_node, root);
}
@@ -656,8 +660,10 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)

if (!RB_EMPTY_NODE(&cfqq->rb_node))
cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
- if (!RB_EMPTY_NODE(&cfqq->p_node))
- rb_erase_init(&cfqq->p_node, &cfqd->prio_trees[cfqq->ioprio]);
+ if (cfqq->p_root) {
+ rb_erase_init(&cfqq->p_node, cfqq->p_root);
+ cfqq->p_root = NULL;
+ }

BUG_ON(!cfqd->busy_queues);
cfqd->busy_queues--;
@@ -976,7 +982,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
* First, if we find a request starting at the end of the last
* request, choose it.
*/
- __cfqq = cfq_prio_tree_lookup(cfqd, cur_cfqq->ioprio,
+ __cfqq = cfq_prio_tree_lookup(cfqd, cur_cfqq->org_ioprio,
sector, &parent, NULL);
if (__cfqq)
return __cfqq;

--
Jens Axboe

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/