. . .Isn't there something more deterministic than just waiting a second and hoping things clear up that you can use here? How about not clearing the queue unless lo->sock is NULL and using whatever lock it is now that's protecting lo->sock. That way the queue clearing race can be eliminated too.
Here's the patch to fix up several race conditions in nbd. It requires
reverting the already included (but admittedly incomplete)
nbd-race-fix.patch that's in -mm5.
Andrew, please apply.
Thanks,
Paul
------------------------------------------------------------------------
--- linux-2.6.0-test2-mm4-PRISTINE/drivers/block/nbd.c Sun Jul 27 12:58:51 2003
+++ linux-2.6.0-test2-mm4/drivers/block/nbd.c Thu Aug 7 18:02:23 2003
@@ -416,11 +416,19 @@ void nbd_clear_que(struct nbd_device *lo
BUG_ON(lo->magic != LO_MAGIC);
#endif
+retry:
do {
req = NULL;
spin_lock(&lo->queue_lock);
if (!list_empty(&lo->queue_head)) {
req = list_entry(lo->queue_head.next, struct request, queuelist);
+ if (req->ref_count > 1) { /* still in xmit */
+ spin_unlock(&lo->queue_lock);
+ printk(KERN_DEBUG "%s: request %p: still in use (%d), waiting...\n",
+ lo->disk->disk_name, req, req->ref_count);
+ schedule_timeout(HZ); /* wait a second */
+ goto retry;Since ref_count isn't atomic, shouldn't ref_count only be changed while the queue_lock is held???
+ }
list_del_init(&req->queuelist);
}
spin_unlock(&lo->queue_lock);
@@ -490,6 +498,7 @@ static void do_nbd_request(request_queue
}
list_add(&req->queuelist, &lo->queue_head);
+ req->ref_count++; /* make sure req does not get freed */
spin_unlock(&lo->queue_lock);
nbd_send_req(lo, req);
@@ -499,12 +508,14 @@ static void do_nbd_request(request_queue
lo->disk->disk_name);
spin_lock(&lo->queue_lock);
list_del_init(&req->queuelist);
+ req->ref_count--;
spin_unlock(&lo->queue_lock);
nbd_end_request(req);
spin_lock_irq(q->queue_lock);
continue;
}
+ req->ref_count--;
spin_lock_irq(q->queue_lock);