[PATCH 10/13] block: add function for waiting for a specific free tag

From: Jens Axboe
Date: Mon May 25 2009 - 03:38:54 EST


We need this in libata to ensure that we don't race between internal
tag usage and the block layer usage.

Signed-off-by: Jens Axboe <jens.axboe@xxxxxxxxxx>
---
block/blk-tag.c | 99 ++++++++++++++++++++++++++++++++-------------
drivers/ata/libata-core.c | 29 +++++++++----
include/linux/blkdev.h | 3 +
3 files changed, 95 insertions(+), 36 deletions(-)

diff --git a/block/blk-tag.c b/block/blk-tag.c
index e9a7501..208468b 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -149,6 +149,7 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
goto fail;

atomic_set(&tags->refcnt, 1);
+ init_waitqueue_head(&tags->waitq);
return tags;
fail:
kfree(tags);
@@ -264,6 +265,65 @@ int blk_queue_resize_tags(struct request_queue *q, int new_depth)
}
EXPORT_SYMBOL(blk_queue_resize_tags);

+void blk_queue_acquire_tag(struct request_queue *q, int tag)
+{
+ struct blk_queue_tag *bqt;
+
+ if (!blk_queue_tagged(q) || !q->queue_tags)
+ return;
+
+ bqt = q->queue_tags;
+ do {
+ DEFINE_WAIT(wait);
+
+ if (!test_and_set_bit_lock(tag, bqt->tag_map))
+ break;
+
+ prepare_to_wait(&bqt->waitq, &wait, TASK_UNINTERRUPTIBLE);
+ if (test_and_set_bit_lock(tag, bqt->tag_map)) {
+ spin_unlock_irq(q->queue_lock);
+ schedule();
+ spin_lock_irq(q->queue_lock);
+ }
+ finish_wait(&bqt->waitq, &wait);
+ } while (1);
+}
+
+void blk_queue_release_tag(struct request_queue *q, int tag)
+{
+ struct blk_queue_tag *bqt = q->queue_tags;
+
+ if (!blk_queue_tagged(q))
+ return;
+
+ /*
+ * Normally we store a request pointer in the tag index, but for
+ * blk_queue_acquire_tag() usage, we may not have something specific
+ * assigned to the tag slot. In any case, be safe and clear it.
+ */
+ bqt->tag_index[tag] = NULL;
+
+ if (unlikely(!test_bit(tag, bqt->tag_map))) {
+ printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
+ __func__, tag);
+ return;
+ }
+ /*
+ * The tag_map bit acts as a lock for tag_index[bit], so we need
+ * unlock memory barrier semantics.
+ */
+ clear_bit_unlock(tag, bqt->tag_map);
+
+ /*
+ * We don't need a memory barrier here, since we have the bit lock
+ * ordering above. Otherwise it would need an smp_mb();
+ */
+ if (waitqueue_active(&bqt->waitq))
+ wake_up(&bqt->waitq);
+
+}
+EXPORT_SYMBOL(blk_queue_release_tag);
+
/**
* blk_queue_end_tag - end tag operations for a request
* @q: the request queue for the device
@@ -285,33 +345,17 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)

BUG_ON(tag == -1);

- if (unlikely(tag >= bqt->real_max_depth))
- /*
- * This can happen after tag depth has been reduced.
- * FIXME: how about a warning or info message here?
- */
- return;
-
- list_del_init(&rq->queuelist);
- rq->cmd_flags &= ~REQ_QUEUED;
- rq->tag = -1;
-
- if (unlikely(bqt->tag_index[tag] == NULL))
- printk(KERN_ERR "%s: tag %d is missing\n",
- __func__, tag);
-
- bqt->tag_index[tag] = NULL;
-
- if (unlikely(!test_bit(tag, bqt->tag_map))) {
- printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
- __func__, tag);
- return;
- }
/*
- * The tag_map bit acts as a lock for tag_index[bit], so we need
- * unlock memory barrier semantics.
+ * When the tag depth is being reduced, we don't wait for higher tags
+ * to finish. So we could see this triggering without it being an error.
*/
- clear_bit_unlock(tag, bqt->tag_map);
+ if (tag < bqt->real_max_depth) {
+ list_del_init(&rq->queuelist);
+ rq->cmd_flags &= ~REQ_QUEUED;
+ rq->tag = -1;
+
+ blk_queue_release_tag(q, tag);
+ }
}
EXPORT_SYMBOL(blk_queue_end_tag);

@@ -336,8 +380,7 @@ EXPORT_SYMBOL(blk_queue_end_tag);
int blk_queue_start_tag(struct request_queue *q, struct request *rq)
{
struct blk_queue_tag *bqt = q->queue_tags;
- unsigned max_depth;
- int tag;
+ int max_depth, tag;

if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
printk(KERN_ERR
@@ -371,7 +414,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
} while (test_and_set_bit_lock(tag, bqt->tag_map));
/*
* We need lock ordering semantics given by test_and_set_bit_lock.
- * See blk_queue_end_tag for details.
+ * See blk_queue_release_tag() for details.
*/

rq->cmd_flags |= REQ_QUEUED;
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 74c9055..5e43f2b 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -61,6 +61,7 @@
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
#include <linux/libata.h>
#include <asm/byteorder.h>
#include <linux/cdrom.h>
@@ -1765,15 +1766,14 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
u32 preempted_sactive, preempted_qc_active;
int preempted_nr_active_links;
DECLARE_COMPLETION_ONSTACK(wait);
- unsigned long flags;
unsigned int err_mask;
int rc;

- spin_lock_irqsave(ap->lock, flags);
+ spin_lock_irq(ap->lock);

/* no internal command while frozen */
if (ap->pflags & ATA_PFLAG_FROZEN) {
- spin_unlock_irqrestore(ap->lock, flags);
+ spin_unlock_irq(ap->lock);
return AC_ERR_SYSTEM;
}

@@ -1789,6 +1789,16 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
else
tag = 0;

+ /*
+ * We could be racing with the tag freeing in the block layer, so
+ * we need to ensure that our tag is free.
+ */
+ if (dev->sdev && dev->sdev->request_queue)
+ blk_queue_acquire_tag(dev->sdev->request_queue, tag);
+
+ /*
+ * The tag is now ours
+ */
qc = __ata_qc_from_tag(ap, tag);

qc->tag = tag;
@@ -1828,7 +1838,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,

ata_qc_issue(qc);

- spin_unlock_irqrestore(ap->lock, flags);
+ spin_unlock_irq(ap->lock);

if (!timeout) {
if (ata_probe_timeout)
@@ -1844,7 +1854,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
ata_port_flush_task(ap);

if (!rc) {
- spin_lock_irqsave(ap->lock, flags);
+ spin_lock_irq(ap->lock);

/* We're racing with irq here. If we lose, the
* following test prevents us from completing the qc
@@ -1864,7 +1874,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
"qc timeout (cmd 0x%x)\n", command);
}

- spin_unlock_irqrestore(ap->lock, flags);
+ spin_unlock_irq(ap->lock);
}

/* do post_internal_cmd */
@@ -1884,11 +1894,14 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
}

/* finish up */
- spin_lock_irqsave(ap->lock, flags);
+ spin_lock_irq(ap->lock);

*tf = qc->result_tf;
err_mask = qc->err_mask;

+ if (dev->sdev && dev->sdev->request_queue)
+ blk_queue_release_tag(dev->sdev->request_queue, tag);
+
ata_qc_free(qc);
link->active_tag = preempted_tag;
link->sactive = preempted_sactive;
@@ -1911,7 +1924,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
ata_port_probe(ap);
}

- spin_unlock_irqrestore(ap->lock, flags);
+ spin_unlock_irq(ap->lock);

if ((err_mask & AC_ERR_TIMEOUT) && auto_timeout)
ata_internal_cmd_timed_out(dev, command);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ca322da..f2b6b92 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -307,6 +307,7 @@ struct blk_queue_tag {
int max_depth; /* what we will send to device */
int real_max_depth; /* what the array can hold */
atomic_t refcnt; /* map can be shared */
+ wait_queue_head_t waitq; /* for waiting on tags */
};

#define BLK_SCSI_MAX_CMDS (256)
@@ -929,6 +930,8 @@ extern void blk_put_queue(struct request_queue *);
* tag stuff
*/
#define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED)
+extern void blk_queue_acquire_tag(struct request_queue *, int);
+extern void blk_queue_release_tag(struct request_queue *, int);
extern int blk_queue_start_tag(struct request_queue *, struct request *);
extern struct request *blk_queue_find_tag(struct request_queue *, int);
extern void blk_queue_end_tag(struct request_queue *, struct request *);
--
1.6.3.rc0.1.gf800

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/