[PATCH] virtio-scsi: use chained sg_lists

From: Paolo Bonzini
Date: Thu Jul 26 2012 - 03:58:14 EST


Using chained sg_lists simplifies everything a lot.
The scatterlists we pass to virtio are always of bounded size,
and can be allocated on the stack. This means we do not need to
take tgt_lock and struct virtio_scsi_target_state does not have
anymore a flexible array at the end, so we can avoid a pointer
access.
---
drivers/block/virtio_blk.c | 3 +
drivers/scsi/virtio_scsi.c | 93 ++++++++++++++++---------------------------
drivers/virtio/virtio_ring.c | 93 +++++++++++++++++++++++++++++++------------
include/linux/virtio.h | 8 +++
4 files changed, 114 insertions(+), 83 deletions(-)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 13f7ccb..ef65ea1 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -66,9 +66,6 @@ struct virtio_scsi_target_state {
struct virtio_scsi_vq *req_vq;

atomic_t reqs;
-
- /* For sglist construction when adding commands to the virtqueue. */
- struct scatterlist sg[];
};

/* Driver instance state */
@@ -362,20 +359,6 @@ static void virtscsi_event_done(struct virtqueue *vq)
spin_unlock_irqrestore(&vscsi->event_vq.vq_lock, flags);
};

-static void virtscsi_map_sgl(struct scatterlist *sg, unsigned int *p_idx,
- struct scsi_data_buffer *sdb)
-{
- struct sg_table *table = &sdb->table;
- struct scatterlist *sg_elem;
- unsigned int idx = *p_idx;
- int i;
-
- for_each_sg(table->sgl, sg_elem, table->nents, i)
- sg_set_buf(&sg[idx++], sg_virt(sg_elem), sg_elem->length);
-
- *p_idx = idx;
-}
-
/**
* virtscsi_map_cmd - map a scsi_cmd to a virtqueue scatterlist
* @vscsi : virtio_scsi state
@@ -384,52 +367,57 @@ static void virtscsi_map_sgl(struct scatterlist *sg, unsigned int *p_idx,
* @in_num : number of write-only elements
* @req_size : size of the request buffer
* @resp_size : size of the response buffer
- *
- * Called with tgt_lock held.
*/
-static void virtscsi_map_cmd(struct virtio_scsi_target_state *tgt,
- struct virtio_scsi_cmd *cmd,
- unsigned *out_num, unsigned *in_num,
+static void virtscsi_map_cmd(struct virtio_scsi_cmd *cmd,
+ struct scatterlist *sg_out,
+ unsigned *out_num,
+ struct scatterlist *sg_in,
+ unsigned *in_num,
size_t req_size, size_t resp_size)
{
struct scsi_cmnd *sc = cmd->sc;
- struct scatterlist *sg = tgt->sg;
- unsigned int idx = 0;
+
+ sg_init_table(sg_out, 2);
+ sg_init_table(sg_in, 2);

/* Request header. */
- sg_set_buf(&sg[idx++], &cmd->req, req_size);
+ sg_set_buf(&sg_out[0], &cmd->req, req_size);
+ *out_num = 1;

/* Data-out buffer. */
- if (sc && sc->sc_data_direction != DMA_FROM_DEVICE)
- virtscsi_map_sgl(sg, &idx, scsi_out(sc));
-
- *out_num = idx;
+ if (sc && sc->sc_data_direction != DMA_FROM_DEVICE) {
+ struct sg_table *table = &scsi_out(sc)->table;
+ sg_chain(sg_out, 2, table->sgl);
+ *out_num += table->nents;
+ }

/* Response header. */
- sg_set_buf(&sg[idx++], &cmd->resp, resp_size);
+ sg_set_buf(&sg_in[0], &cmd->resp, resp_size);
+ *in_num = 1;

/* Data-in buffer */
- if (sc && sc->sc_data_direction != DMA_TO_DEVICE)
- virtscsi_map_sgl(sg, &idx, scsi_in(sc));
-
- *in_num = idx - *out_num;
+ if (sc && sc->sc_data_direction != DMA_TO_DEVICE) {
+ struct sg_table *table = &scsi_in(sc)->table;
+ sg_chain(sg_in, 2, table->sgl);
+ *in_num += table->nents;
+ }
}

-static int virtscsi_kick_cmd(struct virtio_scsi_target_state *tgt,
- struct virtio_scsi_vq *vq,
+static int virtscsi_kick_cmd(struct virtio_scsi_vq *vq,
struct virtio_scsi_cmd *cmd,
size_t req_size, size_t resp_size, gfp_t gfp)
{
+ struct scatterlist sg_out[2], sg_in[2];
unsigned int out_num, in_num;
unsigned long flags;
int ret;

- spin_lock_irqsave(&tgt->tgt_lock, flags);
- virtscsi_map_cmd(tgt, cmd, &out_num, &in_num, req_size, resp_size);
+ virtscsi_map_cmd(cmd, sg_out, &out_num, sg_in, &in_num,
+ req_size, resp_size);

- spin_lock(&vq->vq_lock);
- ret = virtqueue_add_buf(vq->vq, tgt->sg, out_num, in_num, cmd, gfp);
- spin_unlock(&tgt->tgt_lock);
+ spin_lock_irqsave(&vq->vq_lock, flags);
+ ret = virtqueue_add_buf_sg(vq->vq, sg_out, out_num, sg_in, in_num,
+ cmd, gfp);
if (ret >= 0)
ret = virtqueue_kick_prepare(vq->vq);

@@ -447,9 +435,6 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
struct virtio_scsi_cmd *cmd;
int ret;

- struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
- BUG_ON(scsi_sg_count(sc) > shost->sg_tablesize);
-
/* TODO: check feature bit and fail if unsupported? */
BUG_ON(sc->sc_data_direction == DMA_BIDIRECTIONAL);

@@ -477,7 +462,7 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
BUG_ON(sc->cmd_len > VIRTIO_SCSI_CDB_SIZE);
memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len);

- if (virtscsi_kick_cmd(tgt, tgt->req_vq, cmd,
+ if (virtscsi_kick_cmd(tgt->req_vq, cmd,
sizeof cmd->req.cmd, sizeof cmd->resp.cmd,
GFP_ATOMIC) >= 0)
ret = 0;
@@ -519,11 +504,10 @@ static int virtscsi_queuecommand_multi(struct Scsi_Host *sh,
static int virtscsi_tmf(struct virtio_scsi *vscsi, struct virtio_scsi_cmd *cmd)
{
DECLARE_COMPLETION_ONSTACK(comp);
- struct virtio_scsi_target_state *tgt = vscsi->tgt[cmd->sc->device->id];
int ret = FAILED;

cmd->comp = ∁
- if (virtscsi_kick_cmd(tgt, &vscsi->ctrl_vq, cmd,
+ if (virtscsi_kick_cmd(&vscsi->ctrl_vq, cmd,
sizeof cmd->req.tmf, sizeof cmd->resp.tmf,
GFP_NOIO) < 0)
goto out;
@@ -642,20 +626,16 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
}

static struct virtio_scsi_target_state *virtscsi_alloc_tgt(
- struct virtio_scsi *vscsi, u32 sg_elems)
+ struct virtio_scsi *vscsi)
{
struct virtio_scsi_target_state *tgt;
gfp_t gfp_mask = GFP_KERNEL;

- /* We need extra sg elements at head and tail. */
- tgt = kmalloc(sizeof(*tgt) + sizeof(tgt->sg[0]) * (sg_elems + 2),
- gfp_mask);
-
+ tgt = kmalloc(sizeof(*tgt), gfp_mask);
if (!tgt)
return NULL;

spin_lock_init(&tgt->tgt_lock);
- sg_init_table(tgt->sg, sg_elems + 2);
atomic_set(&tgt->reqs, 0);

/*
@@ -698,7 +678,7 @@ static int virtscsi_init(struct virtio_device *vdev,
struct virtio_scsi *vscsi, int num_targets)
{
int err;
- u32 i, sg_elems;
+ u32 i;
u32 num_vqs;
vq_callback_t **callbacks;
const char **names;
@@ -740,9 +720,6 @@ static int virtscsi_init(struct virtio_device *vdev,
if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG))
virtscsi_kick_event_all(vscsi);

- /* We need to know how many segments before we allocate. */
- sg_elems = virtscsi_config_get(vdev, seg_max) ?: 1;
-
vscsi->tgt = kmalloc(num_targets *
sizeof(struct virtio_scsi_target_state *), GFP_KERNEL);
if (!vscsi->tgt) {
@@ -750,7 +727,7 @@ static int virtscsi_init(struct virtio_device *vdev,
goto out;
}
for (i = 0; i < num_targets; i++) {
- vscsi->tgt[i] = virtscsi_alloc_tgt(vscsi, sg_elems);
+ vscsi->tgt[i] = virtscsi_alloc_tgt(vscsi);
if (!vscsi->tgt[i]) {
err = -ENOMEM;
goto out;
--
1.7.1

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 693187d..d359e35 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -155,6 +155,9 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,

num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);

+ /* Remove scatterlist terminator, we will tack more items soon. */
+ vblk->sg[num + out - 1].page_link &= ~0x2;
+
if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 9c5aeea..fda723c 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -126,12 +126,14 @@ struct vring_virtqueue

/* Set up an indirect table of descriptors and add it to the queue. */
static int vring_add_indirect(struct vring_virtqueue *vq,
- struct scatterlist sg[],
+ struct scatterlist *sg_out,
unsigned int out,
+ struct scatterlist *sg_in,
unsigned int in,
gfp_t gfp)
{
struct vring_desc *desc;
+ struct scatterlist *sg_last = NULL;
unsigned head;
int i;

@@ -139,20 +141,23 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
if (!desc)
return -ENOMEM;

- /* Transfer entries from the sg list into the indirect page */
+ /* Transfer entries from the sg_out list into the indirect page */
for (i = 0; i < out; i++) {
desc[i].flags = VRING_DESC_F_NEXT;
- desc[i].addr = sg_phys(sg);
- desc[i].len = sg->length;
+ desc[i].addr = sg_phys(sg_out);
+ desc[i].len = sg_out->length;
desc[i].next = i+1;
- sg++;
+ sg_last = sg_out;
+ sg_out = sg_next(sg_out);
}
+ if (!sg_in)
+ sg_in = sg_out ? sg_out : ++sg_last;
for (; i < (out + in); i++) {
desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
- desc[i].addr = sg_phys(sg);
- desc[i].len = sg->length;
+ desc[i].addr = sg_phys(sg_in);
+ desc[i].len = sg_in->length;
desc[i].next = i+1;
- sg++;
+ sg_in = sg_next(sg_in);
}

/* Last one doesn't continue. */
@@ -189,36 +194,44 @@ int virtqueue_get_queue_index(struct virtqueue *_vq)
EXPORT_SYMBOL_GPL(virtqueue_get_queue_index);

/**
- * virtqueue_add_buf - expose buffer to other end
+ * virtqueue_add_buf_sg - expose buffer to other end
* @vq: the struct virtqueue we're talking about.
- * @sg: the description of the buffer(s).
+ * @sg_out: the description of the output buffer(s).
* @out_num: the number of sg readable by other side
- * @in_num: the number of sg which are writable (after readable ones)
+ * @sg_in: the description of the input buffer(s).
+ * @in_num: the number of sg which are writable
* @data: the token identifying the buffer.
* @gfp: how to do memory allocations (if necessary).
*
* Caller must ensure we don't call this with other virtqueue operations
* at the same time (except where noted).
*
+ * If sg_in is NULL, the writable entries come in sg_out after the
+ * first out_num.
+ *
* Returns remaining capacity of queue or a negative error
* (ie. ENOSPC). Note that it only really makes sense to treat all
* positive return values as "available": indirect buffers mean that
* we can put an entire sg[] array inside a single queue entry.
*/
-int virtqueue_add_buf(struct virtqueue *_vq,
- struct scatterlist sg[],
- unsigned int out,
- unsigned int in,
- void *data,
- gfp_t gfp)
+int virtqueue_add_buf_sg(struct virtqueue *_vq,
+ struct scatterlist *sg_out,
+ unsigned int out,
+ struct scatterlist *sg_in,
+ unsigned int in,
+ void *data,
+ gfp_t gfp)
{
struct vring_virtqueue *vq = to_vvq(_vq);
+ struct scatterlist *sg_last = NULL;
unsigned int i, avail, uninitialized_var(prev);
int head;

START_USE(vq);

BUG_ON(data == NULL);
+ BUG_ON(!sg_out && !sg_in);
+ BUG_ON(out + in == 0);

#ifdef DEBUG
{
@@ -236,13 +249,12 @@ int virtqueue_add_buf(struct virtqueue *_vq,
/* If the host supports indirect descriptor tables, and we have multiple
* buffers, then go indirect. FIXME: tune this threshold */
if (vq->indirect && (out + in) > 1 && vq->num_free) {
- head = vring_add_indirect(vq, sg, out, in, gfp);
+ head = vring_add_indirect(vq, sg_out, out, sg_in, in, gfp);
if (likely(head >= 0))
goto add_head;
}

BUG_ON(out + in > vq->vring.num);
- BUG_ON(out + in == 0);

if (vq->num_free < out + in) {
pr_debug("Can't add buf len %i - avail = %i\n",
@@ -262,17 +274,20 @@ int virtqueue_add_buf(struct virtqueue *_vq,
head = vq->free_head;
for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) {
vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
- vq->vring.desc[i].addr = sg_phys(sg);
- vq->vring.desc[i].len = sg->length;
+ vq->vring.desc[i].addr = sg_phys(sg_out);
+ vq->vring.desc[i].len = sg_out->length;
prev = i;
- sg++;
+ sg_last = sg_out;
+ sg_out = sg_next(sg_out);
}
+ if (!sg_in)
+ sg_in = sg_out ? sg_out : ++sg_last;
for (; in; i = vq->vring.desc[i].next, in--) {
vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
- vq->vring.desc[i].addr = sg_phys(sg);
- vq->vring.desc[i].len = sg->length;
+ vq->vring.desc[i].addr = sg_phys(sg_in);
+ vq->vring.desc[i].len = sg_in->length;
prev = i;
- sg++;
+ sg_in = sg_next(sg_in);
}
/* Last one doesn't continue. */
vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT;
@@ -305,6 +320,34 @@ add_head:

return vq->num_free;
}
+EXPORT_SYMBOL_GPL(virtqueue_add_buf_sg);
+
+/**
+ * virtqueue_add_buf - expose buffer to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sg: the description of the buffer(s).
+ * @out_num: the number of sg readable by other side
+ * @in_num: the number of sg which are writable (after readable ones)
+ * @data: the token identifying the buffer.
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns remaining capacity of queue or a negative error
+ * (ie. ENOSPC). Note that it only really makes sense to treat all
+ * positive return values as "available": indirect buffers mean that
+ * we can put an entire sg[] array inside a single queue entry.
+ */
+int virtqueue_add_buf(struct virtqueue *_vq,
+ struct scatterlist sg[],
+ unsigned int out,
+ unsigned int in,
+ void *data,
+ gfp_t gfp)
+{
+ return virtqueue_add_buf_sg(_vq, sg, out, NULL, in, data, gfp);
+}
EXPORT_SYMBOL_GPL(virtqueue_add_buf);

/**
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 0ac3d3c..f0fe367 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -25,6 +25,14 @@ struct virtqueue {
void *priv;
};

+int virtqueue_add_buf_sg(struct virtqueue *vq,
+ struct scatterlist *sg_out,
+ unsigned int out_num,
+ struct scatterlist *sg_in,
+ unsigned int in_num,
+ void *data,
+ gfp_t gfp);
+
int virtqueue_add_buf(struct virtqueue *vq,
struct scatterlist sg[],
unsigned int out_num,

--------------020101000300050808020602--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/