[[PATCH v1] 18/37] [CIFS] SMBD: Implement API for upper layer to send data

From: Long Li
Date: Wed Aug 02 2017 - 16:22:03 EST


From: Long Li <longli@xxxxxxxxxxxxx>

Implement cifs_rdma_write for send an upper layer data. Upper layer uses this function to do a RDMA send. This function is also used to pass SMB packets for doing a RDMA read/write via memory registration.

Signed-off-by: Long Li <longli@xxxxxxxxxxxxx>
---
fs/cifs/cifsrdma.c | 177 +++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/cifs/cifsrdma.h | 5 ++
2 files changed, 182 insertions(+)

diff --git a/fs/cifs/cifsrdma.c b/fs/cifs/cifsrdma.c
index ef21f1c..eb48651 100644
--- a/fs/cifs/cifsrdma.c
+++ b/fs/cifs/cifsrdma.c
@@ -229,6 +229,10 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc)
request->sge[i].length,
DMA_TO_DEVICE);

+ if (atomic_dec_and_test(&request->info->send_pending)) {
+ wake_up(&request->info->wait_send_pending);
+ }
+
kfree(request->sge);
mempool_free(request, request->info->request_mempool);
}
@@ -551,12 +555,14 @@ static int cifs_rdma_post_send_negotiate_req(struct cifs_rdma_info *info)
request->sge[0].addr,
request->sge[0].length, request->sge[0].lkey);

+ atomic_inc(&info->send_pending);
rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail);
if (!rc)
return 0;

// if we reach here, post send failed
log_rdma_send("ib_post_send failed rc=%d\n", rc);
+ atomic_dec(&info->send_pending);
ib_dma_unmap_single(info->id->device, request->sge[0].addr,
request->sge[0].length, DMA_TO_DEVICE);

@@ -662,12 +668,14 @@ static int cifs_rdma_post_send_page(struct cifs_rdma_info *info, struct page *pa
send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;

+ atomic_inc(&info->send_pending);
rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail);
if (!rc)
return 0;

// post send failed
log_rdma_send("ib_post_send failed rc=%d\n", rc);
+ atomic_dec(&info->send_pending);

dma_mapping_failed:
for (i=0; i<2; i++)
@@ -768,11 +776,13 @@ static int cifs_rdma_post_send_empty(struct cifs_rdma_info *info)
send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;

+ atomic_inc(&info->send_pending);
rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail);
if (!rc)
return 0;

log_rdma_send("ib_post_send failed rc=%d\n", rc);
+ atomic_dec(&info->send_pending);
ib_dma_unmap_single(info->id->device, request->sge[0].addr,
request->sge[0].length, DMA_TO_DEVICE);

@@ -885,12 +895,14 @@ static int cifs_rdma_post_send_data(
send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;

+ atomic_inc(&info->send_pending);
rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail);
if (!rc)
return 0;

// post send failed
log_rdma_send("ib_post_send failed rc=%d\n", rc);
+ atomic_dec(&info->send_pending);

dma_mapping_failure:
for (i=0; i<n_vec+1; i++)
@@ -1185,6 +1197,9 @@ struct cifs_rdma_info* cifs_create_rdma_session(
allocate_receive_buffers(info, info->receive_credit_max);
init_waitqueue_head(&info->wait_send_queue);

+ init_waitqueue_head(&info->wait_send_pending);
+ atomic_set(&info->send_pending, 0);
+
init_waitqueue_head(&info->wait_recv_pending);
atomic_set(&info->recv_pending, 0);

@@ -1202,3 +1217,165 @@ struct cifs_rdma_info* cifs_create_rdma_session(
kfree(info);
return NULL;
}
+
+/*
+ * Write data to transport
+ * Each rqst is transported as a SMBDirect payload
+ * rqst: the data to write
+ * return value: 0 if successfully write, otherwise error code
+ */
+int cifs_rdma_write(struct cifs_rdma_info *info, struct smb_rqst *rqst)
+{
+ struct kvec vec;
+ int nvecs;
+ int size;
+ int buflen=0, remaining_data_length;
+ int start, i, j;
+ int max_iov_size = info->max_send_size - sizeof(struct smbd_data_transfer);
+ struct kvec *iov;
+ int rc;
+
+ if (info->transport_status != CIFS_RDMA_CONNECTED) {
+ log_cifs_write("disconnected returning -EIO\n");
+ return -EIO;
+ }
+
+ iov = kzalloc(sizeof(struct kvec)*rqst->rq_nvec, GFP_KERNEL);
+ if (!iov) {
+ log_cifs_write("failed to allocate iov returing -ENOMEM\n");
+ return -ENOMEM;
+ }
+
+ /* Strip the first 4 bytes MS-SMB2 section 2.1
+ * they are used only for TCP transport */
+ iov[0].iov_base = (char*)rqst->rq_iov[0].iov_base + 4;
+ iov[0].iov_len = rqst->rq_iov[0].iov_len - 4;
+ buflen += iov[0].iov_len;
+
+ /* total up iov array first */
+ for (i = 1; i < rqst->rq_nvec; i++) {
+ iov[i].iov_base = rqst->rq_iov[i].iov_base;
+ iov[i].iov_len = rqst->rq_iov[i].iov_len;
+ buflen += iov[i].iov_len;
+ }
+
+ /* add in the page array if there is one */
+ if (rqst->rq_npages) {
+ buflen += rqst->rq_pagesz * (rqst->rq_npages - 1);
+ buflen += rqst->rq_tailsz;
+ }
+
+ if (buflen + sizeof(struct smbd_data_transfer) >
+ info->max_fragmented_send_size) {
+ log_cifs_write("payload size %d > max size %d\n",
+ buflen, info->max_fragmented_send_size);
+ rc = -EINVAL;
+ goto done;
+ }
+
+ remaining_data_length = buflen;
+
+ log_cifs_write("rqst->rq_nvec=%d rqst->rq_npages=%d rq_pagesz=%d "
+ "rq_tailsz=%d buflen=%d\n",
+ rqst->rq_nvec, rqst->rq_npages, rqst->rq_pagesz,
+ rqst->rq_tailsz, buflen);
+
+ start = i = iov[0].iov_len ? 0 : 1;
+ buflen = 0;
+ while (true){
+ buflen += iov[i].iov_len;
+ if (buflen > max_iov_size) {
+ if (i > start) {
+ remaining_data_length -=
+ (buflen-iov[i].iov_len);
+ log_cifs_write("sending iov[] from start=%d "
+ "i=%d nvecs=%d "
+ "remaining_data_length=%d\n",
+ start, i, i-start,
+ remaining_data_length);
+ rc = cifs_rdma_post_send_data(
+ info, &iov[start], i-start,
+ remaining_data_length);
+ if (rc)
+ goto done;
+ } else {
+ // iov[start] is too big, break it to nvecs pieces
+ nvecs = (buflen+max_iov_size-1)/max_iov_size;
+ log_cifs_write("iov[%d] iov_base=%p buflen=%d"
+ " break to %d vectors\n",
+ start, iov[start].iov_base,
+ buflen, nvecs);
+ for (j=0; j<nvecs; j++) {
+ vec.iov_base =
+ (char *)iov[start].iov_base +
+ j*max_iov_size;
+ vec.iov_len = max_iov_size;
+ if (j == nvecs-1)
+ vec.iov_len =
+ buflen -
+ max_iov_size*(nvecs-1);
+ remaining_data_length -= vec.iov_len;
+ log_cifs_write(
+ "sending vec j=%d iov_base=%p"
+ " iov_len=%lu "
+ "remaining_data_length=%d\n",
+ j, vec.iov_base, vec.iov_len,
+ remaining_data_length);
+ rc = cifs_rdma_post_send_data(
+ info, &vec, 1,
+ remaining_data_length);
+ if (rc)
+ goto done;
+ }
+ i++;
+ }
+ start = i;
+ buflen = 0;
+ } else {
+ i++;
+ if (i == rqst->rq_nvec) {
+ // send out all remaining vecs and we are done
+ remaining_data_length -= buflen;
+ log_cifs_write(
+ "sending iov[] from start=%d i=%d "
+ "nvecs=%d remaining_data_length=%d\n",
+ start, i, i-start,
+ remaining_data_length);
+ rc = cifs_rdma_post_send_data(info, &iov[start],
+ i-start, remaining_data_length);
+ if (rc)
+ goto done;
+ break;
+ }
+ }
+ log_cifs_write("looping i=%d buflen=%d\n", i, buflen);
+ }
+
+ // now sending pages
+ for (i = 0; i < rqst->rq_npages; i++) {
+ buflen = (i == rqst->rq_npages-1) ?
+ rqst->rq_tailsz : rqst->rq_pagesz;
+ nvecs = (buflen+max_iov_size-1)/max_iov_size;
+ log_cifs_write("sending pages buflen=%d nvecs=%d\n",
+ buflen, nvecs);
+ for (j=0; j<nvecs; j++) {
+ size = max_iov_size;
+ if (j == nvecs-1)
+ size = buflen - j*max_iov_size;
+ remaining_data_length -= size;
+ log_cifs_write("sending pages i=%d offset=%d size=%d"
+ " remaining_data_length=%d\n",
+ i, j*max_iov_size, size, remaining_data_length);
+ rc = cifs_rdma_post_send_page(
+ info, rqst->rq_pages[i], j*max_iov_size,
+ size, remaining_data_length);
+ if (rc)
+ goto done;
+ }
+ }
+
+done:
+ kfree(iov);
+ wait_event(info->wait_send_pending, atomic_read(&info->send_pending) == 0);
+ return rc;
+}
diff --git a/fs/cifs/cifsrdma.h b/fs/cifs/cifsrdma.h
index 9618e0b..90746a4 100644
--- a/fs/cifs/cifsrdma.h
+++ b/fs/cifs/cifsrdma.h
@@ -73,6 +73,9 @@ struct cifs_rdma_info {
atomic_t receive_credits;
atomic_t receive_credit_target;

+ atomic_t send_pending;
+ wait_queue_head_t wait_send_pending;
+
atomic_t recv_pending;
wait_queue_head_t wait_recv_pending;

@@ -195,4 +198,6 @@ struct cifs_rdma_response {
// Create a SMBDirect session
struct cifs_rdma_info* cifs_create_rdma_session(
struct TCP_Server_Info *server, struct sockaddr *dstaddr);
+
+int cifs_rdma_write(struct cifs_rdma_info *rdma, struct smb_rqst *rqst);
#endif
--
2.7.4