[PATCH v2 7/8] thunderbolt: Networking transmit and receive

From: Amir Levy
Date: Wed Jun 29 2016 - 04:38:22 EST


Handling the transmission to second peer and receiving from it.
This includes communication with upper layer, the network stack
and configuration of Thunderbolt(TM) HW.

Signed-off-by: Amir Levy <amir.jer.levy@xxxxxxxxx>
---
drivers/thunderbolt/icm_nhi.c | 15 +
drivers/thunderbolt/net.c | 1475 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 1490 insertions(+)

diff --git a/drivers/thunderbolt/icm_nhi.c b/drivers/thunderbolt/icm_nhi.c
index 060bb38..f8b0527 100644
--- a/drivers/thunderbolt/icm_nhi.c
+++ b/drivers/thunderbolt/icm_nhi.c
@@ -1045,6 +1045,7 @@ static irqreturn_t nhi_msi(int __always_unused irq, void *data)
{
struct tbt_nhi_ctxt *nhi_ctxt = data;
u32 isr0, isr1, imr0, imr1;
+ int i;

/* clear on read */
isr0 = ioread32(nhi_ctxt->iobase + REG_RING_NOTIFY_BASE);
@@ -1067,6 +1068,20 @@ static irqreturn_t nhi_msi(int __always_unused irq, void *data)

spin_unlock(&nhi_ctxt->lock);

+ for (i = 0; i < nhi_ctxt->num_ports; ++i) {
+ struct net_device *net_dev =
+ nhi_ctxt->net_devices[i].net_dev;
+ if (net_dev) {
+ u8 path = PATH_FROM_PORT(nhi_ctxt->num_paths, i);
+
+ if (isr0 & REG_RING_INT_RX_PROCESSED(
+ path, nhi_ctxt->num_paths))
+ tbt_net_rx_msi(net_dev);
+ if (isr0 & REG_RING_INT_TX_PROCESSED(path))
+ tbt_net_tx_msi(net_dev);
+ }
+ }
+
if (isr0 & REG_RING_INT_RX_PROCESSED(TBT_ICM_RING_NUM,
nhi_ctxt->num_paths))
schedule_work(&nhi_ctxt->icm_msgs_work);
diff --git a/drivers/thunderbolt/net.c b/drivers/thunderbolt/net.c
index e983dfb..108de28 100644
--- a/drivers/thunderbolt/net.c
+++ b/drivers/thunderbolt/net.c
@@ -135,6 +135,17 @@ struct approve_inter_domain_connection_cmd {

};

+struct tbt_frame_header {
+ /* size of the data with the frame */
+ __le32 frame_size;
+ /* running index on the frames */
+ __le16 frame_index;
+ /* ID of the frame to match frames to specific packet */
+ __le16 frame_id;
+ /* how many frames assembles a full packet */
+ __le32 frame_count;
+};
+
enum neg_event {
RECEIVE_LOGOUT = NUM_MEDIUM_STATUSES,
RECEIVE_LOGIN_RESPONSE,
@@ -142,15 +153,81 @@ enum neg_event {
NUM_NEG_EVENTS
};

+enum frame_status {
+ GOOD_FRAME,
+ GOOD_AS_FIRST_FRAME,
+ GOOD_AS_FIRST_MULTICAST_FRAME,
+ FRAME_NOT_READY,
+ FRAME_ERROR,
+};
+
+enum packet_filter {
+ /* all multicast MAC addresses */
+ PACKET_TYPE_ALL_MULTICAST,
+ /* all types of MAC addresses: multicast, unicast and broadcast */
+ PACKET_TYPE_PROMISCUOUS,
+ /* all unicast MAC addresses */
+ PACKET_TYPE_UNICAST_PROMISCUOUS,
+};
+
enum disconnect_path_stage {
STAGE_1 = BIT(0),
STAGE_2 = BIT(1)
};

+struct tbt_net_stats {
+ u64 tx_packets;
+ u64 tx_bytes;
+ u64 tx_errors;
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 rx_length_errors;
+ u64 rx_over_errors;
+ u64 rx_crc_errors;
+ u64 rx_missed_errors;
+ u64 multicast;
+};
+
+static const char tbt_net_gstrings_stats[][ETH_GSTRING_LEN] = {
+ "tx_packets",
+ "tx_bytes",
+ "tx_errors",
+ "rx_packets",
+ "rx_bytes",
+ "rx_length_errors",
+ "rx_over_errors",
+ "rx_crc_errors",
+ "rx_missed_errors",
+ "multicast",
+};
+
+struct tbt_buffer {
+ dma_addr_t dma;
+ union {
+ struct tbt_frame_header *hdr;
+ struct page *page;
+ };
+ u32 page_offset;
+};
+
+struct tbt_desc_ring {
+ /* pointer to the descriptor ring memory */
+ struct tbt_buf_desc *desc;
+ /* physical address of the descriptor ring */
+ dma_addr_t dma;
+ /* array of buffer structs */
+ struct tbt_buffer *buffers;
+ /* last descriptor that was associated with a buffer */
+ u16 last_allocated;
+ /* next descriptor to check for DD status bit */
+ u16 next_to_clean;
+};
+
/**
* struct tbt_port - the basic tbt_port structure
* @tbt_nhi_ctxt: context of the nhi controller.
* @net_dev: networking device object.
+* @napi: network API
* @login_retry_work: work queue for sending login requests.
* @login_response_work: work queue for sending login responses.
* @work_struct logout_work: work queue for sending logout requests.
@@ -166,6 +243,11 @@ enum disconnect_path_stage {
* @login_retry_count: counts number of login retries sent.
* @local_depth: depth of the remote peer in the chain.
* @transmit_path: routing parameter for the icm.
+* @tx_ring: transmit ring from where the packets are sent.
+* @rx_ring: receive ring where the packets are received.
+* @stats: network statistics of the rx/tx packets.
+* @packet_filters: defines filters for the received packets.
+* @multicast_hash_table hash table of multicast addresses.
* @frame_id: counting ID of frames.
* @num: port number.
* @local_path: routing parameter for the icm.
@@ -175,6 +257,7 @@ enum disconnect_path_stage {
struct tbt_port {
struct tbt_nhi_ctxt *nhi_ctxt;
struct net_device *net_dev;
+ struct napi_struct napi;
struct delayed_work login_retry_work;
struct work_struct login_response_work;
struct work_struct logout_work;
@@ -190,6 +273,17 @@ struct tbt_port {
u8 login_retry_count;
u8 local_depth;
u8 transmit_path;
+ struct tbt_desc_ring tx_ring ____cacheline_aligned_in_smp;
+ struct tbt_desc_ring rx_ring;
+ struct tbt_net_stats stats;
+ u32 packet_filters;
+ /*
+ * hash table of 1024 boolean entries with hashing of
+ * the multicast address
+ */
+ u32 multicast_hash_table[DIV_ROUND_UP(
+ TBT_NET_MULTICAST_HASH_TABLE_SIZE,
+ BITS_PER_U32)];
u16 frame_id;
u8 num;
u8 local_path;
@@ -236,6 +330,8 @@ static void tbt_net_tear_down(struct net_device *net_dev, bool send_logout)
(port->local_path * REG_OPTS_STEP);
u32 rx_reg_val = ioread32(rx_reg) & ~REG_OPTS_E2E_EN;

+ napi_disable(&port->napi);
+
tx_reg = iobase + REG_TX_OPTIONS_BASE +
(port->local_path * REG_OPTS_STEP);
tx_reg_val = ioread32(tx_reg) & ~REG_OPTS_E2E_EN;
@@ -277,8 +373,1340 @@ static void tbt_net_tear_down(struct net_device *net_dev, bool send_logout)
port->nhi_ctxt->num_paths);
spin_unlock_irqrestore(&port->nhi_ctxt->lock, flags);
}
+
+ port->rx_ring.next_to_clean = 0;
+ port->rx_ring.last_allocated = TBT_NET_NUM_RX_BUFS - 1;
+
+}
+
+void tbt_net_tx_msi(struct net_device *net_dev)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+ void __iomem *iobase = port->nhi_ctxt->iobase;
+ u32 prod_cons, prod, cons;
+
+ prod_cons = ioread32(TBT_RING_CONS_PROD_REG(iobase, REG_TX_RING_BASE,
+ port->local_path));
+ prod = TBT_REG_RING_PROD_EXTRACT(prod_cons);
+ cons = TBT_REG_RING_CONS_EXTRACT(prod_cons);
+ if (prod >= TBT_NET_NUM_TX_BUFS || cons >= TBT_NET_NUM_TX_BUFS)
+ return;
+
+ if (TBT_NUM_BUFS_BETWEEN(prod, cons, TBT_NET_NUM_TX_BUFS) >=
+ TX_WAKE_THRESHOLD) {
+ netif_wake_queue(port->net_dev);
+ } else {
+ spin_lock(&port->nhi_ctxt->lock);
+ /* enable TX interrupt */
+ RING_INT_ENABLE_TX(iobase, port->local_path);
+ spin_unlock(&port->nhi_ctxt->lock);
+ }
+}
+
+static irqreturn_t tbt_net_tx_msix(int __always_unused irq, void *data)
+{
+ struct tbt_port *port = data;
+ void __iomem *iobase = port->nhi_ctxt->iobase;
+ u32 prod_cons, prod, cons;
+
+ prod_cons = ioread32(TBT_RING_CONS_PROD_REG(iobase,
+ REG_TX_RING_BASE,
+ port->local_path));
+ prod = TBT_REG_RING_PROD_EXTRACT(prod_cons);
+ cons = TBT_REG_RING_CONS_EXTRACT(prod_cons);
+ if (prod < TBT_NET_NUM_TX_BUFS && cons < TBT_NET_NUM_TX_BUFS &&
+ TBT_NUM_BUFS_BETWEEN(prod, cons, TBT_NET_NUM_TX_BUFS) >=
+ TX_WAKE_THRESHOLD) {
+ spin_lock(&port->nhi_ctxt->lock);
+ /* disable TX interrupt */
+ RING_INT_DISABLE_TX(iobase, port->local_path);
+ spin_unlock(&port->nhi_ctxt->lock);
+
+ netif_wake_queue(port->net_dev);
+ }
+
+ return IRQ_HANDLED;
+}
+
+void tbt_net_rx_msi(struct net_device *net_dev)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+
+ napi_schedule_irqoff(&port->napi);
+}
+
+static irqreturn_t tbt_net_rx_msix(int __always_unused irq, void *data)
+{
+ struct tbt_port *port = data;
+
+ if (likely(napi_schedule_prep(&port->napi))) {
+ struct tbt_nhi_ctxt *nhi_ctx = port->nhi_ctxt;
+
+ spin_lock(&nhi_ctx->lock);
+ /* disable RX interrupt */
+ RING_INT_DISABLE_RX(nhi_ctx->iobase, port->local_path,
+ nhi_ctx->num_paths);
+ spin_unlock(&nhi_ctx->lock);
+
+ __napi_schedule_irqoff(&port->napi);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void tbt_net_pull_tail(struct sk_buff *skb)
+{
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+ unsigned int pull_len;
+ unsigned char *va;
+
+ /*
+ * it is valid to use page_address instead of kmap since we are
+ * working with pages allocated out of the lomem pool
+ */
+ va = skb_frag_address(frag);
+
+ pull_len = eth_get_headlen(va, TBT_NET_RX_HDR_SIZE);
+
+ /* align pull length to size of long to optimize memcpy performance */
+ skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
+
+ /* update all of the pointers */
+ skb_frag_size_sub(frag, pull_len);
+ frag->page_offset += pull_len;
+ skb->data_len -= pull_len;
+ skb->tail += pull_len;
+}
+
+static inline bool tbt_net_alloc_mapped_page(struct device *dev,
+ struct tbt_buffer *buf, gfp_t gfp)
+{
+ if (!buf->page) {
+ buf->page = alloc_page(gfp | __GFP_COLD);
+ if (unlikely(!buf->page))
+ return false;
+
+ buf->dma = dma_map_page(dev, buf->page, 0, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(dev, buf->dma)) {
+ __free_page(buf->page);
+ buf->page = NULL;
+ return false;
+ }
+ buf->page_offset = 0;
+ }
+ return true;
+}
+
+static bool tbt_net_alloc_rx_buffers(struct device *dev,
+ struct tbt_desc_ring *rx_ring,
+ u16 cleaned_count, void __iomem *reg,
+ gfp_t gfp)
+{
+ u16 i = (rx_ring->last_allocated + 1) & (TBT_NET_NUM_RX_BUFS - 1);
+ bool res = false;
+
+ while (cleaned_count--) {
+ struct tbt_buf_desc *desc = &rx_ring->desc[i];
+ struct tbt_buffer *buf = &rx_ring->buffers[i];
+
+ /* making sure next_to_clean won't get old buffer */
+ desc->attributes = cpu_to_le32(DESC_ATTR_REQ_STS |
+ DESC_ATTR_INT_EN);
+ if (tbt_net_alloc_mapped_page(dev, buf, gfp)) {
+ res = true;
+ rx_ring->last_allocated = i;
+ i = (i + 1) & (TBT_NET_NUM_RX_BUFS - 1);
+ desc->phys = cpu_to_le64(buf->dma + buf->page_offset);
+ } else {
+ break;
+ }
+ }
+
+ if (res) {
+ iowrite32((rx_ring->last_allocated << REG_RING_CONS_SHIFT) &
+ REG_RING_CONS_MASK, reg);
+ }
+
+ return res;
+}
+
+static inline bool tbt_net_multicast_mac_set(const u32 *multicast_hash_table,
+ const u8 *ether_addr)
+{
+ u16 hash_val = TBT_NET_ETHER_ADDR_HASH(ether_addr);
+
+ return !!(multicast_hash_table[hash_val / BITS_PER_U32] &
+ BIT(hash_val % BITS_PER_U32));
+}
+
+static enum frame_status tbt_net_check_frame(struct tbt_port *port,
+ u16 frame_num, u32 *count,
+ u16 index, u16 *id, u32 *size)
+{
+ struct tbt_desc_ring *rx_ring = &port->rx_ring;
+ __le32 desc_attr = rx_ring->desc[frame_num].attributes;
+ enum frame_status res = GOOD_AS_FIRST_FRAME;
+ u32 len, frame_count, frame_size;
+ struct tbt_frame_header *hdr;
+
+ if (!(desc_attr & cpu_to_le32(DESC_ATTR_DESC_DONE)))
+ return FRAME_NOT_READY;
+
+ rmb(); /* read other fields from desc after checking DD */
+
+ if (unlikely(desc_attr & cpu_to_le32(DESC_ATTR_RX_CRC_ERR))) {
+ ++port->stats.rx_crc_errors;
+ goto err;
+ } else if (unlikely(desc_attr &
+ cpu_to_le32(DESC_ATTR_RX_BUF_OVRN_ERR))) {
+ ++port->stats.rx_over_errors;
+ goto err;
+ }
+
+ len = (le32_to_cpu(desc_attr) & DESC_ATTR_LEN_MASK)
+ >> DESC_ATTR_LEN_SHIFT;
+ if (len == 0)
+ len = TBT_RING_MAX_FRAME_SIZE;
+ /* should be greater than just header i.e. contains data */
+ if (unlikely(len <= sizeof(struct tbt_frame_header))) {
+ ++port->stats.rx_length_errors;
+ goto err;
+ }
+
+ prefetchw(rx_ring->buffers[frame_num].page);
+ hdr = page_address(rx_ring->buffers[frame_num].page) +
+ rx_ring->buffers[frame_num].page_offset;
+ /* prefetch first cache line of first page */
+ prefetch(hdr);
+
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(&port->nhi_ctxt->pdev->dev,
+ rx_ring->buffers[frame_num].dma,
+ rx_ring->buffers[frame_num].page_offset,
+ TBT_RING_MAX_FRAME_SIZE,
+ DMA_FROM_DEVICE);
+
+ frame_count = le32_to_cpu(hdr->frame_count);
+ frame_size = le32_to_cpu(hdr->frame_size);
+
+ if (unlikely((frame_size > len - sizeof(struct tbt_frame_header)) ||
+ (frame_size == 0))) {
+ ++port->stats.rx_length_errors;
+ goto err;
+ }
+ /*
+ * In case we're in the middle of packet, validate the frame header
+ * based on first fragment of the packet
+ */
+ if (*count) {
+ /* check the frame count fits the count field */
+ if (frame_count != *count) {
+ ++port->stats.rx_length_errors;
+ goto check_as_first;
+ }
+
+ /*
+ * check the frame identifiers are incremented correctly,
+ * and id is matching
+ */
+ if ((le16_to_cpu(hdr->frame_index) != index) ||
+ (le16_to_cpu(hdr->frame_id) != *id)) {
+ ++port->stats.rx_missed_errors;
+ goto check_as_first;
+ }
+
+ *size += frame_size;
+ if (*size > TBT_NET_MTU) {
+ ++port->stats.rx_length_errors;
+ goto err;
+ }
+ res = GOOD_FRAME;
+ } else { /* start of packet, validate the frame header */
+ const u8 *addr;
+
+check_as_first:
+ rx_ring->next_to_clean = frame_num;
+
+ /* validate the first packet has a valid frame count */
+ if (unlikely(frame_count == 0 ||
+ frame_count > (TBT_NET_NUM_RX_BUFS / 4))) {
+ ++port->stats.rx_length_errors;
+ goto err;
+ }
+
+ /* validate the first packet has a valid frame index */
+ if (hdr->frame_index != 0) {
+ ++port->stats.rx_missed_errors;
+ goto err;
+ }
+
+ BUILD_BUG_ON(TBT_NET_RX_HDR_SIZE > TBT_RING_MAX_FRM_DATA_SZ);
+ if ((frame_count > 1) && (frame_size < TBT_NET_RX_HDR_SIZE)) {
+ ++port->stats.rx_length_errors;
+ goto err;
+ }
+
+ addr = (u8 *)(hdr + 1);
+
+ /* check the packet can go through the filter */
+ if (is_multicast_ether_addr(addr)) {
+ if (!is_broadcast_ether_addr(addr)) {
+ if ((port->packet_filters &
+ (BIT(PACKET_TYPE_PROMISCUOUS) |
+ BIT(PACKET_TYPE_ALL_MULTICAST))) ||
+ tbt_net_multicast_mac_set(
+ port->multicast_hash_table, addr))
+ res = GOOD_AS_FIRST_MULTICAST_FRAME;
+ else
+ goto err;
+ }
+ } else if (!(port->packet_filters &
+ (BIT(PACKET_TYPE_PROMISCUOUS) |
+ BIT(PACKET_TYPE_UNICAST_PROMISCUOUS))) &&
+ !ether_addr_equal(port->net_dev->dev_addr, addr)) {
+ goto err;
+ }
+
+ *size = frame_size;
+ *count = frame_count;
+ *id = le16_to_cpu(hdr->frame_id);
+ }
+
+#if (PREFETCH_STRIDE < 128)
+ prefetch((u8 *)hdr + PREFETCH_STRIDE);
+#endif
+
+ return res;
+
+err:
+ rx_ring->next_to_clean = (frame_num + 1) & (TBT_NET_NUM_RX_BUFS - 1);
+ return FRAME_ERROR;
+}
+
+static inline unsigned int tbt_net_max_frm_data_size(
+ __maybe_unused u32 frame_size)
+{
+#if (TBT_NUM_FRAMES_PER_PAGE > 1)
+ return ALIGN(frame_size + sizeof(struct tbt_frame_header),
+ L1_CACHE_BYTES) -
+ sizeof(struct tbt_frame_header);
+#else
+ return TBT_RING_MAX_FRM_DATA_SZ;
+#endif
+}
+
+static int tbt_net_poll(struct napi_struct *napi, int budget)
+{
+ struct tbt_port *port = container_of(napi, struct tbt_port, napi);
+ void __iomem *reg = TBT_RING_CONS_PROD_REG(port->nhi_ctxt->iobase,
+ REG_RX_RING_BASE,
+ port->local_path);
+ struct tbt_desc_ring *rx_ring = &port->rx_ring;
+ u16 cleaned_count = TBT_NUM_BUFS_BETWEEN(rx_ring->last_allocated,
+ rx_ring->next_to_clean,
+ TBT_NET_NUM_RX_BUFS);
+ unsigned long flags;
+ int rx_packets = 0;
+
+loop:
+ while (likely(rx_packets < budget)) {
+ struct sk_buff *skb;
+ enum frame_status status;
+ bool multicast = false;
+ u32 frame_count = 0, size;
+ u16 j, frame_id;
+ int i;
+
+ /*
+ * return some buffers to hardware, one at a time is too slow
+ * so allocate TBT_NET_RX_BUFFER_WRITE buffers at the same time
+ */
+ if (cleaned_count >= TBT_NET_RX_BUFFER_WRITE) {
+ tbt_net_alloc_rx_buffers(&port->nhi_ctxt->pdev->dev,
+ rx_ring, cleaned_count, reg,
+ GFP_ATOMIC);
+ cleaned_count = 0;
+ }
+
+ status = tbt_net_check_frame(port, rx_ring->next_to_clean,
+ &frame_count, 0, &frame_id,
+ &size);
+ if (status == FRAME_NOT_READY)
+ break;
+
+ if (status == FRAME_ERROR) {
+ ++cleaned_count;
+ continue;
+ }
+
+ multicast = (status == GOOD_AS_FIRST_MULTICAST_FRAME);
+
+ /*
+ * i is incremented up to the frame_count frames received,
+ * j cyclicly goes over the location from the next frame
+ * to clean in the ring
+ */
+ j = (rx_ring->next_to_clean + 1);
+ j &= (TBT_NET_NUM_RX_BUFS - 1);
+ for (i = 1; i < frame_count; ++i) {
+ status = tbt_net_check_frame(port, j, &frame_count, i,
+ &frame_id, &size);
+ if (status == FRAME_NOT_READY)
+ goto out;
+
+ j = (j + 1) & (TBT_NET_NUM_RX_BUFS - 1);
+
+ /* if a new frame is found, start over */
+ if (status == GOOD_AS_FIRST_FRAME ||
+ status == GOOD_AS_FIRST_MULTICAST_FRAME) {
+ multicast = (status ==
+ GOOD_AS_FIRST_MULTICAST_FRAME);
+ cleaned_count += i;
+ i = 0;
+ continue;
+ }
+
+ if (status == FRAME_ERROR) {
+ cleaned_count += (i + 1);
+ goto loop;
+ }
+ }
+
+ /* allocate a skb to store the frags */
+ skb = netdev_alloc_skb_ip_align(port->net_dev,
+ TBT_NET_RX_HDR_SIZE);
+ if (unlikely(!skb))
+ break;
+
+ /*
+ * we will be copying header into skb->data in
+ * tbt_net_pull_tail so it is in our interest to prefetch
+ * it now to avoid a possible cache miss
+ */
+ prefetchw(skb->data);
+
+ /*
+ * if overall size of packet smaller than TBT_NET_RX_HDR_SIZE
+ * which is a small buffer size we decided to allocate
+ * as the base to RX
+ */
+ if (size <= TBT_NET_RX_HDR_SIZE) {
+ struct tbt_buffer *buf =
+ &(rx_ring->buffers[rx_ring->next_to_clean]);
+ u8 *va = page_address(buf->page) + buf->page_offset +
+ sizeof(struct tbt_frame_header);
+
+ memcpy(__skb_put(skb, size), va,
+ ALIGN(size, sizeof(long)));
+
+ /*
+ * Reuse buffer as-is,
+ * just make sure it is local
+ * Access to local memory is faster than non-local
+ * memory so let's reuse.
+ * If not local, let's free it and reallocate later.
+ */
+ if (likely(page_to_nid(buf->page) == numa_node_id()))
+ /* sync the buffer for use by the device */
+ dma_sync_single_range_for_device(
+ &port->nhi_ctxt->pdev->dev,
+ buf->dma, buf->page_offset,
+ TBT_RING_MAX_FRAME_SIZE,
+ DMA_FROM_DEVICE);
+ else {
+ /* this page cannot be reused so discard it */
+ put_page(buf->page);
+ buf->page = NULL;
+ dma_unmap_page(&port->nhi_ctxt->pdev->dev,
+ buf->dma, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ }
+ rx_ring->next_to_clean = (rx_ring->next_to_clean + 1) &
+ (TBT_NET_NUM_RX_BUFS - 1);
+ } else {
+ for (i = 0; i < frame_count; ++i) {
+ struct tbt_buffer *buf = &(rx_ring->buffers[
+ rx_ring->next_to_clean]);
+ struct tbt_frame_header *hdr =
+ page_address(buf->page) +
+ buf->page_offset;
+ u32 frm_size = le32_to_cpu(hdr->frame_size);
+
+ unsigned int truesize =
+ tbt_net_max_frm_data_size(frm_size);
+
+ /* add frame to skb struct */
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ buf->page,
+ sizeof(struct tbt_frame_header)
+ + buf->page_offset,
+ frm_size, truesize);
+
+#if (TBT_NUM_FRAMES_PER_PAGE > 1)
+ /* move offset up to the next cache line */
+ buf->page_offset += (truesize +
+ sizeof(struct tbt_frame_header));
+
+ /*
+ * we can reuse buffer if there is space
+ * available and it is local
+ */
+ if (page_to_nid(buf->page) == numa_node_id()
+ && buf->page_offset <=
+ PAGE_SIZE - TBT_RING_MAX_FRAME_SIZE) {
+ /*
+ * bump ref count on page before
+ * it is given to the stack
+ */
+ get_page(buf->page);
+ /*
+ * sync the buffer for use by the
+ * device
+ */
+ dma_sync_single_range_for_device(
+ &port->nhi_ctxt->pdev->dev,
+ buf->dma, buf->page_offset,
+ TBT_RING_MAX_FRAME_SIZE,
+ DMA_FROM_DEVICE);
+ } else
+#endif
+ {
+ buf->page = NULL;
+ dma_unmap_page(
+ &port->nhi_ctxt->pdev->dev,
+ buf->dma, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ }
+
+ rx_ring->next_to_clean =
+ (rx_ring->next_to_clean + 1) &
+ (TBT_NET_NUM_RX_BUFS - 1);
+ }
+ /*
+ * place header from the first
+ * fragment in linear portion of buffer
+ */
+ tbt_net_pull_tail(skb);
+ }
+
+ /* pad short packets */
+ if (unlikely(skb->len < ETH_ZLEN)) {
+ int pad_len = ETH_ZLEN - skb->len;
+
+ /* The skb is freed on error */
+ if (unlikely(skb_pad(skb, pad_len))) {
+ cleaned_count += frame_count;
+ continue;
+ }
+ __skb_put(skb, pad_len);
+ }
+
+ skb->protocol = eth_type_trans(skb, port->net_dev);
+ napi_gro_receive(&port->napi, skb);
+
+ ++rx_packets;
+ port->stats.rx_bytes += size;
+ if (multicast)
+ ++port->stats.multicast;
+ cleaned_count += frame_count;
+ }
+
+out:
+ port->stats.rx_packets += rx_packets;
+
+ if (cleaned_count)
+ tbt_net_alloc_rx_buffers(&port->nhi_ctxt->pdev->dev,
+ rx_ring, cleaned_count, reg,
+ GFP_ATOMIC);
+
+ /* If all work not completed, return budget and keep polling */
+ if (rx_packets >= budget)
+ return budget;
+
+ /* Work is done so exit the polling mode and re-enable the interrupt */
+ napi_complete(napi);
+
+ spin_lock_irqsave(&port->nhi_ctxt->lock, flags);
+ /* enable RX interrupt */
+ RING_INT_ENABLE_RX(port->nhi_ctxt->iobase, port->local_path,
+ port->nhi_ctxt->num_paths);
+
+ spin_unlock_irqrestore(&port->nhi_ctxt->lock, flags);
+
+ return 0;
+}
+
+static int tbt_net_open(struct net_device *net_dev)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+ int res = 0;
+ int i, j;
+
+ /* change link state to off until path establishment finishes */
+ netif_carrier_off(net_dev);
+
+ /*
+ * if we previously succeeded to allocate msix entries,
+ * now request IRQ for them:
+ * 2=tx data port 0,
+ * 3=rx data port 0,
+ * 4=tx data port 1,
+ * 5=rx data port 1,
+ * ...
+ * if not, if msi is used, nhi_msi will handle icm & data paths
+ */
+ if (port->nhi_ctxt->msix_entries) {
+ char name[] = "tbt-net-xx-xx";
+
+ scnprintf(name, sizeof(name), "tbt-net-rx-%02u", port->num);
+ res = devm_request_irq(&port->nhi_ctxt->pdev->dev,
+ port->nhi_ctxt->msix_entries[3+(port->num*2)].vector,
+ tbt_net_rx_msix, 0, name, port);
+ if (res) {
+ netif_err(port, ifup, net_dev, "request_irq %s failed %d\n",
+ name, res);
+ goto out;
+ }
+ name[8] = 't';
+ res = devm_request_irq(&port->nhi_ctxt->pdev->dev,
+ port->nhi_ctxt->msix_entries[2+(port->num*2)].vector,
+ tbt_net_tx_msix, 0, name, port);
+ if (res) {
+ netif_err(port, ifup, net_dev, "request_irq %s failed %d\n",
+ name, res);
+ goto request_irq_failure;
+ }
+ }
+ /*
+ * Verifying that all buffer sizes are well defined.
+ * Starting with frame(s) will not tip over the
+ * page boundary
+ */
+ BUILD_BUG_ON(TBT_NUM_FRAMES_PER_PAGE < 1);
+ /*
+ * Just to make sure we have enough place for containing
+ * 3 max MTU packets for TX
+ */
+ BUILD_BUG_ON((TBT_NET_NUM_TX_BUFS * TBT_RING_MAX_FRAME_SIZE) <
+ (TBT_NET_MTU * 3));
+ /* make sure the number of TX Buffers is power of 2 */
+ BUILD_BUG_ON_NOT_POWER_OF_2(TBT_NET_NUM_TX_BUFS);
+ /*
+ * Just to make sure we have enough place for containing
+ * 3 max MTU packets for RX
+ */
+ BUILD_BUG_ON((TBT_NET_NUM_RX_BUFS * TBT_RING_MAX_FRAME_SIZE) <
+ (TBT_NET_MTU * 3));
+ /* make sure the number of RX Buffers is power of 2 */
+ BUILD_BUG_ON_NOT_POWER_OF_2(TBT_NET_NUM_RX_BUFS);
+
+ port->rx_ring.last_allocated = TBT_NET_NUM_RX_BUFS - 1;
+
+ port->tx_ring.buffers = vzalloc(TBT_NET_NUM_TX_BUFS *
+ sizeof(struct tbt_buffer));
+ if (!port->tx_ring.buffers)
+ goto ring_alloc_failure;
+ port->rx_ring.buffers = vzalloc(TBT_NET_NUM_RX_BUFS *
+ sizeof(struct tbt_buffer));
+ if (!port->rx_ring.buffers)
+ goto ring_alloc_failure;
+
+ /*
+ * Allocate TX and RX descriptors
+ * if the total size is less than a page, do a central allocation
+ * Otherwise, split TX and RX
+ */
+ if (TBT_NET_SIZE_TOTAL_DESCS <= PAGE_SIZE) {
+ port->tx_ring.desc = dmam_alloc_coherent(
+ &port->nhi_ctxt->pdev->dev,
+ TBT_NET_SIZE_TOTAL_DESCS,
+ &port->tx_ring.dma,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!port->tx_ring.desc)
+ goto ring_alloc_failure;
+ /* RX starts where TX finishes */
+ port->rx_ring.desc = &port->tx_ring.desc[TBT_NET_NUM_TX_BUFS];
+ port->rx_ring.dma = port->tx_ring.dma +
+ (TBT_NET_NUM_TX_BUFS * sizeof(struct tbt_buf_desc));
+ } else {
+ port->tx_ring.desc = dmam_alloc_coherent(
+ &port->nhi_ctxt->pdev->dev,
+ TBT_NET_NUM_TX_BUFS *
+ sizeof(struct tbt_buf_desc),
+ &port->tx_ring.dma,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!port->tx_ring.desc)
+ goto ring_alloc_failure;
+ port->rx_ring.desc = dmam_alloc_coherent(
+ &port->nhi_ctxt->pdev->dev,
+ TBT_NET_NUM_RX_BUFS *
+ sizeof(struct tbt_buf_desc),
+ &port->rx_ring.dma,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!port->rx_ring.desc)
+ goto rx_desc_alloc_failure;
+ }
+
+ /* allocate TX buffers and configure the descriptors */
+ for (i = 0; i < TBT_NET_NUM_TX_BUFS; i++) {
+ port->tx_ring.buffers[i].hdr = dma_alloc_coherent(
+ &port->nhi_ctxt->pdev->dev,
+ TBT_NUM_FRAMES_PER_PAGE * TBT_RING_MAX_FRAME_SIZE,
+ &port->tx_ring.buffers[i].dma,
+ GFP_KERNEL);
+ if (!port->tx_ring.buffers[i].hdr)
+ goto buffers_alloc_failure;
+
+ port->tx_ring.desc[i].phys =
+ cpu_to_le64(port->tx_ring.buffers[i].dma);
+ port->tx_ring.desc[i].attributes =
+ cpu_to_le32(DESC_ATTR_REQ_STS |
+ TBT_NET_DESC_ATTR_SOF_EOF);
+
+ /*
+ * In case the page is bigger than the frame size,
+ * make the next buffer descriptor points
+ * on the next frame memory address within the page
+ */
+ for (i++, j = 1; (i < TBT_NET_NUM_TX_BUFS) &&
+ (j < TBT_NUM_FRAMES_PER_PAGE); i++, j++) {
+ port->tx_ring.buffers[i].dma =
+ port->tx_ring.buffers[i - 1].dma +
+ TBT_RING_MAX_FRAME_SIZE;
+ port->tx_ring.buffers[i].hdr =
+ (void *)(port->tx_ring.buffers[i - 1].hdr) +
+ TBT_RING_MAX_FRAME_SIZE;
+ /* move the next offset i.e. TBT_RING_MAX_FRAME_SIZE */
+ port->tx_ring.buffers[i].page_offset =
+ port->tx_ring.buffers[i - 1].page_offset +
+ TBT_RING_MAX_FRAME_SIZE;
+ port->tx_ring.desc[i].phys =
+ cpu_to_le64(port->tx_ring.buffers[i].dma);
+ port->tx_ring.desc[i].attributes =
+ cpu_to_le32(DESC_ATTR_REQ_STS |
+ TBT_NET_DESC_ATTR_SOF_EOF);
+ }
+ i--;
+ }
+
+ port->negotiation_status =
+ BIT(port->nhi_ctxt->net_devices[port->num].medium_sts);
+ if (port->negotiation_status == BIT(MEDIUM_READY_FOR_CONNECTION)) {
+ port->login_retry_count = 0;
+ queue_delayed_work(port->nhi_ctxt->net_workqueue,
+ &port->login_retry_work, 0);
+ }
+
+ netif_info(port, ifup, net_dev, "Thunderbolt(TM) Networking port %u - ready for ThunderboltIP negotiation\n",
+ port->num);
+ return 0;
+
+buffers_alloc_failure:
+ /*
+ * Rollback the Tx buffers that were already allocated
+ * until the failure
+ */
+ for (i--; i >= 0; i--) {
+ /* free only for first buffer allocation */
+ if (port->tx_ring.buffers[i].page_offset == 0)
+ dma_free_coherent(&port->nhi_ctxt->pdev->dev,
+ TBT_NUM_FRAMES_PER_PAGE *
+ TBT_RING_MAX_FRAME_SIZE,
+ port->tx_ring.buffers[i].hdr,
+ port->tx_ring.buffers[i].dma);
+ port->tx_ring.buffers[i].hdr = NULL;
+ }
+ /*
+ * For central allocation, free all
+ * otherwise free RX and then TX separately
+ */
+ if (TBT_NET_SIZE_TOTAL_DESCS <= PAGE_SIZE) {
+ dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+ TBT_NET_SIZE_TOTAL_DESCS,
+ port->tx_ring.desc,
+ port->tx_ring.dma);
+ port->rx_ring.desc = NULL;
+ } else {
+ dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+ TBT_NET_NUM_RX_BUFS *
+ sizeof(struct tbt_buf_desc),
+ port->rx_ring.desc,
+ port->rx_ring.dma);
+ port->rx_ring.desc = NULL;
+rx_desc_alloc_failure:
+ dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+ TBT_NET_NUM_TX_BUFS *
+ sizeof(struct tbt_buf_desc),
+ port->tx_ring.desc,
+ port->tx_ring.dma);
+ }
+ port->tx_ring.desc = NULL;
+ring_alloc_failure:
+ vfree(port->tx_ring.buffers);
+ port->tx_ring.buffers = NULL;
+ vfree(port->rx_ring.buffers);
+ port->rx_ring.buffers = NULL;
+ res = -ENOMEM;
+ netif_err(port, ifup, net_dev, "Thunderbolt(TM) Networking port %u - unable to allocate memory\n",
+ port->num);
+
+ if (!port->nhi_ctxt->msix_entries)
+ goto out;
+
+ devm_free_irq(&port->nhi_ctxt->pdev->dev,
+ port->nhi_ctxt->msix_entries[2 + (port->num * 2)].vector,
+ port);
+request_irq_failure:
+ devm_free_irq(&port->nhi_ctxt->pdev->dev,
+ port->nhi_ctxt->msix_entries[3 + (port->num * 2)].vector,
+ port);
+out:
+ return res;
+}
+
+static int tbt_net_close(struct net_device *net_dev)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+ int i;
+
+ /*
+ * Close connection, disable rings, flow controls
+ * and interrupts
+ */
+ tbt_net_tear_down(net_dev, !(port->negotiation_status &
+ BIT(RECEIVE_LOGOUT)));
+
+ cancel_work_sync(&port->login_response_work);
+ cancel_work_sync(&port->logout_work);
+ cancel_work_sync(&port->status_reply_work);
+ cancel_work_sync(&port->approve_inter_domain_work);
+
+ /* Rollback the Tx buffers that were allocated */
+ for (i = 0; i < TBT_NET_NUM_TX_BUFS; i++) {
+ if (port->tx_ring.buffers[i].page_offset == 0)
+ dma_free_coherent(&port->nhi_ctxt->pdev->dev,
+ TBT_NUM_FRAMES_PER_PAGE *
+ TBT_RING_MAX_FRAME_SIZE,
+ port->tx_ring.buffers[i].hdr,
+ port->tx_ring.buffers[i].dma);
+ port->tx_ring.buffers[i].hdr = NULL;
+ }
+ /* Unmap the Rx buffers that were allocated */
+ for (i = 0; i < TBT_NET_NUM_RX_BUFS; i++)
+ if (port->rx_ring.buffers[i].page) {
+ put_page(port->rx_ring.buffers[i].page);
+ port->rx_ring.buffers[i].page = NULL;
+ dma_unmap_page(&port->nhi_ctxt->pdev->dev,
+ port->rx_ring.buffers[i].dma, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ }
+
+ /*
+ * For central allocation, free all
+ * otherwise free RX and then TX separately
+ */
+ if (TBT_NET_SIZE_TOTAL_DESCS <= PAGE_SIZE) {
+ dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+ TBT_NET_SIZE_TOTAL_DESCS,
+ port->tx_ring.desc,
+ port->tx_ring.dma);
+ port->rx_ring.desc = NULL;
+ } else {
+ dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+ TBT_NET_NUM_RX_BUFS *
+ sizeof(struct tbt_buf_desc),
+ port->rx_ring.desc,
+ port->rx_ring.dma);
+ port->rx_ring.desc = NULL;
+ dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+ TBT_NET_NUM_TX_BUFS *
+ sizeof(struct tbt_buf_desc),
+ port->tx_ring.desc,
+ port->tx_ring.dma);
+ }
+ port->tx_ring.desc = NULL;
+
+ vfree(port->tx_ring.buffers);
+ port->tx_ring.buffers = NULL;
+ vfree(port->rx_ring.buffers);
+ port->rx_ring.buffers = NULL;
+
+ devm_free_irq(&port->nhi_ctxt->pdev->dev,
+ port->nhi_ctxt->msix_entries[3 + (port->num * 2)].vector,
+ port);
+ devm_free_irq(&port->nhi_ctxt->pdev->dev,
+ port->nhi_ctxt->msix_entries[2 + (port->num * 2)].vector,
+ port);
+
+ netif_info(port, ifdown, net_dev, "Thunderbolt(TM) Networking port %u - is down\n",
+ port->num);
+
+ return 0;
+}
+
+static bool tbt_net_xmit_csum(struct sk_buff *skb,
+ struct tbt_desc_ring *tx_ring, u32 first,
+ u32 last, u32 frame_count)
+{
+
+ struct tbt_frame_header *hdr = tx_ring->buffers[first].hdr;
+ __wsum wsum = (__force __wsum)htonl(skb->len -
+ skb_transport_offset(skb));
+ int offset = skb_transport_offset(skb);
+ __sum16 *tucso; /* TCP UDP Checksum Segment Offset */
+ __be16 protocol = skb->protocol;
+ u8 *dest = (u8 *)(hdr + 1);
+ int len;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ for (; first != last;
+ first = (first + 1) & (TBT_NET_NUM_TX_BUFS - 1)) {
+ hdr = tx_ring->buffers[first].hdr;
+ hdr->frame_count = cpu_to_le32(frame_count);
+ }
+ return true;
+ }
+
+ if (protocol == htons(ETH_P_8021Q)) {
+ struct vlan_hdr *vhdr, vh;
+
+ vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(vh), &vh);
+ if (!vhdr)
+ return false;
+
+ protocol = vhdr->h_vlan_encapsulated_proto;
+ }
+
+ /*
+ * Data points on the beginning of packet.
+ * Check is the checksum absolute place in the
+ * packet.
+ * ipcso will update IP checksum.
+ * tucso will update TCP/UPD checksum.
+ */
+ if (protocol == htons(ETH_P_IP)) {
+ __sum16 *ipcso = (__sum16 *)(dest +
+ ((u8 *)&(ip_hdr(skb)->check) - skb->data));
+
+ *ipcso = 0;
+ *ipcso = ip_fast_csum(dest + skb_network_offset(skb),
+ ip_hdr(skb)->ihl);
+ if (ip_hdr(skb)->protocol == IPPROTO_TCP)
+ tucso = (__sum16 *)(dest +
+ ((u8 *)&(tcp_hdr(skb)->check) - skb->data));
+ else if (ip_hdr(skb)->protocol == IPPROTO_UDP)
+ tucso = (__sum16 *)(dest +
+ ((u8 *)&(udp_hdr(skb)->check) - skb->data));
+ else
+ return false;
+
+ *tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr, 0,
+ ip_hdr(skb)->protocol, 0);
+ } else if (skb_is_gso(skb)) {
+ if (skb_is_gso_v6(skb)) {
+ tucso = (__sum16 *)(dest +
+ ((u8 *)&(tcp_hdr(skb)->check) - skb->data));
+ *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ 0, IPPROTO_TCP, 0);
+ } else if ((protocol == htons(ETH_P_IPV6)) &&
+ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) {
+ tucso = (__sum16 *)(dest +
+ ((u8 *)&(udp_hdr(skb)->check) - skb->data));
+ *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ 0, IPPROTO_UDP, 0);
+ } else {
+ return false;
+ }
+ } else if (protocol == htons(ETH_P_IPV6)) {
+ tucso = (__sum16 *)(dest + skb_checksum_start_offset(skb) +
+ skb->csum_offset);
+ *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ 0, ipv6_hdr(skb)->nexthdr, 0);
+ } else {
+ return false;
+ }
+
+ /* First frame was headers, rest of the frames is data */
+ for (; first != last; first = (first + 1) & (TBT_NET_NUM_TX_BUFS - 1),
+ offset = 0) {
+ hdr = tx_ring->buffers[first].hdr;
+ dest = (u8 *)(hdr + 1) + offset;
+ len = le32_to_cpu(hdr->frame_size) - offset;
+ wsum = csum_partial(dest, len, wsum);
+ hdr->frame_count = cpu_to_le32(frame_count);
+ }
+ *tucso = csum_fold(wsum);
+
+ return true;
+}
+
+static netdev_tx_t tbt_net_xmit_frame(struct sk_buff *skb,
+ struct net_device *net_dev)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+ void __iomem *iobase = port->nhi_ctxt->iobase;
+ void __iomem *reg = TBT_RING_CONS_PROD_REG(iobase,
+ REG_TX_RING_BASE,
+ port->local_path);
+ struct tbt_desc_ring *tx_ring = &port->tx_ring;
+ struct tbt_frame_header *hdr;
+ u32 prod_cons, prod, cons, first;
+ /* len equivalent to the fragment length */
+ unsigned int len = skb_headlen(skb);
+ /* data_len is overall packet length */
+ unsigned int data_len = skb->len;
+ u32 frm_idx, frag_num = 0;
+ const u8 *src = skb->data;
+ bool unmap = false;
+ __le32 *attr;
+ u8 *dest;
+
+ if (unlikely(data_len == 0 || data_len > TBT_NET_MTU))
+ goto invalid_packet;
+
+ prod_cons = ioread32(reg);
+ prod = TBT_REG_RING_PROD_EXTRACT(prod_cons);
+ cons = TBT_REG_RING_CONS_EXTRACT(prod_cons);
+ if (prod >= TBT_NET_NUM_TX_BUFS || cons >= TBT_NET_NUM_TX_BUFS)
+ goto tx_error;
+
+ if (data_len > (TBT_NUM_BUFS_BETWEEN(prod, cons, TBT_NET_NUM_TX_BUFS) *
+ TBT_RING_MAX_FRM_DATA_SZ)) {
+ unsigned long flags;
+
+ netif_stop_queue(net_dev);
+
+ spin_lock_irqsave(&port->nhi_ctxt->lock, flags);
+ /*
+ * Enable TX interrupt to be notified about available buffers
+ * and restart transmission upon this.
+ */
+ RING_INT_ENABLE_TX(iobase, port->local_path);
+ spin_unlock_irqrestore(&port->nhi_ctxt->lock, flags);
+
+ return NETDEV_TX_BUSY;
+ }
+
+ first = prod;
+ attr = &tx_ring->desc[prod].attributes;
+ hdr = tx_ring->buffers[prod].hdr;
+ dest = (u8 *)(hdr + 1);
+ /* if overall packet is bigger than the frame data size */
+ for (frm_idx = 0; data_len > TBT_RING_MAX_FRM_DATA_SZ; ++frm_idx) {
+ u32 size_left = TBT_RING_MAX_FRM_DATA_SZ;
+
+ *attr &= cpu_to_le32(~(DESC_ATTR_LEN_MASK |
+ DESC_ATTR_INT_EN |
+ DESC_ATTR_DESC_DONE));
+ hdr->frame_size = cpu_to_le32(TBT_RING_MAX_FRM_DATA_SZ);
+ hdr->frame_index = cpu_to_le16(frm_idx);
+ hdr->frame_id = cpu_to_le16(port->frame_id);
+
+ do {
+ if (len > size_left) {
+ /*
+ * Copy data onto tx buffer data with full
+ * frame size then break
+ * and go to next frame
+ */
+ memcpy(dest, src, size_left);
+ len -= size_left;
+ dest += size_left;
+ src += size_left;
+ break;
+ }
+
+ memcpy(dest, src, len);
+ size_left -= len;
+ dest += len;
+
+ if (unmap) {
+ kunmap_atomic((void *)src);
+ unmap = false;
+ }
+ /*
+ * Ensure all fragments have been processed
+ */
+ if (frag_num < skb_shinfo(skb)->nr_frags) {
+ const skb_frag_t *frag =
+ &(skb_shinfo(skb)->frags[frag_num]);
+ len = skb_frag_size(frag);
+ /* map and then unmap quickly */
+ src = kmap_atomic(skb_frag_page(frag)) +
+ frag->page_offset;
+ unmap = true;
+ ++frag_num;
+ } else if (unlikely(size_left > 0)) {
+ goto invalid_packet;
+ }
+ } while (size_left > 0);
+
+ data_len -= TBT_RING_MAX_FRM_DATA_SZ;
+ prod = (prod + 1) & (TBT_NET_NUM_TX_BUFS - 1);
+ attr = &tx_ring->desc[prod].attributes;
+ hdr = tx_ring->buffers[prod].hdr;
+ dest = (u8 *)(hdr + 1);
+ }
+
+ *attr &= cpu_to_le32(~(DESC_ATTR_LEN_MASK | DESC_ATTR_DESC_DONE));
+ /* Enable the interrupts, for resuming from stop queue later (if so) */
+ *attr |= cpu_to_le32(DESC_ATTR_INT_EN |
+ (((sizeof(struct tbt_frame_header) + data_len) <<
+ DESC_ATTR_LEN_SHIFT) & DESC_ATTR_LEN_MASK));
+ hdr->frame_size = cpu_to_le32(data_len);
+ hdr->frame_index = cpu_to_le16(frm_idx);
+ hdr->frame_id = cpu_to_le16(port->frame_id);
+
+ /* In case the remaining data_len is smaller than a frame */
+ while (len < data_len) {
+ memcpy(dest, src, len);
+ data_len -= len;
+ dest += len;
+
+ if (unmap) {
+ kunmap_atomic((void *)src);
+ unmap = false;
+ }
+
+ if (frag_num < skb_shinfo(skb)->nr_frags) {
+ const skb_frag_t *frag =
+ &(skb_shinfo(skb)->frags[frag_num]);
+ len = skb_frag_size(frag);
+ src = kmap_atomic(skb_frag_page(frag)) +
+ frag->page_offset;
+ unmap = true;
+ ++frag_num;
+ } else if (unlikely(data_len > 0)) {
+ goto invalid_packet;
+ }
+ }
+ memcpy(dest, src, data_len);
+ if (unmap) {
+ kunmap_atomic((void *)src);
+ unmap = false;
+ }
+
+ ++frm_idx;
+ prod = (prod + 1) & (TBT_NET_NUM_TX_BUFS - 1);
+
+ if (!tbt_net_xmit_csum(skb, tx_ring, first, prod, frm_idx))
+ goto invalid_packet;
+
+ if (port->match_frame_id)
+ ++port->frame_id;
+
+ prod_cons &= ~REG_RING_PROD_MASK;
+ prod_cons |= (prod << REG_RING_PROD_SHIFT) & REG_RING_PROD_MASK;
+ wmb(); /* make sure producer update is done after buffers are ready */
+ iowrite32(prod_cons, reg);
+
+ ++port->stats.tx_packets;
+ port->stats.tx_bytes += skb->len;
+
+ dev_consume_skb_any(skb);
+ return NETDEV_TX_OK;
+
+invalid_packet:
+ netif_err(port, tx_err, net_dev, "port %u invalid transmit packet\n",
+ port->num);
+tx_error:
+ ++port->stats.tx_errors;
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+}
+
+static void tbt_net_set_rx_mode(struct net_device *net_dev)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+ struct netdev_hw_addr *ha;
+
+ if (net_dev->flags & IFF_PROMISC)
+ port->packet_filters |= BIT(PACKET_TYPE_PROMISCUOUS);
+ else
+ port->packet_filters &= ~BIT(PACKET_TYPE_PROMISCUOUS);
+ if (net_dev->flags & IFF_ALLMULTI)
+ port->packet_filters |= BIT(PACKET_TYPE_ALL_MULTICAST);
+ else
+ port->packet_filters &= ~BIT(PACKET_TYPE_ALL_MULTICAST);
+
+ /* if you have more than a single MAC address */
+ if (netdev_uc_count(net_dev) > 1)
+ port->packet_filters |= BIT(PACKET_TYPE_UNICAST_PROMISCUOUS);
+ /* if have a single MAC address */
+ else if (netdev_uc_count(net_dev) == 1) {
+ netdev_for_each_uc_addr(ha, net_dev)
+ /* checks whether the MAC is what we set */
+ if (ether_addr_equal(ha->addr, net_dev->dev_addr))
+ port->packet_filters &=
+ ~BIT(PACKET_TYPE_UNICAST_PROMISCUOUS);
+ else
+ port->packet_filters |=
+ BIT(PACKET_TYPE_UNICAST_PROMISCUOUS);
+ } else {
+ port->packet_filters &= ~BIT(PACKET_TYPE_UNICAST_PROMISCUOUS);
+ }
+
+ /* Populate the multicast hash table with received MAC addresses */
+ memset(port->multicast_hash_table, 0,
+ sizeof(port->multicast_hash_table));
+ netdev_for_each_mc_addr(ha, net_dev) {
+ u16 hash_val = TBT_NET_ETHER_ADDR_HASH(ha->addr);
+
+ port->multicast_hash_table[hash_val / BITS_PER_U32] |=
+ BIT(hash_val % BITS_PER_U32);
+ }
+
+}
+
+static struct rtnl_link_stats64 *tbt_net_get_stats64(
+ struct net_device *net_dev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+
+ memset(stats, 0, sizeof(*stats));
+ stats->tx_packets = port->stats.tx_packets;
+ stats->tx_bytes = port->stats.tx_bytes;
+ stats->tx_errors = port->stats.tx_errors;
+ stats->rx_packets = port->stats.rx_packets;
+ stats->rx_bytes = port->stats.rx_bytes;
+ stats->rx_length_errors = port->stats.rx_length_errors;
+ stats->rx_over_errors = port->stats.rx_over_errors;
+ stats->rx_crc_errors = port->stats.rx_crc_errors;
+ stats->rx_missed_errors = port->stats.rx_missed_errors;
+ stats->rx_errors = stats->rx_length_errors + stats->rx_over_errors +
+ stats->rx_crc_errors + stats->rx_missed_errors;
+ stats->multicast = port->stats.multicast;
+ return stats;
}

+static int tbt_net_set_mac_address(struct net_device *net_dev, void *addr)
+{
+ struct sockaddr *saddr = addr;
+
+ if (!is_valid_ether_addr(saddr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ memcpy(net_dev->dev_addr, saddr->sa_data, net_dev->addr_len);
+
+ return 0;
+}
+
+static int tbt_net_change_mtu(struct net_device *net_dev, int new_mtu)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+
+ /* MTU < 68 is an error and causes problems on some kernels */
+ if (new_mtu < 68 || new_mtu > (TBT_NET_MTU - ETH_HLEN))
+ return -EINVAL;
+
+ netif_info(port, probe, net_dev, "Thunderbolt(TM) Networking port %u - changing MTU from %u to %d\n",
+ port->num, net_dev->mtu, new_mtu);
+
+ net_dev->mtu = new_mtu;
+
+ return 0;
+}
+
+static const struct net_device_ops tbt_netdev_ops = {
+ /* called when the network is up'ed */
+ .ndo_open = tbt_net_open,
+ /* called when the network is down'ed */
+ .ndo_stop = tbt_net_close,
+ .ndo_start_xmit = tbt_net_xmit_frame,
+ .ndo_set_rx_mode = tbt_net_set_rx_mode,
+ .ndo_get_stats64 = tbt_net_get_stats64,
+ .ndo_set_mac_address = tbt_net_set_mac_address,
+ .ndo_change_mtu = tbt_net_change_mtu,
+ .ndo_validate_addr = eth_validate_addr,
+};
+
+static int tbt_net_get_settings(__maybe_unused struct net_device *net_dev,
+ struct ethtool_cmd *ecmd)
+{
+ ecmd->supported |= SUPPORTED_20000baseKR2_Full;
+ ecmd->advertising |= ADVERTISED_20000baseKR2_Full;
+ ecmd->autoneg = AUTONEG_DISABLE;
+ ecmd->transceiver = XCVR_INTERNAL;
+ ecmd->supported |= SUPPORTED_FIBRE;
+ ecmd->advertising |= ADVERTISED_FIBRE;
+ ecmd->port = PORT_FIBRE;
+ ethtool_cmd_speed_set(ecmd, SPEED_20000);
+ ecmd->duplex = DUPLEX_FULL;
+
+ return 0;
+}
+
+
+static u32 tbt_net_get_msglevel(struct net_device *net_dev)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+
+ return port->msg_enable;
+}
+
+static void tbt_net_set_msglevel(struct net_device *net_dev, u32 data)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+
+ port->msg_enable = data;
+}
+
+static void tbt_net_get_strings(__maybe_unused struct net_device *net_dev,
+ u32 stringset, u8 *data)
+{
+ if (stringset == ETH_SS_STATS)
+ memcpy(data, tbt_net_gstrings_stats,
+ sizeof(tbt_net_gstrings_stats));
+}
+
+static void tbt_net_get_ethtool_stats(struct net_device *net_dev,
+ __maybe_unused struct ethtool_stats *sts,
+ u64 *data)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+
+ memcpy(data, &port->stats, sizeof(port->stats));
+}
+
+static int tbt_net_get_sset_count(__maybe_unused struct net_device *net_dev,
+ int sset)
+{
+ if (sset == ETH_SS_STATS)
+ return sizeof(tbt_net_gstrings_stats) / ETH_GSTRING_LEN;
+ return -EOPNOTSUPP;
+}
+
+static void tbt_net_get_drvinfo(struct net_device *net_dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+
+ strlcpy(drvinfo->driver, "Thunderbolt(TM) Networking",
+ sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
+
+ strlcpy(drvinfo->bus_info, pci_name(port->nhi_ctxt->pdev),
+ sizeof(drvinfo->bus_info));
+ drvinfo->n_stats = tbt_net_get_sset_count(net_dev, ETH_SS_STATS);
+}
+
+static const struct ethtool_ops tbt_net_ethtool_ops = {
+ .get_settings = tbt_net_get_settings,
+ .get_drvinfo = tbt_net_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_msglevel = tbt_net_get_msglevel,
+ .set_msglevel = tbt_net_set_msglevel,
+ .get_strings = tbt_net_get_strings,
+ .get_ethtool_stats = tbt_net_get_ethtool_stats,
+ .get_sset_count = tbt_net_get_sset_count,
+};
+
static inline int send_message(struct tbt_port *port, const char *func,
enum pdf_value pdf, u32 msg_len, const u8 *msg)
{
@@ -515,6 +1943,10 @@ void negotiation_events(struct net_device *net_dev,
/* configure TX ring */
reg = iobase + REG_TX_RING_BASE +
(port->local_path * REG_RING_STEP);
+ iowrite32(lower_32_bits(port->tx_ring.dma),
+ reg + REG_RING_PHYS_LO_OFFSET);
+ iowrite32(upper_32_bits(port->tx_ring.dma),
+ reg + REG_RING_PHYS_HI_OFFSET);

tx_ring_conf = (TBT_NET_NUM_TX_BUFS << REG_RING_SIZE_SHIFT) &
REG_RING_SIZE_MASK;
@@ -557,6 +1989,10 @@ void negotiation_events(struct net_device *net_dev,
*/
reg = iobase + REG_RX_RING_BASE +
(port->local_path * REG_RING_STEP);
+ iowrite32(lower_32_bits(port->rx_ring.dma),
+ reg + REG_RING_PHYS_LO_OFFSET);
+ iowrite32(upper_32_bits(port->rx_ring.dma),
+ reg + REG_RING_PHYS_HI_OFFSET);

rx_ring_conf = (TBT_NET_NUM_RX_BUFS << REG_RING_SIZE_SHIFT) &
REG_RING_SIZE_MASK;
@@ -566,6 +2002,17 @@ void negotiation_events(struct net_device *net_dev,
REG_RING_BUF_SIZE_MASK;

iowrite32(rx_ring_conf, reg + REG_RING_SIZE_OFFSET);
+ /* allocate RX buffers and configure the descriptors */
+ if (!tbt_net_alloc_rx_buffers(&port->nhi_ctxt->pdev->dev,
+ &port->rx_ring,
+ TBT_NET_NUM_RX_BUFS,
+ reg + REG_RING_CONS_PROD_OFFSET,
+ GFP_KERNEL)) {
+ netif_err(port, link, net_dev, "Thunderbolt(TM) Networking port %u - no memory for receive buffers\n",
+ port->num);
+ tbt_net_tear_down(net_dev, true);
+ break;
+ }

spin_lock_irqsave(&port->nhi_ctxt->lock, flags);
/* enable RX interrupt */
@@ -578,6 +2025,7 @@ void negotiation_events(struct net_device *net_dev,
netif_info(port, link, net_dev, "Thunderbolt(TM) Networking port %u - ready\n",
port->num);

+ napi_enable(&port->napi);
netif_carrier_on(net_dev);
netif_start_queue(net_dev);
break;
@@ -788,15 +2236,42 @@ struct net_device *nhi_alloc_etherdev(struct tbt_nhi_ctxt *nhi_ctxt,
scnprintf(net_dev->name, sizeof(net_dev->name), "tbtnet%%dp%hhu",
port_num);

+ net_dev->netdev_ops = &tbt_netdev_ops;
+
+ netif_napi_add(net_dev, &port->napi, tbt_net_poll, NAPI_POLL_WEIGHT);
+
+ net_dev->hw_features = NETIF_F_SG |
+ NETIF_F_ALL_TSO |
+ NETIF_F_UFO |
+ NETIF_F_GRO |
+ NETIF_F_IP_CSUM |
+ NETIF_F_IPV6_CSUM;
+ net_dev->features = net_dev->hw_features;
+ if (nhi_ctxt->pci_using_dac)
+ net_dev->features |= NETIF_F_HIGHDMA;
+
INIT_DELAYED_WORK(&port->login_retry_work, login_retry);
INIT_WORK(&port->login_response_work, login_response);
INIT_WORK(&port->logout_work, logout);
INIT_WORK(&port->status_reply_work, status_reply);
INIT_WORK(&port->approve_inter_domain_work, approve_inter_domain);

+ net_dev->ethtool_ops = &tbt_net_ethtool_ops;
+
+ tbt_net_change_mtu(net_dev, TBT_NET_MTU - ETH_HLEN);
+
+ if (register_netdev(net_dev))
+ goto err_register;
+
+ netif_carrier_off(net_dev);
+
netif_info(port, probe, net_dev,
"Thunderbolt(TM) Networking port %u - MAC Address: %pM\n",
port_num, net_dev->dev_addr);

return net_dev;
+
+err_register:
+ free_netdev(net_dev);
+ return NULL;
}
--
2.7.4