+ len = 0;
+ len += sizeof(*cpl);
+
+ /* Coalescing skipped and we send through normal path */
+ if (!(m->ol_flags & PKT_TX_TCP_SEG)) {
+ wr->op_immdlen = htonl(V_FW_WR_OP(is_pf4(adap) ?
+ FW_ETH_TX_PKT_WR :
+ FW_ETH_TX_PKT_VM_WR) |
+ V_FW_WR_IMMDLEN(len));
+ if (is_pf4(adap))
+ cpl = (void *)(wr + 1);
+ else
+ cpl = (void *)(vmwr + 1);
+ if (m->ol_flags & PKT_TX_IP_CKSUM) {
+ cntrl = hwcsum(adap->params.chip, m) |
+ F_TXPKT_IPCSUM_DIS;
+ txq->stats.tx_cso++;
+ }
+ } else {
+ if (is_pf4(adap))
+ lso = (void *)(wr + 1);
+ else
+ lso = (void *)(vmwr + 1);
+ v6 = (m->ol_flags & PKT_TX_IPV6) != 0;
+ l3hdr_len = m->l3_len;
+ l4hdr_len = m->l4_len;
+ eth_xtra_len = m->l2_len - RTE_ETHER_HDR_LEN;
+ len += sizeof(*lso);
+ wr->op_immdlen = htonl(V_FW_WR_OP(is_pf4(adap) ?
+ FW_ETH_TX_PKT_WR :
+ FW_ETH_TX_PKT_VM_WR) |
+ V_FW_WR_IMMDLEN(len));
+ lso->lso_ctrl = htonl(V_LSO_OPCODE(CPL_TX_PKT_LSO) |
+ F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE |
+ V_LSO_IPV6(v6) |
+ V_LSO_ETHHDR_LEN(eth_xtra_len / 4) |
+ V_LSO_IPHDR_LEN(l3hdr_len / 4) |
+ V_LSO_TCPHDR_LEN(l4hdr_len / 4));
+ lso->ipid_ofst = htons(0);
+ lso->mss = htons(m->tso_segsz);
+ lso->seqno_offset = htonl(0);
+ if (is_t4(adap->params.chip))
+ lso->len = htonl(m->pkt_len);
+ else
+ lso->len = htonl(V_LSO_T5_XFER_SIZE(m->pkt_len));
+ cpl = (void *)(lso + 1);
+
+ if (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)
+ cntrl = V_TXPKT_ETHHDR_LEN(eth_xtra_len);
+ else
+ cntrl = V_T6_TXPKT_ETHHDR_LEN(eth_xtra_len);
+
+ cntrl |= V_TXPKT_CSUM_TYPE(v6 ? TX_CSUM_TCPIP6 :
+ TX_CSUM_TCPIP) |
+ V_TXPKT_IPHDR_LEN(l3hdr_len);
+ txq->stats.tso++;
+ txq->stats.tx_cso += m->tso_segsz;
+ }
+
+ if (m->ol_flags & PKT_TX_VLAN_PKT) {
+ txq->stats.vlan_ins++;
+ cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->vlan_tci);
+ }
+
+ cpl->ctrl0 = htonl(V_TXPKT_OPCODE(CPL_TX_PKT_XT));
+ if (is_pf4(adap))
+ cpl->ctrl0 |= htonl(V_TXPKT_INTF(pi->tx_chan) |
+ V_TXPKT_PF(adap->pf));
+ else
+ cpl->ctrl0 |= htonl(V_TXPKT_INTF(pi->port_id) |
+ V_TXPKT_PF(0));
+
+ cpl->pack = htons(0);
+ cpl->len = htons(m->pkt_len);
+ cpl->ctrl1 = cpu_to_be64(cntrl);
+
+ txq->stats.pkts++;
+ txq->stats.tx_bytes += m->pkt_len;
+ last_desc = txq->q.pidx + ndesc - 1;
+ if (last_desc >= (int)txq->q.size)
+ last_desc -= txq->q.size;
+
+ d = &txq->q.sdesc[last_desc];
+ if (d->coalesce.idx) {
+ int i;
+
+ for (i = 0; i < d->coalesce.idx; i++) {
+ rte_pktmbuf_free(d->coalesce.mbuf[i]);
+ d->coalesce.mbuf[i] = NULL;
+ }
+ d->coalesce.idx = 0;
+ }
+ write_sgl(m, &txq->q, (struct ulptx_sgl *)(cpl + 1), end, 0,
+ addr);
+ txq->q.sdesc[last_desc].mbuf = m;
+ txq->q.sdesc[last_desc].sgl = (struct ulptx_sgl *)(cpl + 1);
+ txq_advance(&txq->q, ndesc);
+ ring_tx_db(adap, &txq->q);
+ return 0;
+}
+
+/**
+ * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
+ * @q: the SGE control Tx queue
+ *
+ * This is a variant of reclaim_completed_tx() that is used for Tx queues
+ * that send only immediate data (presently just the control queues) and
+ * thus do not have any mbufs to release.
+ */
+static inline void reclaim_completed_tx_imm(struct sge_txq *q)
+{
+ int hw_cidx = ntohs(q->stat->cidx);
+ int reclaim = hw_cidx - q->cidx;
+
+ if (reclaim < 0)
+ reclaim += q->size;
+
+ q->in_use -= reclaim;
+ q->cidx = hw_cidx;
+}
+
+/**
+ * is_imm - check whether a packet can be sent as immediate data
+ * @mbuf: the packet
+ *
+ * Returns true if a packet can be sent as a WR with immediate data.
+ */
+static inline int is_imm(const struct rte_mbuf *mbuf)
+{
+ return mbuf->pkt_len <= MAX_CTRL_WR_LEN;
+}
+
+/**
+ * inline_tx_mbuf: inline a packet's data into TX descriptors
+ * @q: the TX queue where the packet will be inlined
+ * @from: pointer to data portion of packet
+ * @to: pointer after cpl where data has to be inlined
+ * @len: length of data to inline
+ *
+ * Inline a packet's contents directly to TX descriptors, starting at
+ * the given position within the TX DMA ring.
+ * Most of the complexity of this operation is dealing with wrap arounds
+ * in the middle of the packet we want to inline.
+ */
+static void inline_tx_mbuf(const struct sge_txq *q, caddr_t from, caddr_t *to,
+ int len)
+{
+ int left = RTE_PTR_DIFF(q->stat, *to);
+
+ if (likely((uintptr_t)*to + len <= (uintptr_t)q->stat)) {
+ rte_memcpy(*to, from, len);
+ *to = RTE_PTR_ADD(*to, len);
+ } else {
+ rte_memcpy(*to, from, left);
+ from = RTE_PTR_ADD(from, left);
+ left = len - left;
+ rte_memcpy((void *)q->desc, from, left);
+ *to = RTE_PTR_ADD((void *)q->desc, left);
+ }
+}
+
+/**
+ * ctrl_xmit - send a packet through an SGE control Tx queue
+ * @q: the control queue
+ * @mbuf: the packet
+ *
+ * Send a packet through an SGE control Tx queue. Packets sent through
+ * a control queue must fit entirely as immediate data.
+ */
+static int ctrl_xmit(struct sge_ctrl_txq *q, struct rte_mbuf *mbuf)
+{
+ unsigned int ndesc;
+ struct fw_wr_hdr *wr;
+ caddr_t dst;
+
+ if (unlikely(!is_imm(mbuf))) {
+ WARN_ON(1);
+ rte_pktmbuf_free(mbuf);
+ return -1;
+ }
+
+ reclaim_completed_tx_imm(&q->q);
+ ndesc = DIV_ROUND_UP(mbuf->pkt_len, sizeof(struct tx_desc));
+ t4_os_lock(&q->ctrlq_lock);
+
+ q->full = txq_avail(&q->q) < ndesc ? 1 : 0;
+ if (unlikely(q->full)) {
+ t4_os_unlock(&q->ctrlq_lock);
+ return -1;
+ }
+
+ wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx];
+ dst = (void *)wr;
+ inline_tx_mbuf(&q->q, rte_pktmbuf_mtod(mbuf, caddr_t),
+ &dst, mbuf->data_len);
+
+ txq_advance(&q->q, ndesc);
+ if (unlikely(txq_avail(&q->q) < 64))
+ wr->lo |= htonl(F_FW_WR_EQUEQ);
+
+ q->txp++;
+
+ ring_tx_db(q->adapter, &q->q);
+ t4_os_unlock(&q->ctrlq_lock);
+
+ rte_pktmbuf_free(mbuf);
+ return 0;
+}
+
+/**
+ * t4_mgmt_tx - send a management message
+ * @q: the control queue
+ * @mbuf: the packet containing the management message
+ *
+ * Send a management message through control queue.
+ */
+int t4_mgmt_tx(struct sge_ctrl_txq *q, struct rte_mbuf *mbuf)
+{
+ return ctrl_xmit(q, mbuf);
+}
+
+/**
+ * alloc_ring - allocate resources for an SGE descriptor ring
+ * @dev: the port associated with the queue
+ * @z_name: memzone's name
+ * @queue_id: queue index
+ * @socket_id: preferred socket id for memory allocations
+ * @nelem: the number of descriptors
+ * @elem_size: the size of each descriptor
+ * @stat_size: extra space in HW ring for status information
+ * @sw_size: the size of the SW state associated with each ring element
+ * @phys: the physical address of the allocated ring
+ * @metadata: address of the array holding the SW state for the ring
+ *
+ * Allocates resources for an SGE descriptor ring, such as Tx queues,
+ * free buffer lists, or response queues. Each SGE ring requires
+ * space for its HW descriptors plus, optionally, space for the SW state
+ * associated with each HW entry (the metadata). The function returns
+ * three values: the virtual address for the HW ring (the return value
+ * of the function), the bus address of the HW ring, and the address
+ * of the SW ring.
+ */
+static void *alloc_ring(struct rte_eth_dev *dev, const char *z_name,
+ uint16_t queue_id, int socket_id, size_t nelem,
+ size_t elem_size, size_t stat_size, size_t sw_size,
+ dma_addr_t *phys, void *metadata)
+{
+ size_t len = CXGBE_MAX_RING_DESC_SIZE * elem_size + stat_size;
+ char z_name_sw[RTE_MEMZONE_NAMESIZE];
+ const struct rte_memzone *tz;
+ void *s = NULL;
+
+ snprintf(z_name_sw, sizeof(z_name_sw), "eth_p%d_q%d_%s_sw_ring",
+ dev->data->port_id, queue_id, z_name);
+
+ dev_debug(adapter, "%s: nelem = %zu; elem_size = %zu; sw_size = %zu; "
+ "stat_size = %zu; queue_id = %u; socket_id = %d; z_name = %s;"
+ " z_name_sw = %s\n", __func__, nelem, elem_size, sw_size,
+ stat_size, queue_id, socket_id, z_name, z_name_sw);
+
+ /*
+ * Allocate TX/RX ring hardware descriptors. A memzone large enough to
+ * handle the maximum ring size is allocated in order to allow for
+ * resizing in later calls to the queue setup function.
+ */
+ tz = rte_eth_dma_zone_reserve(dev, z_name, queue_id, len, 4096,
+ socket_id);
+ if (!tz)
+ return NULL;