+/**
+ * Fill the packet's l2, l3 and l4 headers to the WQE.
+ *
+ * This will be used as the header for each TSO segment that is transmitted.
+ *
+ * @param buf
+ * Pointer to the first packet mbuf.
+ * @param txq
+ * Pointer to Tx queue structure.
+ * @param tinfo
+ * Pointer to TSO info to use.
+ * @param ctrl
+ * Pointer to the control segment in the TSO WQE.
+ *
+ * @return
+ * 0 on success, negative value upon error.
+ */
+static inline volatile struct mlx4_wqe_data_seg *
+mlx4_tx_burst_fill_tso_hdr(struct rte_mbuf *buf,
+ struct txq *txq,
+ struct tso_info *tinfo,
+ volatile struct mlx4_wqe_ctrl_seg *ctrl)
+{
+ volatile struct mlx4_wqe_lso_seg *tseg =
+ (volatile struct mlx4_wqe_lso_seg *)(ctrl + 1);
+ struct mlx4_sq *sq = &txq->msq;
+ struct pv *pv = tinfo->pv;
+ int *pv_counter = &tinfo->pv_counter;
+ int remain_size = tinfo->tso_header_size;
+ char *from = rte_pktmbuf_mtod(buf, char *);
+ uint16_t txbb_avail_space;
+ /* Union to overcome volatile constraints when copying TSO header. */
+ union {
+ volatile uint8_t *vto;
+ uint8_t *to;
+ } thdr = { .vto = (volatile uint8_t *)tseg->header, };
+
+ /*
+ * TSO data always starts at offset 20 from the beginning of the TXBB
+ * (16 byte ctrl + 4byte TSO desc). Since each TXBB is 64Byte aligned
+ * we can write the first 44 TSO header bytes without worry for TxQ
+ * wrapping or overwriting the first TXBB 32bit word.
+ */
+ txbb_avail_space = MLX4_TXBB_SIZE -
+ (sizeof(struct mlx4_wqe_ctrl_seg) +
+ sizeof(struct mlx4_wqe_lso_seg));
+ while (remain_size >= (int)(txbb_avail_space + sizeof(uint32_t))) {
+ /* Copy to end of txbb. */
+ rte_memcpy(thdr.to, from, txbb_avail_space);
+ from += txbb_avail_space;
+ thdr.to += txbb_avail_space;
+ /* New TXBB, Check for TxQ wrap. */
+ if (thdr.to >= sq->eob)
+ thdr.vto = sq->buf;
+ /* New TXBB, stash the first 32bits for later use. */
+ pv[*pv_counter].dst = (volatile uint32_t *)thdr.to;
+ pv[(*pv_counter)++].val = *(uint32_t *)from,
+ from += sizeof(uint32_t);
+ thdr.to += sizeof(uint32_t);
+ remain_size -= txbb_avail_space + sizeof(uint32_t);
+ /* Avail space in new TXBB is TXBB size - 4 */
+ txbb_avail_space = MLX4_TXBB_SIZE - sizeof(uint32_t);
+ }
+ if (remain_size > txbb_avail_space) {
+ rte_memcpy(thdr.to, from, txbb_avail_space);
+ from += txbb_avail_space;
+ thdr.to += txbb_avail_space;
+ remain_size -= txbb_avail_space;
+ /* New TXBB, Check for TxQ wrap. */
+ if (thdr.to >= sq->eob)
+ thdr.vto = sq->buf;
+ pv[*pv_counter].dst = (volatile uint32_t *)thdr.to;
+ rte_memcpy(&pv[*pv_counter].val, from, remain_size);
+ (*pv_counter)++;
+ } else if (remain_size) {
+ rte_memcpy(thdr.to, from, remain_size);
+ }
+ tseg->mss_hdr_size = rte_cpu_to_be_32((buf->tso_segsz << 16) |
+ tinfo->tso_header_size);
+ /* Calculate data segment location */
+ return (volatile struct mlx4_wqe_data_seg *)
+ ((uintptr_t)tseg + tinfo->wqe_tso_seg_size);
+}
+
+/**
+ * Write data segments and header for TSO uni/multi segment packet.
+ *
+ * @param buf
+ * Pointer to the first packet mbuf.
+ * @param txq
+ * Pointer to Tx queue structure.
+ * @param ctrl
+ * Pointer to the WQE control segment.
+ *
+ * @return
+ * Pointer to the next WQE control segment on success, NULL otherwise.
+ */
+static volatile struct mlx4_wqe_ctrl_seg *
+mlx4_tx_burst_tso(struct rte_mbuf *buf, struct txq *txq,
+ volatile struct mlx4_wqe_ctrl_seg *ctrl)
+{
+ volatile struct mlx4_wqe_data_seg *dseg;
+ volatile struct mlx4_wqe_ctrl_seg *ctrl_next;
+ struct mlx4_sq *sq = &txq->msq;
+ struct tso_info tinfo;
+ struct pv *pv;
+ int pv_counter;
+ int ret;
+
+ ret = mlx4_tx_burst_tso_get_params(buf, txq, &tinfo);
+ if (unlikely(ret))
+ goto error;
+ dseg = mlx4_tx_burst_fill_tso_hdr(buf, txq, &tinfo, ctrl);
+ if (unlikely(dseg == NULL))
+ goto error;
+ if ((uintptr_t)dseg >= (uintptr_t)sq->eob)
+ dseg = (volatile struct mlx4_wqe_data_seg *)
+ ((uintptr_t)dseg - sq->size);
+ ctrl_next = mlx4_tx_burst_fill_tso_dsegs(buf, txq, &tinfo, dseg, ctrl);
+ if (unlikely(ctrl_next == NULL))
+ goto error;
+ /* Write the first DWORD of each TXBB save earlier. */
+ if (likely(tinfo.pv_counter)) {
+ pv = tinfo.pv;
+ pv_counter = tinfo.pv_counter;
+ /* Need a barrier here before writing the first TXBB word. */
+ rte_io_wmb();
+ do {
+ --pv_counter;
+ *pv[pv_counter].dst = pv[pv_counter].val;
+ } while (pv_counter > 0);
+ }
+ ctrl->fence_size = tinfo.fence_size;
+ sq->remain_size -= tinfo.wqe_size;
+ return ctrl_next;
+error:
+ txq->stats.odropped++;
+ return NULL;
+}
+
+/**
+ * Write data segments of multi-segment packet.
+ *
+ * @param buf
+ * Pointer to the first packet mbuf.
+ * @param txq
+ * Pointer to Tx queue structure.
+ * @param ctrl
+ * Pointer to the WQE control segment.
+ *
+ * @return
+ * Pointer to the next WQE control segment on success, NULL otherwise.
+ */
+static volatile struct mlx4_wqe_ctrl_seg *