+ struct txq *txq = (struct txq *)dpdk_txq;
+ uint16_t elts_head = txq->elts_head;
+ const unsigned int elts_n = 1 << txq->elts_n;
+ unsigned int i = 0;
+ unsigned int j = 0;
+ unsigned int max;
+ unsigned int comp;
+ volatile struct mlx5_wqe_v *wqe = NULL;
+ unsigned int segs_n = 0;
+ struct rte_mbuf *buf = NULL;
+ uint8_t *raw;
+
+ if (unlikely(!pkts_n))
+ return 0;
+ /* Prefetch first packet cacheline. */
+ rte_prefetch0(*pkts);
+ /* Start processing. */
+ txq_complete(txq);
+ max = (elts_n - (elts_head - txq->elts_tail));
+ if (max > elts_n)
+ max -= elts_n;
+ do {
+ volatile rte_v128u32_t *dseg = NULL;
+ uint32_t length;
+ unsigned int ds = 0;
+ uintptr_t addr;
+ uint64_t naddr;
+ uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
+ uint16_t ehdr;
+ uint8_t cs_flags = 0;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ uint32_t total_length = 0;
+#endif
+
+ /* first_seg */
+ buf = *(pkts++);
+ segs_n = buf->nb_segs;
+ /*
+ * Make sure there is enough room to store this packet and
+ * that one ring entry remains unused.
+ */
+ assert(segs_n);
+ if (max < segs_n + 1)
+ break;
+ max -= segs_n;
+ --segs_n;
+ if (!segs_n)
+ --pkts_n;
+ wqe = (volatile struct mlx5_wqe_v *)
+ tx_mlx5_wqe(txq, txq->wqe_ci);
+ rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
+ if (pkts_n > 1)
+ rte_prefetch0(*pkts);
+ addr = rte_pktmbuf_mtod(buf, uintptr_t);
+ length = DATA_LEN(buf);
+ ehdr = (((uint8_t *)addr)[1] << 8) |
+ ((uint8_t *)addr)[0];
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ total_length = length;
+#endif
+ assert(length >= MLX5_WQE_DWORD_SIZE);
+ /* Update element. */
+ (*txq->elts)[elts_head] = buf;
+ elts_head = (elts_head + 1) & (elts_n - 1);
+ /* Prefetch next buffer data. */
+ if (pkts_n > 1) {
+ volatile void *pkt_addr;
+
+ pkt_addr = rte_pktmbuf_mtod(*pkts, volatile void *);
+ rte_prefetch0(pkt_addr);
+ }
+ /* Should we enable HW CKSUM offload */
+ if (buf->ol_flags &
+ (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
+ cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
+ }
+ raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
+ /*
+ * Start by copying the Ethernet header minus the first two
+ * bytes which will be appended at the end of the Ethernet
+ * segment.
+ */
+ memcpy((uint8_t *)raw, ((uint8_t *)addr) + 2, 16);
+ length -= MLX5_WQE_DWORD_SIZE;
+ addr += MLX5_WQE_DWORD_SIZE;
+ /* Replace the Ethernet type by the VLAN if necessary. */
+ if (buf->ol_flags & PKT_TX_VLAN_PKT) {
+ uint32_t vlan = htonl(0x81000000 | buf->vlan_tci);
+
+ memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE - 2 -
+ sizeof(vlan)),
+ &vlan, sizeof(vlan));
+ addr -= sizeof(vlan);
+ length += sizeof(vlan);
+ }
+ /* Inline if enough room. */
+ if (txq->max_inline != 0) {
+ uintptr_t end = (uintptr_t)
+ (((uintptr_t)txq->wqes) +
+ (1 << txq->wqe_n) * MLX5_WQE_SIZE);
+ uint16_t max_inline =
+ txq->max_inline * RTE_CACHE_LINE_SIZE;
+ uint16_t room;
+
+ /*
+ * raw starts two bytes before the boundary to
+ * continue the above copy of packet data.
+ */
+ raw += MLX5_WQE_DWORD_SIZE - 2;
+ room = end - (uintptr_t)raw;
+ if (room > max_inline) {
+ uintptr_t addr_end = (addr + max_inline) &
+ ~(RTE_CACHE_LINE_SIZE - 1);
+ uint16_t copy_b = ((addr_end - addr) > length) ?
+ length :
+ (addr_end - addr);
+
+ rte_memcpy((void *)raw, (void *)addr, copy_b);
+ addr += copy_b;
+ length -= copy_b;
+ pkt_inline_sz += copy_b;
+ /* Sanity check. */
+ assert(addr <= addr_end);
+ }
+ /*
+ * 2 DWORDs consumed by the WQE header + ETH segment +
+ * the size of the inline part of the packet.
+ */
+ ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
+ if (length > 0) {
+ dseg = (volatile rte_v128u32_t *)
+ ((uintptr_t)wqe +
+ (ds * MLX5_WQE_DWORD_SIZE));
+ if ((uintptr_t)dseg >= end)
+ dseg = (volatile rte_v128u32_t *)
+ txq->wqes;
+ goto use_dseg;
+ } else if (!segs_n) {
+ goto next_pkt;
+ } else {
+ /* dseg will be advance as part of next_seg */
+ dseg = (volatile rte_v128u32_t *)
+ ((uintptr_t)wqe +
+ ((ds - 1) * MLX5_WQE_DWORD_SIZE));
+ goto next_seg;
+ }
+ } else {
+ /*
+ * No inline has been done in the packet, only the
+ * Ethernet Header as been stored.
+ */
+ dseg = (volatile rte_v128u32_t *)
+ ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE));
+ ds = 3;
+use_dseg:
+ /* Add the remaining packet as a simple ds. */
+ naddr = htonll(addr);
+ *dseg = (rte_v128u32_t){
+ htonl(length),
+ txq_mp2mr(txq, txq_mb2mp(buf)),
+ naddr,
+ naddr >> 32,
+ };
+ ++ds;
+ if (!segs_n)
+ goto next_pkt;
+ }
+next_seg:
+ assert(buf);
+ assert(ds);
+ assert(wqe);
+ /*
+ * Spill on next WQE when the current one does not have
+ * enough room left. Size of WQE must a be a multiple
+ * of data segment size.
+ */
+ assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE));
+ if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) {
+ unsigned int n = (txq->wqe_ci + ((ds + 3) / 4)) &
+ ((1 << txq->wqe_n) - 1);
+
+ dseg = (volatile rte_v128u32_t *)
+ tx_mlx5_wqe(txq, n);
+ rte_prefetch0(tx_mlx5_wqe(txq, n + 1));
+ } else {
+ ++dseg;
+ }
+ ++ds;
+ buf = buf->next;
+ assert(buf);
+ length = DATA_LEN(buf);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ total_length += length;
+#endif
+ /* Store segment information. */
+ naddr = htonll(rte_pktmbuf_mtod(buf, uintptr_t));
+ *dseg = (rte_v128u32_t){
+ htonl(length),
+ txq_mp2mr(txq, txq_mb2mp(buf)),
+ naddr,
+ naddr >> 32,
+ };
+ (*txq->elts)[elts_head] = buf;
+ elts_head = (elts_head + 1) & (elts_n - 1);
+ ++j;
+ --segs_n;
+ if (segs_n)
+ goto next_seg;
+ else
+ --pkts_n;
+next_pkt:
+ ++i;
+ /* Initialize known and common part of the WQE structure. */
+ wqe->ctrl = (rte_v128u32_t){
+ htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND),
+ htonl(txq->qp_num_8s | ds),
+ 0,
+ 0,
+ };
+ wqe->eseg = (rte_v128u32_t){
+ 0,
+ cs_flags,
+ 0,
+ (ehdr << 16) | htons(pkt_inline_sz),
+ };
+ txq->wqe_ci += (ds + 3) / 4;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ /* Increment sent bytes counter. */
+ txq->stats.obytes += total_length;
+#endif
+ } while (pkts_n);
+ /* Take a shortcut if nothing must be sent. */
+ if (unlikely(i == 0))
+ return 0;
+ /* Check whether completion threshold has been reached. */
+ comp = txq->elts_comp + i + j;
+ if (comp >= MLX5_TX_COMP_THRESH) {
+ volatile struct mlx5_wqe_ctrl *w =
+ (volatile struct mlx5_wqe_ctrl *)wqe;
+
+ /* Request completion on last WQE. */
+ w->ctrl2 = htonl(8);
+ /* Save elts_head in unused "immediate" field of WQE. */
+ w->ctrl3 = elts_head;
+ txq->elts_comp = 0;
+ } else {
+ txq->elts_comp = comp;
+ }
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ /* Increment sent packets counter. */
+ txq->stats.opackets += i;
+#endif
+ /* Ring QP doorbell. */
+ mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)wqe);
+ txq->elts_head = elts_head;
+ return i;