*
* @param txq
* Pointer to TX queue structure.
- *
- * @return
- * 0 on success, -1 on failure.
*/
-static int
+static void
txq_complete(struct txq *txq)
{
- unsigned int elts_comp = txq->elts_comp;
- unsigned int elts_tail = txq->elts_tail;
- unsigned int elts_free = txq->elts_tail;
const unsigned int elts_n = txq->elts_n;
- int wcs_n;
-
- if (unlikely(elts_comp == 0))
- return 0;
-#ifdef DEBUG_SEND
- DEBUG("%p: processing %u work requests completions",
- (void *)txq, elts_comp);
-#endif
- wcs_n = txq->poll_cnt(txq->cq, elts_comp);
- if (unlikely(wcs_n == 0))
- return 0;
- if (unlikely(wcs_n < 0)) {
- DEBUG("%p: ibv_poll_cq() failed (wcs_n=%d)",
- (void *)txq, wcs_n);
- return -1;
+ const unsigned int cqe_n = txq->cqe_n;
+ uint16_t elts_free = txq->elts_tail;
+ uint16_t elts_tail;
+ uint16_t cq_ci = txq->cq_ci;
+ unsigned int wqe_ci = (unsigned int)-1;
+ int ret = 0;
+
+ while (ret == 0) {
+ volatile struct mlx5_cqe64 *cqe;
+
+ cqe = get_cqe64(*txq->cqes, cqe_n, &cq_ci);
+ if (cqe == NULL)
+ break;
+ wqe_ci = ntohs(cqe->wqe_counter);
}
- elts_comp -= wcs_n;
- assert(elts_comp <= txq->elts_comp);
- /*
- * Assume WC status is successful as nothing can be done about it
- * anyway.
- */
- elts_tail += wcs_n * txq->elts_comp_cd_init;
- if (elts_tail >= elts_n)
- elts_tail -= elts_n;
-
- while (elts_free != elts_tail) {
- struct txq_elt *elt = &(*txq->elts)[elts_free];
+ if (unlikely(wqe_ci == (unsigned int)-1))
+ return;
+ /* Free buffers. */
+ elts_tail = (wqe_ci + 1) & (elts_n - 1);
+ do {
+ struct rte_mbuf *elt = (*txq->elts)[elts_free];
unsigned int elts_free_next =
- (((elts_free + 1) == elts_n) ? 0 : elts_free + 1);
- struct rte_mbuf *tmp = elt->buf;
- struct txq_elt *elt_next = &(*txq->elts)[elts_free_next];
+ (elts_free + 1) & (elts_n - 1);
+ struct rte_mbuf *elt_next = (*txq->elts)[elts_free_next];
#ifndef NDEBUG
/* Poisoning. */
- memset(elt, 0x66, sizeof(*elt));
+ memset(&(*txq->elts)[elts_free],
+ 0x66,
+ sizeof((*txq->elts)[elts_free]));
#endif
- RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);
- /* Faster than rte_pktmbuf_free(). */
- do {
- struct rte_mbuf *next = NEXT(tmp);
-
- rte_pktmbuf_free_seg(tmp);
- tmp = next;
- } while (tmp != NULL);
+ RTE_MBUF_PREFETCH_TO_FREE(elt_next);
+ /* Only one segment needs to be freed. */
+ rte_pktmbuf_free_seg(elt);
elts_free = elts_free_next;
- }
-
+ } while (elts_free != elts_tail);
+ txq->cq_ci = cq_ci;
txq->elts_tail = elts_tail;
- txq->elts_comp = elts_comp;
- return 0;
+ /* Update the consumer index. */
+ rte_wmb();
+ *txq->cq_db = htonl(cq_ci);
}
/**
}
if (txq->mp2mr[i].mp == mp) {
assert(txq->mp2mr[i].lkey != (uint32_t)-1);
- assert(txq->mp2mr[i].mr->lkey == txq->mp2mr[i].lkey);
+ assert(htonl(txq->mp2mr[i].mr->lkey) ==
+ txq->mp2mr[i].lkey);
lkey = txq->mp2mr[i].lkey;
break;
}
}
/**
- * Insert VLAN using mbuf headroom space.
- *
- * @param buf
- * Buffer for VLAN insertion.
+ * Write a regular WQE.
*
- * @return
- * 0 on success, errno value on failure.
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param wqe
+ * Pointer to the WQE to fill.
+ * @param addr
+ * Buffer data address.
+ * @param length
+ * Packet length.
+ * @param lkey
+ * Memory region lkey.
*/
-static inline int
-insert_vlan_sw(struct rte_mbuf *buf)
+static inline void
+mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
+ uintptr_t addr, uint32_t length, uint32_t lkey)
{
- uintptr_t addr;
- uint32_t vlan;
- uint16_t head_room_len = rte_pktmbuf_headroom(buf);
+ wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
+ wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4);
+ wqe->wqe.ctrl.data[3] = 0;
+ wqe->inl.eseg.rsvd0 = 0;
+ wqe->inl.eseg.rsvd1 = 0;
+ wqe->inl.eseg.mss = 0;
+ wqe->inl.eseg.rsvd2 = 0;
+ wqe->wqe.eseg.inline_hdr_sz = htons(MLX5_ETH_INLINE_HEADER_SIZE);
+ /* Copy the first 16 bytes into inline header. */
+ rte_memcpy((uint8_t *)(uintptr_t)wqe->wqe.eseg.inline_hdr_start,
+ (uint8_t *)(uintptr_t)addr,
+ MLX5_ETH_INLINE_HEADER_SIZE);
+ addr += MLX5_ETH_INLINE_HEADER_SIZE;
+ length -= MLX5_ETH_INLINE_HEADER_SIZE;
+ /* Store remaining data in data segment. */
+ wqe->wqe.dseg.byte_count = htonl(length);
+ wqe->wqe.dseg.lkey = lkey;
+ wqe->wqe.dseg.addr = htonll(addr);
+ /* Increment consumer index. */
+ ++txq->wqe_ci;
+}
- if (head_room_len < 4)
- return EINVAL;
+/**
+ * Write a regular WQE with VLAN.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param wqe
+ * Pointer to the WQE to fill.
+ * @param addr
+ * Buffer data address.
+ * @param length
+ * Packet length.
+ * @param lkey
+ * Memory region lkey.
+ * @param vlan_tci
+ * VLAN field to insert in packet.
+ */
+static inline void
+mlx5_wqe_write_vlan(struct txq *txq, volatile union mlx5_wqe *wqe,
+ uintptr_t addr, uint32_t length, uint32_t lkey,
+ uint16_t vlan_tci)
+{
+ uint32_t vlan = htonl(0x81000000 | vlan_tci);
+
+ wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
+ wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4);
+ wqe->wqe.ctrl.data[3] = 0;
+ wqe->inl.eseg.rsvd0 = 0;
+ wqe->inl.eseg.rsvd1 = 0;
+ wqe->inl.eseg.mss = 0;
+ wqe->inl.eseg.rsvd2 = 0;
+ wqe->wqe.eseg.inline_hdr_sz = htons(MLX5_ETH_VLAN_INLINE_HEADER_SIZE);
+ /*
+ * Copy 12 bytes of source & destination MAC address.
+ * Copy 4 bytes of VLAN.
+ * Copy 2 bytes of Ether type.
+ */
+ rte_memcpy((uint8_t *)(uintptr_t)wqe->wqe.eseg.inline_hdr_start,
+ (uint8_t *)(uintptr_t)addr, 12);
+ rte_memcpy((uint8_t *)((uintptr_t)wqe->wqe.eseg.inline_hdr_start + 12),
+ &vlan, sizeof(vlan));
+ rte_memcpy((uint8_t *)((uintptr_t)wqe->wqe.eseg.inline_hdr_start + 16),
+ (uint8_t *)((uintptr_t)addr + 12), 2);
+ addr += MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan);
+ length -= MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan);
+ /* Store remaining data in data segment. */
+ wqe->wqe.dseg.byte_count = htonl(length);
+ wqe->wqe.dseg.lkey = lkey;
+ wqe->wqe.dseg.addr = htonll(addr);
+ /* Increment consumer index. */
+ ++txq->wqe_ci;
+}
- addr = rte_pktmbuf_mtod(buf, uintptr_t);
- vlan = htonl(0x81000000 | buf->vlan_tci);
- memmove((void *)(addr - 4), (void *)addr, 12);
- memcpy((void *)(addr + 8), &vlan, sizeof(vlan));
+/**
+ * Ring TX queue doorbell.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ */
+static inline void
+mlx5_tx_dbrec(struct txq *txq)
+{
+ uint8_t *dst = (uint8_t *)((uintptr_t)txq->bf_reg + txq->bf_offset);
+ uint32_t data[4] = {
+ htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND),
+ htonl(txq->qp_num_8s),
+ 0,
+ 0,
+ };
+ rte_wmb();
+ *txq->qp_db = htonl(txq->wqe_ci);
+ /* Ensure ordering between DB record and BF copy. */
+ rte_wmb();
+ rte_mov16(dst, (uint8_t *)data);
+ txq->bf_offset ^= txq->bf_buf_size;
+}
- SET_DATA_OFF(buf, head_room_len - 4);
- DATA_LEN(buf) += 4;
+/**
+ * Prefetch a CQE.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param cqe_ci
+ * CQE consumer index.
+ */
+static inline void
+tx_prefetch_cqe(struct txq *txq, uint16_t ci)
+{
+ volatile struct mlx5_cqe64 *cqe;
- return 0;
+ cqe = &(*txq->cqes)[ci & (txq->cqe_n - 1)].cqe64;
+ rte_prefetch0(cqe);
}
/**
mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
struct txq *txq = (struct txq *)dpdk_txq;
- unsigned int elts_head = txq->elts_head;
+ uint16_t elts_head = txq->elts_head;
const unsigned int elts_n = txq->elts_n;
- unsigned int elts_comp_cd = txq->elts_comp_cd;
- unsigned int elts_comp = 0;
unsigned int i;
unsigned int max;
- int err;
- struct rte_mbuf *buf = pkts[0];
+ volatile union mlx5_wqe *wqe;
+ struct rte_mbuf *buf;
- assert(elts_comp_cd != 0);
+ if (unlikely(!pkts_n))
+ return 0;
+ buf = pkts[0];
/* Prefetch first packet cacheline. */
+ tx_prefetch_cqe(txq, txq->cq_ci);
+ tx_prefetch_cqe(txq, txq->cq_ci + 1);
rte_prefetch0(buf);
+ /* Start processing. */
txq_complete(txq);
max = (elts_n - (elts_head - txq->elts_tail));
if (max > elts_n)
if (max > pkts_n)
max = pkts_n;
for (i = 0; (i != max); ++i) {
- struct rte_mbuf *buf_next = pkts[i + 1];
- unsigned int elts_head_next =
- (((elts_head + 1) == elts_n) ? 0 : elts_head + 1);
- struct txq_elt *elt = &(*txq->elts)[elts_head];
- uint32_t send_flags = 0;
-#ifdef HAVE_VERBS_VLAN_INSERTION
- int insert_vlan = 0;
-#endif /* HAVE_VERBS_VLAN_INSERTION */
+ unsigned int elts_head_next = (elts_head + 1) & (elts_n - 1);
uintptr_t addr;
uint32_t length;
uint32_t lkey;
- uintptr_t buf_next_addr;
+ wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)];
+ rte_prefetch0(wqe);
if (i + 1 < max)
- rte_prefetch0(buf_next);
- /* Request TX completion. */
- if (unlikely(--elts_comp_cd == 0)) {
- elts_comp_cd = txq->elts_comp_cd_init;
- ++elts_comp;
- send_flags |= IBV_EXP_QP_BURST_SIGNALED;
- }
- /* Should we enable HW CKSUM offload */
- if (buf->ol_flags &
- (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
- send_flags |= IBV_EXP_QP_BURST_IP_CSUM;
- /* HW does not support checksum offloads at arbitrary
- * offsets but automatically recognizes the packet
- * type. For inner L3/L4 checksums, only VXLAN (UDP)
- * tunnels are currently supported. */
- if (RTE_ETH_IS_TUNNEL_PKT(buf->packet_type))
- send_flags |= IBV_EXP_QP_BURST_TUNNEL;
- }
- if (buf->ol_flags & PKT_TX_VLAN_PKT) {
-#ifdef HAVE_VERBS_VLAN_INSERTION
- if (!txq->priv->mps)
- insert_vlan = 1;
- else
-#endif /* HAVE_VERBS_VLAN_INSERTION */
- {
- err = insert_vlan_sw(buf);
- if (unlikely(err))
- goto stop;
- }
- }
+ rte_prefetch0(pkts[i + 1]);
/* Retrieve buffer information. */
addr = rte_pktmbuf_mtod(buf, uintptr_t);
length = DATA_LEN(buf);
/* Update element. */
- elt->buf = buf;
- if (txq->priv->sriov)
- rte_prefetch0((volatile void *)
- (uintptr_t)addr);
+ (*txq->elts)[elts_head] = buf;
/* Prefetch next buffer data. */
- if (i + 1 < max) {
- buf_next_addr =
- rte_pktmbuf_mtod(buf_next, uintptr_t);
- rte_prefetch0((volatile void *)
- (uintptr_t)buf_next_addr);
- }
+ if (i + 1 < max)
+ rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 1],
+ volatile void *));
/* Retrieve Memory Region key for this memory pool. */
lkey = txq_mp2mr(txq, txq_mb2mp(buf));
- if (unlikely(lkey == (uint32_t)-1)) {
- /* MR does not exist. */
- DEBUG("%p: unable to get MP <-> MR"
- " association", (void *)txq);
- /* Clean up TX element. */
- elt->buf = NULL;
- goto stop;
- }
-#ifdef HAVE_VERBS_VLAN_INSERTION
- if (insert_vlan)
- err = txq->send_pending_vlan
- (txq->qp,
- addr,
- length,
- lkey,
- send_flags,
- &buf->vlan_tci);
+ if (buf->ol_flags & PKT_TX_VLAN_PKT)
+ mlx5_wqe_write_vlan(txq, wqe, addr, length, lkey,
+ buf->vlan_tci);
else
-#endif /* HAVE_VERBS_VLAN_INSERTION */
- err = txq->send_pending
- (txq->qp,
- addr,
- length,
- lkey,
- send_flags);
- if (unlikely(err))
- goto stop;
+ mlx5_wqe_write(txq, wqe, addr, length, lkey);
+ /* Request completion if needed. */
+ if (unlikely(--txq->elts_comp == 0)) {
+ wqe->wqe.ctrl.data[2] = htonl(8);
+ txq->elts_comp = txq->elts_comp_cd_init;
+ } else {
+ wqe->wqe.ctrl.data[2] = 0;
+ }
+ /* Should we enable HW CKSUM offload */
+ if (buf->ol_flags &
+ (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
+ wqe->wqe.eseg.cs_flags =
+ MLX5_ETH_WQE_L3_CSUM |
+ MLX5_ETH_WQE_L4_CSUM;
+ } else {
+ wqe->wqe.eseg.cs_flags = 0;
+ }
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment sent bytes counter. */
txq->stats.obytes += length;
#endif
-stop:
elts_head = elts_head_next;
- buf = buf_next;
+ buf = pkts[i + 1];
}
/* Take a shortcut if nothing must be sent. */
if (unlikely(i == 0))
txq->stats.opackets += i;
#endif
/* Ring QP doorbell. */
- err = txq->send_flush(txq->qp);
- if (unlikely(err)) {
- /* A nonzero value is not supposed to be returned.
- * Nothing can be done about it. */
- DEBUG("%p: send_flush() failed with error %d",
- (void *)txq, err);
- }
+ mlx5_tx_dbrec(txq);
txq->elts_head = elts_head;
- txq->elts_comp += elts_comp;
- txq->elts_comp_cd = elts_comp_cd;
return i;
}