/* Verbs header. */
/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
#endif
#include <infiniband/verbs.h>
#include <infiniband/mlx5_hw.h>
#include <infiniband/arch.h>
#ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
#endif
/* DPDK headers don't like -pedantic. */
#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
#endif
#include <rte_mbuf.h>
#include <rte_mempool.h>
#include <rte_branch_prediction.h>
#include <rte_ether.h>
#ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
#endif
#include "mlx5.h"
#include "mlx5_defs.h"
#include "mlx5_prm.h"
-static inline volatile struct mlx5_cqe64 *
-get_cqe64(volatile struct mlx5_cqe cqes[],
- unsigned int cqes_n, uint16_t *ci)
- __attribute__((always_inline));
+#ifndef NDEBUG
+/**
+ * Verify or set magic value in CQE.
+ *
+ * @param cqe
+ * Pointer to CQE.
+ *
+ * @return
+ * 0 the first time.
+ */
static inline int
-rx_poll_len(struct rxq *rxq) __attribute__((always_inline));
-
-static volatile struct mlx5_cqe64 *
-get_cqe64(volatile struct mlx5_cqe cqes[],
- unsigned int cqes_n, uint16_t *ci)
+check_cqe_seen(volatile struct mlx5_cqe *cqe)
{
- volatile struct mlx5_cqe64 *cqe;
- uint16_t idx = *ci;
- uint8_t op_own;
-
- cqe = &cqes[idx & (cqes_n - 1)].cqe64;
- op_own = cqe->op_own;
- if (unlikely((op_own & MLX5_CQE_OWNER_MASK) == !(idx & cqes_n))) {
- return NULL;
- } else if (unlikely(op_own & 0x80)) {
- switch (op_own >> 4) {
- case MLX5_CQE_INVALID:
- return NULL; /* No CQE */
- case MLX5_CQE_REQ_ERR:
- return cqe;
- case MLX5_CQE_RESP_ERR:
- ++(*ci);
- return NULL;
- default:
- return NULL;
+ static const uint8_t magic[] = "seen";
+ volatile uint8_t (*buf)[sizeof(cqe->rsvd3)] = &cqe->rsvd3;
+ int ret = 1;
+ unsigned int i;
+
+ for (i = 0; i < sizeof(magic) && i < sizeof(*buf); ++i)
+ if (!ret || (*buf)[i] != magic[i]) {
+ ret = 0;
+ (*buf)[i] = magic[i];
}
+ return ret;
+}
+
+#endif /* NDEBUG */
+
+static inline int
+check_cqe(volatile struct mlx5_cqe *cqe,
+ unsigned int cqes_n, const uint16_t ci)
+ __attribute__((always_inline));
+
+/**
+ * Check whether CQE is valid.
+ *
+ * @param cqe
+ * Pointer to CQE.
+ * @param cqes_n
+ * Size of completion queue.
+ * @param ci
+ * Consumer index.
+ *
+ * @return
+ * 0 on success, 1 on failure.
+ */
+static inline int
+check_cqe(volatile struct mlx5_cqe *cqe,
+ unsigned int cqes_n, const uint16_t ci)
+{
+ uint16_t idx = ci & cqes_n;
+ uint8_t op_own = cqe->op_own;
+ uint8_t op_owner = MLX5_CQE_OWNER(op_own);
+ uint8_t op_code = MLX5_CQE_OPCODE(op_own);
+
+ if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID)))
+ return 1; /* No CQE. */
+#ifndef NDEBUG
+ if ((op_code == MLX5_CQE_RESP_ERR) ||
+ (op_code == MLX5_CQE_REQ_ERR)) {
+ volatile struct mlx5_err_cqe *err_cqe = (volatile void *)cqe;
+ uint8_t syndrome = err_cqe->syndrome;
+
+ if ((syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR) ||
+ (syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR))
+ return 0;
+ if (!check_cqe_seen(cqe))
+ ERROR("unexpected CQE error %u (0x%02x)"
+ " syndrome 0x%02x",
+ op_code, op_code, syndrome);
+ return 1;
+ } else if ((op_code != MLX5_CQE_RESP_SEND) &&
+ (op_code != MLX5_CQE_REQ)) {
+ if (!check_cqe_seen(cqe))
+ ERROR("unexpected CQE opcode %u (0x%02x)",
+ op_code, op_code);
+ return 1;
}
- if (cqe) {
- *ci = idx + 1;
- return cqe;
- }
- return NULL;
+#endif /* NDEBUG */
+ return 0;
}
+static inline void
+txq_complete(struct txq *txq) __attribute__((always_inline));
+
/**
* Manage TX completions.
*
* When sending a burst, mlx5_tx_burst() posts several WRs.
- * To improve performance, a completion event is only required once every
- * MLX5_PMD_TX_PER_COMP_REQ sends. Doing so discards completion information
- * for other WRs, but this information would not be used anyway.
*
* @param txq
* Pointer to TX queue structure.
- *
- * @return
- * 0 on success, -1 on failure.
*/
-static int
+static inline void
txq_complete(struct txq *txq)
{
- unsigned int elts_comp = txq->elts_comp;
- unsigned int elts_tail = txq->elts_tail;
- unsigned int elts_free = txq->elts_tail;
- const unsigned int elts_n = txq->elts_n;
- int wcs_n;
+ const unsigned int elts_n = 1 << txq->elts_n;
+ const unsigned int cqe_n = 1 << txq->cqe_n;
+ const unsigned int cqe_cnt = cqe_n - 1;
+ uint16_t elts_free = txq->elts_tail;
+ uint16_t elts_tail;
+ uint16_t cq_ci = txq->cq_ci;
+ volatile struct mlx5_cqe *cqe = NULL;
+ volatile struct mlx5_wqe *wqe;
- if (unlikely(elts_comp == 0))
- return 0;
-#ifdef DEBUG_SEND
- DEBUG("%p: processing %u work requests completions",
- (void *)txq, elts_comp);
-#endif
- wcs_n = txq->poll_cnt(txq->cq, elts_comp);
- if (unlikely(wcs_n == 0))
- return 0;
- if (unlikely(wcs_n < 0)) {
- DEBUG("%p: ibv_poll_cq() failed (wcs_n=%d)",
- (void *)txq, wcs_n);
- return -1;
- }
- elts_comp -= wcs_n;
- assert(elts_comp <= txq->elts_comp);
- /*
- * Assume WC status is successful as nothing can be done about it
- * anyway.
- */
- elts_tail += wcs_n * txq->elts_comp_cd_init;
- if (elts_tail >= elts_n)
- elts_tail -= elts_n;
+ do {
+ volatile struct mlx5_cqe *tmp;
+ tmp = &(*txq->cqes)[cq_ci & cqe_cnt];
+ if (check_cqe(tmp, cqe_n, cq_ci))
+ break;
+ cqe = tmp;
+#ifndef NDEBUG
+ if (MLX5_CQE_FORMAT(cqe->op_own) == MLX5_COMPRESSED) {
+ if (!check_cqe_seen(cqe))
+ ERROR("unexpected compressed CQE, TX stopped");
+ return;
+ }
+ if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) ||
+ (MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) {
+ if (!check_cqe_seen(cqe))
+ ERROR("unexpected error CQE, TX stopped");
+ return;
+ }
+#endif /* NDEBUG */
+ ++cq_ci;
+ } while (1);
+ if (unlikely(cqe == NULL))
+ return;
+ wqe = &(*txq->wqes)[htons(cqe->wqe_counter) &
+ ((1 << txq->wqe_n) - 1)].hdr;
+ elts_tail = wqe->ctrl[3];
+ assert(elts_tail < (1 << txq->wqe_n));
+ /* Free buffers. */
while (elts_free != elts_tail) {
- struct txq_elt *elt = &(*txq->elts)[elts_free];
+ struct rte_mbuf *elt = (*txq->elts)[elts_free];
unsigned int elts_free_next =
- (((elts_free + 1) == elts_n) ? 0 : elts_free + 1);
- struct rte_mbuf *tmp = elt->buf;
- struct txq_elt *elt_next = &(*txq->elts)[elts_free_next];
+ (elts_free + 1) & (elts_n - 1);
+ struct rte_mbuf *elt_next = (*txq->elts)[elts_free_next];
#ifndef NDEBUG
/* Poisoning. */
- memset(elt, 0x66, sizeof(*elt));
+ memset(&(*txq->elts)[elts_free],
+ 0x66,
+ sizeof((*txq->elts)[elts_free]));
#endif
- RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);
- /* Faster than rte_pktmbuf_free(). */
- do {
- struct rte_mbuf *next = NEXT(tmp);
-
- rte_pktmbuf_free_seg(tmp);
- tmp = next;
- } while (tmp != NULL);
+ RTE_MBUF_PREFETCH_TO_FREE(elt_next);
+ /* Only one segment needs to be freed. */
+ rte_pktmbuf_free_seg(elt);
elts_free = elts_free_next;
}
-
+ txq->cq_ci = cq_ci;
txq->elts_tail = elts_tail;
- txq->elts_comp = elts_comp;
- return 0;
+ /* Update the consumer index. */
+ rte_wmb();
+ *txq->cq_db = htonl(cq_ci);
}
/**
}
if (txq->mp2mr[i].mp == mp) {
assert(txq->mp2mr[i].lkey != (uint32_t)-1);
- assert(txq->mp2mr[i].mr->lkey == txq->mp2mr[i].lkey);
+ assert(htonl(txq->mp2mr[i].mr->lkey) ==
+ txq->mp2mr[i].lkey);
lkey = txq->mp2mr[i].lkey;
break;
}
}
/**
- * Insert VLAN using mbuf headroom space.
+ * Ring TX queue doorbell.
*
- * @param buf
- * Buffer for VLAN insertion.
- *
- * @return
- * 0 on success, errno value on failure.
+ * @param txq
+ * Pointer to TX queue structure.
*/
-static inline int
-insert_vlan_sw(struct rte_mbuf *buf)
+static inline void
+mlx5_tx_dbrec(struct txq *txq)
{
- uintptr_t addr;
- uint32_t vlan;
- uint16_t head_room_len = rte_pktmbuf_headroom(buf);
+ uint8_t *dst = (uint8_t *)((uintptr_t)txq->bf_reg + txq->bf_offset);
+ uint32_t data[4] = {
+ htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND),
+ htonl(txq->qp_num_8s),
+ 0,
+ 0,
+ };
+ rte_wmb();
+ *txq->qp_db = htonl(txq->wqe_ci);
+ /* Ensure ordering between DB record and BF copy. */
+ rte_wmb();
+ memcpy(dst, (uint8_t *)data, 16);
+ txq->bf_offset ^= (1 << txq->bf_buf_size);
+}
- if (head_room_len < 4)
- return EINVAL;
+/**
+ * Prefetch a CQE.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param cqe_ci
+ * CQE consumer index.
+ */
+static inline void
+tx_prefetch_cqe(struct txq *txq, uint16_t ci)
+{
+ volatile struct mlx5_cqe *cqe;
- addr = rte_pktmbuf_mtod(buf, uintptr_t);
- vlan = htonl(0x81000000 | buf->vlan_tci);
- memmove((void *)(addr - 4), (void *)addr, 12);
- memcpy((void *)(addr + 8), &vlan, sizeof(vlan));
+ cqe = &(*txq->cqes)[ci & ((1 << txq->cqe_n) - 1)];
+ rte_prefetch0(cqe);
+}
- SET_DATA_OFF(buf, head_room_len - 4);
- DATA_LEN(buf) += 4;
+/**
+ * Prefetch a WQE.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param wqe_ci
+ * WQE consumer index.
+ */
+static inline void
+tx_prefetch_wqe(struct txq *txq, uint16_t ci)
+{
+ volatile struct mlx5_wqe64 *wqe;
- return 0;
+ wqe = &(*txq->wqes)[ci & ((1 << txq->wqe_n) - 1)];
+ rte_prefetch0(wqe);
}
/**
mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
struct txq *txq = (struct txq *)dpdk_txq;
- unsigned int elts_head = txq->elts_head;
- const unsigned int elts_n = txq->elts_n;
- unsigned int elts_comp_cd = txq->elts_comp_cd;
- unsigned int elts_comp = 0;
- unsigned int i;
+ uint16_t elts_head = txq->elts_head;
+ const unsigned int elts_n = 1 << txq->elts_n;
+ unsigned int i = 0;
+ unsigned int j = 0;
unsigned int max;
- int err;
- struct rte_mbuf *buf = pkts[0];
+ unsigned int comp;
+ volatile struct mlx5_wqe *wqe = NULL;
+ unsigned int segs_n = 0;
+ struct rte_mbuf *buf = NULL;
+ uint8_t *raw;
- assert(elts_comp_cd != 0);
+ if (unlikely(!pkts_n))
+ return 0;
/* Prefetch first packet cacheline. */
- rte_prefetch0(buf);
+ tx_prefetch_cqe(txq, txq->cq_ci);
+ tx_prefetch_cqe(txq, txq->cq_ci + 1);
+ rte_prefetch0(*pkts);
+ /* Start processing. */
txq_complete(txq);
max = (elts_n - (elts_head - txq->elts_tail));
if (max > elts_n)
max -= elts_n;
- assert(max >= 1);
- assert(max <= elts_n);
- /* Always leave one free entry in the ring. */
- --max;
- if (max == 0)
- return 0;
- if (max > pkts_n)
- max = pkts_n;
- for (i = 0; (i != max); ++i) {
- struct rte_mbuf *buf_next = pkts[i + 1];
- unsigned int elts_head_next =
- (((elts_head + 1) == elts_n) ? 0 : elts_head + 1);
- struct txq_elt *elt = &(*txq->elts)[elts_head];
- uint32_t send_flags = 0;
-#ifdef HAVE_VERBS_VLAN_INSERTION
- int insert_vlan = 0;
-#endif /* HAVE_VERBS_VLAN_INSERTION */
- uintptr_t addr;
+ do {
+ volatile struct mlx5_wqe_data_seg *dseg = NULL;
uint32_t length;
- uint32_t lkey;
- uintptr_t buf_next_addr;
-
- if (i + 1 < max)
- rte_prefetch0(buf_next);
- /* Request TX completion. */
- if (unlikely(--elts_comp_cd == 0)) {
- elts_comp_cd = txq->elts_comp_cd_init;
- ++elts_comp;
- send_flags |= IBV_EXP_QP_BURST_SIGNALED;
+ unsigned int ds = 0;
+ uintptr_t addr;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ uint32_t total_length = 0;
+#endif
+
+ /* first_seg */
+ buf = *(pkts++);
+ segs_n = buf->nb_segs;
+ /*
+ * Make sure there is enough room to store this packet and
+ * that one ring entry remains unused.
+ */
+ assert(segs_n);
+ if (max < segs_n + 1)
+ break;
+ max -= segs_n;
+ --segs_n;
+ if (!segs_n)
+ --pkts_n;
+ wqe = &(*txq->wqes)[txq->wqe_ci &
+ ((1 << txq->wqe_n) - 1)].hdr;
+ tx_prefetch_wqe(txq, txq->wqe_ci + 1);
+ if (pkts_n > 1)
+ rte_prefetch0(*pkts);
+ addr = rte_pktmbuf_mtod(buf, uintptr_t);
+ length = DATA_LEN(buf);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ total_length = length;
+#endif
+ assert(length >= MLX5_WQE_DWORD_SIZE);
+ /* Update element. */
+ (*txq->elts)[elts_head] = buf;
+ elts_head = (elts_head + 1) & (elts_n - 1);
+ /* Prefetch next buffer data. */
+ if (pkts_n > 1) {
+ volatile void *pkt_addr;
+
+ pkt_addr = rte_pktmbuf_mtod(*pkts, volatile void *);
+ rte_prefetch0(pkt_addr);
}
/* Should we enable HW CKSUM offload */
if (buf->ol_flags &
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
- send_flags |= IBV_EXP_QP_BURST_IP_CSUM;
- /* HW does not support checksum offloads at arbitrary
- * offsets but automatically recognizes the packet
- * type. For inner L3/L4 checksums, only VXLAN (UDP)
- * tunnels are currently supported. */
- if (RTE_ETH_IS_TUNNEL_PKT(buf->packet_type))
- send_flags |= IBV_EXP_QP_BURST_TUNNEL;
+ wqe->eseg.cs_flags =
+ MLX5_ETH_WQE_L3_CSUM |
+ MLX5_ETH_WQE_L4_CSUM;
+ } else {
+ wqe->eseg.cs_flags = 0;
}
+ raw = (uint8_t *)(uintptr_t)&wqe->eseg.inline_hdr[0];
+ /* Start the know and common part of the WQE structure. */
+ wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
+ wqe->ctrl[2] = 0;
+ wqe->ctrl[3] = 0;
+ wqe->eseg.rsvd0 = 0;
+ wqe->eseg.rsvd1 = 0;
+ wqe->eseg.mss = 0;
+ wqe->eseg.rsvd2 = 0;
+ /* Start by copying the Ethernet Header. */
+ memcpy((uint8_t *)raw, ((uint8_t *)addr), 16);
+ length -= MLX5_WQE_DWORD_SIZE;
+ addr += MLX5_WQE_DWORD_SIZE;
+ /* Replace the Ethernet type by the VLAN if necessary. */
if (buf->ol_flags & PKT_TX_VLAN_PKT) {
-#ifdef HAVE_VERBS_VLAN_INSERTION
- if (!txq->priv->mps)
- insert_vlan = 1;
- else
-#endif /* HAVE_VERBS_VLAN_INSERTION */
- {
- err = insert_vlan_sw(buf);
- if (unlikely(err))
- goto stop;
- }
+ uint32_t vlan = htonl(0x81000000 | buf->vlan_tci);
+
+ memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE -
+ sizeof(vlan)),
+ &vlan, sizeof(vlan));
+ addr -= sizeof(vlan);
+ length += sizeof(vlan);
}
- /* Retrieve buffer information. */
- addr = rte_pktmbuf_mtod(buf, uintptr_t);
- length = DATA_LEN(buf);
- /* Update element. */
- elt->buf = buf;
- if (txq->priv->sriov)
- rte_prefetch0((volatile void *)
- (uintptr_t)addr);
- /* Prefetch next buffer data. */
- if (i + 1 < max) {
- buf_next_addr =
- rte_pktmbuf_mtod(buf_next, uintptr_t);
- rte_prefetch0((volatile void *)
- (uintptr_t)buf_next_addr);
+ /* Inline if enough room. */
+ if (txq->max_inline != 0) {
+ uintptr_t end =
+ (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n];
+ uint16_t max_inline =
+ txq->max_inline * RTE_CACHE_LINE_SIZE;
+ uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
+ uint16_t room;
+
+ raw += MLX5_WQE_DWORD_SIZE;
+ room = end - (uintptr_t)raw;
+ if (room > max_inline) {
+ uintptr_t addr_end = (addr + max_inline) &
+ ~(RTE_CACHE_LINE_SIZE - 1);
+ uint16_t copy_b = ((addr_end - addr) > length) ?
+ length :
+ (addr_end - addr);
+
+ rte_memcpy((void *)raw, (void *)addr, copy_b);
+ addr += copy_b;
+ length -= copy_b;
+ pkt_inline_sz += copy_b;
+ /* Sanity check. */
+ assert(addr <= addr_end);
+ }
+ /* Store the inlined packet size in the WQE. */
+ wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz);
+ /*
+ * 2 DWORDs consumed by the WQE header + 1 DSEG +
+ * the size of the inline part of the packet.
+ */
+ ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
+ if (length > 0) {
+ dseg = (struct mlx5_wqe_data_seg *)
+ ((uintptr_t)wqe +
+ (ds * MLX5_WQE_DWORD_SIZE));
+ if ((uintptr_t)dseg >= end)
+ dseg = (struct mlx5_wqe_data_seg *)
+ ((uintptr_t)&(*txq->wqes)[0]);
+ goto use_dseg;
+ } else if (!segs_n) {
+ goto next_pkt;
+ } else {
+ goto next_seg;
+ }
+ } else {
+ /*
+ * No inline has been done in the packet, only the
+ * Ethernet Header as been stored.
+ */
+ wqe->eseg.inline_hdr_sz = htons(MLX5_WQE_DWORD_SIZE);
+ dseg = (struct mlx5_wqe_data_seg *)
+ ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE));
+ ds = 3;
+use_dseg:
+ /* Add the remaining packet as a simple ds. */
+ *dseg = (struct mlx5_wqe_data_seg) {
+ .addr = htonll(addr),
+ .byte_count = htonl(length),
+ .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
+ };
+ ++ds;
+ if (!segs_n)
+ goto next_pkt;
}
- /* Retrieve Memory Region key for this memory pool. */
- lkey = txq_mp2mr(txq, txq_mb2mp(buf));
- if (unlikely(lkey == (uint32_t)-1)) {
- /* MR does not exist. */
- DEBUG("%p: unable to get MP <-> MR"
- " association", (void *)txq);
- /* Clean up TX element. */
- elt->buf = NULL;
- goto stop;
+next_seg:
+ assert(buf);
+ assert(ds);
+ assert(wqe);
+ /*
+ * Spill on next WQE when the current one does not have
+ * enough room left. Size of WQE must a be a multiple
+ * of data segment size.
+ */
+ assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE));
+ if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) {
+ unsigned int n = (txq->wqe_ci + ((ds + 3) / 4)) &
+ ((1 << txq->wqe_n) - 1);
+
+ dseg = (struct mlx5_wqe_data_seg *)
+ ((uintptr_t)&(*txq->wqes)[n]);
+ tx_prefetch_wqe(txq, n + 1);
+ } else {
+ ++dseg;
}
-#ifdef HAVE_VERBS_VLAN_INSERTION
- if (insert_vlan)
- err = txq->send_pending_vlan
- (txq->qp,
- addr,
- length,
- lkey,
- send_flags,
- &buf->vlan_tci);
+ ++ds;
+ buf = buf->next;
+ assert(buf);
+ length = DATA_LEN(buf);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ total_length += length;
+#endif
+ /* Store segment information. */
+ *dseg = (struct mlx5_wqe_data_seg) {
+ .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)),
+ .byte_count = htonl(length),
+ .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
+ };
+ (*txq->elts)[elts_head] = buf;
+ elts_head = (elts_head + 1) & (elts_n - 1);
+ ++j;
+ --segs_n;
+ if (segs_n)
+ goto next_seg;
else
-#endif /* HAVE_VERBS_VLAN_INSERTION */
- err = txq->send_pending
- (txq->qp,
- addr,
- length,
- lkey,
- send_flags);
- if (unlikely(err))
- goto stop;
+ --pkts_n;
+next_pkt:
+ ++i;
+ wqe->ctrl[1] = htonl(txq->qp_num_8s | ds);
+ txq->wqe_ci += (ds + 3) / 4;
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment sent bytes counter. */
- txq->stats.obytes += length;
+ txq->stats.obytes += total_length;
#endif
-stop:
- elts_head = elts_head_next;
- buf = buf_next;
+ } while (pkts_n);
+ /* Take a shortcut if nothing must be sent. */
+ if (unlikely(i == 0))
+ return 0;
+ /* Check whether completion threshold has been reached. */
+ comp = txq->elts_comp + i + j;
+ if (comp >= MLX5_TX_COMP_THRESH) {
+ /* Request completion on last WQE. */
+ wqe->ctrl[2] = htonl(8);
+ /* Save elts_head in unused "immediate" field of WQE. */
+ wqe->ctrl[3] = elts_head;
+ txq->elts_comp = 0;
+ } else {
+ txq->elts_comp = comp;
}
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ /* Increment sent packets counter. */
+ txq->stats.opackets += i;
+#endif
+ /* Ring QP doorbell. */
+ mlx5_tx_dbrec(txq);
+ txq->elts_head = elts_head;
+ return i;
+}
+
+/**
+ * Open a MPW session.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param mpw
+ * Pointer to MPW session structure.
+ * @param length
+ * Packet length.
+ */
+static inline void
+mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
+{
+ uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
+ volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
+ (volatile struct mlx5_wqe_data_seg (*)[])
+ (uintptr_t)&(*txq->wqes)[(idx + 1) & ((1 << txq->wqe_n) - 1)];
+
+ mpw->state = MLX5_MPW_STATE_OPENED;
+ mpw->pkts_n = 0;
+ mpw->len = length;
+ mpw->total_len = 0;
+ mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr;
+ mpw->wqe->eseg.mss = htons(length);
+ mpw->wqe->eseg.inline_hdr_sz = 0;
+ mpw->wqe->eseg.rsvd0 = 0;
+ mpw->wqe->eseg.rsvd1 = 0;
+ mpw->wqe->eseg.rsvd2 = 0;
+ mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
+ (txq->wqe_ci << 8) | MLX5_OPCODE_TSO);
+ mpw->wqe->ctrl[2] = 0;
+ mpw->wqe->ctrl[3] = 0;
+ mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *)
+ (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));
+ mpw->data.dseg[1] = (volatile struct mlx5_wqe_data_seg *)
+ (((uintptr_t)mpw->wqe) + (3 * MLX5_WQE_DWORD_SIZE));
+ mpw->data.dseg[2] = &(*dseg)[0];
+ mpw->data.dseg[3] = &(*dseg)[1];
+ mpw->data.dseg[4] = &(*dseg)[2];
+}
+
+/**
+ * Close a MPW session.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param mpw
+ * Pointer to MPW session structure.
+ */
+static inline void
+mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
+{
+ unsigned int num = mpw->pkts_n;
+
+ /*
+ * Store size in multiple of 16 bytes. Control and Ethernet segments
+ * count as 2.
+ */
+ mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | (2 + num));
+ mpw->state = MLX5_MPW_STATE_CLOSED;
+ if (num < 3)
+ ++txq->wqe_ci;
+ else
+ txq->wqe_ci += 2;
+ tx_prefetch_wqe(txq, txq->wqe_ci);
+ tx_prefetch_wqe(txq, txq->wqe_ci + 1);
+}
+
+/**
+ * DPDK callback for TX with MPW support.
+ *
+ * @param dpdk_txq
+ * Generic pointer to TX queue structure.
+ * @param[in] pkts
+ * Packets to transmit.
+ * @param pkts_n
+ * Number of packets in array.
+ *
+ * @return
+ * Number of packets successfully transmitted (<= pkts_n).
+ */
+uint16_t
+mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+ struct txq *txq = (struct txq *)dpdk_txq;
+ uint16_t elts_head = txq->elts_head;
+ const unsigned int elts_n = 1 << txq->elts_n;
+ unsigned int i = 0;
+ unsigned int j = 0;
+ unsigned int max;
+ unsigned int comp;
+ struct mlx5_mpw mpw = {
+ .state = MLX5_MPW_STATE_CLOSED,
+ };
+
+ if (unlikely(!pkts_n))
+ return 0;
+ /* Prefetch first packet cacheline. */
+ tx_prefetch_cqe(txq, txq->cq_ci);
+ tx_prefetch_wqe(txq, txq->wqe_ci);
+ tx_prefetch_wqe(txq, txq->wqe_ci + 1);
+ /* Start processing. */
+ txq_complete(txq);
+ max = (elts_n - (elts_head - txq->elts_tail));
+ if (max > elts_n)
+ max -= elts_n;
+ do {
+ struct rte_mbuf *buf = *(pkts++);
+ unsigned int elts_head_next;
+ uint32_t length;
+ unsigned int segs_n = buf->nb_segs;
+ uint32_t cs_flags = 0;
+
+ /*
+ * Make sure there is enough room to store this packet and
+ * that one ring entry remains unused.
+ */
+ assert(segs_n);
+ if (max < segs_n + 1)
+ break;
+ /* Do not bother with large packets MPW cannot handle. */
+ if (segs_n > MLX5_MPW_DSEG_MAX)
+ break;
+ max -= segs_n;
+ --pkts_n;
+ /* Should we enable HW CKSUM offload */
+ if (buf->ol_flags &
+ (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
+ cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
+ /* Retrieve packet information. */
+ length = PKT_LEN(buf);
+ assert(length);
+ /* Start new session if packet differs. */
+ if ((mpw.state == MLX5_MPW_STATE_OPENED) &&
+ ((mpw.len != length) ||
+ (segs_n != 1) ||
+ (mpw.wqe->eseg.cs_flags != cs_flags)))
+ mlx5_mpw_close(txq, &mpw);
+ if (mpw.state == MLX5_MPW_STATE_CLOSED) {
+ mlx5_mpw_new(txq, &mpw, length);
+ mpw.wqe->eseg.cs_flags = cs_flags;
+ }
+ /* Multi-segment packets must be alone in their MPW. */
+ assert((segs_n == 1) || (mpw.pkts_n == 0));
+#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
+ length = 0;
+#endif
+ do {
+ volatile struct mlx5_wqe_data_seg *dseg;
+ uintptr_t addr;
+
+ elts_head_next = (elts_head + 1) & (elts_n - 1);
+ assert(buf);
+ (*txq->elts)[elts_head] = buf;
+ dseg = mpw.data.dseg[mpw.pkts_n];
+ addr = rte_pktmbuf_mtod(buf, uintptr_t);
+ *dseg = (struct mlx5_wqe_data_seg){
+ .byte_count = htonl(DATA_LEN(buf)),
+ .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
+ .addr = htonll(addr),
+ };
+ elts_head = elts_head_next;
+#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
+ length += DATA_LEN(buf);
+#endif
+ buf = buf->next;
+ ++mpw.pkts_n;
+ ++j;
+ } while (--segs_n);
+ assert(length == mpw.len);
+ if (mpw.pkts_n == MLX5_MPW_DSEG_MAX)
+ mlx5_mpw_close(txq, &mpw);
+ elts_head = elts_head_next;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ /* Increment sent bytes counter. */
+ txq->stats.obytes += length;
+#endif
+ ++i;
+ } while (pkts_n);
/* Take a shortcut if nothing must be sent. */
if (unlikely(i == 0))
return 0;
+ /* Check whether completion threshold has been reached. */
+ /* "j" includes both packets and segments. */
+ comp = txq->elts_comp + j;
+ if (comp >= MLX5_TX_COMP_THRESH) {
+ volatile struct mlx5_wqe *wqe = mpw.wqe;
+
+ /* Request completion on last WQE. */
+ wqe->ctrl[2] = htonl(8);
+ /* Save elts_head in unused "immediate" field of WQE. */
+ wqe->ctrl[3] = elts_head;
+ txq->elts_comp = 0;
+ } else {
+ txq->elts_comp = comp;
+ }
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment sent packets counter. */
txq->stats.opackets += i;
#endif
/* Ring QP doorbell. */
- err = txq->send_flush(txq->qp);
- if (unlikely(err)) {
- /* A nonzero value is not supposed to be returned.
- * Nothing can be done about it. */
- DEBUG("%p: send_flush() failed with error %d",
- (void *)txq, err);
+ if (mpw.state == MLX5_MPW_STATE_OPENED)
+ mlx5_mpw_close(txq, &mpw);
+ mlx5_tx_dbrec(txq);
+ txq->elts_head = elts_head;
+ return i;
+}
+
+/**
+ * Open a MPW inline session.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param mpw
+ * Pointer to MPW session structure.
+ * @param length
+ * Packet length.
+ */
+static inline void
+mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
+{
+ uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
+ struct mlx5_wqe_inl_small *inl;
+
+ mpw->state = MLX5_MPW_INL_STATE_OPENED;
+ mpw->pkts_n = 0;
+ mpw->len = length;
+ mpw->total_len = 0;
+ mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr;
+ mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
+ (txq->wqe_ci << 8) |
+ MLX5_OPCODE_TSO);
+ mpw->wqe->ctrl[2] = 0;
+ mpw->wqe->ctrl[3] = 0;
+ mpw->wqe->eseg.mss = htons(length);
+ mpw->wqe->eseg.inline_hdr_sz = 0;
+ mpw->wqe->eseg.cs_flags = 0;
+ mpw->wqe->eseg.rsvd0 = 0;
+ mpw->wqe->eseg.rsvd1 = 0;
+ mpw->wqe->eseg.rsvd2 = 0;
+ inl = (struct mlx5_wqe_inl_small *)
+ (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE);
+ mpw->data.raw = (uint8_t *)&inl->raw;
+}
+
+/**
+ * Close a MPW inline session.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param mpw
+ * Pointer to MPW session structure.
+ */
+static inline void
+mlx5_mpw_inline_close(struct txq *txq, struct mlx5_mpw *mpw)
+{
+ unsigned int size;
+ struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *)
+ (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));
+
+ size = MLX5_WQE_SIZE - MLX5_MWQE64_INL_DATA + mpw->total_len;
+ /*
+ * Store size in multiple of 16 bytes. Control and Ethernet segments
+ * count as 2.
+ */
+ mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | MLX5_WQE_DS(size));
+ mpw->state = MLX5_MPW_STATE_CLOSED;
+ inl->byte_cnt = htonl(mpw->total_len | MLX5_INLINE_SEG);
+ txq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE;
+}
+
+/**
+ * DPDK callback for TX with MPW inline support.
+ *
+ * @param dpdk_txq
+ * Generic pointer to TX queue structure.
+ * @param[in] pkts
+ * Packets to transmit.
+ * @param pkts_n
+ * Number of packets in array.
+ *
+ * @return
+ * Number of packets successfully transmitted (<= pkts_n).
+ */
+uint16_t
+mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
+ uint16_t pkts_n)
+{
+ struct txq *txq = (struct txq *)dpdk_txq;
+ uint16_t elts_head = txq->elts_head;
+ const unsigned int elts_n = 1 << txq->elts_n;
+ unsigned int i = 0;
+ unsigned int j = 0;
+ unsigned int max;
+ unsigned int comp;
+ unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE;
+ struct mlx5_mpw mpw = {
+ .state = MLX5_MPW_STATE_CLOSED,
+ };
+
+ if (unlikely(!pkts_n))
+ return 0;
+ /* Prefetch first packet cacheline. */
+ tx_prefetch_cqe(txq, txq->cq_ci);
+ tx_prefetch_wqe(txq, txq->wqe_ci);
+ tx_prefetch_wqe(txq, txq->wqe_ci + 1);
+ /* Start processing. */
+ txq_complete(txq);
+ max = (elts_n - (elts_head - txq->elts_tail));
+ if (max > elts_n)
+ max -= elts_n;
+ do {
+ struct rte_mbuf *buf = *(pkts++);
+ unsigned int elts_head_next;
+ uintptr_t addr;
+ uint32_t length;
+ unsigned int segs_n = buf->nb_segs;
+ uint32_t cs_flags = 0;
+
+ /*
+ * Make sure there is enough room to store this packet and
+ * that one ring entry remains unused.
+ */
+ assert(segs_n);
+ if (max < segs_n + 1)
+ break;
+ /* Do not bother with large packets MPW cannot handle. */
+ if (segs_n > MLX5_MPW_DSEG_MAX)
+ break;
+ max -= segs_n;
+ --pkts_n;
+ /* Should we enable HW CKSUM offload */
+ if (buf->ol_flags &
+ (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
+ cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
+ /* Retrieve packet information. */
+ length = PKT_LEN(buf);
+ /* Start new session if packet differs. */
+ if (mpw.state == MLX5_MPW_STATE_OPENED) {
+ if ((mpw.len != length) ||
+ (segs_n != 1) ||
+ (mpw.wqe->eseg.cs_flags != cs_flags))
+ mlx5_mpw_close(txq, &mpw);
+ } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) {
+ if ((mpw.len != length) ||
+ (segs_n != 1) ||
+ (length > inline_room) ||
+ (mpw.wqe->eseg.cs_flags != cs_flags)) {
+ mlx5_mpw_inline_close(txq, &mpw);
+ inline_room =
+ txq->max_inline * RTE_CACHE_LINE_SIZE;
+ }
+ }
+ if (mpw.state == MLX5_MPW_STATE_CLOSED) {
+ if ((segs_n != 1) ||
+ (length > inline_room)) {
+ mlx5_mpw_new(txq, &mpw, length);
+ mpw.wqe->eseg.cs_flags = cs_flags;
+ } else {
+ mlx5_mpw_inline_new(txq, &mpw, length);
+ mpw.wqe->eseg.cs_flags = cs_flags;
+ }
+ }
+ /* Multi-segment packets must be alone in their MPW. */
+ assert((segs_n == 1) || (mpw.pkts_n == 0));
+ if (mpw.state == MLX5_MPW_STATE_OPENED) {
+ assert(inline_room ==
+ txq->max_inline * RTE_CACHE_LINE_SIZE);
+#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
+ length = 0;
+#endif
+ do {
+ volatile struct mlx5_wqe_data_seg *dseg;
+
+ elts_head_next =
+ (elts_head + 1) & (elts_n - 1);
+ assert(buf);
+ (*txq->elts)[elts_head] = buf;
+ dseg = mpw.data.dseg[mpw.pkts_n];
+ addr = rte_pktmbuf_mtod(buf, uintptr_t);
+ *dseg = (struct mlx5_wqe_data_seg){
+ .byte_count = htonl(DATA_LEN(buf)),
+ .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
+ .addr = htonll(addr),
+ };
+ elts_head = elts_head_next;
+#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
+ length += DATA_LEN(buf);
+#endif
+ buf = buf->next;
+ ++mpw.pkts_n;
+ ++j;
+ } while (--segs_n);
+ assert(length == mpw.len);
+ if (mpw.pkts_n == MLX5_MPW_DSEG_MAX)
+ mlx5_mpw_close(txq, &mpw);
+ } else {
+ unsigned int max;
+
+ assert(mpw.state == MLX5_MPW_INL_STATE_OPENED);
+ assert(length <= inline_room);
+ assert(length == DATA_LEN(buf));
+ elts_head_next = (elts_head + 1) & (elts_n - 1);
+ addr = rte_pktmbuf_mtod(buf, uintptr_t);
+ (*txq->elts)[elts_head] = buf;
+ /* Maximum number of bytes before wrapping. */
+ max = ((uintptr_t)&(*txq->wqes)[1 << txq->wqe_n] -
+ (uintptr_t)mpw.data.raw);
+ if (length > max) {
+ rte_memcpy((void *)(uintptr_t)mpw.data.raw,
+ (void *)addr,
+ max);
+ mpw.data.raw =
+ (volatile void *)&(*txq->wqes)[0];
+ rte_memcpy((void *)(uintptr_t)mpw.data.raw,
+ (void *)(addr + max),
+ length - max);
+ mpw.data.raw += length - max;
+ } else {
+ rte_memcpy((void *)(uintptr_t)mpw.data.raw,
+ (void *)addr,
+ length);
+ mpw.data.raw += length;
+ }
+ if ((uintptr_t)mpw.data.raw ==
+ (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n])
+ mpw.data.raw =
+ (volatile void *)&(*txq->wqes)[0];
+ ++mpw.pkts_n;
+ ++j;
+ if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) {
+ mlx5_mpw_inline_close(txq, &mpw);
+ inline_room =
+ txq->max_inline * RTE_CACHE_LINE_SIZE;
+ } else {
+ inline_room -= length;
+ }
+ }
+ mpw.total_len += length;
+ elts_head = elts_head_next;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ /* Increment sent bytes counter. */
+ txq->stats.obytes += length;
+#endif
+ ++i;
+ } while (pkts_n);
+ /* Take a shortcut if nothing must be sent. */
+ if (unlikely(i == 0))
+ return 0;
+ /* Check whether completion threshold has been reached. */
+ /* "j" includes both packets and segments. */
+ comp = txq->elts_comp + j;
+ if (comp >= MLX5_TX_COMP_THRESH) {
+ volatile struct mlx5_wqe *wqe = mpw.wqe;
+
+ /* Request completion on last WQE. */
+ wqe->ctrl[2] = htonl(8);
+ /* Save elts_head in unused "immediate" field of WQE. */
+ wqe->ctrl[3] = elts_head;
+ txq->elts_comp = 0;
+ } else {
+ txq->elts_comp = comp;
}
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ /* Increment sent packets counter. */
+ txq->stats.opackets += i;
+#endif
+ /* Ring QP doorbell. */
+ if (mpw.state == MLX5_MPW_INL_STATE_OPENED)
+ mlx5_mpw_inline_close(txq, &mpw);
+ else if (mpw.state == MLX5_MPW_STATE_OPENED)
+ mlx5_mpw_close(txq, &mpw);
+ mlx5_tx_dbrec(txq);
txq->elts_head = elts_head;
- txq->elts_comp += elts_comp;
- txq->elts_comp_cd = elts_comp_cd;
return i;
}
* Packet type for struct rte_mbuf.
*/
static inline uint32_t
-rxq_cq_to_pkt_type(volatile struct mlx5_cqe64 *cqe)
+rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
{
uint32_t pkt_type;
uint8_t flags = cqe->l4_hdr_type_etc;
- uint8_t info = cqe->rsvd0[0];
- if (info & IBV_EXP_CQ_RX_TUNNEL_PACKET)
+ if (cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET)
pkt_type =
TRANSPOSE(flags,
- IBV_EXP_CQ_RX_OUTER_IPV4_PACKET,
+ MLX5_CQE_RX_OUTER_IPV4_PACKET,
RTE_PTYPE_L3_IPV4) |
TRANSPOSE(flags,
- IBV_EXP_CQ_RX_OUTER_IPV6_PACKET,
+ MLX5_CQE_RX_OUTER_IPV6_PACKET,
RTE_PTYPE_L3_IPV6) |
TRANSPOSE(flags,
- IBV_EXP_CQ_RX_IPV4_PACKET,
+ MLX5_CQE_RX_IPV4_PACKET,
RTE_PTYPE_INNER_L3_IPV4) |
TRANSPOSE(flags,
- IBV_EXP_CQ_RX_IPV6_PACKET,
+ MLX5_CQE_RX_IPV6_PACKET,
RTE_PTYPE_INNER_L3_IPV6);
else
pkt_type =
return pkt_type;
}
+/**
+ * Get size of the next packet for a given CQE. For compressed CQEs, the
+ * consumer index is updated only once all packets of the current one have
+ * been processed.
+ *
+ * @param rxq
+ * Pointer to RX queue.
+ * @param cqe
+ * CQE to process.
+ * @param[out] rss_hash
+ * Packet RSS Hash result.
+ *
+ * @return
+ * Packet size in bytes (0 if there is none), -1 in case of completion
+ * with error.
+ */
+static inline int
+mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
+ uint16_t cqe_cnt, uint32_t *rss_hash)
+{
+ struct rxq_zip *zip = &rxq->zip;
+ uint16_t cqe_n = cqe_cnt + 1;
+ int len = 0;
+
+ /* Process compressed data in the CQE and mini arrays. */
+ if (zip->ai) {
+ volatile struct mlx5_mini_cqe8 (*mc)[8] =
+ (volatile struct mlx5_mini_cqe8 (*)[8])
+ (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt]);
+
+ len = ntohl((*mc)[zip->ai & 7].byte_cnt);
+ *rss_hash = ntohl((*mc)[zip->ai & 7].rx_hash_result);
+ if ((++zip->ai & 7) == 0) {
+ /*
+ * Increment consumer index to skip the number of
+ * CQEs consumed. Hardware leaves holes in the CQ
+ * ring for software use.
+ */
+ zip->ca = zip->na;
+ zip->na += 8;
+ }
+ if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) {
+ uint16_t idx = rxq->cq_ci;
+ uint16_t end = zip->cq_ci;
+
+ while (idx != end) {
+ (*rxq->cqes)[idx & cqe_cnt].op_own =
+ MLX5_CQE_INVALIDATE;
+ ++idx;
+ }
+ rxq->cq_ci = zip->cq_ci;
+ zip->ai = 0;
+ }
+ /* No compressed data, get next CQE and verify if it is compressed. */
+ } else {
+ int ret;
+ int8_t op_own;
+
+ ret = check_cqe(cqe, cqe_n, rxq->cq_ci);
+ if (unlikely(ret == 1))
+ return 0;
+ ++rxq->cq_ci;
+ op_own = cqe->op_own;
+ if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) {
+ volatile struct mlx5_mini_cqe8 (*mc)[8] =
+ (volatile struct mlx5_mini_cqe8 (*)[8])
+ (uintptr_t)(&(*rxq->cqes)[rxq->cq_ci &
+ cqe_cnt]);
+
+ /* Fix endianness. */
+ zip->cqe_cnt = ntohl(cqe->byte_cnt);
+ /*
+ * Current mini array position is the one returned by
+ * check_cqe64().
+ *
+ * If completion comprises several mini arrays, as a
+ * special case the second one is located 7 CQEs after
+ * the initial CQE instead of 8 for subsequent ones.
+ */
+ zip->ca = rxq->cq_ci & cqe_cnt;
+ zip->na = zip->ca + 7;
+ /* Compute the next non compressed CQE. */
+ --rxq->cq_ci;
+ zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
+ /* Get packet size to return. */
+ len = ntohl((*mc)[0].byte_cnt);
+ *rss_hash = ntohl((*mc)[0].rx_hash_result);
+ zip->ai = 1;
+ } else {
+ len = ntohl(cqe->byte_cnt);
+ *rss_hash = ntohl(cqe->rx_hash_res);
+ }
+ /* Error while receiving packet. */
+ if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR))
+ return -1;
+ }
+ return len;
+}
+
/**
* Translate RX completion flags to offload flags.
*
* Offload flags (ol_flags) for struct rte_mbuf.
*/
static inline uint32_t
-rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe)
+rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe)
{
uint32_t ol_flags = 0;
uint8_t l3_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L3_HDR_TYPE_MASK;
uint8_t l4_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L4_HDR_TYPE_MASK;
- uint8_t info = cqe->rsvd0[0];
if ((l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV4) ||
(l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV6))
* of PKT_RX_EIP_CKSUM_BAD because the latter is not functional
* (its value is 0).
*/
- if ((info & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
+ if ((cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
ol_flags |=
TRANSPOSE(~cqe->l4_hdr_type_etc,
- IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK,
+ MLX5_CQE_RX_OUTER_IP_CSUM_OK,
PKT_RX_IP_CKSUM_BAD) |
TRANSPOSE(~cqe->l4_hdr_type_etc,
- IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK,
+ MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK,
PKT_RX_L4_CKSUM_BAD);
return ol_flags;
}
-/**
- * Get size of the next packet.
- *
- * @param rxq
- * RX queue to fetch packet from.
- *
- * @return
- * Packet size in bytes.
- */
-static inline int __attribute__((always_inline))
-rx_poll_len(struct rxq *rxq)
-{
- volatile struct mlx5_cqe64 *cqe;
-
- cqe = get_cqe64(*rxq->cqes, rxq->elts_n, &rxq->cq_ci);
- if (cqe)
- return ntohl(cqe->byte_cnt);
- return 0;
-}
-
/**
* DPDK callback for RX.
*
mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
struct rxq *rxq = dpdk_rxq;
- unsigned int pkts_ret = 0;
- unsigned int i;
- unsigned int rq_ci = rxq->rq_ci;
- const unsigned int elts_n = rxq->elts_n;
- const unsigned int wqe_cnt = elts_n - 1;
+ const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1;
+ const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1;
+ const unsigned int sges_n = rxq->sges_n;
+ struct rte_mbuf *pkt = NULL;
+ struct rte_mbuf *seg = NULL;
+ volatile struct mlx5_cqe *cqe =
+ &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
+ unsigned int i = 0;
+ unsigned int rq_ci = rxq->rq_ci << sges_n;
+ int len; /* keep its value across iterations. */
- for (i = 0; (i != pkts_n); ++i) {
+ while (pkts_n) {
unsigned int idx = rq_ci & wqe_cnt;
- struct rte_mbuf *rep;
- struct rte_mbuf *pkt;
- unsigned int len;
volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx];
- volatile struct mlx5_cqe64 *cqe =
- &(*rxq->cqes)[rxq->cq_ci & wqe_cnt].cqe64;
+ struct rte_mbuf *rep = (*rxq->elts)[idx];
+ uint32_t rss_hash_res = 0;
- pkt = (*rxq->elts)[idx];
+ if (pkt)
+ NEXT(seg) = rep;
+ seg = rep;
+ rte_prefetch0(seg);
rte_prefetch0(cqe);
+ rte_prefetch0(wqe);
rep = rte_mbuf_raw_alloc(rxq->mp);
if (unlikely(rep == NULL)) {
++rxq->stats.rx_nombuf;
+ if (!pkt) {
+ /*
+ * no buffers before we even started,
+ * bail out silently.
+ */
+ break;
+ }
+ while (pkt != seg) {
+ assert(pkt != (*rxq->elts)[idx]);
+ seg = NEXT(pkt);
+ rte_mbuf_refcnt_set(pkt, 0);
+ __rte_mbuf_raw_free(pkt);
+ pkt = seg;
+ }
break;
}
- SET_DATA_OFF(rep, RTE_PKTMBUF_HEADROOM);
- NB_SEGS(rep) = 1;
- PORT(rep) = rxq->port_id;
- NEXT(rep) = NULL;
- len = rx_poll_len(rxq);
- if (unlikely(len == 0)) {
- rte_mbuf_refcnt_set(rep, 0);
- __rte_mbuf_raw_free(rep);
- break;
+ if (!pkt) {
+ cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
+ len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt,
+ &rss_hash_res);
+ if (!len) {
+ rte_mbuf_refcnt_set(rep, 0);
+ __rte_mbuf_raw_free(rep);
+ break;
+ }
+ if (unlikely(len == -1)) {
+ /* RX error, packet is likely too large. */
+ rte_mbuf_refcnt_set(rep, 0);
+ __rte_mbuf_raw_free(rep);
+ ++rxq->stats.idropped;
+ goto skip;
+ }
+ pkt = seg;
+ assert(len >= (rxq->crc_present << 2));
+ /* Update packet information. */
+ pkt->packet_type = 0;
+ pkt->ol_flags = 0;
+ if (rxq->rss_hash) {
+ pkt->hash.rss = rss_hash_res;
+ pkt->ol_flags = PKT_RX_RSS_HASH;
+ }
+ if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip |
+ rxq->crc_present) {
+ if (rxq->csum) {
+ pkt->packet_type =
+ rxq_cq_to_pkt_type(cqe);
+ pkt->ol_flags |=
+ rxq_cq_to_ol_flags(rxq, cqe);
+ }
+ if (cqe->l4_hdr_type_etc &
+ MLX5_CQE_VLAN_STRIPPED) {
+ pkt->ol_flags |= PKT_RX_VLAN_PKT |
+ PKT_RX_VLAN_STRIPPED;
+ pkt->vlan_tci = ntohs(cqe->vlan_info);
+ }
+ if (rxq->crc_present)
+ len -= ETHER_CRC_LEN;
+ }
+ PKT_LEN(pkt) = len;
}
+ DATA_LEN(rep) = DATA_LEN(seg);
+ PKT_LEN(rep) = PKT_LEN(seg);
+ SET_DATA_OFF(rep, DATA_OFF(seg));
+ NB_SEGS(rep) = NB_SEGS(seg);
+ PORT(rep) = PORT(seg);
+ NEXT(rep) = NULL;
+ (*rxq->elts)[idx] = rep;
/*
* Fill NIC descriptor with the new buffer. The lkey and size
* of the buffers are already known, only the buffer address
* changes.
*/
- wqe->addr = htonll((uintptr_t)rep->buf_addr +
- RTE_PKTMBUF_HEADROOM);
- (*rxq->elts)[idx] = rep;
- /* Update pkt information. */
- if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip |
- rxq->crc_present) {
- if (rxq->csum) {
- pkt->packet_type = rxq_cq_to_pkt_type(cqe);
- pkt->ol_flags = rxq_cq_to_ol_flags(rxq, cqe);
- }
- if (cqe->l4_hdr_type_etc & MLX5_CQE_VLAN_STRIPPED) {
- pkt->ol_flags |= PKT_RX_VLAN_PKT |
- PKT_RX_VLAN_STRIPPED;
- pkt->vlan_tci = ntohs(cqe->vlan_info);
- }
- if (rxq->crc_present)
- len -= ETHER_CRC_LEN;
+ wqe->addr = htonll(rte_pktmbuf_mtod(rep, uintptr_t));
+ if (len > DATA_LEN(seg)) {
+ len -= DATA_LEN(seg);
+ ++NB_SEGS(pkt);
+ ++rq_ci;
+ continue;
}
- PKT_LEN(pkt) = len;
- DATA_LEN(pkt) = len;
+ DATA_LEN(seg) = len;
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment bytes counter. */
- rxq->stats.ibytes += len;
+ rxq->stats.ibytes += PKT_LEN(pkt);
#endif
/* Return packet. */
*(pkts++) = pkt;
- ++pkts_ret;
+ pkt = NULL;
+ --pkts_n;
+ ++i;
+skip:
+ /* Align consumer index to the next stride. */
+ rq_ci >>= sges_n;
++rq_ci;
+ rq_ci <<= sges_n;
}
- if (unlikely((i == 0) && (rq_ci == rxq->rq_ci)))
+ if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci)))
return 0;
- /* Repost WRs. */
-#ifdef DEBUG_RECV
- DEBUG("%p: reposting %u WRs", (void *)rxq, i);
-#endif
/* Update the consumer index. */
- rxq->rq_ci = rq_ci;
+ rxq->rq_ci = rq_ci >> sges_n;
rte_wmb();
*rxq->cq_db = htonl(rxq->cq_ci);
rte_wmb();
*rxq->rq_db = htonl(rxq->rq_ci);
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment packets counter. */
- rxq->stats.ipackets += pkts_ret;
+ rxq->stats.ipackets += i;
#endif
- return pkts_ret;
+ return i;
}
/**