static inline void
txq_complete(struct txq *txq)
{
- const unsigned int elts_n = txq->elts_n;
- const unsigned int cqe_n = txq->cqe_n;
+ const unsigned int elts_n = 1 << txq->elts_n;
+ const unsigned int cqe_n = 1 << txq->cqe_n;
const unsigned int cqe_cnt = cqe_n - 1;
uint16_t elts_free = txq->elts_tail;
uint16_t elts_tail;
} while (1);
if (unlikely(cqe == NULL))
return;
- wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)].hdr;
+ wqe = &(*txq->wqes)[htons(cqe->wqe_counter) &
+ ((1 << txq->wqe_n) - 1)].hdr;
elts_tail = wqe->ctrl[3];
- assert(elts_tail < txq->wqe_n);
+ assert(elts_tail < (1 << txq->wqe_n));
/* Free buffers. */
while (elts_free != elts_tail) {
struct rte_mbuf *elt = (*txq->elts)[elts_free];
return lkey;
}
-/**
- * Write a regular WQE.
- *
- * @param txq
- * Pointer to TX queue structure.
- * @param wqe
- * Pointer to the WQE to fill.
- * @param buf
- * Buffer.
- * @param length
- * Packet length.
- *
- * @return ds
- * Number of DS elements consumed.
- */
-static inline unsigned int
-mlx5_wqe_write(struct txq *txq, volatile struct mlx5_wqe *wqe,
- struct rte_mbuf *buf, uint32_t length)
-{
- uint8_t *raw = (uint8_t *)(uintptr_t)&wqe->eseg.inline_hdr[0];
- uint16_t ds;
- uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
- uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
- struct mlx5_wqe_data_seg *dseg = NULL;
-
- assert(length >= MLX5_WQE_DWORD_SIZE);
- /* Start the know and common part of the WQE structure. */
- wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
- wqe->ctrl[2] = 0;
- wqe->ctrl[3] = 0;
- wqe->eseg.rsvd0 = 0;
- wqe->eseg.rsvd1 = 0;
- wqe->eseg.mss = 0;
- wqe->eseg.rsvd2 = 0;
- /* Start by copying the Ethernet Header. */
- rte_mov16((uint8_t *)raw, (uint8_t *)addr);
- length -= MLX5_WQE_DWORD_SIZE;
- addr += MLX5_WQE_DWORD_SIZE;
- /* Replace the Ethernet type by the VLAN if necessary. */
- if (buf->ol_flags & PKT_TX_VLAN_PKT) {
- uint32_t vlan = htonl(0x81000000 | buf->vlan_tci);
-
- memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE - sizeof(vlan)),
- &vlan, sizeof(vlan));
- addr -= sizeof(vlan);
- length += sizeof(vlan);
- }
- /* Inline if enough room. */
- if (txq->max_inline != 0) {
- uintptr_t end = (uintptr_t)&(*txq->wqes)[txq->wqe_n];
- uint16_t max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE;
- uint16_t room;
-
- raw += MLX5_WQE_DWORD_SIZE;
- room = end - (uintptr_t)raw;
- if (room > max_inline) {
- uintptr_t addr_end = (addr + max_inline) &
- ~(RTE_CACHE_LINE_SIZE - 1);
- uint16_t copy_b = ((addr_end - addr) > length) ?
- length :
- (addr_end - addr);
-
- rte_memcpy((void *)raw, (void *)addr, copy_b);
- addr += copy_b;
- length -= copy_b;
- pkt_inline_sz += copy_b;
- /* Sanity check. */
- assert(addr <= addr_end);
- }
- /* Store the inlined packet size in the WQE. */
- wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz);
- /*
- * 2 DWORDs consumed by the WQE header + 1 DSEG +
- * the size of the inline part of the packet.
- */
- ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
- if (length > 0) {
- dseg = (struct mlx5_wqe_data_seg *)
- ((uintptr_t)wqe + (ds * MLX5_WQE_DWORD_SIZE));
- if ((uintptr_t)dseg >= end)
- dseg = (struct mlx5_wqe_data_seg *)
- ((uintptr_t)&(*txq->wqes)[0]);
- goto use_dseg;
- }
- } else {
- /* Add the remaining packet as a simple ds. */
- ds = 3;
- /*
- * No inline has been done in the packet, only the Ethernet
- * Header as been stored.
- */
- wqe->eseg.inline_hdr_sz = htons(MLX5_WQE_DWORD_SIZE);
- dseg = (struct mlx5_wqe_data_seg *)
- ((uintptr_t)wqe + (ds * MLX5_WQE_DWORD_SIZE));
-use_dseg:
- *dseg = (struct mlx5_wqe_data_seg) {
- .addr = htonll(addr),
- .byte_count = htonl(length),
- .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
- };
- ++ds;
- }
- wqe->ctrl[1] = htonl(txq->qp_num_8s | ds);
- return ds;
-}
-
/**
* Ring TX queue doorbell.
*
*txq->qp_db = htonl(txq->wqe_ci);
/* Ensure ordering between DB record and BF copy. */
rte_wmb();
- rte_mov16(dst, (uint8_t *)data);
- txq->bf_offset ^= txq->bf_buf_size;
+ memcpy(dst, (uint8_t *)data, 16);
+ txq->bf_offset ^= (1 << txq->bf_buf_size);
}
/**
static inline void
tx_prefetch_cqe(struct txq *txq, uint16_t ci)
{
- volatile struct mlx5_cqe64 *cqe;
+ volatile struct mlx5_cqe *cqe;
- cqe = &(*txq->cqes)[ci & (txq->cqe_n - 1)].cqe64;
+ cqe = &(*txq->cqes)[ci & ((1 << txq->cqe_n) - 1)];
rte_prefetch0(cqe);
}
{
volatile struct mlx5_wqe64 *wqe;
- wqe = &(*txq->wqes)[ci & (txq->wqe_n - 1)];
+ wqe = &(*txq->wqes)[ci & ((1 << txq->wqe_n) - 1)];
rte_prefetch0(wqe);
}
{
struct txq *txq = (struct txq *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
- const unsigned int elts_n = txq->elts_n;
+ const unsigned int elts_n = 1 << txq->elts_n;
unsigned int i = 0;
unsigned int j = 0;
unsigned int max;
unsigned int comp;
volatile struct mlx5_wqe *wqe = NULL;
+ unsigned int segs_n = 0;
+ struct rte_mbuf *buf = NULL;
+ uint8_t *raw;
if (unlikely(!pkts_n))
return 0;
if (max > elts_n)
max -= elts_n;
do {
- struct rte_mbuf *buf = *(pkts++);
- unsigned int elts_head_next;
+ volatile struct mlx5_wqe_data_seg *dseg = NULL;
uint32_t length;
- unsigned int segs_n = buf->nb_segs;
- volatile struct mlx5_wqe_data_seg *dseg;
unsigned int ds = 0;
+ uintptr_t addr;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ uint32_t total_length = 0;
+#endif
+ /* first_seg */
+ buf = *(pkts++);
+ segs_n = buf->nb_segs;
/*
* Make sure there is enough room to store this packet and
* that one ring entry remains unused.
if (max < segs_n + 1)
break;
max -= segs_n;
- --pkts_n;
- elts_head_next = (elts_head + 1) & (elts_n - 1);
- wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)].hdr;
- tx_prefetch_wqe(txq, txq->wqe_ci);
+ --segs_n;
+ if (!segs_n)
+ --pkts_n;
+ wqe = &(*txq->wqes)[txq->wqe_ci &
+ ((1 << txq->wqe_n) - 1)].hdr;
tx_prefetch_wqe(txq, txq->wqe_ci + 1);
- if (pkts_n)
+ if (pkts_n > 1)
rte_prefetch0(*pkts);
+ addr = rte_pktmbuf_mtod(buf, uintptr_t);
length = DATA_LEN(buf);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ total_length = length;
+#endif
+ assert(length >= MLX5_WQE_DWORD_SIZE);
/* Update element. */
(*txq->elts)[elts_head] = buf;
+ elts_head = (elts_head + 1) & (elts_n - 1);
/* Prefetch next buffer data. */
- if (pkts_n)
- rte_prefetch0(rte_pktmbuf_mtod(*pkts,
- volatile void *));
+ if (pkts_n > 1) {
+ volatile void *pkt_addr;
+
+ pkt_addr = rte_pktmbuf_mtod(*pkts, volatile void *);
+ rte_prefetch0(pkt_addr);
+ }
/* Should we enable HW CKSUM offload */
if (buf->ol_flags &
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
} else {
wqe->eseg.cs_flags = 0;
}
- ds = mlx5_wqe_write(txq, wqe, buf, length);
- if (segs_n == 1)
- goto skip_segs;
- dseg = (volatile struct mlx5_wqe_data_seg *)
- (((uintptr_t)wqe) + ds * MLX5_WQE_DWORD_SIZE);
- while (--segs_n) {
+ raw = (uint8_t *)(uintptr_t)&wqe->eseg.inline_hdr[0];
+ /* Start the know and common part of the WQE structure. */
+ wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
+ wqe->ctrl[2] = 0;
+ wqe->ctrl[3] = 0;
+ wqe->eseg.rsvd0 = 0;
+ wqe->eseg.rsvd1 = 0;
+ wqe->eseg.mss = 0;
+ wqe->eseg.rsvd2 = 0;
+ /* Start by copying the Ethernet Header. */
+ memcpy((uint8_t *)raw, ((uint8_t *)addr), 16);
+ length -= MLX5_WQE_DWORD_SIZE;
+ addr += MLX5_WQE_DWORD_SIZE;
+ /* Replace the Ethernet type by the VLAN if necessary. */
+ if (buf->ol_flags & PKT_TX_VLAN_PKT) {
+ uint32_t vlan = htonl(0x81000000 | buf->vlan_tci);
+
+ memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE -
+ sizeof(vlan)),
+ &vlan, sizeof(vlan));
+ addr -= sizeof(vlan);
+ length += sizeof(vlan);
+ }
+ /* Inline if enough room. */
+ if (txq->max_inline != 0) {
+ uintptr_t end =
+ (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n];
+ uint16_t max_inline =
+ txq->max_inline * RTE_CACHE_LINE_SIZE;
+ uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
+ uint16_t room;
+
+ raw += MLX5_WQE_DWORD_SIZE;
+ room = end - (uintptr_t)raw;
+ if (room > max_inline) {
+ uintptr_t addr_end = (addr + max_inline) &
+ ~(RTE_CACHE_LINE_SIZE - 1);
+ uint16_t copy_b = ((addr_end - addr) > length) ?
+ length :
+ (addr_end - addr);
+
+ rte_memcpy((void *)raw, (void *)addr, copy_b);
+ addr += copy_b;
+ length -= copy_b;
+ pkt_inline_sz += copy_b;
+ /* Sanity check. */
+ assert(addr <= addr_end);
+ }
+ /* Store the inlined packet size in the WQE. */
+ wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz);
+ /*
+ * 2 DWORDs consumed by the WQE header + 1 DSEG +
+ * the size of the inline part of the packet.
+ */
+ ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
+ if (length > 0) {
+ dseg = (struct mlx5_wqe_data_seg *)
+ ((uintptr_t)wqe +
+ (ds * MLX5_WQE_DWORD_SIZE));
+ if ((uintptr_t)dseg >= end)
+ dseg = (struct mlx5_wqe_data_seg *)
+ ((uintptr_t)&(*txq->wqes)[0]);
+ goto use_dseg;
+ } else if (!segs_n) {
+ goto next_pkt;
+ } else {
+ goto next_seg;
+ }
+ } else {
/*
- * Spill on next WQE when the current one does not have
- * enough room left. Size of WQE must a be a multiple
- * of data segment size.
+ * No inline has been done in the packet, only the
+ * Ethernet Header as been stored.
*/
- assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE));
- if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE)))
- dseg = (volatile void *)
- &(*txq->wqes)[txq->wqe_ci++ &
- (txq->wqe_n - 1)];
- else
- ++dseg;
+ wqe->eseg.inline_hdr_sz = htons(MLX5_WQE_DWORD_SIZE);
+ dseg = (struct mlx5_wqe_data_seg *)
+ ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE));
+ ds = 3;
+use_dseg:
+ /* Add the remaining packet as a simple ds. */
+ *dseg = (struct mlx5_wqe_data_seg) {
+ .addr = htonll(addr),
+ .byte_count = htonl(length),
+ .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
+ };
++ds;
- buf = buf->next;
- assert(buf);
- /* Store segment information. */
- dseg->byte_count = htonl(DATA_LEN(buf));
- dseg->lkey = txq_mp2mr(txq, txq_mb2mp(buf));
- dseg->addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t));
- (*txq->elts)[elts_head_next] = buf;
- elts_head_next = (elts_head_next + 1) & (elts_n - 1);
+ if (!segs_n)
+ goto next_pkt;
+ }
+next_seg:
+ assert(buf);
+ assert(ds);
+ assert(wqe);
+ /*
+ * Spill on next WQE when the current one does not have
+ * enough room left. Size of WQE must a be a multiple
+ * of data segment size.
+ */
+ assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE));
+ if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) {
+ unsigned int n = (txq->wqe_ci + ((ds + 3) / 4)) &
+ ((1 << txq->wqe_n) - 1);
+
+ dseg = (struct mlx5_wqe_data_seg *)
+ ((uintptr_t)&(*txq->wqes)[n]);
+ tx_prefetch_wqe(txq, n + 1);
+ } else {
+ ++dseg;
+ }
+ ++ds;
+ buf = buf->next;
+ assert(buf);
+ length = DATA_LEN(buf);
#ifdef MLX5_PMD_SOFT_COUNTERS
- length += DATA_LEN(buf);
+ total_length += length;
#endif
- ++j;
- }
- /* Update DS field in WQE. */
- wqe->ctrl[1] &= htonl(0xffffffc0);
- wqe->ctrl[1] |= htonl(ds & 0x3f);
-skip_segs:
+ /* Store segment information. */
+ *dseg = (struct mlx5_wqe_data_seg) {
+ .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)),
+ .byte_count = htonl(length),
+ .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
+ };
+ (*txq->elts)[elts_head] = buf;
+ elts_head = (elts_head + 1) & (elts_n - 1);
+ ++j;
+ --segs_n;
+ if (segs_n)
+ goto next_seg;
+ else
+ --pkts_n;
+next_pkt:
+ ++i;
+ wqe->ctrl[1] = htonl(txq->qp_num_8s | ds);
+ txq->wqe_ci += (ds + 3) / 4;
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment sent bytes counter. */
- txq->stats.obytes += length;
+ txq->stats.obytes += total_length;
#endif
- /* Increment consumer index. */
- txq->wqe_ci += (ds + 3) / 4;
- elts_head = elts_head_next;
- ++i;
} while (pkts_n);
/* Take a shortcut if nothing must be sent. */
if (unlikely(i == 0))
static inline void
mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
{
- uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
+ uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
(volatile struct mlx5_wqe_data_seg (*)[])
- (uintptr_t)&(*txq->wqes)[(idx + 1) & (txq->wqe_n - 1)];
+ (uintptr_t)&(*txq->wqes)[(idx + 1) & ((1 << txq->wqe_n) - 1)];
mpw->state = MLX5_MPW_STATE_OPENED;
mpw->pkts_n = 0;
mpw->wqe->eseg.rsvd1 = 0;
mpw->wqe->eseg.rsvd2 = 0;
mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
- (txq->wqe_ci << 8) | MLX5_OPCODE_LSO_MPW);
+ (txq->wqe_ci << 8) | MLX5_OPCODE_TSO);
mpw->wqe->ctrl[2] = 0;
mpw->wqe->ctrl[3] = 0;
mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *)
{
struct txq *txq = (struct txq *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
- const unsigned int elts_n = txq->elts_n;
+ const unsigned int elts_n = 1 << txq->elts_n;
unsigned int i = 0;
unsigned int j = 0;
unsigned int max;
static inline void
mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
{
- uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
+ uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
struct mlx5_wqe_inl_small *inl;
mpw->state = MLX5_MPW_INL_STATE_OPENED;
mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr;
mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
(txq->wqe_ci << 8) |
- MLX5_OPCODE_LSO_MPW);
+ MLX5_OPCODE_TSO);
mpw->wqe->ctrl[2] = 0;
mpw->wqe->ctrl[3] = 0;
mpw->wqe->eseg.mss = htons(length);
{
struct txq *txq = (struct txq *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
- const unsigned int elts_n = txq->elts_n;
+ const unsigned int elts_n = 1 << txq->elts_n;
unsigned int i = 0;
unsigned int j = 0;
unsigned int max;
addr = rte_pktmbuf_mtod(buf, uintptr_t);
(*txq->elts)[elts_head] = buf;
/* Maximum number of bytes before wrapping. */
- max = ((uintptr_t)&(*txq->wqes)[txq->wqe_n] -
+ max = ((uintptr_t)&(*txq->wqes)[1 << txq->wqe_n] -
(uintptr_t)mpw.data.raw);
if (length > max) {
rte_memcpy((void *)(uintptr_t)mpw.data.raw,
mpw.data.raw += length;
}
if ((uintptr_t)mpw.data.raw ==
- (uintptr_t)&(*txq->wqes)[txq->wqe_n])
+ (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n])
mpw.data.raw =
(volatile void *)&(*txq->wqes)[0];
++mpw.pkts_n;
* Pointer to RX queue.
* @param cqe
* CQE to process.
+ * @param[out] rss_hash
+ * Packet RSS Hash result.
*
* @return
* Packet size in bytes (0 if there is none), -1 in case of completion
*/
static inline int
mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe,
- uint16_t cqe_cnt)
+ uint16_t cqe_cnt, uint32_t *rss_hash)
{
struct rxq_zip *zip = &rxq->zip;
uint16_t cqe_n = cqe_cnt + 1;
(uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].cqe64);
len = ntohl((*mc)[zip->ai & 7].byte_cnt);
+ *rss_hash = ntohl((*mc)[zip->ai & 7].rx_hash_result);
if ((++zip->ai & 7) == 0) {
/*
* Increment consumer index to skip the number of
zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
/* Get packet size to return. */
len = ntohl((*mc)[0].byte_cnt);
+ *rss_hash = ntohl((*mc)[0].rx_hash_result);
zip->ai = 1;
} else {
len = ntohl(cqe->byte_cnt);
+ *rss_hash = ntohl(cqe->rx_hash_res);
}
/* Error while receiving packet. */
if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR))
mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
struct rxq *rxq = dpdk_rxq;
- const unsigned int wqe_cnt = rxq->elts_n - 1;
- const unsigned int cqe_cnt = rxq->cqe_n - 1;
+ const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1;
+ const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1;
const unsigned int sges_n = rxq->sges_n;
struct rte_mbuf *pkt = NULL;
struct rte_mbuf *seg = NULL;
&(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64;
unsigned int i = 0;
unsigned int rq_ci = rxq->rq_ci << sges_n;
- int len;
+ int len; /* keep its value across iterations. */
while (pkts_n) {
unsigned int idx = rq_ci & wqe_cnt;
volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx];
struct rte_mbuf *rep = (*rxq->elts)[idx];
+ uint32_t rss_hash_res = 0;
if (pkt)
NEXT(seg) = rep;
}
if (!pkt) {
cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64;
- len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt);
- if (len == 0) {
+ len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt,
+ &rss_hash_res);
+ if (!len) {
rte_mbuf_refcnt_set(rep, 0);
__rte_mbuf_raw_free(rep);
break;
/* Update packet information. */
pkt->packet_type = 0;
pkt->ol_flags = 0;
+ if (rxq->rss_hash) {
+ pkt->hash.rss = rss_hash_res;
+ pkt->ol_flags = PKT_RX_RSS_HASH;
+ }
if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip |
rxq->crc_present) {
if (rxq->csum) {
pkt->packet_type =
rxq_cq_to_pkt_type(cqe);
- pkt->ol_flags =
+ pkt->ol_flags |=
rxq_cq_to_ol_flags(rxq, cqe);
}
if (cqe->l4_hdr_type_etc &