{
unsigned int count = MLX5_TX_COMP_MAX_CQE;
volatile struct mlx5_cqe *last_cqe = NULL;
- uint16_t ci = txq->cq_ci;
+ bool ring_doorbell = false;
int ret;
static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value");
do {
volatile struct mlx5_cqe *cqe;
- cqe = &txq->cqes[ci & txq->cqe_m];
- ret = check_cqe(cqe, txq->cqe_s, ci);
+ cqe = &txq->cqes[txq->cq_ci & txq->cqe_m];
+ ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci);
if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
if (likely(ret != MLX5_CQE_STATUS_ERR)) {
/* No new CQEs in completion queue. */
* here, before we might perform SQ reset.
*/
rte_wmb();
- txq->cq_ci = ci;
ret = mlx5_tx_error_cqe_handle
(txq, (volatile struct mlx5_err_cqe *)cqe);
if (unlikely(ret < 0)) {
* MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
* The send queue is supposed to be empty.
*/
- ++ci;
- txq->cq_pi = ci;
+ ring_doorbell = true;
+ ++txq->cq_ci;
+ txq->cq_pi = txq->cq_ci;
last_cqe = NULL;
continue;
}
/* Normal transmit completion. */
- MLX5_ASSERT(ci != txq->cq_pi);
- MLX5_ASSERT((txq->fcqs[ci & txq->cqe_m] >> 16) ==
+ MLX5_ASSERT(txq->cq_ci != txq->cq_pi);
+ MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) ==
cqe->wqe_counter);
- ++ci;
+ ring_doorbell = true;
+ ++txq->cq_ci;
last_cqe = cqe;
/*
* We have to restrict the amount of processed CQEs
if (likely(--count == 0))
break;
} while (true);
- if (likely(ci != txq->cq_ci)) {
- /*
- * Update completion queue consuming index
- * and ring doorbell to notify hardware.
- */
+ if (likely(ring_doorbell)) {
+ /* Ring doorbell to notify hardware. */
rte_compiler_barrier();
- txq->cq_ci = ci;
- *txq->cq_db = rte_cpu_to_be_32(ci);
+ *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
mlx5_tx_comp_flush(txq, last_cqe, olx);
}
}
(uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) {
volatile struct mlx5_wqe *last = loc->wqe_last;
+ MLX5_ASSERT(last);
txq->elts_comp = head;
if (MLX5_TXOFF_CONFIG(INLINE))
txq->wqe_comp = txq->wqe_ci;
unsigned int part;
uint8_t *pdst;
- dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE);
- pdst = &dseg->inline_data[0];
+ if (!MLX5_TXOFF_CONFIG(MPW)) {
+ /* Store the descriptor byte counter for eMPW sessions. */
+ dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE);
+ pdst = &dseg->inline_data[0];
+ } else {
+ /* The entire legacy MPW session counter is stored on close. */
+ pdst = (uint8_t *)dseg;
+ }
/*
* The WQEBB space availability is checked by caller.
* Here we should be aware of WQE ring buffer wraparound only.
len -= part;
if (likely(!len)) {
pdst += part;
- pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE);
+ if (!MLX5_TXOFF_CONFIG(MPW))
+ pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE);
/* Note: no final wraparound check here. */
return (struct mlx5_wqe_dseg *)pdst;
}
static_assert(MLX5_DSEG_MIN_INLINE_SIZE ==
(2 * RTE_ETHER_ADDR_LEN),
"invalid Data Segment data size");
- dseg->bcount = rte_cpu_to_be_32((len + sizeof(struct rte_vlan_hdr)) |
- MLX5_ETH_WQE_DATA_INLINE);
- pdst = &dseg->inline_data[0];
+ if (!MLX5_TXOFF_CONFIG(MPW)) {
+ /* Store the descriptor byte counter for eMPW sessions. */
+ dseg->bcount = rte_cpu_to_be_32
+ ((len + sizeof(struct rte_vlan_hdr)) |
+ MLX5_ETH_WQE_DATA_INLINE);
+ pdst = &dseg->inline_data[0];
+ } else {
+ /* The entire legacy MPW session counter is stored on close. */
+ pdst = (uint8_t *)dseg;
+ }
memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE);
buf += MLX5_DSEG_MIN_INLINE_SIZE;
pdst += MLX5_DSEG_MIN_INLINE_SIZE;
len -= part;
if (likely(!len)) {
pdst += part;
- pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE);
+ if (!MLX5_TXOFF_CONFIG(MPW))
+ pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE);
/* Note: no final wraparound check here. */
return (struct mlx5_wqe_dseg *)pdst;
}
* Total size of descriptor/data in bytes.
* @param slen
* Accumulated statistics, data bytes sent.
+ * @param wqem
+ * The base WQE for the eMPW/MPW descriptor.
* @param olx
* Configured Tx offloads mask. It is fully defined at
* compile time and may be used for optimization.
struct mlx5_txq_local *restrict loc,
unsigned int len,
unsigned int slen,
+ struct mlx5_wqe *restrict wqem,
unsigned int olx __rte_unused)
{
+ struct mlx5_wqe_dseg *dseg = &wqem->dseg[0];
+
MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE));
- MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0);
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Update sent data bytes counter. */
txq->stats.obytes += slen;
#else
(void)slen;
#endif
- len = len / MLX5_WSEG_SIZE + 2;
- loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len);
+ if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) {
+ /*
+ * If the legacy MPW session contains the inline packets
+ * we should set the only inline data segment length
+ * and align the total length to the segment size.
+ */
+ MLX5_ASSERT(len > sizeof(dseg->bcount));
+ dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) |
+ MLX5_ETH_WQE_DATA_INLINE);
+ len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2;
+ } else {
+ /*
+ * The session is not legacy MPW or contains the
+ * data buffer pointer segments.
+ */
+ MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0);
+ len = len / MLX5_WSEG_SIZE + 2;
+ }
+ wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len);
txq->wqe_ci += (len + 3) / 4;
loc->wqe_free -= (len + 3) / 4;
+ loc->wqe_last = wqem;
}
/**
pkts_n -= loc->pkts_sent;
for (;;) {
struct mlx5_wqe_dseg *restrict dseg;
- struct mlx5_wqe_eseg *restrict eseg;
+ struct mlx5_wqe *restrict wqem;
enum mlx5_txcmp_code ret;
unsigned int room, part, nlim;
unsigned int slen = 0;
return MLX5_TXCMP_CODE_EXIT;
if (likely(pkts_n > 1))
rte_prefetch0(*pkts);
- loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m);
+ wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m);
/*
* Build eMPW title WQEBB:
* - Control Segment, eMPW opcode, zero DS
* - Ethernet Segment, no inline
*/
- mlx5_tx_cseg_init(txq, loc, loc->wqe_last, 0,
+ mlx5_tx_cseg_init(txq, loc, wqem, 0,
MLX5_OPCODE_ENHANCED_MPSW, olx);
- mlx5_tx_eseg_none(txq, loc, loc->wqe_last,
+ mlx5_tx_eseg_none(txq, loc, wqem,
olx & ~MLX5_TXOFF_CONFIG_VLAN);
- eseg = &loc->wqe_last->eseg;
- dseg = &loc->wqe_last->dseg[0];
+ dseg = &wqem->dseg[0];
/* Store the packet length for legacy MPW. */
if (MLX5_TXOFF_CONFIG(MPW))
- eseg->mss = rte_cpu_to_be_16
- (rte_pktmbuf_data_len(loc->mbuf));
+ wqem->eseg.mss = rte_cpu_to_be_16
+ (rte_pktmbuf_data_len(loc->mbuf));
room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE,
loc->wqe_free) * MLX5_WQE_SIZE -
MLX5_WQE_CSEG_SIZE -
MLX5_WQE_ESEG_SIZE;
+ /* Limit the room for legacy MPW sessions for performance. */
+ if (MLX5_TXOFF_CONFIG(MPW))
+ room = RTE_MIN(room,
+ RTE_MAX(txq->inlen_empw +
+ sizeof(dseg->bcount) +
+ (MLX5_TXOFF_CONFIG(VLAN) ?
+ sizeof(struct rte_vlan_hdr) : 0),
+ MLX5_MPW_INLINE_MAX_PACKETS *
+ MLX5_WQE_DSEG_SIZE));
/* Build WQE till we have space, packets and resources. */
part = room;
for (;;) {
* We have some successfully built
* packet Data Segments to send.
*/
- mlx5_tx_idone_empw(txq, loc, part, slen, olx);
+ mlx5_tx_idone_empw(txq, loc, part,
+ slen, wqem, olx);
return MLX5_TXCMP_CODE_ERROR;
}
/* Inline or not inline - that's the Question. */
if (dlen > txq->inlen_empw ||
loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE)
goto pointer_empw;
+ if (MLX5_TXOFF_CONFIG(MPW)) {
+ if (dlen > txq->inlen_send)
+ goto pointer_empw;
+ tlen = dlen;
+ if (part == room) {
+ /* Open new inline MPW session. */
+ tlen += sizeof(dseg->bcount);
+ dseg->bcount = RTE_BE32(0);
+ dseg = RTE_PTR_ADD
+ (dseg, sizeof(dseg->bcount));
+ } else {
+ /*
+ * No pointer and inline descriptor
+ * intermix for legacy MPW sessions.
+ */
+ if (wqem->dseg[0].bcount)
+ break;
+ }
+ } else {
+ tlen = sizeof(dseg->bcount) + dlen;
+ }
/* Inline entire packet, optional VLAN insertion. */
- tlen = sizeof(dseg->bcount) + dlen;
if (MLX5_TXOFF_CONFIG(VLAN) &&
loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) {
/*
dseg = mlx5_tx_dseg_empw(txq, loc, dseg,
dptr, dlen, olx);
}
- tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE);
+ if (!MLX5_TXOFF_CONFIG(MPW))
+ tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE);
MLX5_ASSERT(room >= tlen);
room -= tlen;
/*
rte_pktmbuf_free_seg(loc->mbuf);
goto next_mbuf;
pointer_empw:
+ /*
+ * No pointer and inline descriptor
+ * intermix for legacy MPW sessions.
+ */
+ if (MLX5_TXOFF_CONFIG(MPW) &&
+ part != room &&
+ wqem->dseg[0].bcount == RTE_BE32(0))
+ break;
/*
* Not inlinable VLAN packets are
* proceeded outside of this routine.
* continue build descriptors.
*/
part -= room;
- mlx5_tx_idone_empw(txq, loc, part, slen, olx);
+ mlx5_tx_idone_empw(txq, loc, part,
+ slen, wqem, olx);
return MLX5_TXCMP_CODE_EXIT;
}
loc->mbuf = *pkts++;
*/
if (ret == MLX5_TXCMP_CODE_MULTI) {
part -= room;
- mlx5_tx_idone_empw(txq, loc, part, slen, olx);
+ mlx5_tx_idone_empw(txq, loc, part,
+ slen, wqem, olx);
if (unlikely(!loc->elts_free ||
!loc->wqe_free))
return MLX5_TXCMP_CODE_EXIT;
MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1);
if (ret == MLX5_TXCMP_CODE_TSO) {
part -= room;
- mlx5_tx_idone_empw(txq, loc, part, slen, olx);
+ mlx5_tx_idone_empw(txq, loc, part,
+ slen, wqem, olx);
if (unlikely(!loc->elts_free ||
!loc->wqe_free))
return MLX5_TXCMP_CODE_EXIT;
}
if (ret == MLX5_TXCMP_CODE_SINGLE) {
part -= room;
- mlx5_tx_idone_empw(txq, loc, part, slen, olx);
+ mlx5_tx_idone_empw(txq, loc, part,
+ slen, wqem, olx);
if (unlikely(!loc->elts_free ||
!loc->wqe_free))
return MLX5_TXCMP_CODE_EXIT;
if (ret != MLX5_TXCMP_CODE_EMPW) {
MLX5_ASSERT(false);
part -= room;
- mlx5_tx_idone_empw(txq, loc, part, slen, olx);
+ mlx5_tx_idone_empw(txq, loc, part,
+ slen, wqem, olx);
return MLX5_TXCMP_CODE_ERROR;
}
/* Check if we have minimal room left. */
* - software parser settings
* - packets length (legacy MPW only)
*/
- if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx))
+ if (!mlx5_tx_match_empw(txq, &wqem->eseg,
+ loc, dlen, olx))
break;
/* Packet attributes match, continue the same eMPW. */
if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end)
part -= room;
if (unlikely(!part))
return MLX5_TXCMP_CODE_EXIT;
- mlx5_tx_idone_empw(txq, loc, part, slen, olx);
+ mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx);
if (unlikely(!loc->elts_free ||
!loc->wqe_free))
return MLX5_TXCMP_CODE_EXIT;
/*
* Generate routines with Legacy Multi-Packet Write support.
- * This mode is supported by ConnectX-4LX only and imposes
+ * This mode is supported by ConnectX-4 Lx only and imposes
* offload limitations, not supported:
* - ACL/Flows (metadata are becoming meaningless)
* - WQE Inline headers