#define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/
#define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */
#define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/
+#define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/
/* The most common offloads groups. */
#define MLX5_TXOFF_CONFIG_NONE 0
}
pkt = seg;
assert(len >= (rxq->crc_present << 2));
- pkt->ol_flags = 0;
+ pkt->ol_flags &= EXT_ATTACHED_MBUF;
/* If compressed, take hash result from mini-CQE. */
rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ?
cqe->rx_hash_res :
* Pointer to TX queue structure.
* @param loc
* Pointer to burst routine local context.
- * @param multi,
- * Routine is called from multi-segment sending loop,
- * do not correct the elts_head according to the pkts_copy.
* @param olx
* Configured Tx offloads mask. It is fully defined at
* compile time and may be used for optimization.
static __rte_always_inline void
mlx5_tx_request_completion(struct mlx5_txq_data *restrict txq,
struct mlx5_txq_local *restrict loc,
- bool multi,
unsigned int olx)
{
uint16_t head = txq->elts_head;
unsigned int part;
- part = (MLX5_TXOFF_CONFIG(INLINE) || multi) ?
+ part = MLX5_TXOFF_CONFIG(INLINE) ?
0 : loc->pkts_sent - loc->pkts_copy;
head += part;
if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH ||
{
struct mlx5_wqe_cseg *restrict cs = &wqe->cseg;
+ /* For legacy MPW replace the EMPW by TSO with modifier. */
+ if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW)
+ opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24;
cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode);
cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds);
cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR <<
wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds);
txq->wqe_ci += (ds + 3) / 4;
loc->wqe_free -= (ds + 3) / 4;
- /* Request CQE generation if limits are reached. */
- mlx5_tx_request_completion(txq, loc, true, olx);
return MLX5_TXCMP_CODE_MULTI;
}
} while (true);
txq->wqe_ci += (ds + 3) / 4;
loc->wqe_free -= (ds + 3) / 4;
- /* Request CQE generation if limits are reached. */
- mlx5_tx_request_completion(txq, loc, true, olx);
return MLX5_TXCMP_CODE_MULTI;
}
wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds);
txq->wqe_ci += (ds + 3) / 4;
loc->wqe_free -= (ds + 3) / 4;
- /* Request CQE generation if limits are reached. */
- mlx5_tx_request_completion(txq, loc, true, olx);
return MLX5_TXCMP_CODE_MULTI;
}
--loc->elts_free;
++loc->pkts_sent;
--pkts_n;
- /* Request CQE generation if limits are reached. */
- mlx5_tx_request_completion(txq, loc, false, olx);
if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free))
return MLX5_TXCMP_CODE_EXIT;
loc->mbuf = *pkts++;
/**
* Check the next packet attributes to match with the eMPW batch ones.
+ * In addition, for legacy MPW the packet length is checked either.
*
* @param txq
* Pointer to TX queue structure.
* Pointer to Ethernet Segment of eMPW batch.
* @param loc
* Pointer to burst routine local context.
+ * @param dlen
+ * Length of previous packet in MPW descriptor.
* @param olx
* Configured Tx offloads mask. It is fully defined at
* compile time and may be used for optimization.
mlx5_tx_match_empw(struct mlx5_txq_data *restrict txq __rte_unused,
struct mlx5_wqe_eseg *restrict es,
struct mlx5_txq_local *restrict loc,
+ uint32_t dlen,
unsigned int olx)
{
uint8_t swp_flags = 0;
es->metadata != (loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ?
*RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0))
return false;
+ /* Legacy MPW can send packets with the same lengt only. */
+ if (MLX5_TXOFF_CONFIG(MPW) &&
+ dlen != rte_pktmbuf_data_len(loc->mbuf))
+ return false;
/* There must be no VLAN packets in eMPW loop. */
if (MLX5_TXOFF_CONFIG(VLAN))
assert(!(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT));
struct mlx5_txq_local *restrict loc,
unsigned int ds,
unsigned int slen,
- unsigned int olx)
+ unsigned int olx __rte_unused)
{
assert(!MLX5_TXOFF_CONFIG(INLINE));
#ifdef MLX5_PMD_SOFT_COUNTERS
loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds);
txq->wqe_ci += (ds + 3) / 4;
loc->wqe_free -= (ds + 3) / 4;
- /* Request CQE generation if limits are reached. */
- mlx5_tx_request_completion(txq, loc, false, olx);
}
/*
loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len);
txq->wqe_ci += (len + 3) / 4;
loc->wqe_free -= (len + 3) / 4;
- /* Request CQE generation if limits are reached. */
- mlx5_tx_request_completion(txq, loc, false, olx);
}
/**
next_empw:
assert(NB_SEGS(loc->mbuf) == 1);
- part = RTE_MIN(pkts_n, MLX5_EMPW_MAX_PACKETS);
+ part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ?
+ MLX5_MPW_MAX_PACKETS :
+ MLX5_EMPW_MAX_PACKETS);
if (unlikely(loc->elts_free < part)) {
/* We have no enough elts to save all mbufs. */
if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS))
eseg = &loc->wqe_last->eseg;
dseg = &loc->wqe_last->dseg[0];
loop = part;
+ /* Store the packet length for legacy MPW. */
+ if (MLX5_TXOFF_CONFIG(MPW))
+ eseg->mss = rte_cpu_to_be_16
+ (rte_pktmbuf_data_len(loc->mbuf));
for (;;) {
uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf);
#ifdef MLX5_PMD_SOFT_COUNTERS
* - check sum settings
* - metadata value
* - software parser settings
+ * - packets length (legacy MPW only)
*/
- if (!mlx5_tx_match_empw(txq, eseg, loc, olx)) {
+ if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) {
assert(loop);
part -= loop;
mlx5_tx_sdone_empw(txq, loc, part, slen, olx);
txq->wqe_ci += (2 + part + 3) / 4;
loc->wqe_free -= (2 + part + 3) / 4;
pkts_n -= part;
- /* Request CQE generation if limits are reached. */
- mlx5_tx_request_completion(txq, loc, false, olx);
if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free))
return MLX5_TXCMP_CODE_EXIT;
loc->mbuf = *pkts++;
* Limits the amount of packets in one WQE
* to improve CQE latency generation.
*/
- nlim = RTE_MIN(pkts_n, MLX5_EMPW_MAX_PACKETS);
+ nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ?
+ MLX5_MPW_INLINE_MAX_PACKETS :
+ MLX5_EMPW_MAX_PACKETS);
/* Check whether we have minimal amount WQEs */
if (unlikely(loc->wqe_free <
((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4)))
olx & ~MLX5_TXOFF_CONFIG_VLAN);
eseg = &loc->wqe_last->eseg;
dseg = &loc->wqe_last->dseg[0];
+ /* Store the packet length for legacy MPW. */
+ if (MLX5_TXOFF_CONFIG(MPW))
+ eseg->mss = rte_cpu_to_be_16
+ (rte_pktmbuf_data_len(loc->mbuf));
room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE,
loc->wqe_free) * MLX5_WQE_SIZE -
MLX5_WQE_CSEG_SIZE -
* - check sum settings
* - metadata value
* - software parser settings
+ * - packets length (legacy MPW only)
*/
- if (!mlx5_tx_match_empw(txq, eseg, loc, olx))
+ if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx))
break;
/* Packet attributes match, continue the same eMPW. */
if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end)
* free the packet immediately.
*/
rte_pktmbuf_free_seg(loc->mbuf);
- } else if (!MLX5_TXOFF_CONFIG(EMPW) &&
- txq->inlen_mode) {
+ } else if ((!MLX5_TXOFF_CONFIG(EMPW) ||
+ MLX5_TXOFF_CONFIG(MPW)) &&
+ txq->inlen_mode) {
/*
* If minimal inlining is requested the eMPW
* feature should be disabled due to data is
}
++loc->pkts_sent;
--pkts_n;
- /* Request CQE generation if limits are reached. */
- mlx5_tx_request_completion(txq, loc, false, olx);
if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free))
return MLX5_TXCMP_CODE_EXIT;
loc->mbuf = *pkts++;
/* Take a shortcut if nothing is sent. */
if (unlikely(loc.pkts_sent == loc.pkts_loop))
goto burst_exit;
+ /* Request CQE generation if limits are reached. */
+ mlx5_tx_request_completion(txq, &loc, olx);
/*
* Ring QP doorbell immediately after WQE building completion
* to improve latencies. The pure software related data treatment
* impact under heavy loading conditions but the explicit write
* memory barrier is not required and it may improve core
* performance.
+ *
+ * - the legacy behaviour (prior 19.08 release) was to use some
+ * heuristics to decide whether write memory barrier should
+ * be performed. This behavior is supported with specifying
+ * tx_db_nc=2, write barrier is skipped if application
+ * provides the full recommended burst of packets, it
+ * supposes the next packets are coming and the write barrier
+ * will be issued on the next burst (after descriptor writing,
+ * at least).
*/
- mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc);
+ mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc &&
+ (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST));
/* Not all of the mbufs may be stored into elts yet. */
part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy;
if (!MLX5_TXOFF_CONFIG(INLINE) && part) {
MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
MLX5_TXOFF_CONFIG_METADATA)
+/*
+ * Generate routines with Legacy Multi-Packet Write support.
+ * This mode is supported by ConnectX-4LX only and imposes
+ * offload limitations, not supported:
+ * - ACL/Flows (metadata are becoming meaningless)
+ * - WQE Inline headers
+ * - SRIOV (E-Switch offloads)
+ * - VLAN insertion
+ * - tunnel encapsulation/decapsulation
+ * - TSO
+ */
+MLX5_TXOFF_DECL(none_mpw,
+ MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW |
+ MLX5_TXOFF_CONFIG_MPW)
+
+MLX5_TXOFF_DECL(mci_mpw,
+ MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM |
+ MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW |
+ MLX5_TXOFF_CONFIG_MPW)
+
+MLX5_TXOFF_DECL(mc_mpw,
+ MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM |
+ MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW)
+
+MLX5_TXOFF_DECL(i_mpw,
+ MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW |
+ MLX5_TXOFF_CONFIG_MPW)
+
/*
* Array of declared and compiled Tx burst function and corresponding
* supported offloads set. The array is used to select the Tx burst
MLX5_TXOFF_CONFIG_INLINE |
MLX5_TXOFF_CONFIG_METADATA)
-
MLX5_TXOFF_INFO(mtv,
MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
MLX5_TXOFF_CONFIG_VLAN |
MLX5_TXOFF_INFO(iv,
MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
MLX5_TXOFF_CONFIG_METADATA)
+
+MLX5_TXOFF_INFO(none_mpw,
+ MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW |
+ MLX5_TXOFF_CONFIG_MPW)
+
+MLX5_TXOFF_INFO(mci_mpw,
+ MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM |
+ MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW |
+ MLX5_TXOFF_CONFIG_MPW)
+
+MLX5_TXOFF_INFO(mc_mpw,
+ MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM |
+ MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW)
+
+MLX5_TXOFF_INFO(i_mpw,
+ MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW |
+ MLX5_TXOFF_CONFIG_MPW)
};
/**
if (config->mps == MLX5_MPW_ENHANCED &&
config->txq_inline_min <= 0) {
/*
- * The NIC supports Enhanced Multi-Packet Write.
- * We do not support legacy MPW due to its
- * hardware related problems, so we just ignore
- * legacy MLX5_MPW settings. There should be no
- * minimal required inline data.
+ * The NIC supports Enhanced Multi-Packet Write
+ * and does not require minimal inline data.
*/
olx |= MLX5_TXOFF_CONFIG_EMPW;
}
/* We should support Flow metadata. */
olx |= MLX5_TXOFF_CONFIG_METADATA;
}
+ if (config->mps == MLX5_MPW) {
+ /*
+ * The NIC supports Legacy Multi-Packet Write.
+ * The MLX5_TXOFF_CONFIG_MPW controls the
+ * descriptor building method in combination
+ * with MLX5_TXOFF_CONFIG_EMPW.
+ */
+ if (!(olx & (MLX5_TXOFF_CONFIG_TSO |
+ MLX5_TXOFF_CONFIG_SWP |
+ MLX5_TXOFF_CONFIG_VLAN |
+ MLX5_TXOFF_CONFIG_METADATA)))
+ olx |= MLX5_TXOFF_CONFIG_EMPW |
+ MLX5_TXOFF_CONFIG_MPW;
+ }
/*
* Scan the routines table to find the minimal
* satisfying routine with requested offloads.
DRV_LOG(DEBUG, "\tVLANI (VLAN insertion)");
if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_METADATA)
DRV_LOG(DEBUG, "\tMETAD (tx Flow metadata)");
- if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW)
- DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)");
+ if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW) {
+ if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MPW)
+ DRV_LOG(DEBUG, "\tMPW (Legacy MPW)");
+ else
+ DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)");
+ }
return txoff_func[m].func;
}