X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx.c;h=0b8563491926449c49acae46236b407f34a5ebcb;hb=8b581c690a54be065ecb7b7fd9979c8e86898234;hp=1ea5960df69f5e0dc331f28a674d02816e2b3d44;hpb=8409a28573d39ef15da795cadae88825373f1326;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 1ea5960df6..0b85634919 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -62,6 +62,7 @@ enum mlx5_txcmp_code { #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ +#define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ /* The most common offloads groups. */ #define MLX5_TXOFF_CONFIG_NONE 0 @@ -1340,7 +1341,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) } pkt = seg; assert(len >= (rxq->crc_present << 2)); - pkt->ol_flags = 0; + pkt->ol_flags &= EXT_ATTACHED_MBUF; /* If compressed, take hash result from mini-CQE. */ rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ? cqe->rx_hash_res : @@ -2144,9 +2145,6 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, * Pointer to TX queue structure. * @param loc * Pointer to burst routine local context. - * @param multi, - * Routine is called from multi-segment sending loop, - * do not correct the elts_head according to the pkts_copy. * @param olx * Configured Tx offloads mask. It is fully defined at * compile time and may be used for optimization. @@ -2154,13 +2152,12 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, static __rte_always_inline void mlx5_tx_request_completion(struct mlx5_txq_data *restrict txq, struct mlx5_txq_local *restrict loc, - bool multi, unsigned int olx) { uint16_t head = txq->elts_head; unsigned int part; - part = (MLX5_TXOFF_CONFIG(INLINE) || multi) ? + part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc->pkts_sent - loc->pkts_copy; head += part; if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || @@ -2240,6 +2237,9 @@ mlx5_tx_cseg_init(struct mlx5_txq_data *restrict txq, { struct mlx5_wqe_cseg *restrict cs = &wqe->cseg; + /* For legacy MPW replace the EMPW by TSO with modifier. */ + if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) + opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << @@ -3116,8 +3116,6 @@ mlx5_tx_packet_multi_tso(struct mlx5_txq_data *restrict txq, wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); txq->wqe_ci += (ds + 3) / 4; loc->wqe_free -= (ds + 3) / 4; - /* Request CQE generation if limits are reached. */ - mlx5_tx_request_completion(txq, loc, true, olx); return MLX5_TXCMP_CODE_MULTI; } @@ -3226,8 +3224,6 @@ mlx5_tx_packet_multi_send(struct mlx5_txq_data *restrict txq, } while (true); txq->wqe_ci += (ds + 3) / 4; loc->wqe_free -= (ds + 3) / 4; - /* Request CQE generation if limits are reached. */ - mlx5_tx_request_completion(txq, loc, true, olx); return MLX5_TXCMP_CODE_MULTI; } @@ -3384,8 +3380,6 @@ do_align: wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); txq->wqe_ci += (ds + 3) / 4; loc->wqe_free -= (ds + 3) / 4; - /* Request CQE generation if limits are reached. */ - mlx5_tx_request_completion(txq, loc, true, olx); return MLX5_TXCMP_CODE_MULTI; } @@ -3595,8 +3589,6 @@ mlx5_tx_burst_tso(struct mlx5_txq_data *restrict txq, --loc->elts_free; ++loc->pkts_sent; --pkts_n; - /* Request CQE generation if limits are reached. */ - mlx5_tx_request_completion(txq, loc, false, olx); if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; loc->mbuf = *pkts++; @@ -3669,6 +3661,7 @@ mlx5_tx_able_to_empw(struct mlx5_txq_data *restrict txq, /** * Check the next packet attributes to match with the eMPW batch ones. + * In addition, for legacy MPW the packet length is checked either. * * @param txq * Pointer to TX queue structure. @@ -3676,6 +3669,8 @@ mlx5_tx_able_to_empw(struct mlx5_txq_data *restrict txq, * Pointer to Ethernet Segment of eMPW batch. * @param loc * Pointer to burst routine local context. + * @param dlen + * Length of previous packet in MPW descriptor. * @param olx * Configured Tx offloads mask. It is fully defined at * compile time and may be used for optimization. @@ -3688,6 +3683,7 @@ static __rte_always_inline bool mlx5_tx_match_empw(struct mlx5_txq_data *restrict txq __rte_unused, struct mlx5_wqe_eseg *restrict es, struct mlx5_txq_local *restrict loc, + uint32_t dlen, unsigned int olx) { uint8_t swp_flags = 0; @@ -3706,6 +3702,10 @@ mlx5_tx_match_empw(struct mlx5_txq_data *restrict txq __rte_unused, es->metadata != (loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0)) return false; + /* Legacy MPW can send packets with the same lengt only. */ + if (MLX5_TXOFF_CONFIG(MPW) && + dlen != rte_pktmbuf_data_len(loc->mbuf)) + return false; /* There must be no VLAN packets in eMPW loop. */ if (MLX5_TXOFF_CONFIG(VLAN)) assert(!(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT)); @@ -3738,7 +3738,7 @@ mlx5_tx_sdone_empw(struct mlx5_txq_data *restrict txq, struct mlx5_txq_local *restrict loc, unsigned int ds, unsigned int slen, - unsigned int olx) + unsigned int olx __rte_unused) { assert(!MLX5_TXOFF_CONFIG(INLINE)); #ifdef MLX5_PMD_SOFT_COUNTERS @@ -3753,8 +3753,6 @@ mlx5_tx_sdone_empw(struct mlx5_txq_data *restrict txq, loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); txq->wqe_ci += (ds + 3) / 4; loc->wqe_free -= (ds + 3) / 4; - /* Request CQE generation if limits are reached. */ - mlx5_tx_request_completion(txq, loc, false, olx); } /* @@ -3797,8 +3795,6 @@ mlx5_tx_idone_empw(struct mlx5_txq_data *restrict txq, loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); txq->wqe_ci += (len + 3) / 4; loc->wqe_free -= (len + 3) / 4; - /* Request CQE generation if limits are reached. */ - mlx5_tx_request_completion(txq, loc, false, olx); } /** @@ -3876,7 +3872,9 @@ mlx5_tx_burst_empw_simple(struct mlx5_txq_data *restrict txq, next_empw: assert(NB_SEGS(loc->mbuf) == 1); - part = RTE_MIN(pkts_n, MLX5_EMPW_MAX_PACKETS); + part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? + MLX5_MPW_MAX_PACKETS : + MLX5_EMPW_MAX_PACKETS); if (unlikely(loc->elts_free < part)) { /* We have no enough elts to save all mbufs. */ if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) @@ -3906,6 +3904,10 @@ next_empw: eseg = &loc->wqe_last->eseg; dseg = &loc->wqe_last->dseg[0]; loop = part; + /* Store the packet length for legacy MPW. */ + if (MLX5_TXOFF_CONFIG(MPW)) + eseg->mss = rte_cpu_to_be_16 + (rte_pktmbuf_data_len(loc->mbuf)); for (;;) { uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); #ifdef MLX5_PMD_SOFT_COUNTERS @@ -3964,8 +3966,9 @@ next_empw: * - check sum settings * - metadata value * - software parser settings + * - packets length (legacy MPW only) */ - if (!mlx5_tx_match_empw(txq, eseg, loc, olx)) { + if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { assert(loop); part -= loop; mlx5_tx_sdone_empw(txq, loc, part, slen, olx); @@ -3992,8 +3995,6 @@ next_empw: txq->wqe_ci += (2 + part + 3) / 4; loc->wqe_free -= (2 + part + 3) / 4; pkts_n -= part; - /* Request CQE generation if limits are reached. */ - mlx5_tx_request_completion(txq, loc, false, olx); if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; loc->mbuf = *pkts++; @@ -4040,7 +4041,9 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, * Limits the amount of packets in one WQE * to improve CQE latency generation. */ - nlim = RTE_MIN(pkts_n, MLX5_EMPW_MAX_PACKETS); + nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? + MLX5_MPW_INLINE_MAX_PACKETS : + MLX5_EMPW_MAX_PACKETS); /* Check whether we have minimal amount WQEs */ if (unlikely(loc->wqe_free < ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) @@ -4059,6 +4062,10 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, olx & ~MLX5_TXOFF_CONFIG_VLAN); eseg = &loc->wqe_last->eseg; dseg = &loc->wqe_last->dseg[0]; + /* Store the packet length for legacy MPW. */ + if (MLX5_TXOFF_CONFIG(MPW)) + eseg->mss = rte_cpu_to_be_16 + (rte_pktmbuf_data_len(loc->mbuf)); room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, loc->wqe_free) * MLX5_WQE_SIZE - MLX5_WQE_CSEG_SIZE - @@ -4209,8 +4216,9 @@ next_mbuf: * - check sum settings * - metadata value * - software parser settings + * - packets length (legacy MPW only) */ - if (!mlx5_tx_match_empw(txq, eseg, loc, olx)) + if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) break; /* Packet attributes match, continue the same eMPW. */ if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) @@ -4313,8 +4321,9 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, * free the packet immediately. */ rte_pktmbuf_free_seg(loc->mbuf); - } else if (!MLX5_TXOFF_CONFIG(EMPW) && - txq->inlen_mode) { + } else if ((!MLX5_TXOFF_CONFIG(EMPW) || + MLX5_TXOFF_CONFIG(MPW)) && + txq->inlen_mode) { /* * If minimal inlining is requested the eMPW * feature should be disabled due to data is @@ -4469,8 +4478,6 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, } ++loc->pkts_sent; --pkts_n; - /* Request CQE generation if limits are reached. */ - mlx5_tx_request_completion(txq, loc, false, olx); if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; loc->mbuf = *pkts++; @@ -4749,6 +4756,8 @@ enter_send_single: /* Take a shortcut if nothing is sent. */ if (unlikely(loc.pkts_sent == loc.pkts_loop)) goto burst_exit; + /* Request CQE generation if limits are reached. */ + mlx5_tx_request_completion(txq, &loc, olx); /* * Ring QP doorbell immediately after WQE building completion * to improve latencies. The pure software related data treatment @@ -4769,8 +4778,18 @@ enter_send_single: * impact under heavy loading conditions but the explicit write * memory barrier is not required and it may improve core * performance. + * + * - the legacy behaviour (prior 19.08 release) was to use some + * heuristics to decide whether write memory barrier should + * be performed. This behavior is supported with specifying + * tx_db_nc=2, write barrier is skipped if application + * provides the full recommended burst of packets, it + * supposes the next packets are coming and the write barrier + * will be issued on the next burst (after descriptor writing, + * at least). */ - mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc); + mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc && + (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); /* Not all of the mbufs may be stored into elts yet. */ part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; if (!MLX5_TXOFF_CONFIG(INLINE) && part) { @@ -4938,6 +4957,34 @@ MLX5_TXOFF_DECL(iv, MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA) +/* + * Generate routines with Legacy Multi-Packet Write support. + * This mode is supported by ConnectX-4LX only and imposes + * offload limitations, not supported: + * - ACL/Flows (metadata are becoming meaningless) + * - WQE Inline headers + * - SRIOV (E-Switch offloads) + * - VLAN insertion + * - tunnel encapsulation/decapsulation + * - TSO + */ +MLX5_TXOFF_DECL(none_mpw, + MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | + MLX5_TXOFF_CONFIG_MPW) + +MLX5_TXOFF_DECL(mci_mpw, + MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | + MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | + MLX5_TXOFF_CONFIG_MPW) + +MLX5_TXOFF_DECL(mc_mpw, + MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | + MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) + +MLX5_TXOFF_DECL(i_mpw, + MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | + MLX5_TXOFF_CONFIG_MPW) + /* * Array of declared and compiled Tx burst function and corresponding * supported offloads set. The array is used to select the Tx burst @@ -5040,7 +5087,6 @@ MLX5_TXOFF_INFO(mti, MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA) - MLX5_TXOFF_INFO(mtv, MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | MLX5_TXOFF_CONFIG_VLAN | @@ -5081,6 +5127,23 @@ MLX5_TXOFF_INFO(v, MLX5_TXOFF_INFO(iv, MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA) + +MLX5_TXOFF_INFO(none_mpw, + MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | + MLX5_TXOFF_CONFIG_MPW) + +MLX5_TXOFF_INFO(mci_mpw, + MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | + MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | + MLX5_TXOFF_CONFIG_MPW) + +MLX5_TXOFF_INFO(mc_mpw, + MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | + MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) + +MLX5_TXOFF_INFO(i_mpw, + MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | + MLX5_TXOFF_CONFIG_MPW) }; /** @@ -5163,11 +5226,8 @@ mlx5_select_tx_function(struct rte_eth_dev *dev) if (config->mps == MLX5_MPW_ENHANCED && config->txq_inline_min <= 0) { /* - * The NIC supports Enhanced Multi-Packet Write. - * We do not support legacy MPW due to its - * hardware related problems, so we just ignore - * legacy MLX5_MPW settings. There should be no - * minimal required inline data. + * The NIC supports Enhanced Multi-Packet Write + * and does not require minimal inline data. */ olx |= MLX5_TXOFF_CONFIG_EMPW; } @@ -5175,6 +5235,20 @@ mlx5_select_tx_function(struct rte_eth_dev *dev) /* We should support Flow metadata. */ olx |= MLX5_TXOFF_CONFIG_METADATA; } + if (config->mps == MLX5_MPW) { + /* + * The NIC supports Legacy Multi-Packet Write. + * The MLX5_TXOFF_CONFIG_MPW controls the + * descriptor building method in combination + * with MLX5_TXOFF_CONFIG_EMPW. + */ + if (!(olx & (MLX5_TXOFF_CONFIG_TSO | + MLX5_TXOFF_CONFIG_SWP | + MLX5_TXOFF_CONFIG_VLAN | + MLX5_TXOFF_CONFIG_METADATA))) + olx |= MLX5_TXOFF_CONFIG_EMPW | + MLX5_TXOFF_CONFIG_MPW; + } /* * Scan the routines table to find the minimal * satisfying routine with requested offloads. @@ -5243,7 +5317,11 @@ mlx5_select_tx_function(struct rte_eth_dev *dev) DRV_LOG(DEBUG, "\tVLANI (VLAN insertion)"); if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_METADATA) DRV_LOG(DEBUG, "\tMETAD (tx Flow metadata)"); - if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW) - DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)"); + if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW) { + if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MPW) + DRV_LOG(DEBUG, "\tMPW (Legacy MPW)"); + else + DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)"); + } return txoff_func[m].func; }