X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx.c;h=1b71e94221a130bc1987fa8367de20f45c0da18d;hb=1af745211344622b50ffd7d67a618dcdce85fef8;hp=371f96c30f92bb2c75cecb2ca4e27f10d5f61b04;hpb=8e46d4e18f0961f022218c4b693cd56434a444f0;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 371f96c30f..1b71e94221 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -7,17 +7,6 @@ #include #include -/* Verbs header. */ -/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ -#ifdef PEDANTIC -#pragma GCC diagnostic ignored "-Wpedantic" -#endif -#include -#include -#ifdef PEDANTIC -#pragma GCC diagnostic error "-Wpedantic" -#endif - #include #include #include @@ -27,12 +16,14 @@ #include #include +#include #include #include #include #include "mlx5_defs.h" #include "mlx5.h" +#include "mlx5_mr.h" #include "mlx5_utils.h" #include "mlx5_rxtx.h" #include "mlx5_autoconf.h" @@ -65,6 +56,7 @@ enum mlx5_txcmp_code { #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ +#define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ /* The most common offloads groups. */ #define MLX5_TXOFF_CONFIG_NONE 0 @@ -112,13 +104,13 @@ mlx5_queue_state_modify(struct rte_eth_dev *dev, struct mlx5_mp_arg_queue_state_modify *sm); static inline void -mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *restrict tcp, - volatile struct mlx5_cqe *restrict cqe, +mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, + volatile struct mlx5_cqe *__rte_restrict cqe, uint32_t phcsum); static inline void -mlx5_lro_update_hdr(uint8_t *restrict padd, - volatile struct mlx5_cqe *restrict cqe, +mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, + volatile struct mlx5_cqe *__rte_restrict cqe, uint32_t len); uint32_t mlx5_ptype_table[] __rte_cache_aligned = { @@ -373,7 +365,7 @@ mlx5_set_swp_types_table(void) * Software Parser flags are set by pointer. */ static __rte_always_inline uint32_t -txq_mbuf_to_swp(struct mlx5_txq_local *restrict loc, +txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, uint8_t *swp_flags, unsigned int olx) { @@ -536,6 +528,89 @@ mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) return RTE_ETH_RX_DESC_AVAIL; } +/** + * DPDK callback to get the RX queue information + * + * @param dev + * Pointer to the device structure. + * + * @param rx_queue_id + * Rx queue identificator. + * + * @param qinfo + * Pointer to the RX queue information structure. + * + * @return + * None. + */ + +void +mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id, + struct rte_eth_rxq_info *qinfo) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id]; + struct mlx5_rxq_ctrl *rxq_ctrl = + container_of(rxq, struct mlx5_rxq_ctrl, rxq); + + if (!rxq) + return; + qinfo->mp = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? + rxq->mprq_mp : rxq->mp; + qinfo->conf.rx_thresh.pthresh = 0; + qinfo->conf.rx_thresh.hthresh = 0; + qinfo->conf.rx_thresh.wthresh = 0; + qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh; + qinfo->conf.rx_drop_en = 1; + qinfo->conf.rx_deferred_start = rxq_ctrl ? 0 : 1; + qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; + qinfo->scattered_rx = dev->data->scattered_rx; + qinfo->nb_desc = 1 << rxq->elts_n; +} + +/** + * DPDK callback to get the RX packet burst mode information + * + * @param dev + * Pointer to the device structure. + * + * @param rx_queue_id + * Rx queue identificatior. + * + * @param mode + * Pointer to the burts mode information. + * + * @return + * 0 as success, -EINVAL as failure. + */ + +int +mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, + uint16_t rx_queue_id __rte_unused, + struct rte_eth_burst_mode *mode) +{ + eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; + + if (pkt_burst == mlx5_rx_burst) { + snprintf(mode->info, sizeof(mode->info), "%s", "Scalar"); + } else if (pkt_burst == mlx5_rx_burst_mprq) { + snprintf(mode->info, sizeof(mode->info), "%s", "Multi-Packet RQ"); + } else if (pkt_burst == mlx5_rx_burst_vec) { +#if defined RTE_ARCH_X86_64 + snprintf(mode->info, sizeof(mode->info), "%s", "Vector SSE"); +#elif defined RTE_ARCH_ARM64 + snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon"); +#elif defined RTE_ARCH_PPC_64 + snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec"); +#else + return -EINVAL; +#endif + } else { + return -EINVAL; + } + return 0; +} + /** * DPDK callback to get the number of used descriptors in a RX queue * @@ -663,7 +738,7 @@ check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe) * the error completion entry is handled successfully. */ static int -mlx5_tx_error_cqe_handle(struct mlx5_txq_data *restrict txq, +mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq, volatile struct mlx5_err_cqe *err_cqe) { if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) { @@ -860,43 +935,79 @@ mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, struct mlx5_txq_data *txq = (*priv->txqs)[sm->queue_id]; struct mlx5_txq_ctrl *txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); - struct ibv_qp_attr mod = { - .qp_state = IBV_QPS_RESET, - .port_num = (uint8_t)priv->ibv_port, - }; - struct ibv_qp *qp = txq_ctrl->obj->qp; - ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); - if (ret) { - DRV_LOG(ERR, "Cannot change the Tx QP state to RESET " - "%s", strerror(errno)); - rte_errno = errno; - return ret; - } - mod.qp_state = IBV_QPS_INIT; - ret = mlx5_glue->modify_qp(qp, &mod, - (IBV_QP_STATE | IBV_QP_PORT)); - if (ret) { - DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s", - strerror(errno)); - rte_errno = errno; - return ret; - } - mod.qp_state = IBV_QPS_RTR; - ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); - if (ret) { - DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s", - strerror(errno)); - rte_errno = errno; - return ret; - } - mod.qp_state = IBV_QPS_RTS; - ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); - if (ret) { - DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s", - strerror(errno)); - rte_errno = errno; - return ret; + if (txq_ctrl->obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_SQ) { + struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; + + /* Change queue state to reset. */ + msq_attr.sq_state = MLX5_SQC_STATE_ERR; + msq_attr.state = MLX5_SQC_STATE_RST; + ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq_devx, + &msq_attr); + if (ret) { + DRV_LOG(ERR, "Cannot change the " + "Tx QP state to RESET %s", + strerror(errno)); + rte_errno = errno; + return ret; + } + /* Change queue state to ready. */ + msq_attr.sq_state = MLX5_SQC_STATE_RST; + msq_attr.state = MLX5_SQC_STATE_RDY; + ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq_devx, + &msq_attr); + if (ret) { + DRV_LOG(ERR, "Cannot change the " + "Tx QP state to READY %s", + strerror(errno)); + rte_errno = errno; + return ret; + } + } else { + struct ibv_qp_attr mod = { + .qp_state = IBV_QPS_RESET, + .port_num = (uint8_t)priv->dev_port, + }; + struct ibv_qp *qp = txq_ctrl->obj->qp; + + MLX5_ASSERT + (txq_ctrl->obj->type == MLX5_TXQ_OBJ_TYPE_IBV); + + ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); + if (ret) { + DRV_LOG(ERR, "Cannot change the " + "Tx QP state to RESET %s", + strerror(errno)); + rte_errno = errno; + return ret; + } + mod.qp_state = IBV_QPS_INIT; + ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); + if (ret) { + DRV_LOG(ERR, "Cannot change the " + "Tx QP state to INIT %s", + strerror(errno)); + rte_errno = errno; + return ret; + } + mod.qp_state = IBV_QPS_RTR; + ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); + if (ret) { + DRV_LOG(ERR, "Cannot change the " + "Tx QP state to RTR %s", + strerror(errno)); + rte_errno = errno; + return ret; + } + mod.qp_state = IBV_QPS_RTS; + ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); + if (ret) { + DRV_LOG(ERR, "Cannot change the " + "Tx QP state to RTS %s", + strerror(errno)); + rte_errno = errno; + return ret; + } } } return 0; @@ -917,6 +1028,7 @@ static int mlx5_queue_state_modify(struct rte_eth_dev *dev, struct mlx5_mp_arg_queue_state_modify *sm) { + struct mlx5_priv *priv = dev->data->dev_private; int ret = 0; switch (rte_eal_process_type()) { @@ -924,7 +1036,7 @@ mlx5_queue_state_modify(struct rte_eth_dev *dev, ret = mlx5_queue_state_modify_primary(dev, sm); break; case RTE_PROC_SECONDARY: - ret = mlx5_mp_req_queue_state_modify(dev, sm); + ret = mlx5_mp_req_queue_state_modify(&priv->mp_id, sm); break; default: break; @@ -1254,9 +1366,10 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); } } - if (rte_flow_dynf_metadata_avail() && cqe->flow_table_metadata) { - pkt->ol_flags |= PKT_RX_DYNF_METADATA; - *RTE_FLOW_DYNF_METADATA(pkt) = cqe->flow_table_metadata; + if (rxq->dynf_meta && cqe->flow_table_metadata) { + pkt->ol_flags |= rxq->flow_meta_mask; + *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, uint32_t *) = + cqe->flow_table_metadata; } if (rxq->csum) pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); @@ -1266,7 +1379,11 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); } if (rxq->hw_timestamp) { - pkt->timestamp = rte_be_to_cpu_64(cqe->timestamp); + uint64_t ts = rte_be_to_cpu_64(cqe->timestamp); + + if (rxq->rt_timestamp) + ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts); + pkt->timestamp = ts; pkt->ol_flags |= PKT_RX_TIMESTAMP; } } @@ -1422,8 +1539,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) * The L3 pseudo-header checksum. */ static inline void -mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *restrict tcp, - volatile struct mlx5_cqe *restrict cqe, +mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, + volatile struct mlx5_cqe *__rte_restrict cqe, uint32_t phcsum) { uint8_t l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & @@ -1443,7 +1560,7 @@ mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *restrict tcp, if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK) tcp->tcp_flags |= RTE_TCP_PSH_FLAG; tcp->cksum = 0; - csum += rte_raw_cksum(tcp, (tcp->data_off & 0xF) * 4); + csum += rte_raw_cksum(tcp, (tcp->data_off >> 4) * 4); csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); csum = (~csum) & 0xffff; if (csum == 0) @@ -1464,8 +1581,8 @@ mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *restrict tcp, * The packet length. */ static inline void -mlx5_lro_update_hdr(uint8_t *restrict padd, - volatile struct mlx5_cqe *restrict cqe, +mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, + volatile struct mlx5_cqe *__rte_restrict cqe, uint32_t len) { union { @@ -1575,21 +1692,20 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) unsigned int i = 0; uint32_t rq_ci = rxq->rq_ci; uint16_t consumed_strd = rxq->consumed_strd; - uint16_t headroom_sz = rxq->strd_headroom_en * RTE_PKTMBUF_HEADROOM; struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; while (i < pkts_n) { struct rte_mbuf *pkt; void *addr; int ret; - unsigned int len; + uint32_t len; uint16_t strd_cnt; uint16_t strd_idx; uint32_t offset; uint32_t byte_cnt; + int32_t hdrm_overlap; volatile struct mlx5_mini_cqe8 *mcqe = NULL; uint32_t rss_hash_res = 0; - uint8_t lro_num_seg; if (consumed_strd == strd_n) { /* Replace WQE only if the buffer is still in use. */ @@ -1636,18 +1752,6 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) MLX5_ASSERT(strd_idx < strd_n); MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & wq_mask)); - lro_num_seg = cqe->lro_num_seg; - /* - * Currently configured to receive a packet per a stride. But if - * MTU is adjusted through kernel interface, device could - * consume multiple strides without raising an error. In this - * case, the packet should be dropped because it is bigger than - * the max_rx_pkt_len. - */ - if (unlikely(!lro_num_seg && strd_cnt > 1)) { - ++rxq->stats.idropped; - continue; - } pkt = rte_pktmbuf_alloc(rxq->mp); if (unlikely(pkt == NULL)) { ++rxq->stats.rx_nombuf; @@ -1659,23 +1763,57 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) len -= RTE_ETHER_CRC_LEN; offset = strd_idx * strd_sz + strd_shift; addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset); + hdrm_overlap = len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz; /* * Memcpy packets to the target mbuf if: * - The size of packet is smaller than mprq_max_memcpy_len. * - Out of buffer in the Mempool for Multi-Packet RQ. + * - The packet's stride overlaps a headroom and scatter is off. */ - if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl == NULL) { - /* - * When memcpy'ing packet due to out-of-buffer, the - * packet must be smaller than the target mbuf. - */ - if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) { + if (len <= rxq->mprq_max_memcpy_len || + rxq->mprq_repl == NULL || + (hdrm_overlap > 0 && !rxq->strd_scatter_en)) { + if (likely(rte_pktmbuf_tailroom(pkt) >= len)) { + rte_memcpy(rte_pktmbuf_mtod(pkt, void *), + addr, len); + DATA_LEN(pkt) = len; + } else if (rxq->strd_scatter_en) { + struct rte_mbuf *prev = pkt; + uint32_t seg_len = + RTE_MIN(rte_pktmbuf_tailroom(pkt), len); + uint32_t rem_len = len - seg_len; + + rte_memcpy(rte_pktmbuf_mtod(pkt, void *), + addr, seg_len); + DATA_LEN(pkt) = seg_len; + while (rem_len) { + struct rte_mbuf *next = + rte_pktmbuf_alloc(rxq->mp); + + if (unlikely(next == NULL)) { + rte_pktmbuf_free(pkt); + ++rxq->stats.rx_nombuf; + goto out; + } + NEXT(prev) = next; + SET_DATA_OFF(next, 0); + addr = RTE_PTR_ADD(addr, seg_len); + seg_len = RTE_MIN + (rte_pktmbuf_tailroom(next), + rem_len); + rte_memcpy + (rte_pktmbuf_mtod(next, void *), + addr, seg_len); + DATA_LEN(next) = seg_len; + rem_len -= seg_len; + prev = next; + ++NB_SEGS(pkt); + } + } else { rte_pktmbuf_free_seg(pkt); ++rxq->stats.idropped; continue; } - rte_memcpy(rte_pktmbuf_mtod(pkt, void *), addr, len); - DATA_LEN(pkt) = len; } else { rte_iova_t buf_iova; struct rte_mbuf_ext_shared_info *shinfo; @@ -1686,7 +1824,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) rte_atomic16_add_return(&buf->refcnt, 1); MLX5_ASSERT((uint16_t)rte_atomic16_read(&buf->refcnt) <= strd_n + 1); - buf_addr = RTE_PTR_SUB(addr, headroom_sz); + buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM); /* * MLX5 device doesn't use iova but it is necessary in a * case where the Rx packet is transmitted via a @@ -1705,43 +1843,42 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova, buf_len, shinfo); /* Set mbuf head-room. */ - pkt->data_off = headroom_sz; + SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM); MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF); - /* - * Prevent potential overflow due to MTU change through - * kernel interface. - */ - if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) { - rte_pktmbuf_free_seg(pkt); - ++rxq->stats.idropped; - continue; - } + MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) >= + len - (hdrm_overlap > 0 ? hdrm_overlap : 0)); DATA_LEN(pkt) = len; /* - * LRO packet may consume all the stride memory, in this - * case packet head-room space is not guaranteed so must - * to add an empty mbuf for the head-room. + * Copy the last fragment of a packet (up to headroom + * size bytes) in case there is a stride overlap with + * a next packet's headroom. Allocate a separate mbuf + * to store this fragment and link it. Scatter is on. */ - if (!rxq->strd_headroom_en) { - struct rte_mbuf *headroom_mbuf = - rte_pktmbuf_alloc(rxq->mp); + if (hdrm_overlap > 0) { + MLX5_ASSERT(rxq->strd_scatter_en); + struct rte_mbuf *seg = + rte_pktmbuf_alloc(rxq->mp); - if (unlikely(headroom_mbuf == NULL)) { + if (unlikely(seg == NULL)) { rte_pktmbuf_free_seg(pkt); ++rxq->stats.rx_nombuf; break; } - PORT(pkt) = rxq->port_id; - NEXT(headroom_mbuf) = pkt; - pkt = headroom_mbuf; + SET_DATA_OFF(seg, 0); + rte_memcpy(rte_pktmbuf_mtod(seg, void *), + RTE_PTR_ADD(addr, len - hdrm_overlap), + hdrm_overlap); + DATA_LEN(seg) = hdrm_overlap; + DATA_LEN(pkt) = len - hdrm_overlap; + NEXT(pkt) = seg; NB_SEGS(pkt) = 2; } } rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); - if (lro_num_seg > 1) { + if (cqe->lro_num_seg > 1) { mlx5_lro_update_hdr(addr, cqe, len); pkt->ol_flags |= PKT_RX_LRO; - pkt->tso_segsz = strd_sz; + pkt->tso_segsz = len / cqe->lro_num_seg; } PKT_LEN(pkt) = len; PORT(pkt) = rxq->port_id; @@ -1753,6 +1890,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) *(pkts++) = pkt; ++i; } +out: /* Update the consumer indexes. */ rxq->consumed_strd = consumed_strd; rte_cio_wmb(); @@ -1858,7 +1996,7 @@ mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) * compile time and may be used for optimization. */ static __rte_always_inline void -mlx5_tx_free_mbuf(struct rte_mbuf **restrict pkts, +mlx5_tx_free_mbuf(struct rte_mbuf **__rte_restrict pkts, unsigned int pkts_n, unsigned int olx __rte_unused) { @@ -1963,7 +2101,7 @@ mlx5_tx_free_mbuf(struct rte_mbuf **restrict pkts, * compile time and may be used for optimization. */ static __rte_always_inline void -mlx5_tx_free_elts(struct mlx5_txq_data *restrict txq, +mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, uint16_t tail, unsigned int olx __rte_unused) { @@ -2004,8 +2142,8 @@ mlx5_tx_free_elts(struct mlx5_txq_data *restrict txq, * compile time and may be used for optimization. */ static __rte_always_inline void -mlx5_tx_copy_elts(struct mlx5_txq_data *restrict txq, - struct rte_mbuf **restrict pkts, +mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, + struct rte_mbuf **__rte_restrict pkts, unsigned int pkts_n, unsigned int olx __rte_unused) { @@ -2041,7 +2179,7 @@ mlx5_tx_copy_elts(struct mlx5_txq_data *restrict txq, * compile time and may be used for optimization. */ static __rte_always_inline void -mlx5_tx_comp_flush(struct mlx5_txq_data *restrict txq, +mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq, volatile struct mlx5_cqe *last_cqe, unsigned int olx __rte_unused) { @@ -2072,12 +2210,12 @@ mlx5_tx_comp_flush(struct mlx5_txq_data *restrict txq, * routine smaller, simple and faster - from experiments. */ static void -mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, +mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, unsigned int olx __rte_unused) { unsigned int count = MLX5_TX_COMP_MAX_CQE; volatile struct mlx5_cqe *last_cqe = NULL; - uint16_t ci = txq->cq_ci; + bool ring_doorbell = false; int ret; static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); @@ -2085,8 +2223,8 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, do { volatile struct mlx5_cqe *cqe; - cqe = &txq->cqes[ci & txq->cqe_m]; - ret = check_cqe(cqe, txq->cqe_s, ci); + cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; + ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { if (likely(ret != MLX5_CQE_STATUS_ERR)) { /* No new CQEs in completion queue. */ @@ -2100,7 +2238,6 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, * here, before we might perform SQ reset. */ rte_wmb(); - txq->cq_ci = ci; ret = mlx5_tx_error_cqe_handle (txq, (volatile struct mlx5_err_cqe *)cqe); if (unlikely(ret < 0)) { @@ -2116,16 +2253,18 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status. * The send queue is supposed to be empty. */ - ++ci; - txq->cq_pi = ci; + ring_doorbell = true; + ++txq->cq_ci; + txq->cq_pi = txq->cq_ci; last_cqe = NULL; continue; } /* Normal transmit completion. */ - MLX5_ASSERT(ci != txq->cq_pi); - MLX5_ASSERT((txq->fcqs[ci & txq->cqe_m] >> 16) == + MLX5_ASSERT(txq->cq_ci != txq->cq_pi); + MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) == cqe->wqe_counter); - ++ci; + ring_doorbell = true; + ++txq->cq_ci; last_cqe = cqe; /* * We have to restrict the amount of processed CQEs @@ -2138,14 +2277,10 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, if (likely(--count == 0)) break; } while (true); - if (likely(ci != txq->cq_ci)) { - /* - * Update completion queue consuming index - * and ring doorbell to notify hardware. - */ + if (likely(ring_doorbell)) { + /* Ring doorbell to notify hardware. */ rte_compiler_barrier(); - txq->cq_ci = ci; - *txq->cq_db = rte_cpu_to_be_32(ci); + *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); mlx5_tx_comp_flush(txq, last_cqe, olx); } } @@ -2164,8 +2299,8 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, * compile time and may be used for optimization. */ static __rte_always_inline void -mlx5_tx_request_completion(struct mlx5_txq_data *restrict txq, - struct mlx5_txq_local *restrict loc, +mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, + struct mlx5_txq_local *__rte_restrict loc, unsigned int olx) { uint16_t head = txq->elts_head; @@ -2179,6 +2314,7 @@ mlx5_tx_request_completion(struct mlx5_txq_data *restrict txq, (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { volatile struct mlx5_wqe *last = loc->wqe_last; + MLX5_ASSERT(last); txq->elts_comp = head; if (MLX5_TXOFF_CONFIG(INLINE)) txq->wqe_comp = txq->wqe_ci; @@ -2211,7 +2347,7 @@ mlx5_tx_request_completion(struct mlx5_txq_data *restrict txq, int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) { - struct mlx5_txq_data *restrict txq = tx_queue; + struct mlx5_txq_data *__rte_restrict txq = tx_queue; uint16_t used; mlx5_tx_handle_completion(txq, 0); @@ -2242,14 +2378,14 @@ mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) * compile time and may be used for optimization. */ static __rte_always_inline void -mlx5_tx_cseg_init(struct mlx5_txq_data *restrict txq, - struct mlx5_txq_local *restrict loc __rte_unused, - struct mlx5_wqe *restrict wqe, +mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, + struct mlx5_txq_local *__rte_restrict loc __rte_unused, + struct mlx5_wqe *__rte_restrict wqe, unsigned int ds, unsigned int opcode, unsigned int olx __rte_unused) { - struct mlx5_wqe_cseg *restrict cs = &wqe->cseg; + struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; /* For legacy MPW replace the EMPW by TSO with modifier. */ if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) @@ -2261,6 +2397,37 @@ mlx5_tx_cseg_init(struct mlx5_txq_data *restrict txq, cs->misc = RTE_BE32(0); } +/** + * Build the Synchronize Queue Segment with specified completion index. + * + * @param txq + * Pointer to TX queue structure. + * @param loc + * Pointer to burst routine local context. + * @param wqe + * Pointer to WQE to fill with built Control Segment. + * @param wci + * Completion index in Clock Queue to wait. + * @param olx + * Configured Tx offloads mask. It is fully defined at + * compile time and may be used for optimization. + */ +static __rte_always_inline void +mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, + struct mlx5_txq_local *restrict loc __rte_unused, + struct mlx5_wqe *restrict wqe, + unsigned int wci, + unsigned int olx __rte_unused) +{ + struct mlx5_wqe_qseg *qs; + + qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); + qs->max_index = rte_cpu_to_be_32(wci); + qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq->id); + qs->reserved0 = RTE_BE32(0); + qs->reserved1 = RTE_BE32(0); +} + /** * Build the Ethernet Segment without inlined data. * Supports Software Parser, Checksums and VLAN @@ -2277,12 +2444,12 @@ mlx5_tx_cseg_init(struct mlx5_txq_data *restrict txq, * compile time and may be used for optimization. */ static __rte_always_inline void -mlx5_tx_eseg_none(struct mlx5_txq_data *restrict txq __rte_unused, - struct mlx5_txq_local *restrict loc, - struct mlx5_wqe *restrict wqe, +mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, + struct mlx5_txq_local *__rte_restrict loc, + struct mlx5_wqe *__rte_restrict wqe, unsigned int olx) { - struct mlx5_wqe_eseg *restrict es = &wqe->eseg; + struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; uint32_t csum; /* @@ -2335,13 +2502,13 @@ mlx5_tx_eseg_none(struct mlx5_txq_data *restrict txq __rte_unused, * compile time and may be used for optimization. */ static __rte_always_inline void -mlx5_tx_eseg_dmin(struct mlx5_txq_data *restrict txq __rte_unused, - struct mlx5_txq_local *restrict loc, - struct mlx5_wqe *restrict wqe, +mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, + struct mlx5_txq_local *__rte_restrict loc, + struct mlx5_wqe *__rte_restrict wqe, unsigned int vlan, unsigned int olx) { - struct mlx5_wqe_eseg *restrict es = &wqe->eseg; + struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; uint32_t csum; uint8_t *psrc, *pdst; @@ -2419,15 +2586,15 @@ mlx5_tx_eseg_dmin(struct mlx5_txq_data *restrict txq __rte_unused, * Pointer to the next Data Segment (aligned and wrapped around). */ static __rte_always_inline struct mlx5_wqe_dseg * -mlx5_tx_eseg_data(struct mlx5_txq_data *restrict txq, - struct mlx5_txq_local *restrict loc, - struct mlx5_wqe *restrict wqe, +mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, + struct mlx5_txq_local *__rte_restrict loc, + struct mlx5_wqe *__rte_restrict wqe, unsigned int vlan, unsigned int inlen, unsigned int tso, unsigned int olx) { - struct mlx5_wqe_eseg *restrict es = &wqe->eseg; + struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; uint32_t csum; uint8_t *psrc, *pdst; unsigned int part; @@ -2545,7 +2712,7 @@ mlx5_tx_eseg_data(struct mlx5_txq_data *restrict txq, */ static __rte_always_inline unsigned int mlx5_tx_mseg_memcpy(uint8_t *pdst, - struct mlx5_txq_local *restrict loc, + struct mlx5_txq_local *__rte_restrict loc, unsigned int len, unsigned int must, unsigned int olx __rte_unused) @@ -2642,15 +2809,15 @@ mlx5_tx_mseg_memcpy(uint8_t *pdst, * wrapping check on its own). */ static __rte_always_inline struct mlx5_wqe_dseg * -mlx5_tx_eseg_mdat(struct mlx5_txq_data *restrict txq, - struct mlx5_txq_local *restrict loc, - struct mlx5_wqe *restrict wqe, +mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, + struct mlx5_txq_local *__rte_restrict loc, + struct mlx5_wqe *__rte_restrict wqe, unsigned int vlan, unsigned int inlen, unsigned int tso, unsigned int olx) { - struct mlx5_wqe_eseg *restrict es = &wqe->eseg; + struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; uint32_t csum; uint8_t *pdst; unsigned int part, tlen = 0; @@ -2746,9 +2913,9 @@ mlx5_tx_eseg_mdat(struct mlx5_txq_data *restrict txq, * compile time and may be used for optimization. */ static __rte_always_inline void -mlx5_tx_dseg_ptr(struct mlx5_txq_data *restrict txq, - struct mlx5_txq_local *restrict loc, - struct mlx5_wqe_dseg *restrict dseg, +mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, + struct mlx5_txq_local *__rte_restrict loc, + struct mlx5_wqe_dseg *__rte_restrict dseg, uint8_t *buf, unsigned int len, unsigned int olx __rte_unused) @@ -2780,9 +2947,9 @@ mlx5_tx_dseg_ptr(struct mlx5_txq_data *restrict txq, * compile time and may be used for optimization. */ static __rte_always_inline void -mlx5_tx_dseg_iptr(struct mlx5_txq_data *restrict txq, - struct mlx5_txq_local *restrict loc, - struct mlx5_wqe_dseg *restrict dseg, +mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, + struct mlx5_txq_local *__rte_restrict loc, + struct mlx5_wqe_dseg *__rte_restrict dseg, uint8_t *buf, unsigned int len, unsigned int olx __rte_unused) @@ -2856,9 +3023,9 @@ mlx5_tx_dseg_iptr(struct mlx5_txq_data *restrict txq, * last packet in the eMPW session. */ static __rte_always_inline struct mlx5_wqe_dseg * -mlx5_tx_dseg_empw(struct mlx5_txq_data *restrict txq, - struct mlx5_txq_local *restrict loc __rte_unused, - struct mlx5_wqe_dseg *restrict dseg, +mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, + struct mlx5_txq_local *__rte_restrict loc __rte_unused, + struct mlx5_wqe_dseg *__rte_restrict dseg, uint8_t *buf, unsigned int len, unsigned int olx __rte_unused) @@ -2866,8 +3033,14 @@ mlx5_tx_dseg_empw(struct mlx5_txq_data *restrict txq, unsigned int part; uint8_t *pdst; - dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); - pdst = &dseg->inline_data[0]; + if (!MLX5_TXOFF_CONFIG(MPW)) { + /* Store the descriptor byte counter for eMPW sessions. */ + dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); + pdst = &dseg->inline_data[0]; + } else { + /* The entire legacy MPW session counter is stored on close. */ + pdst = (uint8_t *)dseg; + } /* * The WQEBB space availability is checked by caller. * Here we should be aware of WQE ring buffer wraparound only. @@ -2879,7 +3052,8 @@ mlx5_tx_dseg_empw(struct mlx5_txq_data *restrict txq, len -= part; if (likely(!len)) { pdst += part; - pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); + if (!MLX5_TXOFF_CONFIG(MPW)) + pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); /* Note: no final wraparound check here. */ return (struct mlx5_wqe_dseg *)pdst; } @@ -2912,9 +3086,9 @@ mlx5_tx_dseg_empw(struct mlx5_txq_data *restrict txq, * Ring buffer wraparound check is needed. */ static __rte_always_inline struct mlx5_wqe_dseg * -mlx5_tx_dseg_vlan(struct mlx5_txq_data *restrict txq, - struct mlx5_txq_local *restrict loc __rte_unused, - struct mlx5_wqe_dseg *restrict dseg, +mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, + struct mlx5_txq_local *__rte_restrict loc __rte_unused, + struct mlx5_wqe_dseg *__rte_restrict dseg, uint8_t *buf, unsigned int len, unsigned int olx __rte_unused) @@ -2927,9 +3101,16 @@ mlx5_tx_dseg_vlan(struct mlx5_txq_data *restrict txq, static_assert(MLX5_DSEG_MIN_INLINE_SIZE == (2 * RTE_ETHER_ADDR_LEN), "invalid Data Segment data size"); - dseg->bcount = rte_cpu_to_be_32((len + sizeof(struct rte_vlan_hdr)) | - MLX5_ETH_WQE_DATA_INLINE); - pdst = &dseg->inline_data[0]; + if (!MLX5_TXOFF_CONFIG(MPW)) { + /* Store the descriptor byte counter for eMPW sessions. */ + dseg->bcount = rte_cpu_to_be_32 + ((len + sizeof(struct rte_vlan_hdr)) | + MLX5_ETH_WQE_DATA_INLINE); + pdst = &dseg->inline_data[0]; + } else { + /* The entire legacy MPW session counter is stored on close. */ + pdst = (uint8_t *)dseg; + } memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); buf += MLX5_DSEG_MIN_INLINE_SIZE; pdst += MLX5_DSEG_MIN_INLINE_SIZE; @@ -2952,7 +3133,8 @@ mlx5_tx_dseg_vlan(struct mlx5_txq_data *restrict txq, len -= part; if (likely(!len)) { pdst += part; - pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); + if (!MLX5_TXOFF_CONFIG(MPW)) + pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); /* Note: no final wraparound check here. */ return (struct mlx5_wqe_dseg *)pdst; } @@ -2992,15 +3174,15 @@ mlx5_tx_dseg_vlan(struct mlx5_txq_data *restrict txq, * Actual size of built WQE in segments. */ static __rte_always_inline unsigned int -mlx5_tx_mseg_build(struct mlx5_txq_data *restrict txq, - struct mlx5_txq_local *restrict loc, - struct mlx5_wqe *restrict wqe, +mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, + struct mlx5_txq_local *__rte_restrict loc, + struct mlx5_wqe *__rte_restrict wqe, unsigned int vlan, unsigned int inlen, unsigned int tso, unsigned int olx __rte_unused) { - struct mlx5_wqe_dseg *restrict dseg; + struct mlx5_wqe_dseg *__rte_restrict dseg; unsigned int ds; MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); @@ -3083,6 +3265,59 @@ dseg_done: return ds; } +/** + * The routine checks timestamp flag in the current packet, + * and push WAIT WQE into the queue if scheduling is required. + * + * @param txq + * Pointer to TX queue structure. + * @param loc + * Pointer to burst routine local context. + * @param olx + * Configured Tx offloads mask. It is fully defined at + * compile time and may be used for optimization. + * + * @return + * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. + * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. + * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. + * Local context variables partially updated. + */ +static __rte_always_inline enum mlx5_txcmp_code +mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, + struct mlx5_txq_local *restrict loc, + unsigned int olx) +{ + if (MLX5_TXOFF_CONFIG(TXPP) && + loc->mbuf->ol_flags & txq->ts_mask) { + struct mlx5_wqe *wqe; + uint64_t ts; + int32_t wci; + + /* + * Estimate the required space quickly and roughly. + * We would like to ensure the packet can be pushed + * to the queue and we won't get the orphan WAIT WQE. + */ + if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || + loc->elts_free < NB_SEGS(loc->mbuf)) + return MLX5_TXCMP_CODE_EXIT; + /* Convert the timestamp into completion to wait. */ + ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); + wci = mlx5_txpp_convert_tx_ts(txq->sh, ts); + if (unlikely(wci < 0)) + return MLX5_TXCMP_CODE_SINGLE; + /* Build the WAIT WQE with specified completion. */ + wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); + mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx); + mlx5_tx_wseg_init(txq, loc, wqe, wci, olx); + ++txq->wqe_ci; + --loc->wqe_free; + return MLX5_TXCMP_CODE_MULTI; + } + return MLX5_TXCMP_CODE_SINGLE; +} + /** * Tx one packet function for multi-segment TSO. Supports all * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, @@ -3105,13 +3340,23 @@ dseg_done: * Local context variables partially updated. */ static __rte_always_inline enum mlx5_txcmp_code -mlx5_tx_packet_multi_tso(struct mlx5_txq_data *restrict txq, - struct mlx5_txq_local *restrict loc, +mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, + struct mlx5_txq_local *__rte_restrict loc, unsigned int olx) { - struct mlx5_wqe *restrict wqe; + struct mlx5_wqe *__rte_restrict wqe; unsigned int ds, dlen, inlen, ntcp, vlan = 0; + if (MLX5_TXOFF_CONFIG(TXPP)) { + enum mlx5_txcmp_code wret; + + /* Generate WAIT for scheduling if requested. */ + wret = mlx5_tx_schedule_send(txq, loc, olx); + if (wret == MLX5_TXCMP_CODE_EXIT) + return MLX5_TXCMP_CODE_EXIT; + if (wret == MLX5_TXCMP_CODE_ERROR) + return MLX5_TXCMP_CODE_ERROR; + } /* * Calculate data length to be inlined to estimate * the required space in WQE ring buffer. @@ -3194,15 +3439,25 @@ mlx5_tx_packet_multi_tso(struct mlx5_txq_data *restrict txq, * Local context variables partially updated. */ static __rte_always_inline enum mlx5_txcmp_code -mlx5_tx_packet_multi_send(struct mlx5_txq_data *restrict txq, - struct mlx5_txq_local *restrict loc, +mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, + struct mlx5_txq_local *__rte_restrict loc, unsigned int olx) { - struct mlx5_wqe_dseg *restrict dseg; - struct mlx5_wqe *restrict wqe; + struct mlx5_wqe_dseg *__rte_restrict dseg; + struct mlx5_wqe *__rte_restrict wqe; unsigned int ds, nseg; MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); + if (MLX5_TXOFF_CONFIG(TXPP)) { + enum mlx5_txcmp_code wret; + + /* Generate WAIT for scheduling if requested. */ + wret = mlx5_tx_schedule_send(txq, loc, olx); + if (wret == MLX5_TXCMP_CODE_EXIT) + return MLX5_TXCMP_CODE_EXIT; + if (wret == MLX5_TXCMP_CODE_ERROR) + return MLX5_TXCMP_CODE_ERROR; + } /* * No inline at all, it means the CPU cycles saving * is prioritized at configuration, we should not @@ -3302,15 +3557,25 @@ mlx5_tx_packet_multi_send(struct mlx5_txq_data *restrict txq, * Local context variables partially updated. */ static __rte_always_inline enum mlx5_txcmp_code -mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq, - struct mlx5_txq_local *restrict loc, +mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, + struct mlx5_txq_local *__rte_restrict loc, unsigned int olx) { - struct mlx5_wqe *restrict wqe; + struct mlx5_wqe *__rte_restrict wqe; unsigned int ds, inlen, dlen, vlan = 0; MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); + if (MLX5_TXOFF_CONFIG(TXPP)) { + enum mlx5_txcmp_code wret; + + /* Generate WAIT for scheduling if requested. */ + wret = mlx5_tx_schedule_send(txq, loc, olx); + if (wret == MLX5_TXCMP_CODE_EXIT) + return MLX5_TXCMP_CODE_EXIT; + if (wret == MLX5_TXCMP_CODE_ERROR) + return MLX5_TXCMP_CODE_ERROR; + } /* * First calculate data length to be inlined * to estimate the required space for WQE. @@ -3467,10 +3732,10 @@ do_align: * Local context variables updated. */ static __rte_always_inline enum mlx5_txcmp_code -mlx5_tx_burst_mseg(struct mlx5_txq_data *restrict txq, - struct rte_mbuf **restrict pkts, +mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, + struct rte_mbuf **__rte_restrict pkts, unsigned int pkts_n, - struct mlx5_txq_local *restrict loc, + struct mlx5_txq_local *__rte_restrict loc, unsigned int olx) { MLX5_ASSERT(loc->elts_free && loc->wqe_free); @@ -3556,10 +3821,10 @@ mlx5_tx_burst_mseg(struct mlx5_txq_data *restrict txq, * Local context variables updated. */ static __rte_always_inline enum mlx5_txcmp_code -mlx5_tx_burst_tso(struct mlx5_txq_data *restrict txq, - struct rte_mbuf **restrict pkts, +mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, + struct rte_mbuf **__rte_restrict pkts, unsigned int pkts_n, - struct mlx5_txq_local *restrict loc, + struct mlx5_txq_local *__rte_restrict loc, unsigned int olx) { MLX5_ASSERT(loc->elts_free && loc->wqe_free); @@ -3567,12 +3832,22 @@ mlx5_tx_burst_tso(struct mlx5_txq_data *restrict txq, pkts += loc->pkts_sent + 1; pkts_n -= loc->pkts_sent; for (;;) { - struct mlx5_wqe_dseg *restrict dseg; - struct mlx5_wqe *restrict wqe; + struct mlx5_wqe_dseg *__rte_restrict dseg; + struct mlx5_wqe *__rte_restrict wqe; unsigned int ds, dlen, hlen, ntcp, vlan = 0; uint8_t *dptr; MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); + if (MLX5_TXOFF_CONFIG(TXPP)) { + enum mlx5_txcmp_code wret; + + /* Generate WAIT for scheduling if requested. */ + wret = mlx5_tx_schedule_send(txq, loc, olx); + if (wret == MLX5_TXCMP_CODE_EXIT) + return MLX5_TXCMP_CODE_EXIT; + if (wret == MLX5_TXCMP_CODE_ERROR) + return MLX5_TXCMP_CODE_ERROR; + } dlen = rte_pktmbuf_data_len(loc->mbuf); if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { @@ -3680,8 +3955,8 @@ mlx5_tx_burst_tso(struct mlx5_txq_data *restrict txq, * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. */ static __rte_always_inline enum mlx5_txcmp_code -mlx5_tx_able_to_empw(struct mlx5_txq_data *restrict txq, - struct mlx5_txq_local *restrict loc, +mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, + struct mlx5_txq_local *__rte_restrict loc, unsigned int olx, bool newp) { @@ -3735,9 +4010,9 @@ mlx5_tx_able_to_empw(struct mlx5_txq_data *restrict txq, * false - no match, eMPW should be restarted. */ static __rte_always_inline bool -mlx5_tx_match_empw(struct mlx5_txq_data *restrict txq __rte_unused, - struct mlx5_wqe_eseg *restrict es, - struct mlx5_txq_local *restrict loc, +mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, + struct mlx5_wqe_eseg *__rte_restrict es, + struct mlx5_txq_local *__rte_restrict loc, uint32_t dlen, unsigned int olx) { @@ -3764,6 +4039,10 @@ mlx5_tx_match_empw(struct mlx5_txq_data *restrict txq __rte_unused, /* There must be no VLAN packets in eMPW loop. */ if (MLX5_TXOFF_CONFIG(VLAN)) MLX5_ASSERT(!(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT)); + /* Check if the scheduling is requested. */ + if (MLX5_TXOFF_CONFIG(TXPP) && + loc->mbuf->ol_flags & txq->ts_mask) + return false; return true; } @@ -3789,8 +4068,8 @@ mlx5_tx_match_empw(struct mlx5_txq_data *restrict txq __rte_unused, * false - no match, eMPW should be restarted. */ static __rte_always_inline void -mlx5_tx_sdone_empw(struct mlx5_txq_data *restrict txq, - struct mlx5_txq_local *restrict loc, +mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, + struct mlx5_txq_local *__rte_restrict loc, unsigned int ds, unsigned int slen, unsigned int olx __rte_unused) @@ -3823,6 +4102,8 @@ mlx5_tx_sdone_empw(struct mlx5_txq_data *restrict txq, * Total size of descriptor/data in bytes. * @param slen * Accumulated statistics, data bytes sent. + * @param wqem + * The base WQE for the eMPW/MPW descriptor. * @param olx * Configured Tx offloads mask. It is fully defined at * compile time and may be used for optimization. @@ -3832,24 +4113,44 @@ mlx5_tx_sdone_empw(struct mlx5_txq_data *restrict txq, * false - no match, eMPW should be restarted. */ static __rte_always_inline void -mlx5_tx_idone_empw(struct mlx5_txq_data *restrict txq, - struct mlx5_txq_local *restrict loc, +mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, + struct mlx5_txq_local *__rte_restrict loc, unsigned int len, unsigned int slen, + struct mlx5_wqe *__rte_restrict wqem, unsigned int olx __rte_unused) { + struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; + MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); - MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); #ifdef MLX5_PMD_SOFT_COUNTERS /* Update sent data bytes counter. */ txq->stats.obytes += slen; #else (void)slen; #endif - len = len / MLX5_WSEG_SIZE + 2; - loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); + if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { + /* + * If the legacy MPW session contains the inline packets + * we should set the only inline data segment length + * and align the total length to the segment size. + */ + MLX5_ASSERT(len > sizeof(dseg->bcount)); + dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | + MLX5_ETH_WQE_DATA_INLINE); + len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; + } else { + /* + * The session is not legacy MPW or contains the + * data buffer pointer segments. + */ + MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); + len = len / MLX5_WSEG_SIZE + 2; + } + wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); txq->wqe_ci += (len + 3) / 4; loc->wqe_free -= (len + 3) / 4; + loc->wqe_last = wqem; } /** @@ -3900,10 +4201,10 @@ mlx5_tx_idone_empw(struct mlx5_txq_data *restrict txq, * No VLAN insertion is supported. */ static __rte_always_inline enum mlx5_txcmp_code -mlx5_tx_burst_empw_simple(struct mlx5_txq_data *restrict txq, - struct rte_mbuf **restrict pkts, +mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, + struct rte_mbuf **__rte_restrict pkts, unsigned int pkts_n, - struct mlx5_txq_local *restrict loc, + struct mlx5_txq_local *__rte_restrict loc, unsigned int olx) { /* @@ -3919,14 +4220,24 @@ mlx5_tx_burst_empw_simple(struct mlx5_txq_data *restrict txq, pkts += loc->pkts_sent + 1; pkts_n -= loc->pkts_sent; for (;;) { - struct mlx5_wqe_dseg *restrict dseg; - struct mlx5_wqe_eseg *restrict eseg; + struct mlx5_wqe_dseg *__rte_restrict dseg; + struct mlx5_wqe_eseg *__rte_restrict eseg; enum mlx5_txcmp_code ret; unsigned int part, loop; unsigned int slen = 0; next_empw: MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); + if (MLX5_TXOFF_CONFIG(TXPP)) { + enum mlx5_txcmp_code wret; + + /* Generate WAIT for scheduling if requested. */ + wret = mlx5_tx_schedule_send(txq, loc, olx); + if (wret == MLX5_TXCMP_CODE_EXIT) + return MLX5_TXCMP_CODE_EXIT; + if (wret == MLX5_TXCMP_CODE_ERROR) + return MLX5_TXCMP_CODE_ERROR; + } part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? MLX5_MPW_MAX_PACKETS : MLX5_EMPW_MAX_PACKETS); @@ -4022,6 +4333,7 @@ next_empw: * - metadata value * - software parser settings * - packets length (legacy MPW only) + * - scheduling is not required */ if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { MLX5_ASSERT(loop); @@ -4066,10 +4378,10 @@ next_empw: * with inlining, optionally supports VLAN insertion. */ static __rte_always_inline enum mlx5_txcmp_code -mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, - struct rte_mbuf **restrict pkts, +mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, + struct rte_mbuf **__rte_restrict pkts, unsigned int pkts_n, - struct mlx5_txq_local *restrict loc, + struct mlx5_txq_local *__rte_restrict loc, unsigned int olx) { /* @@ -4085,13 +4397,23 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, pkts += loc->pkts_sent + 1; pkts_n -= loc->pkts_sent; for (;;) { - struct mlx5_wqe_dseg *restrict dseg; - struct mlx5_wqe_eseg *restrict eseg; + struct mlx5_wqe_dseg *__rte_restrict dseg; + struct mlx5_wqe *__rte_restrict wqem; enum mlx5_txcmp_code ret; unsigned int room, part, nlim; unsigned int slen = 0; MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); + if (MLX5_TXOFF_CONFIG(TXPP)) { + enum mlx5_txcmp_code wret; + + /* Generate WAIT for scheduling if requested. */ + wret = mlx5_tx_schedule_send(txq, loc, olx); + if (wret == MLX5_TXCMP_CODE_EXIT) + return MLX5_TXCMP_CODE_EXIT; + if (wret == MLX5_TXCMP_CODE_ERROR) + return MLX5_TXCMP_CODE_ERROR; + } /* * Limits the amount of packets in one WQE * to improve CQE latency generation. @@ -4105,26 +4427,34 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, return MLX5_TXCMP_CODE_EXIT; if (likely(pkts_n > 1)) rte_prefetch0(*pkts); - loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); + wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); /* * Build eMPW title WQEBB: * - Control Segment, eMPW opcode, zero DS * - Ethernet Segment, no inline */ - mlx5_tx_cseg_init(txq, loc, loc->wqe_last, 0, + mlx5_tx_cseg_init(txq, loc, wqem, 0, MLX5_OPCODE_ENHANCED_MPSW, olx); - mlx5_tx_eseg_none(txq, loc, loc->wqe_last, + mlx5_tx_eseg_none(txq, loc, wqem, olx & ~MLX5_TXOFF_CONFIG_VLAN); - eseg = &loc->wqe_last->eseg; - dseg = &loc->wqe_last->dseg[0]; + dseg = &wqem->dseg[0]; /* Store the packet length for legacy MPW. */ if (MLX5_TXOFF_CONFIG(MPW)) - eseg->mss = rte_cpu_to_be_16 - (rte_pktmbuf_data_len(loc->mbuf)); + wqem->eseg.mss = rte_cpu_to_be_16 + (rte_pktmbuf_data_len(loc->mbuf)); room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, loc->wqe_free) * MLX5_WQE_SIZE - MLX5_WQE_CSEG_SIZE - MLX5_WQE_ESEG_SIZE; + /* Limit the room for legacy MPW sessions for performance. */ + if (MLX5_TXOFF_CONFIG(MPW)) + room = RTE_MIN(room, + RTE_MAX(txq->inlen_empw + + sizeof(dseg->bcount) + + (MLX5_TXOFF_CONFIG(VLAN) ? + sizeof(struct rte_vlan_hdr) : 0), + MLX5_MPW_INLINE_MAX_PACKETS * + MLX5_WQE_DSEG_SIZE)); /* Build WQE till we have space, packets and resources. */ part = room; for (;;) { @@ -4148,15 +4478,36 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, * We have some successfully built * packet Data Segments to send. */ - mlx5_tx_idone_empw(txq, loc, part, slen, olx); + mlx5_tx_idone_empw(txq, loc, part, + slen, wqem, olx); return MLX5_TXCMP_CODE_ERROR; } /* Inline or not inline - that's the Question. */ if (dlen > txq->inlen_empw || loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) goto pointer_empw; + if (MLX5_TXOFF_CONFIG(MPW)) { + if (dlen > txq->inlen_send) + goto pointer_empw; + tlen = dlen; + if (part == room) { + /* Open new inline MPW session. */ + tlen += sizeof(dseg->bcount); + dseg->bcount = RTE_BE32(0); + dseg = RTE_PTR_ADD + (dseg, sizeof(dseg->bcount)); + } else { + /* + * No pointer and inline descriptor + * intermix for legacy MPW sessions. + */ + if (wqem->dseg[0].bcount) + break; + } + } else { + tlen = sizeof(dseg->bcount) + dlen; + } /* Inline entire packet, optional VLAN insertion. */ - tlen = sizeof(dseg->bcount) + dlen; if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { /* @@ -4182,7 +4533,8 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, dseg = mlx5_tx_dseg_empw(txq, loc, dseg, dptr, dlen, olx); } - tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); + if (!MLX5_TXOFF_CONFIG(MPW)) + tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); MLX5_ASSERT(room >= tlen); room -= tlen; /* @@ -4192,6 +4544,14 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, rte_pktmbuf_free_seg(loc->mbuf); goto next_mbuf; pointer_empw: + /* + * No pointer and inline descriptor + * intermix for legacy MPW sessions. + */ + if (MLX5_TXOFF_CONFIG(MPW) && + part != room && + wqem->dseg[0].bcount == RTE_BE32(0)) + break; /* * Not inlinable VLAN packets are * proceeded outside of this routine. @@ -4220,7 +4580,8 @@ next_mbuf: * continue build descriptors. */ part -= room; - mlx5_tx_idone_empw(txq, loc, part, slen, olx); + mlx5_tx_idone_empw(txq, loc, part, + slen, wqem, olx); return MLX5_TXCMP_CODE_EXIT; } loc->mbuf = *pkts++; @@ -4234,7 +4595,8 @@ next_mbuf: */ if (ret == MLX5_TXCMP_CODE_MULTI) { part -= room; - mlx5_tx_idone_empw(txq, loc, part, slen, olx); + mlx5_tx_idone_empw(txq, loc, part, + slen, wqem, olx); if (unlikely(!loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; @@ -4243,7 +4605,8 @@ next_mbuf: MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); if (ret == MLX5_TXCMP_CODE_TSO) { part -= room; - mlx5_tx_idone_empw(txq, loc, part, slen, olx); + mlx5_tx_idone_empw(txq, loc, part, + slen, wqem, olx); if (unlikely(!loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; @@ -4251,7 +4614,8 @@ next_mbuf: } if (ret == MLX5_TXCMP_CODE_SINGLE) { part -= room; - mlx5_tx_idone_empw(txq, loc, part, slen, olx); + mlx5_tx_idone_empw(txq, loc, part, + slen, wqem, olx); if (unlikely(!loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; @@ -4260,7 +4624,8 @@ next_mbuf: if (ret != MLX5_TXCMP_CODE_EMPW) { MLX5_ASSERT(false); part -= room; - mlx5_tx_idone_empw(txq, loc, part, slen, olx); + mlx5_tx_idone_empw(txq, loc, part, + slen, wqem, olx); return MLX5_TXCMP_CODE_ERROR; } /* Check if we have minimal room left. */ @@ -4274,8 +4639,10 @@ next_mbuf: * - metadata value * - software parser settings * - packets length (legacy MPW only) + * - scheduling is not required */ - if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) + if (!mlx5_tx_match_empw(txq, &wqem->eseg, + loc, dlen, olx)) break; /* Packet attributes match, continue the same eMPW. */ if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) @@ -4289,7 +4656,7 @@ next_mbuf: part -= room; if (unlikely(!part)) return MLX5_TXCMP_CODE_EXIT; - mlx5_tx_idone_empw(txq, loc, part, slen, olx); + mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); if (unlikely(!loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; @@ -4303,10 +4670,10 @@ next_mbuf: * Data inlining and VLAN insertion are supported. */ static __rte_always_inline enum mlx5_txcmp_code -mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, - struct rte_mbuf **restrict pkts, +mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, + struct rte_mbuf **__rte_restrict pkts, unsigned int pkts_n, - struct mlx5_txq_local *restrict loc, + struct mlx5_txq_local *__rte_restrict loc, unsigned int olx) { /* @@ -4318,10 +4685,20 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, pkts += loc->pkts_sent + 1; pkts_n -= loc->pkts_sent; for (;;) { - struct mlx5_wqe *restrict wqe; + struct mlx5_wqe *__rte_restrict wqe; enum mlx5_txcmp_code ret; MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); + if (MLX5_TXOFF_CONFIG(TXPP)) { + enum mlx5_txcmp_code wret; + + /* Generate WAIT for scheduling if requested. */ + wret = mlx5_tx_schedule_send(txq, loc, olx); + if (wret == MLX5_TXCMP_CODE_EXIT) + return MLX5_TXCMP_CODE_EXIT; + if (wret == MLX5_TXCMP_CODE_ERROR) + return MLX5_TXCMP_CODE_ERROR; + } if (MLX5_TXOFF_CONFIG(INLINE)) { unsigned int inlen, vlan = 0; @@ -4362,6 +4739,8 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, txq->inlen_mode) || (MLX5_TXOFF_CONFIG(MPW) && txq->inlen_mode)) { + if (inlen <= txq->inlen_send) + goto single_inline; /* * The hardware requires the * minimal inline data header. @@ -4378,6 +4757,7 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, } goto single_no_inline; } +single_inline: /* * Completely inlined packet data WQE: * - Control Segment, SEND opcode @@ -4416,7 +4796,7 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, * not contain inlined data for eMPW due to * segment shared for all packets. */ - struct mlx5_wqe_dseg *restrict dseg; + struct mlx5_wqe_dseg *__rte_restrict dseg; unsigned int ds; uint8_t *dptr; @@ -4579,10 +4959,10 @@ single_no_inline: } static __rte_always_inline enum mlx5_txcmp_code -mlx5_tx_burst_single(struct mlx5_txq_data *restrict txq, - struct rte_mbuf **restrict pkts, +mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, + struct rte_mbuf **__rte_restrict pkts, unsigned int pkts_n, - struct mlx5_txq_local *restrict loc, + struct mlx5_txq_local *__rte_restrict loc, unsigned int olx) { enum mlx5_txcmp_code ret; @@ -4633,8 +5013,8 @@ ordinary_send: * Number of packets successfully transmitted (<= pkts_n). */ static __rte_always_inline uint16_t -mlx5_tx_burst_tmpl(struct mlx5_txq_data *restrict txq, - struct rte_mbuf **restrict pkts, +mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, + struct rte_mbuf **__rte_restrict pkts, uint16_t pkts_n, unsigned int olx) { @@ -4664,7 +5044,7 @@ send_loop: /* * Calculate the number of available resources - elts and WQEs. * There are two possible different scenarios: - * - no data inlining into WQEs, one WQEBB may contains upto + * - no data inlining into WQEs, one WQEBB may contains up to * four packets, in this case elts become scarce resource * - data inlining into WQEs, one packet may require multiple * WQEBBs, the WQEs become the limiting factor. @@ -5046,9 +5426,48 @@ MLX5_TXOFF_DECL(iv, MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA) +/* Generate routines with timestamp scheduling. */ +MLX5_TXOFF_DECL(full_ts_nompw, + MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) + +MLX5_TXOFF_DECL(full_ts_nompwi, + MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | + MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | + MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | + MLX5_TXOFF_CONFIG_TXPP) + +MLX5_TXOFF_DECL(full_ts, + MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | + MLX5_TXOFF_CONFIG_EMPW) + +MLX5_TXOFF_DECL(full_ts_noi, + MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | + MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | + MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | + MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) + +MLX5_TXOFF_DECL(none_ts, + MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | + MLX5_TXOFF_CONFIG_EMPW) + +MLX5_TXOFF_DECL(mdi_ts, + MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | + MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) + +MLX5_TXOFF_DECL(mti_ts, + MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | + MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | + MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) + +MLX5_TXOFF_DECL(mtiv_ts, + MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | + MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | + MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | + MLX5_TXOFF_CONFIG_EMPW) + /* * Generate routines with Legacy Multi-Packet Write support. - * This mode is supported by ConnectX-4LX only and imposes + * This mode is supported by ConnectX-4 Lx only and imposes * offload limitations, not supported: * - ACL/Flows (metadata are becoming meaningless) * - WQE Inline headers @@ -5150,6 +5569,44 @@ MLX5_TXOFF_INFO(iv_empw, MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) +MLX5_TXOFF_INFO(full_ts_nompw, + MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) + +MLX5_TXOFF_INFO(full_ts_nompwi, + MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | + MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | + MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | + MLX5_TXOFF_CONFIG_TXPP) + +MLX5_TXOFF_INFO(full_ts, + MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | + MLX5_TXOFF_CONFIG_EMPW) + +MLX5_TXOFF_INFO(full_ts_noi, + MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | + MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | + MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | + MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) + +MLX5_TXOFF_INFO(none_ts, + MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | + MLX5_TXOFF_CONFIG_EMPW) + +MLX5_TXOFF_INFO(mdi_ts, + MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | + MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) + +MLX5_TXOFF_INFO(mti_ts, + MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | + MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | + MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) + +MLX5_TXOFF_INFO(mtiv_ts, + MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | + MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | + MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | + MLX5_TXOFF_CONFIG_EMPW) + MLX5_TXOFF_INFO(full, MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | @@ -5296,6 +5753,14 @@ mlx5_select_tx_function(struct rte_eth_dev *dev) /* We should support VLAN insertion. */ olx |= MLX5_TXOFF_CONFIG_VLAN; } + if (tx_offloads & DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP && + rte_mbuf_dynflag_lookup + (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL) > 0 && + rte_mbuf_dynfield_lookup + (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL) > 0) { + /* Offload configured, dynamic entities registered. */ + olx |= MLX5_TXOFF_CONFIG_TXPP; + } if (priv->txqs_n && (*priv->txqs)[0]) { struct mlx5_txq_data *txd = (*priv->txqs)[0]; @@ -5356,12 +5821,18 @@ mlx5_select_tx_function(struct rte_eth_dev *dev) /* Does not meet requested offloads at all. */ continue; } + if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_MPW) + /* Do not enable legacy MPW if not configured. */ + continue; if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_EMPW) /* Do not enable eMPW if not configured. */ continue; if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_INLINE) /* Do not enable inlining if not configured. */ continue; + if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_TXPP) + /* Do not enable scheduling if not configured. */ + continue; /* * Some routine meets the requirements. * Check whether it has minimal amount @@ -5406,6 +5877,8 @@ mlx5_select_tx_function(struct rte_eth_dev *dev) DRV_LOG(DEBUG, "\tVLANI (VLAN insertion)"); if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_METADATA) DRV_LOG(DEBUG, "\tMETAD (tx Flow metadata)"); + if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TXPP) + DRV_LOG(DEBUG, "\tMETAD (tx Scheduling)"); if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW) { if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MPW) DRV_LOG(DEBUG, "\tMPW (Legacy MPW)"); @@ -5414,3 +5887,94 @@ mlx5_select_tx_function(struct rte_eth_dev *dev) } return txoff_func[m].func; } + +/** + * DPDK callback to get the TX queue information + * + * @param dev + * Pointer to the device structure. + * + * @param tx_queue_id + * Tx queue identificator. + * + * @param qinfo + * Pointer to the TX queue information structure. + * + * @return + * None. + */ + +void +mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, + struct rte_eth_txq_info *qinfo) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id]; + struct mlx5_txq_ctrl *txq_ctrl = + container_of(txq, struct mlx5_txq_ctrl, txq); + + if (!txq) + return; + qinfo->nb_desc = txq->elts_s; + qinfo->conf.tx_thresh.pthresh = 0; + qinfo->conf.tx_thresh.hthresh = 0; + qinfo->conf.tx_thresh.wthresh = 0; + qinfo->conf.tx_rs_thresh = 0; + qinfo->conf.tx_free_thresh = 0; + qinfo->conf.tx_deferred_start = txq_ctrl ? 0 : 1; + qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; +} + +/** + * DPDK callback to get the TX packet burst mode information + * + * @param dev + * Pointer to the device structure. + * + * @param tx_queue_id + * Tx queue identificatior. + * + * @param mode + * Pointer to the burts mode information. + * + * @return + * 0 as success, -EINVAL as failure. + */ + +int +mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, + uint16_t tx_queue_id __rte_unused, + struct rte_eth_burst_mode *mode) +{ + eth_tx_burst_t pkt_burst = dev->tx_pkt_burst; + unsigned int i, olx; + + for (i = 0; i < RTE_DIM(txoff_func); i++) { + if (pkt_burst == txoff_func[i].func) { + olx = txoff_func[i].olx; + snprintf(mode->info, sizeof(mode->info), + "%s%s%s%s%s%s%s%s%s", + (olx & MLX5_TXOFF_CONFIG_EMPW) ? + ((olx & MLX5_TXOFF_CONFIG_MPW) ? + "Legacy MPW" : "Enhanced MPW") : "No MPW", + (olx & MLX5_TXOFF_CONFIG_MULTI) ? + " + MULTI" : "", + (olx & MLX5_TXOFF_CONFIG_TSO) ? + " + TSO" : "", + (olx & MLX5_TXOFF_CONFIG_SWP) ? + " + SWP" : "", + (olx & MLX5_TXOFF_CONFIG_CSUM) ? + " + CSUM" : "", + (olx & MLX5_TXOFF_CONFIG_INLINE) ? + " + INLINE" : "", + (olx & MLX5_TXOFF_CONFIG_VLAN) ? + " + VLAN" : "", + (olx & MLX5_TXOFF_CONFIG_METADATA) ? + " + METADATA" : "", + (olx & MLX5_TXOFF_CONFIG_TXPP) ? + " + TXPP" : ""); + return 0; + } + } + return -EINVAL; +}