X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx.c;h=c76b9951bc4b9c02d29784088a2a075e97f89871;hb=18da3c854bb8105818dc23c36eecf3465596052a;hp=0b87be15b44f06e65ff59aaf2188214a06514bf7;hpb=ae3255bfd934be8f69ad4c960c755cddb817a12b;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 0b87be15b4..c76b9951bc 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -16,17 +16,15 @@ #include #include -#include -#include #include #include +#include "mlx5_autoconf.h" #include "mlx5_defs.h" #include "mlx5.h" #include "mlx5_mr.h" #include "mlx5_utils.h" #include "mlx5_rxtx.h" -#include "mlx5_autoconf.h" /* TX burst subroutines return codes. */ enum mlx5_txcmp_code { @@ -81,8 +79,59 @@ static uint16_t mlx5_tx_burst_##func(void *txq, \ #define MLX5_TXOFF_INFO(func, olx) {mlx5_tx_burst_##func, olx}, +/* static asserts */ +static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); +static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative value"); +static_assert(MLX5_ESEG_MIN_INLINE_SIZE == + (sizeof(uint16_t) + + sizeof(rte_v128u32_t)), + "invalid Ethernet Segment data size"); +static_assert(MLX5_ESEG_MIN_INLINE_SIZE == + (sizeof(uint16_t) + + sizeof(struct rte_vlan_hdr) + + 2 * RTE_ETHER_ADDR_LEN), + "invalid Ethernet Segment data size"); +static_assert(MLX5_ESEG_MIN_INLINE_SIZE == + (sizeof(uint16_t) + + sizeof(rte_v128u32_t)), + "invalid Ethernet Segment data size"); +static_assert(MLX5_ESEG_MIN_INLINE_SIZE == + (sizeof(uint16_t) + + sizeof(struct rte_vlan_hdr) + + 2 * RTE_ETHER_ADDR_LEN), + "invalid Ethernet Segment data size"); +static_assert(MLX5_ESEG_MIN_INLINE_SIZE == + (sizeof(uint16_t) + + sizeof(rte_v128u32_t)), + "invalid Ethernet Segment data size"); +static_assert(MLX5_ESEG_MIN_INLINE_SIZE == + (sizeof(uint16_t) + + sizeof(struct rte_vlan_hdr) + + 2 * RTE_ETHER_ADDR_LEN), + "invalid Ethernet Segment data size"); +static_assert(MLX5_DSEG_MIN_INLINE_SIZE == + (2 * RTE_ETHER_ADDR_LEN), + "invalid Data Segment data size"); +static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); +static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); +static_assert((sizeof(struct rte_vlan_hdr) + + sizeof(struct rte_ether_hdr)) == + MLX5_ESEG_MIN_INLINE_SIZE, + "invalid min inline data size"); +static_assert(MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE <= + MLX5_DSEG_MAX, "invalid WQE max size"); +static_assert(MLX5_WQE_CSEG_SIZE == MLX5_WSEG_SIZE, + "invalid WQE Control Segment size"); +static_assert(MLX5_WQE_ESEG_SIZE == MLX5_WSEG_SIZE, + "invalid WQE Ethernet Segment size"); +static_assert(MLX5_WQE_DSEG_SIZE == MLX5_WSEG_SIZE, + "invalid WQE Data Segment size"); +static_assert(MLX5_WQE_SIZE == 4 * MLX5_WSEG_SIZE, + "invalid WQE size"); + static __rte_always_inline uint32_t -rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe); +rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, + volatile struct mlx5_mini_cqe8 *mcqe); static __rte_always_inline int mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, @@ -93,11 +142,8 @@ rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); static __rte_always_inline void rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, - volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res); - -static __rte_always_inline void -mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx, - const unsigned int strd_n); + volatile struct mlx5_cqe *cqe, + volatile struct mlx5_mini_cqe8 *mcqe); static int mlx5_queue_state_modify(struct rte_eth_dev *dev, @@ -106,12 +152,13 @@ mlx5_queue_state_modify(struct rte_eth_dev *dev, static inline void mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, volatile struct mlx5_cqe *__rte_restrict cqe, - uint32_t phcsum); + uint32_t phcsum, uint8_t l4_type); static inline void mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, volatile struct mlx5_cqe *__rte_restrict cqe, - uint32_t len); + volatile struct mlx5_mini_cqe8 *mcqe, + struct mlx5_rxq_data *rxq, uint32_t len); uint32_t mlx5_ptype_table[] __rte_cache_aligned = { [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ @@ -466,13 +513,15 @@ rx_queue_count(struct mlx5_rxq_data *rxq) struct rxq_zip *zip = &rxq->zip; volatile struct mlx5_cqe *cqe; const unsigned int cqe_n = (1 << rxq->cqe_n); + const unsigned int sges_n = (1 << rxq->sges_n); + const unsigned int elts_n = (1 << rxq->elts_n); + const unsigned int strd_n = (1 << rxq->strd_num_n); const unsigned int cqe_cnt = cqe_n - 1; - unsigned int cq_ci; - unsigned int used; + unsigned int cq_ci, used; /* if we are processing a compressed cqe */ if (zip->ai) { - used = zip->cqe_cnt - zip->ca; + used = zip->cqe_cnt - zip->ai; cq_ci = zip->cq_ci; } else { used = 0; @@ -492,7 +541,7 @@ rx_queue_count(struct mlx5_rxq_data *rxq) used += n; cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; } - used = RTE_MIN(used, (1U << rxq->elts_n) - 1); + used = RTE_MIN(used * sges_n, elts_n * strd_n); return used; } @@ -515,11 +564,12 @@ mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) container_of(rxq, struct mlx5_rxq_ctrl, rxq); struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv); - if (dev->rx_pkt_burst != mlx5_rx_burst) { + if (dev->rx_pkt_burst == NULL || + dev->rx_pkt_burst == removed_rx_burst) { rte_errno = ENOTSUP; return -rte_errno; } - if (offset >= (1 << rxq->elts_n)) { + if (offset >= (1 << rxq->cqe_n)) { rte_errno = EINVAL; return -rte_errno; } @@ -555,7 +605,7 @@ mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id, if (!rxq) return; - qinfo->mp = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? + qinfo->mp = mlx5_rxq_mprq_enabled(rxq) ? rxq->mprq_mp : rxq->mp; qinfo->conf.rx_thresh.pthresh = 0; qinfo->conf.rx_thresh.hthresh = 0; @@ -565,7 +615,9 @@ mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id, qinfo->conf.rx_deferred_start = rxq_ctrl ? 0 : 1; qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; qinfo->scattered_rx = dev->data->scattered_rx; - qinfo->nb_desc = 1 << rxq->elts_n; + qinfo->nb_desc = mlx5_rxq_mprq_enabled(rxq) ? + (1 << rxq->elts_n) * (1 << rxq->strd_num_n) : + (1 << rxq->elts_n); } /** @@ -590,7 +642,14 @@ mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, struct rte_eth_burst_mode *mode) { eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_rxq_data *rxq; + rxq = (*priv->rxqs)[rx_queue_id]; + if (!rxq) { + rte_errno = EINVAL; + return -rte_errno; + } if (pkt_burst == mlx5_rx_burst) { snprintf(mode->info, sizeof(mode->info), "%s", "Scalar"); } else if (pkt_burst == mlx5_rx_burst_mprq) { @@ -604,6 +663,16 @@ mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec"); #else return -EINVAL; +#endif + } else if (pkt_burst == mlx5_rx_burst_mprq_vec) { +#if defined RTE_ARCH_X86_64 + snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector SSE"); +#elif defined RTE_ARCH_ARM64 + snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon"); +#elif defined RTE_ARCH_PPC_64 + snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec"); +#else + return -EINVAL; #endif } else { return -EINVAL; @@ -630,7 +699,8 @@ mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_rxq_data *rxq; - if (dev->rx_pkt_burst != mlx5_rx_burst) { + if (dev->rx_pkt_burst == NULL || + dev->rx_pkt_burst == removed_rx_burst) { rte_errno = ENOTSUP; return -rte_errno; } @@ -805,12 +875,19 @@ mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq, * Packet type for struct rte_mbuf. */ static inline uint32_t -rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe) +rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, + volatile struct mlx5_mini_cqe8 *mcqe) { uint8_t idx; - uint8_t pinfo = cqe->pkt_info; - uint16_t ptype = cqe->hdr_type_etc; + uint8_t ptype; + uint8_t pinfo = (cqe->pkt_info & 0x3) << 6; + /* Get l3/l4 header from mini-CQE in case L3/L4 format*/ + if (mcqe == NULL || + rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) + ptype = (cqe->hdr_type_etc & 0xfc00) >> 10; + else + ptype = mcqe->hdr_type >> 2; /* * The index to the array should have: * bit[1:0] = l3_hdr_type @@ -819,7 +896,7 @@ rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe) * bit[6] = tunneled * bit[7] = outer_l3_type */ - idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10); + idx = pinfo | ptype; return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); } @@ -871,6 +948,8 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) rxq->zip = (struct rxq_zip){ .ai = 0, }; + rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ? + (wqe_n >> rxq->sges_n) * (1 << rxq->strd_num_n) : 0; /* Update doorbell counter. */ rxq->rq_ci = wqe_n >> rxq->sges_n; rte_io_wmb(); @@ -901,30 +980,7 @@ mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); - if (rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_IBV) { - struct ibv_wq_attr mod = { - .attr_mask = IBV_WQ_ATTR_STATE, - .wq_state = sm->state, - }; - - ret = mlx5_glue->modify_wq(rxq_ctrl->obj->wq, &mod); - } else { /* rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ. */ - struct mlx5_devx_modify_rq_attr rq_attr; - - memset(&rq_attr, 0, sizeof(rq_attr)); - if (sm->state == IBV_WQS_RESET) { - rq_attr.rq_state = MLX5_RQC_STATE_ERR; - rq_attr.state = MLX5_RQC_STATE_RST; - } else if (sm->state == IBV_WQS_RDY) { - rq_attr.rq_state = MLX5_RQC_STATE_RST; - rq_attr.state = MLX5_RQC_STATE_RDY; - } else if (sm->state == IBV_WQS_ERR) { - rq_attr.rq_state = MLX5_RQC_STATE_RDY; - rq_attr.state = MLX5_RQC_STATE_ERR; - } - ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, - &rq_attr); - } + ret = priv->obj_ops.rxq_obj_modify(rxq_ctrl->obj, sm->state); if (ret) { DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s", sm->state, strerror(errno)); @@ -936,79 +992,11 @@ mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); - if (txq_ctrl->obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_SQ) { - struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; - - /* Change queue state to reset. */ - msq_attr.sq_state = MLX5_SQC_STATE_ERR; - msq_attr.state = MLX5_SQC_STATE_RST; - ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq_devx, - &msq_attr); - if (ret) { - DRV_LOG(ERR, "Cannot change the " - "Tx QP state to RESET %s", - strerror(errno)); - rte_errno = errno; - return ret; - } - /* Change queue state to ready. */ - msq_attr.sq_state = MLX5_SQC_STATE_RST; - msq_attr.state = MLX5_SQC_STATE_RDY; - ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq_devx, - &msq_attr); - if (ret) { - DRV_LOG(ERR, "Cannot change the " - "Tx QP state to READY %s", - strerror(errno)); - rte_errno = errno; - return ret; - } - } else { - struct ibv_qp_attr mod = { - .qp_state = IBV_QPS_RESET, - .port_num = (uint8_t)priv->dev_port, - }; - struct ibv_qp *qp = txq_ctrl->obj->qp; - - MLX5_ASSERT - (txq_ctrl->obj->type == MLX5_TXQ_OBJ_TYPE_IBV); - - ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); - if (ret) { - DRV_LOG(ERR, "Cannot change the " - "Tx QP state to RESET %s", - strerror(errno)); - rte_errno = errno; - return ret; - } - mod.qp_state = IBV_QPS_INIT; - ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); - if (ret) { - DRV_LOG(ERR, "Cannot change the " - "Tx QP state to INIT %s", - strerror(errno)); - rte_errno = errno; - return ret; - } - mod.qp_state = IBV_QPS_RTR; - ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); - if (ret) { - DRV_LOG(ERR, "Cannot change the " - "Tx QP state to RTR %s", - strerror(errno)); - rte_errno = errno; - return ret; - } - mod.qp_state = IBV_QPS_RTS; - ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); - if (ret) { - DRV_LOG(ERR, "Cannot change the " - "Tx QP state to RTS %s", - strerror(errno)); - rte_errno = errno; - return ret; - } - } + ret = priv->obj_ops.txq_obj_modify(txq_ctrl->obj, + MLX5_TXQ_MOD_ERR2RDY, + (uint8_t)priv->dev_port); + if (ret) + return ret; } return 0; } @@ -1065,7 +1053,8 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) { const uint16_t cqe_n = 1 << rxq->cqe_n; const uint16_t cqe_mask = cqe_n - 1; - const unsigned int wqe_n = 1 << rxq->elts_n; + const uint16_t wqe_n = 1 << rxq->elts_n; + const uint16_t strd_n = 1 << rxq->strd_num_n; struct mlx5_rxq_ctrl *rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); union { @@ -1129,21 +1118,27 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) &sm)) return -1; if (vec) { - const uint16_t q_mask = wqe_n - 1; - uint16_t elt_idx; + const uint32_t elts_n = + mlx5_rxq_mprq_enabled(rxq) ? + wqe_n * strd_n : wqe_n; + const uint32_t e_mask = elts_n - 1; + uint32_t elts_ci = + mlx5_rxq_mprq_enabled(rxq) ? + rxq->elts_ci : rxq->rq_ci; + uint32_t elt_idx; struct rte_mbuf **elt; int i; - unsigned int n = wqe_n - (rxq->rq_ci - + unsigned int n = elts_n - (elts_ci - rxq->rq_pi); for (i = 0; i < (int)n; ++i) { - elt_idx = (rxq->rq_ci + i) & q_mask; + elt_idx = (elts_ci + i) & e_mask; elt = &(*rxq->elts)[elt_idx]; *elt = rte_mbuf_raw_alloc(rxq->mp); if (!*elt) { for (i--; i >= 0; --i) { - elt_idx = (rxq->rq_ci + - i) & q_mask; + elt_idx = (elts_ci + + i) & elts_n; elt = &(*rxq->elts) [elt_idx]; rte_pktmbuf_free_seg @@ -1152,7 +1147,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) return -1; } } - for (i = 0; i < (int)wqe_n; ++i) { + for (i = 0; i < (int)elts_n; ++i) { elt = &(*rxq->elts)[i]; DATA_LEN(*elt) = (uint16_t)((*elt)->buf_len - @@ -1160,7 +1155,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) } /* Padding with a fake mbuf for vec Rx. */ for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) - (*rxq->elts)[wqe_n + i] = + (*rxq->elts)[elts_n + i] = &rxq->fake_mbuf; } mlx5_rxq_initialize(rxq); @@ -1205,8 +1200,8 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, (volatile struct mlx5_mini_cqe8 (*)[8]) (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].pkt_info); - - len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt); + len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt & + rxq->byte_mask); *mcqe = &(*mc)[zip->ai & 7]; if ((++zip->ai & 7) == 0) { /* Invalidate consumed CQEs */ @@ -1245,6 +1240,7 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, } else { int ret; int8_t op_own; + uint32_t cq_ci; ret = check_cqe(cqe, cqe_n, rxq->cq_ci); if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { @@ -1258,14 +1254,19 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, return 0; } } - ++rxq->cq_ci; + /* + * Introduce the local variable to have queue cq_ci + * index in queue structure always consistent with + * actual CQE boundary (not pointing to the middle + * of compressed CQE session). + */ + cq_ci = rxq->cq_ci + 1; op_own = cqe->op_own; if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { volatile struct mlx5_mini_cqe8 (*mc)[8] = (volatile struct mlx5_mini_cqe8 (*)[8]) (uintptr_t)(&(*rxq->cqes) - [rxq->cq_ci & - cqe_cnt].pkt_info); + [cq_ci & cqe_cnt].pkt_info); /* Fix endianness. */ zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); @@ -1278,13 +1279,13 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, * 7 CQEs after the initial CQE instead of 8 * for subsequent ones. */ - zip->ca = rxq->cq_ci; + zip->ca = cq_ci; zip->na = zip->ca + 7; /* Compute the next non compressed CQE. */ - --rxq->cq_ci; zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; /* Get packet size to return. */ - len = rte_be_to_cpu_32((*mc)[0].byte_cnt); + len = rte_be_to_cpu_32((*mc)[0].byte_cnt & + rxq->byte_mask); *mcqe = &(*mc)[0]; zip->ai = 1; /* Prefetch all to be invalidated */ @@ -1296,6 +1297,7 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, ++idx; } } else { + rxq->cq_ci = cq_ci; len = rte_be_to_cpu_32(cqe->byte_cnt); } } @@ -1348,43 +1350,78 @@ rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) */ static inline void rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, - volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res) + volatile struct mlx5_cqe *cqe, + volatile struct mlx5_mini_cqe8 *mcqe) { /* Update packet information. */ - pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe); - if (rss_hash_res && rxq->rss_hash) { - pkt->hash.rss = rss_hash_res; - pkt->ol_flags |= PKT_RX_RSS_HASH; + pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe, mcqe); + + if (rxq->rss_hash) { + uint32_t rss_hash_res = 0; + + /* If compressed, take hash result from mini-CQE. */ + if (mcqe == NULL || + rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH) + rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); + else + rss_hash_res = rte_be_to_cpu_32(mcqe->rx_hash_result); + if (rss_hash_res) { + pkt->hash.rss = rss_hash_res; + pkt->ol_flags |= PKT_RX_RSS_HASH; + } } - if (rxq->mark && MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) { - pkt->ol_flags |= PKT_RX_FDIR; - if (cqe->sop_drop_qpn != - rte_cpu_to_be_32(MLX5_FLOW_MARK_DEFAULT)) { - uint32_t mark = cqe->sop_drop_qpn; - - pkt->ol_flags |= PKT_RX_FDIR_ID; - pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); + if (rxq->mark) { + uint32_t mark = 0; + + /* If compressed, take flow tag from mini-CQE. */ + if (mcqe == NULL || + rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) + mark = cqe->sop_drop_qpn; + else + mark = ((mcqe->byte_cnt_flow & 0xff) << 8) | + (mcqe->flow_tag_high << 16); + if (MLX5_FLOW_MARK_IS_VALID(mark)) { + pkt->ol_flags |= PKT_RX_FDIR; + if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) { + pkt->ol_flags |= PKT_RX_FDIR_ID; + pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); + } } } - if (rxq->dynf_meta && cqe->flow_table_metadata) { - pkt->ol_flags |= rxq->flow_meta_mask; - *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, uint32_t *) = - cqe->flow_table_metadata; + if (rxq->dynf_meta) { + uint32_t meta = cqe->flow_table_metadata & + rxq->flow_meta_port_mask; + + if (meta) { + pkt->ol_flags |= rxq->flow_meta_mask; + *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, + uint32_t *) = meta; + } } if (rxq->csum) pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); - if (rxq->vlan_strip && - (cqe->hdr_type_etc & rte_cpu_to_be_16(MLX5_CQE_VLAN_STRIPPED))) { - pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; - pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); + if (rxq->vlan_strip) { + bool vlan_strip; + + if (mcqe == NULL || + rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) + vlan_strip = cqe->hdr_type_etc & + RTE_BE16(MLX5_CQE_VLAN_STRIPPED); + else + vlan_strip = mcqe->hdr_type & + RTE_BE16(MLX5_CQE_VLAN_STRIPPED); + if (vlan_strip) { + pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; + pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); + } } if (rxq->hw_timestamp) { uint64_t ts = rte_be_to_cpu_64(cqe->timestamp); if (rxq->rt_timestamp) ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts); - pkt->timestamp = ts; - pkt->ol_flags |= PKT_RX_TIMESTAMP; + mlx5_timestamp_set(pkt, rxq->timestamp_offset, ts); + pkt->ol_flags |= rxq->timestamp_rx_flag; } } @@ -1422,7 +1459,6 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; struct rte_mbuf *rep = (*rxq->elts)[idx]; volatile struct mlx5_mini_cqe8 *mcqe = NULL; - uint32_t rss_hash_res; if (pkt) NEXT(seg) = rep; @@ -1430,7 +1466,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) rte_prefetch0(seg); rte_prefetch0(cqe); rte_prefetch0(wqe); - rep = rte_mbuf_raw_alloc(rxq->mp); + /* Allocate the buf from the same pool. */ + rep = rte_mbuf_raw_alloc(seg->pool); if (unlikely(rep == NULL)) { ++rxq->stats.rx_nombuf; if (!pkt) { @@ -1448,6 +1485,9 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) rte_mbuf_raw_free(pkt); pkt = rep; } + rq_ci >>= sges_n; + ++rq_ci; + rq_ci <<= sges_n; break; } if (!pkt) { @@ -1460,18 +1500,14 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) pkt = seg; MLX5_ASSERT(len >= (rxq->crc_present << 2)); pkt->ol_flags &= EXT_ATTACHED_MBUF; - /* If compressed, take hash result from mini-CQE. */ - rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ? - cqe->rx_hash_res : - mcqe->rx_hash_result); - rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); + rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); if (rxq->crc_present) len -= RTE_ETHER_CRC_LEN; PKT_LEN(pkt) = len; if (cqe->lro_num_seg > 1) { mlx5_lro_update_hdr (rte_pktmbuf_mtod(pkt, uint8_t *), cqe, - len); + mcqe, rxq, len); pkt->ol_flags |= PKT_RX_LRO; pkt->tso_segsz = len / cqe->lro_num_seg; } @@ -1541,10 +1577,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) static inline void mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, volatile struct mlx5_cqe *__rte_restrict cqe, - uint32_t phcsum) + uint32_t phcsum, uint8_t l4_type) { - uint8_t l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & - MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; /* * The HW calculates only the TCP payload checksum, need to complete * the TCP header checksum and the L3 pseudo-header checksum. @@ -1583,7 +1617,8 @@ mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, static inline void mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, volatile struct mlx5_cqe *__rte_restrict cqe, - uint32_t len) + volatile struct mlx5_mini_cqe8 *mcqe, + struct mlx5_rxq_data *rxq, uint32_t len) { union { struct rte_ether_hdr *eth; @@ -1597,6 +1632,7 @@ mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, }; uint16_t proto = h.eth->ether_type; uint32_t phcsum; + uint8_t l4_type; h.eth++; while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) || @@ -1618,7 +1654,14 @@ mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0); h.ipv6++; } - mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum); + if (mcqe == NULL || + rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) + l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & + MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; + else + l4_type = (rte_be_to_cpu_16(mcqe->hdr_type) & + MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; + mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum, l4_type); } void @@ -1641,31 +1684,6 @@ mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) mlx5_mprq_buf_free_cb(NULL, buf); } -static inline void -mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx, - const unsigned int strd_n) -{ - struct mlx5_mprq_buf *rep = rxq->mprq_repl; - volatile struct mlx5_wqe_data_seg *wqe = - &((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg; - void *addr; - - MLX5_ASSERT(rep != NULL); - /* Replace MPRQ buf. */ - (*rxq->mprq_bufs)[rq_idx] = rep; - /* Replace WQE. */ - addr = mlx5_mprq_buf_addr(rep, strd_n); - wqe->addr = rte_cpu_to_be_64((uintptr_t)addr); - /* If there's only one MR, no need to replace LKey in WQE. */ - if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) - wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr); - /* Stash a mbuf for next replacement. */ - if (likely(!rte_mempool_get(rxq->mprq_mp, (void **)&rep))) - rxq->mprq_repl = rep; - else - rxq->mprq_repl = NULL; -} - /** * DPDK callback for RX with Multi-Packet RQ support. * @@ -1683,12 +1701,10 @@ uint16_t mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct mlx5_rxq_data *rxq = dpdk_rxq; - const unsigned int strd_n = 1 << rxq->strd_num_n; - const unsigned int strd_sz = 1 << rxq->strd_sz_n; - const unsigned int strd_shift = - MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en; - const unsigned int cq_mask = (1 << rxq->cqe_n) - 1; - const unsigned int wq_mask = (1 << rxq->elts_n) - 1; + const uint32_t strd_n = 1 << rxq->strd_num_n; + const uint32_t strd_sz = 1 << rxq->strd_sz_n; + const uint32_t cq_mask = (1 << rxq->cqe_n) - 1; + const uint32_t wq_mask = (1 << rxq->elts_n) - 1; volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; unsigned int i = 0; uint32_t rq_ci = rxq->rq_ci; @@ -1697,37 +1713,17 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) while (i < pkts_n) { struct rte_mbuf *pkt; - void *addr; int ret; uint32_t len; uint16_t strd_cnt; uint16_t strd_idx; - uint32_t offset; uint32_t byte_cnt; - int32_t hdrm_overlap; volatile struct mlx5_mini_cqe8 *mcqe = NULL; - uint32_t rss_hash_res = 0; + enum mlx5_rqx_code rxq_code; if (consumed_strd == strd_n) { - /* Replace WQE only if the buffer is still in use. */ - if (__atomic_load_n(&buf->refcnt, - __ATOMIC_RELAXED) > 1) { - mprq_buf_replace(rxq, rq_ci & wq_mask, strd_n); - /* Release the old buffer. */ - mlx5_mprq_buf_free(buf); - } else if (unlikely(rxq->mprq_repl == NULL)) { - struct mlx5_mprq_buf *rep; - - /* - * Currently, the MPRQ mempool is out of buffer - * and doing memcpy regardless of the size of Rx - * packet. Retry allocation to get back to - * normal. - */ - if (!rte_mempool_get(rxq->mprq_mp, - (void **)&rep)) - rxq->mprq_repl = rep; - } + /* Replace WQE if the buffer is still in use. */ + mprq_buf_replace(rxq, rq_ci & wq_mask); /* Advance to the next WQE. */ consumed_strd = 0; ++rq_ci; @@ -1738,19 +1734,23 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) if (!ret) break; byte_cnt = ret; - strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> - MLX5_MPRQ_STRIDE_NUM_SHIFT; + len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; + MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); + if (rxq->crc_present) + len -= RTE_ETHER_CRC_LEN; + if (mcqe && + rxq->mcqe_format == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) + strd_cnt = (len / strd_sz) + !!(len % strd_sz); + else + strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> + MLX5_MPRQ_STRIDE_NUM_SHIFT; MLX5_ASSERT(strd_cnt); consumed_strd += strd_cnt; if (byte_cnt & MLX5_MPRQ_FILLER_MASK) continue; - if (mcqe == NULL) { - rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); - strd_idx = rte_be_to_cpu_16(cqe->wqe_counter); - } else { - /* mini-CQE for MPRQ doesn't have hash result. */ - strd_idx = rte_be_to_cpu_16(mcqe->stride_idx); - } + strd_idx = rte_be_to_cpu_16(mcqe == NULL ? + cqe->wqe_counter : + mcqe->stride_idx); MLX5_ASSERT(strd_idx < strd_n); MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & wq_mask)); @@ -1763,122 +1763,23 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); if (rxq->crc_present) len -= RTE_ETHER_CRC_LEN; - offset = strd_idx * strd_sz + strd_shift; - addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset); - hdrm_overlap = len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz; - /* - * Memcpy packets to the target mbuf if: - * - The size of packet is smaller than mprq_max_memcpy_len. - * - Out of buffer in the Mempool for Multi-Packet RQ. - * - The packet's stride overlaps a headroom and scatter is off. - */ - if (len <= rxq->mprq_max_memcpy_len || - rxq->mprq_repl == NULL || - (hdrm_overlap > 0 && !rxq->strd_scatter_en)) { - if (likely(rte_pktmbuf_tailroom(pkt) >= len)) { - rte_memcpy(rte_pktmbuf_mtod(pkt, void *), - addr, len); - DATA_LEN(pkt) = len; - } else if (rxq->strd_scatter_en) { - struct rte_mbuf *prev = pkt; - uint32_t seg_len = - RTE_MIN(rte_pktmbuf_tailroom(pkt), len); - uint32_t rem_len = len - seg_len; - - rte_memcpy(rte_pktmbuf_mtod(pkt, void *), - addr, seg_len); - DATA_LEN(pkt) = seg_len; - while (rem_len) { - struct rte_mbuf *next = - rte_pktmbuf_alloc(rxq->mp); - - if (unlikely(next == NULL)) { - rte_pktmbuf_free(pkt); - ++rxq->stats.rx_nombuf; - goto out; - } - NEXT(prev) = next; - SET_DATA_OFF(next, 0); - addr = RTE_PTR_ADD(addr, seg_len); - seg_len = RTE_MIN - (rte_pktmbuf_tailroom(next), - rem_len); - rte_memcpy - (rte_pktmbuf_mtod(next, void *), - addr, seg_len); - DATA_LEN(next) = seg_len; - rem_len -= seg_len; - prev = next; - ++NB_SEGS(pkt); - } - } else { - rte_pktmbuf_free_seg(pkt); + rxq_code = mprq_buf_to_pkt(rxq, pkt, len, buf, + strd_idx, strd_cnt); + if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) { + rte_pktmbuf_free_seg(pkt); + if (rxq_code == MLX5_RXQ_CODE_DROPPED) { ++rxq->stats.idropped; continue; } - } else { - rte_iova_t buf_iova; - struct rte_mbuf_ext_shared_info *shinfo; - uint16_t buf_len = strd_cnt * strd_sz; - void *buf_addr; - - /* Increment the refcnt of the whole chunk. */ - __atomic_add_fetch(&buf->refcnt, 1, __ATOMIC_RELAXED); - MLX5_ASSERT(__atomic_load_n(&buf->refcnt, - __ATOMIC_RELAXED) <= strd_n + 1); - buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM); - /* - * MLX5 device doesn't use iova but it is necessary in a - * case where the Rx packet is transmitted via a - * different PMD. - */ - buf_iova = rte_mempool_virt2iova(buf) + - RTE_PTR_DIFF(buf_addr, buf); - shinfo = &buf->shinfos[strd_idx]; - rte_mbuf_ext_refcnt_set(shinfo, 1); - /* - * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when - * attaching the stride to mbuf and more offload flags - * will be added below by calling rxq_cq_to_mbuf(). - * Other fields will be overwritten. - */ - rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova, - buf_len, shinfo); - /* Set mbuf head-room. */ - SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM); - MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF); - MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) >= - len - (hdrm_overlap > 0 ? hdrm_overlap : 0)); - DATA_LEN(pkt) = len; - /* - * Copy the last fragment of a packet (up to headroom - * size bytes) in case there is a stride overlap with - * a next packet's headroom. Allocate a separate mbuf - * to store this fragment and link it. Scatter is on. - */ - if (hdrm_overlap > 0) { - MLX5_ASSERT(rxq->strd_scatter_en); - struct rte_mbuf *seg = - rte_pktmbuf_alloc(rxq->mp); - - if (unlikely(seg == NULL)) { - rte_pktmbuf_free_seg(pkt); - ++rxq->stats.rx_nombuf; - break; - } - SET_DATA_OFF(seg, 0); - rte_memcpy(rte_pktmbuf_mtod(seg, void *), - RTE_PTR_ADD(addr, len - hdrm_overlap), - hdrm_overlap); - DATA_LEN(seg) = hdrm_overlap; - DATA_LEN(pkt) = len - hdrm_overlap; - NEXT(pkt) = seg; - NB_SEGS(pkt) = 2; + if (rxq_code == MLX5_RXQ_CODE_NOMBUF) { + ++rxq->stats.rx_nombuf; + break; } } - rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); + rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); if (cqe->lro_num_seg > 1) { - mlx5_lro_update_hdr(addr, cqe, len); + mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt, uint8_t *), + cqe, mcqe, rxq, len); pkt->ol_flags |= PKT_RX_LRO; pkt->tso_segsz = len / cqe->lro_num_seg; } @@ -1892,7 +1793,6 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) *(pkts++) = pkt; ++i; } -out: /* Update the consumer indexes. */ rxq->consumed_strd = consumed_strd; rte_io_wmb(); @@ -1974,6 +1874,14 @@ mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, return 0; } +__rte_weak uint16_t +mlx5_rx_burst_mprq_vec(void *dpdk_txq __rte_unused, + struct rte_mbuf **pkts __rte_unused, + uint16_t pkts_n __rte_unused) +{ + return 0; +} + __rte_weak int mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) { @@ -1989,6 +1897,8 @@ mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) /** * Free the mbufs from the linear array of pointers. * + * @param txq + * Pointer to Tx queue structure. * @param pkts * Pointer to array of packets to be free. * @param pkts_n @@ -1998,7 +1908,8 @@ mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) * compile time and may be used for optimization. */ static __rte_always_inline void -mlx5_tx_free_mbuf(struct rte_mbuf **__rte_restrict pkts, +mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, + struct rte_mbuf **__rte_restrict pkts, unsigned int pkts_n, unsigned int olx __rte_unused) { @@ -2014,6 +1925,16 @@ mlx5_tx_free_mbuf(struct rte_mbuf **__rte_restrict pkts, */ MLX5_ASSERT(pkts); MLX5_ASSERT(pkts_n); + /* + * Free mbufs directly to the pool in bulk + * if fast free offload is engaged + */ + if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) { + mbuf = *pkts; + pool = mbuf->pool; + rte_mempool_put_bulk(pool, (void *)pkts, pkts_n); + return; + } for (;;) { for (;;) { /* @@ -2090,6 +2011,18 @@ mlx5_tx_free_mbuf(struct rte_mbuf **__rte_restrict pkts, } } } +/* + * No inline version to free buffers for optimal call + * on the tx_burst completion. + */ +static __rte_noinline void +__mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, + struct rte_mbuf **__rte_restrict pkts, + unsigned int pkts_n, + unsigned int olx __rte_unused) +{ + mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx); +} /** * Free the mbuf from the elts ring buffer till new tail. @@ -2122,7 +2055,8 @@ mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, part = RTE_MIN(part, n_elts); MLX5_ASSERT(part); MLX5_ASSERT(part <= txq->elts_s); - mlx5_tx_free_mbuf(&txq->elts[txq->elts_tail & txq->elts_m], + mlx5_tx_free_mbuf(txq, + &txq->elts[txq->elts_tail & txq->elts_m], part, olx); txq->elts_tail += part; n_elts -= part; @@ -2220,8 +2154,6 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, bool ring_doorbell = false; int ret; - static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); - static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative value"); do { volatile struct mlx5_cqe *cqe; @@ -2263,8 +2195,10 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, } /* Normal transmit completion. */ MLX5_ASSERT(txq->cq_ci != txq->cq_pi); +#ifdef RTE_LIBRTE_MLX5_DEBUG MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) == cqe->wqe_counter); +#endif ring_doorbell = true; ++txq->cq_ci; last_cqe = cqe; @@ -2425,7 +2359,7 @@ mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); qs->max_index = rte_cpu_to_be_32(wci); - qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq->id); + qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq_obj.cq->id); qs->reserved0 = RTE_BE32(0); qs->reserved1 = RTE_BE32(0); } @@ -2529,15 +2463,6 @@ mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; - static_assert(MLX5_ESEG_MIN_INLINE_SIZE == - (sizeof(uint16_t) + - sizeof(rte_v128u32_t)), - "invalid Ethernet Segment data size"); - static_assert(MLX5_ESEG_MIN_INLINE_SIZE == - (sizeof(uint16_t) + - sizeof(struct rte_vlan_hdr) + - 2 * RTE_ETHER_ADDR_LEN), - "invalid Ethernet Segment data size"); psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); es->inline_data = *(unaligned_uint16_t *)psrc; @@ -2622,15 +2547,6 @@ mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; - static_assert(MLX5_ESEG_MIN_INLINE_SIZE == - (sizeof(uint16_t) + - sizeof(rte_v128u32_t)), - "invalid Ethernet Segment data size"); - static_assert(MLX5_ESEG_MIN_INLINE_SIZE == - (sizeof(uint16_t) + - sizeof(struct rte_vlan_hdr) + - 2 * RTE_ETHER_ADDR_LEN), - "invalid Ethernet Segment data size"); psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); es->inline_hdr_sz = rte_cpu_to_be_16(inlen); es->inline_data = *(unaligned_uint16_t *)psrc; @@ -2845,15 +2761,6 @@ mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; - static_assert(MLX5_ESEG_MIN_INLINE_SIZE == - (sizeof(uint16_t) + - sizeof(rte_v128u32_t)), - "invalid Ethernet Segment data size"); - static_assert(MLX5_ESEG_MIN_INLINE_SIZE == - (sizeof(uint16_t) + - sizeof(struct rte_vlan_hdr) + - 2 * RTE_ETHER_ADDR_LEN), - "invalid Ethernet Segment data size"); MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); pdst = (uint8_t *)&es->inline_data; if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { @@ -3100,9 +3007,6 @@ mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, uint8_t *pdst; MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); - static_assert(MLX5_DSEG_MIN_INLINE_SIZE == - (2 * RTE_ETHER_ADDR_LEN), - "invalid Data Segment data size"); if (!MLX5_TXOFF_CONFIG(MPW)) { /* Store the descriptor byte counter for eMPW sessions. */ dseg->bcount = rte_cpu_to_be_32 @@ -4218,7 +4122,6 @@ mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); MLX5_ASSERT(loc->elts_free && loc->wqe_free); MLX5_ASSERT(pkts_n > loc->pkts_sent); - static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); pkts += loc->pkts_sent + 1; pkts_n -= loc->pkts_sent; for (;;) { @@ -4395,7 +4298,6 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); MLX5_ASSERT(loc->elts_free && loc->wqe_free); MLX5_ASSERT(pkts_n > loc->pkts_sent); - static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); pkts += loc->pkts_sent + 1; pkts_n -= loc->pkts_sent; for (;;) { @@ -4540,10 +4442,25 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, MLX5_ASSERT(room >= tlen); room -= tlen; /* - * Packet data are completely inlined, - * free the packet immediately. + * Packet data are completely inline, + * we can try to free the packet. + */ + if (likely(loc->pkts_sent == loc->mbuf_free)) { + /* + * All the packets from the burst beginning + * are inline, we can free mbufs directly + * from the origin array on tx_burst exit(). + */ + loc->mbuf_free++; + goto next_mbuf; + } + /* + * In order no to call rte_pktmbuf_free_seg() here, + * in the most inner loop (that might be very + * expensive) we just save the mbuf in elts. */ - rte_pktmbuf_free_seg(loc->mbuf); + txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; + loc->elts_free--; goto next_mbuf; pointer_empw: /* @@ -4565,6 +4482,7 @@ pointer_empw: mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); /* We have to store mbuf in elts.*/ txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; + loc->elts_free--; room -= MLX5_WQE_DSEG_SIZE; /* Ring buffer wraparound is checked at the loop end.*/ ++dseg; @@ -4574,7 +4492,6 @@ next_mbuf: slen += dlen; #endif loc->pkts_sent++; - loc->elts_free--; pkts_n--; if (unlikely(!pkts_n || !loc->elts_free)) { /* @@ -4709,10 +4626,6 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { vlan = sizeof(struct rte_vlan_hdr); inlen += vlan; - static_assert((sizeof(struct rte_vlan_hdr) + - sizeof(struct rte_ether_hdr)) == - MLX5_ESEG_MIN_INLINE_SIZE, - "invalid min inline data size"); } /* * If inlining is enabled at configuration time @@ -5028,6 +4941,8 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); if (unlikely(!pkts_n)) return 0; + if (MLX5_TXOFF_CONFIG(INLINE)) + loc.mbuf_free = 0; loc.pkts_sent = 0; loc.pkts_copy = 0; loc.wqe_last = NULL; @@ -5291,6 +5206,8 @@ burst_exit: /* Increment sent packets counter. */ txq->stats.opackets += loc.pkts_sent; #endif + if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free) + __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx); return loc.pkts_sent; } @@ -5715,16 +5632,6 @@ mlx5_select_tx_function(struct rte_eth_dev *dev) uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; unsigned int diff = 0, olx = 0, i, m; - static_assert(MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE <= - MLX5_DSEG_MAX, "invalid WQE max size"); - static_assert(MLX5_WQE_CSEG_SIZE == MLX5_WSEG_SIZE, - "invalid WQE Control Segment size"); - static_assert(MLX5_WQE_ESEG_SIZE == MLX5_WSEG_SIZE, - "invalid WQE Ethernet Segment size"); - static_assert(MLX5_WQE_DSEG_SIZE == MLX5_WSEG_SIZE, - "invalid WQE Data Segment size"); - static_assert(MLX5_WQE_SIZE == 4 * MLX5_WSEG_SIZE, - "invalid WQE size"); MLX5_ASSERT(priv); if (tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) { /* We should support Multi-Segment Packets. */ @@ -5757,9 +5664,9 @@ mlx5_select_tx_function(struct rte_eth_dev *dev) } if (tx_offloads & DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP && rte_mbuf_dynflag_lookup - (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL) > 0 && + (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL) >= 0 && rte_mbuf_dynfield_lookup - (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL) > 0) { + (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL) >= 0) { /* Offload configured, dynamic entities registered. */ olx |= MLX5_TXOFF_CONFIG_TXPP; } @@ -5945,17 +5852,19 @@ mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, - uint16_t tx_queue_id __rte_unused, + uint16_t tx_queue_id, struct rte_eth_burst_mode *mode) { eth_tx_burst_t pkt_burst = dev->tx_pkt_burst; + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id]; unsigned int i, olx; for (i = 0; i < RTE_DIM(txoff_func); i++) { if (pkt_burst == txoff_func[i].func) { olx = txoff_func[i].olx; snprintf(mode->info, sizeof(mode->info), - "%s%s%s%s%s%s%s%s%s", + "%s%s%s%s%s%s%s%s%s%s", (olx & MLX5_TXOFF_CONFIG_EMPW) ? ((olx & MLX5_TXOFF_CONFIG_MPW) ? "Legacy MPW" : "Enhanced MPW") : "No MPW", @@ -5974,7 +5883,9 @@ mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, (olx & MLX5_TXOFF_CONFIG_METADATA) ? " + METADATA" : "", (olx & MLX5_TXOFF_CONFIG_TXPP) ? - " + TXPP" : ""); + " + TXPP" : "", + (txq && txq->fast_free) ? + " + Fast Free" : ""); return 0; } }