X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx.c;h=5eea932d44000ec8cda94c6d92c6b6001d5affc0;hb=26f1bae837eb;hp=b9ecc308a31cd7baeb35765766dfde78aad838cc;hpb=efa79e68c8cd41ba9ef082448b5c7ccc2a245ab9;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index b9ecc308a3..5eea932d44 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -3,7 +3,6 @@ * Copyright 2015-2019 Mellanox Technologies, Ltd */ -#include #include #include #include @@ -130,6 +129,7 @@ uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; uint64_t rte_net_mlx5_dynf_inline_mask; +#define PKT_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask /** * Build a table to translate Rx completion flags to packet type. @@ -536,6 +536,89 @@ mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) return RTE_ETH_RX_DESC_AVAIL; } +/** + * DPDK callback to get the RX queue information + * + * @param dev + * Pointer to the device structure. + * + * @param rx_queue_id + * Rx queue identificator. + * + * @param qinfo + * Pointer to the RX queue information structure. + * + * @return + * None. + */ + +void +mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id, + struct rte_eth_rxq_info *qinfo) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id]; + struct mlx5_rxq_ctrl *rxq_ctrl = + container_of(rxq, struct mlx5_rxq_ctrl, rxq); + + if (!rxq) + return; + qinfo->mp = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? + rxq->mprq_mp : rxq->mp; + qinfo->conf.rx_thresh.pthresh = 0; + qinfo->conf.rx_thresh.hthresh = 0; + qinfo->conf.rx_thresh.wthresh = 0; + qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh; + qinfo->conf.rx_drop_en = 1; + qinfo->conf.rx_deferred_start = rxq_ctrl ? 0 : 1; + qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; + qinfo->scattered_rx = dev->data->scattered_rx; + qinfo->nb_desc = 1 << rxq->elts_n; +} + +/** + * DPDK callback to get the RX packet burst mode information + * + * @param dev + * Pointer to the device structure. + * + * @param rx_queue_id + * Rx queue identificatior. + * + * @param mode + * Pointer to the burts mode information. + * + * @return + * 0 as success, -EINVAL as failure. + */ + +int +mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, + uint16_t rx_queue_id __rte_unused, + struct rte_eth_burst_mode *mode) +{ + eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; + + if (pkt_burst == mlx5_rx_burst) { + snprintf(mode->info, sizeof(mode->info), "%s", "Scalar"); + } else if (pkt_burst == mlx5_rx_burst_mprq) { + snprintf(mode->info, sizeof(mode->info), "%s", "Multi-Packet RQ"); + } else if (pkt_burst == mlx5_rx_burst_vec) { +#if defined RTE_ARCH_X86_64 + snprintf(mode->info, sizeof(mode->info), "%s", "Vector SSE"); +#elif defined RTE_ARCH_ARM64 + snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon"); +#elif defined RTE_ARCH_PPC_64 + snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec"); +#else + return -EINVAL; +#endif + } else { + return -EINVAL; + } + return 0; +} + /** * DPDK callback to get the number of used descriptors in a RX queue * @@ -783,7 +866,7 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) byte_count = DATA_LEN(buf); } /* scat->addr must be able to store a pointer. */ - assert(sizeof(scat->addr) >= sizeof(uintptr_t)); + MLX5_ASSERT(sizeof(scat->addr) >= sizeof(uintptr_t)); *scat = (struct mlx5_wqe_data_seg){ .addr = rte_cpu_to_be_64(addr), .byte_count = rte_cpu_to_be_32(byte_count), @@ -1324,7 +1407,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) break; } while (pkt != seg) { - assert(pkt != (*rxq->elts)[idx]); + MLX5_ASSERT(pkt != (*rxq->elts)[idx]); rep = NEXT(pkt); NEXT(pkt) = NULL; NB_SEGS(pkt) = 1; @@ -1341,7 +1424,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) break; } pkt = seg; - assert(len >= (rxq->crc_present << 2)); + MLX5_ASSERT(len >= (rxq->crc_present << 2)); pkt->ol_flags &= EXT_ATTACHED_MBUF; /* If compressed, take hash result from mini-CQE. */ rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ? @@ -1532,7 +1615,7 @@ mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx, &((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg; void *addr; - assert(rep != NULL); + MLX5_ASSERT(rep != NULL); /* Replace MPRQ buf. */ (*rxq->mprq_bufs)[rq_idx] = rep; /* Replace WQE. */ @@ -1622,7 +1705,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) byte_cnt = ret; strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> MLX5_MPRQ_STRIDE_NUM_SHIFT; - assert(strd_cnt); + MLX5_ASSERT(strd_cnt); consumed_strd += strd_cnt; if (byte_cnt & MLX5_MPRQ_FILLER_MASK) continue; @@ -1633,8 +1716,9 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) /* mini-CQE for MPRQ doesn't have hash result. */ strd_idx = rte_be_to_cpu_16(mcqe->stride_idx); } - assert(strd_idx < strd_n); - assert(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & wq_mask)); + MLX5_ASSERT(strd_idx < strd_n); + MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & + wq_mask)); lro_num_seg = cqe->lro_num_seg; /* * Currently configured to receive a packet per a stride. But if @@ -1653,7 +1737,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) break; } len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; - assert((int)len >= (rxq->crc_present << 2)); + MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); if (rxq->crc_present) len -= RTE_ETHER_CRC_LEN; offset = strd_idx * strd_sz + strd_shift; @@ -1683,8 +1767,8 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Increment the refcnt of the whole chunk. */ rte_atomic16_add_return(&buf->refcnt, 1); - assert((uint16_t)rte_atomic16_read(&buf->refcnt) <= - strd_n + 1); + MLX5_ASSERT((uint16_t)rte_atomic16_read(&buf->refcnt) <= + strd_n + 1); buf_addr = RTE_PTR_SUB(addr, headroom_sz); /* * MLX5 device doesn't use iova but it is necessary in a @@ -1705,7 +1789,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) buf_len, shinfo); /* Set mbuf head-room. */ pkt->data_off = headroom_sz; - assert(pkt->ol_flags == EXT_ATTACHED_MBUF); + MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF); /* * Prevent potential overflow due to MTU change through * kernel interface. @@ -1871,8 +1955,8 @@ mlx5_tx_free_mbuf(struct rte_mbuf **restrict pkts, * copying pointers to temporary array * for rte_mempool_put_bulk() calls. */ - assert(pkts); - assert(pkts_n); + MLX5_ASSERT(pkts); + MLX5_ASSERT(pkts_n); for (;;) { for (;;) { /* @@ -1881,7 +1965,7 @@ mlx5_tx_free_mbuf(struct rte_mbuf **restrict pkts, */ mbuf = rte_pktmbuf_prefree_seg(*pkts); if (likely(mbuf != NULL)) { - assert(mbuf == *pkts); + MLX5_ASSERT(mbuf == *pkts); if (likely(n_free != 0)) { if (unlikely(pool != mbuf->pool)) /* From different pool. */ @@ -1918,9 +2002,9 @@ mlx5_tx_free_mbuf(struct rte_mbuf **restrict pkts, * This loop is implemented to avoid multiple * inlining of rte_mempool_put_bulk(). */ - assert(pool); - assert(p_free); - assert(n_free); + MLX5_ASSERT(pool); + MLX5_ASSERT(p_free); + MLX5_ASSERT(n_free); /* * Free the array of pre-freed mbufs * belonging to the same memory pool. @@ -1968,8 +2052,8 @@ mlx5_tx_free_elts(struct mlx5_txq_data *restrict txq, { uint16_t n_elts = tail - txq->elts_tail; - assert(n_elts); - assert(n_elts <= txq->elts_s); + MLX5_ASSERT(n_elts); + MLX5_ASSERT(n_elts <= txq->elts_s); /* * Implement a loop to support ring buffer wraparound * with single inlining of mlx5_tx_free_mbuf(). @@ -1979,8 +2063,8 @@ mlx5_tx_free_elts(struct mlx5_txq_data *restrict txq, part = txq->elts_s - (txq->elts_tail & txq->elts_m); part = RTE_MIN(part, n_elts); - assert(part); - assert(part <= txq->elts_s); + MLX5_ASSERT(part); + MLX5_ASSERT(part <= txq->elts_s); mlx5_tx_free_mbuf(&txq->elts[txq->elts_tail & txq->elts_m], part, olx); txq->elts_tail += part; @@ -2011,11 +2095,11 @@ mlx5_tx_copy_elts(struct mlx5_txq_data *restrict txq, unsigned int part; struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; - assert(pkts); - assert(pkts_n); + MLX5_ASSERT(pkts); + MLX5_ASSERT(pkts_n); part = txq->elts_s - (txq->elts_head & txq->elts_m); - assert(part); - assert(part <= txq->elts_s); + MLX5_ASSERT(part); + MLX5_ASSERT(part <= txq->elts_s); /* This code is a good candidate for vectorizing with SIMD. */ rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), (void *)pkts, @@ -2051,7 +2135,7 @@ mlx5_tx_comp_flush(struct mlx5_txq_data *restrict txq, tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m]; if (likely(tail != txq->elts_tail)) { mlx5_tx_free_elts(txq, tail, olx); - assert(tail == txq->elts_tail); + MLX5_ASSERT(tail == txq->elts_tail); } } } @@ -2089,7 +2173,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { if (likely(ret != MLX5_CQE_STATUS_ERR)) { /* No new CQEs in completion queue. */ - assert(ret == MLX5_CQE_STATUS_HW_OWN); + MLX5_ASSERT(ret == MLX5_CQE_STATUS_HW_OWN); break; } /* @@ -2121,8 +2205,9 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, continue; } /* Normal transmit completion. */ - assert(ci != txq->cq_pi); - assert((txq->fcqs[ci & txq->cqe_m] >> 16) == cqe->wqe_counter); + MLX5_ASSERT(ci != txq->cq_pi); + MLX5_ASSERT((txq->fcqs[ci & txq->cqe_m] >> 16) == + cqe->wqe_counter); ++ci; last_cqe = cqe; /* @@ -2184,14 +2269,14 @@ mlx5_tx_request_completion(struct mlx5_txq_data *restrict txq, last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET); /* Save elts_head in dedicated free on completion queue. */ -#ifdef NDEBUG - txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; -#else +#ifdef RTE_LIBRTE_MLX5_DEBUG txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | - (last->cseg.opcode >> 8) << 16; + (last->cseg.opcode >> 8) << 16; +#else + txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; #endif /* A CQE slot must always be available. */ - assert((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); + MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); } } @@ -2305,7 +2390,7 @@ mlx5_tx_eseg_none(struct mlx5_txq_data *restrict txq __rte_unused, * We should get here only if device support * this feature correctly. */ - assert(txq->vlan_en); + MLX5_ASSERT(txq->vlan_en); es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | loc->mbuf->vlan_tci); } else { @@ -2383,7 +2468,7 @@ mlx5_tx_eseg_dmin(struct mlx5_txq_data *restrict txq __rte_unused, loc->mbuf->vlan_tci); pdst += sizeof(struct rte_vlan_hdr); /* Copy the rest two bytes from packet data. */ - assert(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); + MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; } else { /* Fill the gap in the title WQEBB with inline data. */ @@ -2476,7 +2561,7 @@ mlx5_tx_eseg_data(struct mlx5_txq_data *restrict txq, loc->mbuf->vlan_tci); pdst += sizeof(struct rte_vlan_hdr); /* Copy the rest two bytes from packet data. */ - assert(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); + MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; psrc += sizeof(uint16_t); } else { @@ -2485,11 +2570,11 @@ mlx5_tx_eseg_data(struct mlx5_txq_data *restrict txq, psrc += sizeof(rte_v128u32_t); } pdst = (uint8_t *)(es + 2); - assert(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); - assert(pdst < (uint8_t *)txq->wqes_end); + MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); + MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); inlen -= MLX5_ESEG_MIN_INLINE_SIZE; if (!inlen) { - assert(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); + MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); return (struct mlx5_wqe_dseg *)pdst; } /* @@ -2530,21 +2615,30 @@ mlx5_tx_eseg_data(struct mlx5_txq_data *restrict txq, * Pointer to burst routine local context. * @param len * Length of data to be copied. + * @param must + * Length of data to be copied ignoring no inline hint. * @param olx * Configured Tx offloads mask. It is fully defined at * compile time and may be used for optimization. + * + * @return + * Number of actual copied data bytes. This is always greater than or + * equal to must parameter and might be lesser than len in no inline + * hint flag is encountered. */ -static __rte_always_inline void +static __rte_always_inline unsigned int mlx5_tx_mseg_memcpy(uint8_t *pdst, struct mlx5_txq_local *restrict loc, unsigned int len, + unsigned int must, unsigned int olx __rte_unused) { struct rte_mbuf *mbuf; - unsigned int part, dlen; + unsigned int part, dlen, copy = 0; uint8_t *psrc; - assert(len); + MLX5_ASSERT(len); + MLX5_ASSERT(must <= len); do { /* Allow zero length packets, must check first. */ dlen = rte_pktmbuf_data_len(loc->mbuf); @@ -2554,9 +2648,28 @@ mlx5_tx_mseg_memcpy(uint8_t *pdst, loc->mbuf = mbuf->next; rte_pktmbuf_free_seg(mbuf); loc->mbuf_off = 0; - assert(loc->mbuf_nseg > 1); - assert(loc->mbuf); + MLX5_ASSERT(loc->mbuf_nseg > 1); + MLX5_ASSERT(loc->mbuf); --loc->mbuf_nseg; + if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { + unsigned int diff; + + if (copy >= must) { + /* + * We already copied the minimal + * requested amount of data. + */ + return copy; + } + diff = must - copy; + if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { + /* + * Copy only the minimal required + * part of the data buffer. + */ + len = diff; + } + } continue; } dlen -= loc->mbuf_off; @@ -2564,6 +2677,7 @@ mlx5_tx_mseg_memcpy(uint8_t *pdst, loc->mbuf_off); part = RTE_MIN(len, dlen); rte_memcpy(pdst, psrc, part); + copy += part; loc->mbuf_off += part; len -= part; if (!len) { @@ -2574,10 +2688,10 @@ mlx5_tx_mseg_memcpy(uint8_t *pdst, loc->mbuf = mbuf->next; rte_pktmbuf_free_seg(mbuf); loc->mbuf_off = 0; - assert(loc->mbuf_nseg >= 1); + MLX5_ASSERT(loc->mbuf_nseg >= 1); --loc->mbuf_nseg; } - return; + return copy; } pdst += part; } while (true); @@ -2622,7 +2736,7 @@ mlx5_tx_eseg_mdat(struct mlx5_txq_data *restrict txq, struct mlx5_wqe_eseg *restrict es = &wqe->eseg; uint32_t csum; uint8_t *pdst; - unsigned int part; + unsigned int part, tlen = 0; /* * Calculate and set check sum flags first, uint32_t field @@ -2654,37 +2768,46 @@ mlx5_tx_eseg_mdat(struct mlx5_txq_data *restrict txq, sizeof(struct rte_vlan_hdr) + 2 * RTE_ETHER_ADDR_LEN), "invalid Ethernet Segment data size"); - assert(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); - es->inline_hdr_sz = rte_cpu_to_be_16(inlen); + MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); pdst = (uint8_t *)&es->inline_data; if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { /* Implement VLAN tag insertion as part inline data. */ - mlx5_tx_mseg_memcpy(pdst, loc, 2 * RTE_ETHER_ADDR_LEN, olx); + mlx5_tx_mseg_memcpy(pdst, loc, + 2 * RTE_ETHER_ADDR_LEN, + 2 * RTE_ETHER_ADDR_LEN, olx); pdst += 2 * RTE_ETHER_ADDR_LEN; *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 ((RTE_ETHER_TYPE_VLAN << 16) | loc->mbuf->vlan_tci); pdst += sizeof(struct rte_vlan_hdr); - inlen -= 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); + tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); } - assert(pdst < (uint8_t *)txq->wqes_end); + MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); /* * The WQEBB space availability is checked by caller. * Here we should be aware of WQE ring buffer wraparound only. */ part = (uint8_t *)txq->wqes_end - pdst; - part = RTE_MIN(part, inlen); - assert(part); + part = RTE_MIN(part, inlen - tlen); + MLX5_ASSERT(part); do { - mlx5_tx_mseg_memcpy(pdst, loc, part, olx); - inlen -= part; - if (likely(!inlen)) { - pdst += part; + unsigned int copy; + + /* + * Copying may be interrupted inside the routine + * if run into no inline hint flag. + */ + copy = tlen >= txq->inlen_mode ? 0 : (txq->inlen_mode - tlen); + copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); + tlen += copy; + if (likely(inlen <= tlen) || copy < part) { + es->inline_hdr_sz = rte_cpu_to_be_16(tlen); + pdst += copy; pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); return (struct mlx5_wqe_dseg *)pdst; } pdst = (uint8_t *)txq->wqes; - part = inlen; + part = inlen - tlen; } while (true); } @@ -2714,7 +2837,7 @@ mlx5_tx_dseg_ptr(struct mlx5_txq_data *restrict txq, unsigned int olx __rte_unused) { - assert(len); + MLX5_ASSERT(len); dseg->bcount = rte_cpu_to_be_32(len); dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); @@ -2750,7 +2873,7 @@ mlx5_tx_dseg_iptr(struct mlx5_txq_data *restrict txq, { uintptr_t dst, src; - assert(len); + MLX5_ASSERT(len); if (len > MLX5_DSEG_MIN_INLINE_SIZE) { dseg->bcount = rte_cpu_to_be_32(len); dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); @@ -2764,7 +2887,7 @@ mlx5_tx_dseg_iptr(struct mlx5_txq_data *restrict txq, src = (uintptr_t)buf; if (len & 0x08) { #ifdef RTE_ARCH_STRICT_ALIGN - assert(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); + MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); *(uint32_t *)dst = *(unaligned_uint32_t *)src; dst += sizeof(uint32_t); src += sizeof(uint32_t); @@ -2883,7 +3006,7 @@ mlx5_tx_dseg_vlan(struct mlx5_txq_data *restrict txq, unsigned int part; uint8_t *pdst; - assert(len > MLX5_ESEG_MIN_INLINE_SIZE); + MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); static_assert(MLX5_DSEG_MIN_INLINE_SIZE == (2 * RTE_ETHER_ADDR_LEN), "invalid Data Segment data size"); @@ -2895,7 +3018,7 @@ mlx5_tx_dseg_vlan(struct mlx5_txq_data *restrict txq, pdst += MLX5_DSEG_MIN_INLINE_SIZE; len -= MLX5_DSEG_MIN_INLINE_SIZE; /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ - assert(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); + MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) pdst = (uint8_t *)txq->wqes; *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | @@ -2963,7 +3086,7 @@ mlx5_tx_mseg_build(struct mlx5_txq_data *restrict txq, struct mlx5_wqe_dseg *restrict dseg; unsigned int ds; - assert((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); + MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); loc->mbuf_nseg = NB_SEGS(loc->mbuf); loc->mbuf_off = 0; @@ -2984,8 +3107,8 @@ mlx5_tx_mseg_build(struct mlx5_txq_data *restrict txq, * Non-zero offset means there are some data * remained in the packet. */ - assert(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); - assert(rte_pktmbuf_data_len(loc->mbuf)); + MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); + MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, loc->mbuf_off); dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; @@ -2997,7 +3120,7 @@ mlx5_tx_mseg_build(struct mlx5_txq_data *restrict txq, dseg = (struct mlx5_wqe_dseg *)txq->wqes; mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); /* Store the mbuf to be freed on completion. */ - assert(loc->elts_free); + MLX5_ASSERT(loc->elts_free); txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; --loc->elts_free; ++dseg; @@ -3023,7 +3146,7 @@ mlx5_tx_mseg_build(struct mlx5_txq_data *restrict txq, (txq, loc, dseg, rte_pktmbuf_mtod(loc->mbuf, uint8_t *), rte_pktmbuf_data_len(loc->mbuf), olx); - assert(loc->elts_free); + MLX5_ASSERT(loc->elts_free); txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; --loc->elts_free; ++dseg; @@ -3090,7 +3213,7 @@ mlx5_tx_packet_multi_tso(struct mlx5_txq_data *restrict txq, inlen <= MLX5_ESEG_MIN_INLINE_SIZE || inlen > (dlen + vlan))) return MLX5_TXCMP_CODE_ERROR; - assert(inlen >= txq->inlen_mode); + MLX5_ASSERT(inlen >= txq->inlen_mode); /* * Check whether there are enough free WQEBBs: * - Control Segment @@ -3162,7 +3285,7 @@ mlx5_tx_packet_multi_send(struct mlx5_txq_data *restrict txq, struct mlx5_wqe *restrict wqe; unsigned int ds, nseg; - assert(NB_SEGS(loc->mbuf) > 1); + MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); /* * No inline at all, it means the CPU cycles saving * is prioritized at configuration, we should not @@ -3269,8 +3392,8 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq, struct mlx5_wqe *restrict wqe; unsigned int ds, inlen, dlen, vlan = 0; - assert(MLX5_TXOFF_CONFIG(INLINE)); - assert(NB_SEGS(loc->mbuf) > 1); + MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); + MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); /* * First calculate data length to be inlined * to estimate the required space for WQE. @@ -3282,8 +3405,9 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq, /* Check against minimal length. */ if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) return MLX5_TXCMP_CODE_ERROR; - assert(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); - if (inlen > txq->inlen_send) { + MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); + if (inlen > txq->inlen_send || + loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { struct rte_mbuf *mbuf; unsigned int nxlen; uintptr_t start; @@ -3294,11 +3418,13 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq, * inlining is required. */ if (txq->inlen_mode) { - assert(txq->inlen_mode >= MLX5_ESEG_MIN_INLINE_SIZE); - assert(txq->inlen_mode <= txq->inlen_send); + MLX5_ASSERT(txq->inlen_mode >= + MLX5_ESEG_MIN_INLINE_SIZE); + MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); inlen = txq->inlen_mode; } else { - if (!vlan || txq->vlan_en) { + if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE || + !vlan || txq->vlan_en) { /* * VLAN insertion will be done inside by HW. * It is not utmost effective - VLAN flag is @@ -3327,7 +3453,7 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq, do { smlen = nxlen; mbuf = NEXT(mbuf); - assert(mbuf); + MLX5_ASSERT(mbuf); nxlen = rte_pktmbuf_data_len(mbuf); nxlen += smlen; } while (unlikely(nxlen < inlen)); @@ -3343,7 +3469,7 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq, inlen = nxlen; mbuf = NEXT(mbuf); /* There should be not end of packet. */ - assert(mbuf); + MLX5_ASSERT(mbuf); nxlen = inlen + rte_pktmbuf_data_len(mbuf); } while (unlikely(nxlen < txq->inlen_send)); } @@ -3371,7 +3497,7 @@ do_align: * Estimate the number of Data Segments conservatively, * supposing no any mbufs is being freed during inlining. */ - assert(inlen <= txq->inlen_send); + MLX5_ASSERT(inlen <= txq->inlen_send); ds = NB_SEGS(loc->mbuf) + 2 + (inlen - MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WSEG_SIZE + @@ -3430,14 +3556,14 @@ mlx5_tx_burst_mseg(struct mlx5_txq_data *restrict txq, struct mlx5_txq_local *restrict loc, unsigned int olx) { - assert(loc->elts_free && loc->wqe_free); - assert(pkts_n > loc->pkts_sent); + MLX5_ASSERT(loc->elts_free && loc->wqe_free); + MLX5_ASSERT(pkts_n > loc->pkts_sent); pkts += loc->pkts_sent + 1; pkts_n -= loc->pkts_sent; for (;;) { enum mlx5_txcmp_code ret; - assert(NB_SEGS(loc->mbuf) > 1); + MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); /* * Estimate the number of free elts quickly but * conservatively. Some segment may be fully inlined @@ -3477,7 +3603,7 @@ mlx5_tx_burst_mseg(struct mlx5_txq_data *restrict txq, return MLX5_TXCMP_CODE_TSO; return MLX5_TXCMP_CODE_SINGLE; } - assert(false); + MLX5_ASSERT(false); } /** @@ -3519,8 +3645,8 @@ mlx5_tx_burst_tso(struct mlx5_txq_data *restrict txq, struct mlx5_txq_local *restrict loc, unsigned int olx) { - assert(loc->elts_free && loc->wqe_free); - assert(pkts_n > loc->pkts_sent); + MLX5_ASSERT(loc->elts_free && loc->wqe_free); + MLX5_ASSERT(pkts_n > loc->pkts_sent); pkts += loc->pkts_sent + 1; pkts_n -= loc->pkts_sent; for (;;) { @@ -3529,7 +3655,7 @@ mlx5_tx_burst_tso(struct mlx5_txq_data *restrict txq, unsigned int ds, dlen, hlen, ntcp, vlan = 0; uint8_t *dptr; - assert(NB_SEGS(loc->mbuf) == 1); + MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); dlen = rte_pktmbuf_data_len(loc->mbuf); if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { @@ -3613,7 +3739,7 @@ mlx5_tx_burst_tso(struct mlx5_txq_data *restrict txq, return MLX5_TXCMP_CODE_SINGLE; /* Continue with the next TSO packet. */ } - assert(false); + MLX5_ASSERT(false); } /** @@ -3720,7 +3846,7 @@ mlx5_tx_match_empw(struct mlx5_txq_data *restrict txq __rte_unused, return false; /* There must be no VLAN packets in eMPW loop. */ if (MLX5_TXOFF_CONFIG(VLAN)) - assert(!(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT)); + MLX5_ASSERT(!(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT)); return true; } @@ -3752,7 +3878,7 @@ mlx5_tx_sdone_empw(struct mlx5_txq_data *restrict txq, unsigned int slen, unsigned int olx __rte_unused) { - assert(!MLX5_TXOFF_CONFIG(INLINE)); + MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); #ifdef MLX5_PMD_SOFT_COUNTERS /* Update sent data bytes counter. */ txq->stats.obytes += slen; @@ -3795,8 +3921,8 @@ mlx5_tx_idone_empw(struct mlx5_txq_data *restrict txq, unsigned int slen, unsigned int olx __rte_unused) { - assert(MLX5_TXOFF_CONFIG(INLINE)); - assert((len % MLX5_WSEG_SIZE) == 0); + MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); + MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); #ifdef MLX5_PMD_SOFT_COUNTERS /* Update sent data bytes counter. */ txq->stats.obytes += slen; @@ -3868,10 +3994,10 @@ mlx5_tx_burst_empw_simple(struct mlx5_txq_data *restrict txq, * and sends single-segment packet with eMPW opcode * without data inlining. */ - assert(!MLX5_TXOFF_CONFIG(INLINE)); - assert(MLX5_TXOFF_CONFIG(EMPW)); - assert(loc->elts_free && loc->wqe_free); - assert(pkts_n > loc->pkts_sent); + MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); + MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); + MLX5_ASSERT(loc->elts_free && loc->wqe_free); + MLX5_ASSERT(pkts_n > loc->pkts_sent); static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); pkts += loc->pkts_sent + 1; pkts_n -= loc->pkts_sent; @@ -3883,7 +4009,7 @@ mlx5_tx_burst_empw_simple(struct mlx5_txq_data *restrict txq, unsigned int slen = 0; next_empw: - assert(NB_SEGS(loc->mbuf) == 1); + MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? MLX5_MPW_MAX_PACKETS : MLX5_EMPW_MAX_PACKETS); @@ -3949,7 +4075,7 @@ next_empw: return MLX5_TXCMP_CODE_EXIT; return MLX5_TXCMP_CODE_MULTI; } - assert(NB_SEGS(loc->mbuf) == 1); + MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); if (ret == MLX5_TXCMP_CODE_TSO) { part -= loop; mlx5_tx_sdone_empw(txq, loc, part, slen, olx); @@ -3967,7 +4093,7 @@ next_empw: return MLX5_TXCMP_CODE_SINGLE; } if (ret != MLX5_TXCMP_CODE_EMPW) { - assert(false); + MLX5_ASSERT(false); part -= loop; mlx5_tx_sdone_empw(txq, loc, part, slen, olx); return MLX5_TXCMP_CODE_ERROR; @@ -3981,7 +4107,7 @@ next_empw: * - packets length (legacy MPW only) */ if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { - assert(loop); + MLX5_ASSERT(loop); part -= loop; mlx5_tx_sdone_empw(txq, loc, part, slen, olx); if (unlikely(!loc->elts_free || @@ -3996,8 +4122,8 @@ next_empw: dseg = (struct mlx5_wqe_dseg *)txq->wqes; } /* eMPW is built successfully, update loop parameters. */ - assert(!loop); - assert(pkts_n >= part); + MLX5_ASSERT(!loop); + MLX5_ASSERT(pkts_n >= part); #ifdef MLX5_PMD_SOFT_COUNTERS /* Update sent data bytes counter. */ txq->stats.obytes += slen; @@ -4015,7 +4141,7 @@ next_empw: return ret; /* Continue sending eMPW batches. */ } - assert(false); + MLX5_ASSERT(false); } /** @@ -4034,10 +4160,10 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, * and sends single-segment packet with eMPW opcode * with data inlining. */ - assert(MLX5_TXOFF_CONFIG(INLINE)); - assert(MLX5_TXOFF_CONFIG(EMPW)); - assert(loc->elts_free && loc->wqe_free); - assert(pkts_n > loc->pkts_sent); + MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); + MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); + MLX5_ASSERT(loc->elts_free && loc->wqe_free); + MLX5_ASSERT(pkts_n > loc->pkts_sent); static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); pkts += loc->pkts_sent + 1; pkts_n -= loc->pkts_sent; @@ -4048,7 +4174,7 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, unsigned int room, part, nlim; unsigned int slen = 0; - assert(NB_SEGS(loc->mbuf) == 1); + MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); /* * Limits the amount of packets in one WQE * to improve CQE latency generation. @@ -4089,9 +4215,9 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); unsigned int tlen; - assert(room >= MLX5_WQE_DSEG_SIZE); - assert((room % MLX5_WQE_DSEG_SIZE) == 0); - assert((uintptr_t)dseg < (uintptr_t)txq->wqes_end); + MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); + MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); + MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); /* * Some Tx offloads may cause an error if * packet is not long enough, check against @@ -4109,7 +4235,8 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, return MLX5_TXCMP_CODE_ERROR; } /* Inline or not inline - that's the Question. */ - if (dlen > txq->inlen_empw) + if (dlen > txq->inlen_empw || + loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) goto pointer_empw; /* Inline entire packet, optional VLAN insertion. */ tlen = sizeof(dseg->bcount) + dlen; @@ -4120,8 +4247,9 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, * mlx5_tx_able_to_empw() and packet * fits into inline length guaranteed. */ - assert((dlen + sizeof(struct rte_vlan_hdr)) <= - txq->inlen_empw); + MLX5_ASSERT((dlen + + sizeof(struct rte_vlan_hdr)) <= + txq->inlen_empw); tlen += sizeof(struct rte_vlan_hdr); if (room < tlen) break; @@ -4138,7 +4266,7 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, dptr, dlen, olx); } tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); - assert(room >= tlen); + MLX5_ASSERT(room >= tlen); room -= tlen; /* * Packet data are completely inlined, @@ -4151,10 +4279,10 @@ pointer_empw: * Not inlinable VLAN packets are * proceeded outside of this routine. */ - assert(room >= MLX5_WQE_DSEG_SIZE); + MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); if (MLX5_TXOFF_CONFIG(VLAN)) - assert(!(loc->mbuf->ol_flags & - PKT_TX_VLAN_PKT)); + MLX5_ASSERT(!(loc->mbuf->ol_flags & + PKT_TX_VLAN_PKT)); mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); /* We have to store mbuf in elts.*/ txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; @@ -4195,7 +4323,7 @@ next_mbuf: return MLX5_TXCMP_CODE_EXIT; return MLX5_TXCMP_CODE_MULTI; } - assert(NB_SEGS(loc->mbuf) == 1); + MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); if (ret == MLX5_TXCMP_CODE_TSO) { part -= room; mlx5_tx_idone_empw(txq, loc, part, slen, olx); @@ -4213,7 +4341,7 @@ next_mbuf: return MLX5_TXCMP_CODE_SINGLE; } if (ret != MLX5_TXCMP_CODE_EMPW) { - assert(false); + MLX5_ASSERT(false); part -= room; mlx5_tx_idone_empw(txq, loc, part, slen, olx); return MLX5_TXCMP_CODE_ERROR; @@ -4240,7 +4368,7 @@ next_mbuf: * We get here to close an existing eMPW * session and start the new one. */ - assert(pkts_n); + MLX5_ASSERT(pkts_n); part -= room; if (unlikely(!part)) return MLX5_TXCMP_CODE_EXIT; @@ -4250,7 +4378,7 @@ next_mbuf: return MLX5_TXCMP_CODE_EXIT; /* Continue the loop with new eMPW session. */ } - assert(false); + MLX5_ASSERT(false); } /** @@ -4268,15 +4396,15 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, * Subroutine is the part of mlx5_tx_burst_single() * and sends single-segment packet with SEND opcode. */ - assert(loc->elts_free && loc->wqe_free); - assert(pkts_n > loc->pkts_sent); + MLX5_ASSERT(loc->elts_free && loc->wqe_free); + MLX5_ASSERT(pkts_n > loc->pkts_sent); pkts += loc->pkts_sent + 1; pkts_n -= loc->pkts_sent; for (;;) { struct mlx5_wqe *restrict wqe; enum mlx5_txcmp_code ret; - assert(NB_SEGS(loc->mbuf) == 1); + MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); if (MLX5_TXOFF_CONFIG(INLINE)) { unsigned int inlen, vlan = 0; @@ -4296,7 +4424,8 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, * Otherwise we would do extra check for data * size to avoid crashes due to length overflow. */ - assert(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); + MLX5_ASSERT(txq->inlen_send >= + MLX5_ESEG_MIN_INLINE_SIZE); if (inlen <= txq->inlen_send) { unsigned int seg_n, wqe_n; @@ -4305,6 +4434,33 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, /* Check against minimal length. */ if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) return MLX5_TXCMP_CODE_ERROR; + if (loc->mbuf->ol_flags & + PKT_TX_DYNF_NOINLINE) { + /* + * The hint flag not to inline packet + * data is set. Check whether we can + * follow the hint. + */ + if ((!MLX5_TXOFF_CONFIG(EMPW) && + txq->inlen_mode) || + (MLX5_TXOFF_CONFIG(MPW) && + txq->inlen_mode)) { + /* + * The hardware requires the + * minimal inline data header. + */ + goto single_min_inline; + } + if (MLX5_TXOFF_CONFIG(VLAN) && + vlan && !txq->vlan_en) { + /* + * We must insert VLAN tag + * by software means. + */ + goto single_part_inline; + } + goto single_no_inline; + } /* * Completely inlined packet data WQE: * - Control Segment, SEND opcode @@ -4354,10 +4510,11 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, * We should check the free space in * WQE ring buffer to inline partially. */ - assert(txq->inlen_send >= txq->inlen_mode); - assert(inlen > txq->inlen_mode); - assert(txq->inlen_mode >= - MLX5_ESEG_MIN_INLINE_SIZE); +single_min_inline: + MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); + MLX5_ASSERT(inlen > txq->inlen_mode); + MLX5_ASSERT(txq->inlen_mode >= + MLX5_ESEG_MIN_INLINE_SIZE); /* * Check whether there are enough free WQEBBs: * - Control Segment @@ -4400,7 +4557,7 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, txq->wqe_ci += (ds + 3) / 4; loc->wqe_free -= (ds + 3) / 4; /* We have to store mbuf in elts.*/ - assert(MLX5_TXOFF_CONFIG(INLINE)); + MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; --loc->elts_free; @@ -4421,6 +4578,7 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, * We also get here if VLAN insertion is not * supported by HW, the inline is enabled. */ +single_part_inline: wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); loc->wqe_last = wqe; mlx5_tx_cseg_init(txq, loc, wqe, 4, @@ -4433,14 +4591,14 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, * comparing with txq->inlen_send. We should * not get overflow here. */ - assert(inlen > MLX5_ESEG_MIN_INLINE_SIZE); + MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], dptr, dlen, olx); ++txq->wqe_ci; --loc->wqe_free; /* We have to store mbuf in elts.*/ - assert(MLX5_TXOFF_CONFIG(INLINE)); + MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; --loc->elts_free; @@ -4461,6 +4619,7 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, * - Ethernet Segment, optional VLAN, no inline * - Data Segment, pointer type */ +single_no_inline: wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); loc->wqe_last = wqe; mlx5_tx_cseg_init(txq, loc, wqe, 3, @@ -4477,7 +4636,7 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, * if no inlining is configured, this is done * by calling routine in a batch copy. */ - assert(!MLX5_TXOFF_CONFIG(INLINE)); + MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); --loc->elts_free; #ifdef MLX5_PMD_SOFT_COUNTERS /* Update sent data bytes counter. */ @@ -4499,7 +4658,7 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) return ret; } - assert(false); + MLX5_ASSERT(false); } static __rte_always_inline enum mlx5_txcmp_code @@ -4514,7 +4673,7 @@ mlx5_tx_burst_single(struct mlx5_txq_data *restrict txq, ret = mlx5_tx_able_to_empw(txq, loc, olx, false); if (ret == MLX5_TXCMP_CODE_SINGLE) goto ordinary_send; - assert(ret == MLX5_TXCMP_CODE_EMPW); + MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); for (;;) { /* Optimize for inline/no inline eMPW send. */ ret = (MLX5_TXOFF_CONFIG(INLINE)) ? @@ -4525,14 +4684,14 @@ mlx5_tx_burst_single(struct mlx5_txq_data *restrict txq, if (ret != MLX5_TXCMP_CODE_SINGLE) return ret; /* The resources to send one packet should remain. */ - assert(loc->elts_free && loc->wqe_free); + MLX5_ASSERT(loc->elts_free && loc->wqe_free); ordinary_send: ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); - assert(ret != MLX5_TXCMP_CODE_SINGLE); + MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); if (ret != MLX5_TXCMP_CODE_EMPW) return ret; /* The resources to send one packet should remain. */ - assert(loc->elts_free && loc->wqe_free); + MLX5_ASSERT(loc->elts_free && loc->wqe_free); } } @@ -4566,8 +4725,8 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *restrict txq, enum mlx5_txcmp_code ret; unsigned int part; - assert(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); - assert(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); + MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); + MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); if (unlikely(!pkts_n)) return 0; loc.pkts_sent = 0; @@ -4593,10 +4752,10 @@ send_loop: * - data inlining into WQEs, one packet may require multiple * WQEBBs, the WQEs become the limiting factor. */ - assert(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); + MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); loc.elts_free = txq->elts_s - (uint16_t)(txq->elts_head - txq->elts_tail); - assert(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); + MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); loc.wqe_free = txq->wqe_s - (uint16_t)(txq->wqe_ci - txq->wqe_pi); if (unlikely(!loc.elts_free || !loc.wqe_free)) @@ -4618,7 +4777,7 @@ send_loop: * per WQE, do it in dedicated routine. */ enter_send_multi: - assert(loc.pkts_sent >= loc.pkts_copy); + MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); part = loc.pkts_sent - loc.pkts_copy; if (!MLX5_TXOFF_CONFIG(INLINE) && part) { /* @@ -4632,7 +4791,7 @@ enter_send_multi: part, olx); loc.pkts_copy = loc.pkts_sent; } - assert(pkts_n > loc.pkts_sent); + MLX5_ASSERT(pkts_n > loc.pkts_sent); ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); if (!MLX5_TXOFF_CONFIG(INLINE)) loc.pkts_copy = loc.pkts_sent; @@ -4674,7 +4833,7 @@ enter_send_multi: goto enter_send_tso; } /* We must not get here. Something is going wrong. */ - assert(false); + MLX5_ASSERT(false); txq->stats.oerrors++; break; } @@ -4688,8 +4847,8 @@ enter_send_multi: * in dedicated branch. */ enter_send_tso: - assert(NB_SEGS(loc.mbuf) == 1); - assert(pkts_n > loc.pkts_sent); + MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); + MLX5_ASSERT(pkts_n > loc.pkts_sent); ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); /* * These returned code checks are supposed @@ -4712,7 +4871,7 @@ enter_send_tso: goto enter_send_multi; } /* We must not get here. Something is going wrong. */ - assert(false); + MLX5_ASSERT(false); txq->stats.oerrors++; break; } @@ -4725,7 +4884,7 @@ enter_send_tso: * offloads are requested at SQ configuration time). */ enter_send_single: - assert(pkts_n > loc.pkts_sent); + MLX5_ASSERT(pkts_n > loc.pkts_sent); ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); /* * These returned code checks are supposed @@ -4754,7 +4913,7 @@ enter_send_single: goto enter_send_tso; } /* We must not get here. Something is going wrong. */ - assert(false); + MLX5_ASSERT(false); txq->stats.oerrors++; break; } @@ -4764,7 +4923,8 @@ enter_send_single: * - doorbell the hardware * - copy the rest of mbufs to elts (if any) */ - assert(MLX5_TXOFF_CONFIG(INLINE) || loc.pkts_sent >= loc.pkts_copy); + MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || + loc.pkts_sent >= loc.pkts_copy); /* Take a shortcut if nothing is sent. */ if (unlikely(loc.pkts_sent == loc.pkts_loop)) goto burst_exit; @@ -4817,8 +4977,8 @@ enter_send_single: mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); loc.pkts_copy = loc.pkts_sent; } - assert(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); - assert(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); + MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); + MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); if (pkts_n > loc.pkts_sent) { /* * If burst size is large there might be no enough CQE @@ -5189,7 +5349,7 @@ mlx5_select_tx_function(struct rte_eth_dev *dev) "invalid WQE Data Segment size"); static_assert(MLX5_WQE_SIZE == 4 * MLX5_WSEG_SIZE, "invalid WQE size"); - assert(priv); + MLX5_ASSERT(priv); if (tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) { /* We should support Multi-Segment Packets. */ olx |= MLX5_TXOFF_CONFIG_MULTI; @@ -5337,3 +5497,92 @@ mlx5_select_tx_function(struct rte_eth_dev *dev) } return txoff_func[m].func; } + +/** + * DPDK callback to get the TX queue information + * + * @param dev + * Pointer to the device structure. + * + * @param tx_queue_id + * Tx queue identificator. + * + * @param qinfo + * Pointer to the TX queue information structure. + * + * @return + * None. + */ + +void +mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, + struct rte_eth_txq_info *qinfo) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id]; + struct mlx5_txq_ctrl *txq_ctrl = + container_of(txq, struct mlx5_txq_ctrl, txq); + + if (!txq) + return; + qinfo->nb_desc = txq->elts_s; + qinfo->conf.tx_thresh.pthresh = 0; + qinfo->conf.tx_thresh.hthresh = 0; + qinfo->conf.tx_thresh.wthresh = 0; + qinfo->conf.tx_rs_thresh = 0; + qinfo->conf.tx_free_thresh = 0; + qinfo->conf.tx_deferred_start = txq_ctrl ? 0 : 1; + qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; +} + +/** + * DPDK callback to get the TX packet burst mode information + * + * @param dev + * Pointer to the device structure. + * + * @param tx_queue_id + * Tx queue identificatior. + * + * @param mode + * Pointer to the burts mode information. + * + * @return + * 0 as success, -EINVAL as failure. + */ + +int +mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, + uint16_t tx_queue_id __rte_unused, + struct rte_eth_burst_mode *mode) +{ + eth_tx_burst_t pkt_burst = dev->tx_pkt_burst; + unsigned int i, olx; + + for (i = 0; i < RTE_DIM(txoff_func); i++) { + if (pkt_burst == txoff_func[i].func) { + olx = txoff_func[i].olx; + snprintf(mode->info, sizeof(mode->info), + "%s%s%s%s%s%s%s%s", + (olx & MLX5_TXOFF_CONFIG_EMPW) ? + ((olx & MLX5_TXOFF_CONFIG_MPW) ? + "Legacy MPW" : "Enhanced MPW") : "No MPW", + (olx & MLX5_TXOFF_CONFIG_MULTI) ? + " + MULTI" : "", + (olx & MLX5_TXOFF_CONFIG_TSO) ? + " + TSO" : "", + (olx & MLX5_TXOFF_CONFIG_SWP) ? + " + SWP" : "", + (olx & MLX5_TXOFF_CONFIG_CSUM) ? + " + CSUM" : "", + (olx & MLX5_TXOFF_CONFIG_INLINE) ? + " + INLINE" : "", + (olx & MLX5_TXOFF_CONFIG_VLAN) ? + " + VLAN" : "", + (olx & MLX5_TXOFF_CONFIG_METADATA) ? + " + METADATA" : ""); + return 0; + } + } + return -EINVAL; +}