X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx.c;h=887e283c180301a3ed99cfd0b195ed1fd05c4df9;hb=9bf26e1318e3cd7a04115c8201255505e90cab83;hp=003eefdd40be0f2d482d21e78275cf8150e99e79;hpb=be39124e5b708bbf6ffe9c870ea3a6a6156e15b0;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 003eefdd40..887e283c18 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -107,6 +107,16 @@ static int mlx5_queue_state_modify(struct rte_eth_dev *dev, struct mlx5_mp_arg_queue_state_modify *sm); +static inline void +mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *restrict tcp, + volatile struct mlx5_cqe *restrict cqe, + uint32_t phcsum); + +static inline void +mlx5_lro_update_hdr(uint8_t *restrict padd, + volatile struct mlx5_cqe *restrict cqe, + uint32_t len); + uint32_t mlx5_ptype_table[] __rte_cache_aligned = { [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ }; @@ -573,18 +583,16 @@ mlx5_dump_debug_information(const char *fname, const char *hex_title, MKSTR(path, "%s/%s", MLX5_SYSTEM_LOG_DIR, fname); fd = fopen(path, "a+"); if (!fd) { - DRV_LOG(WARNING, "cannot open %s for debug dump\n", - path); + DRV_LOG(WARNING, "cannot open %s for debug dump", path); MKSTR(path2, "./%s", fname); fd = fopen(path2, "a+"); if (!fd) { - DRV_LOG(ERR, "cannot open %s for debug dump\n", - path2); + DRV_LOG(ERR, "cannot open %s for debug dump", path2); return; } - DRV_LOG(INFO, "New debug dump in file %s\n", path2); + DRV_LOG(INFO, "New debug dump in file %s", path2); } else { - DRV_LOG(INFO, "New debug dump in file %s\n", path); + DRV_LOG(INFO, "New debug dump in file %s", path); } if (hex_title) rte_hexdump(fd, hex_title, buf, hex_len); @@ -644,9 +652,10 @@ check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe) * Pointer to the error CQE. * * @return - * The last Tx buffer element to free. + * Negative value if queue recovery failed, + * the last Tx buffer element to free otherwise. */ -uint16_t +int mlx5_tx_error_cqe_handle(struct mlx5_txq_data *restrict txq, volatile struct mlx5_err_cqe *err_cqe) { @@ -696,6 +705,7 @@ mlx5_tx_error_cqe_handle(struct mlx5_txq_data *restrict txq, return txq->elts_head; } /* Recovering failed - try again later on the same WQE. */ + return -1; } else { txq->cq_ci++; } @@ -838,7 +848,7 @@ mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, &rq_attr); } if (ret) { - DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s\n", + DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s", sm->state, strerror(errno)); rte_errno = errno; return ret; @@ -851,12 +861,12 @@ mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, .qp_state = IBV_QPS_RESET, .port_num = (uint8_t)priv->ibv_port, }; - struct ibv_qp *qp = txq_ctrl->ibv->qp; + struct ibv_qp *qp = txq_ctrl->obj->qp; ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); if (ret) { DRV_LOG(ERR, "Cannot change the Tx QP state to RESET " - "%s\n", strerror(errno)); + "%s", strerror(errno)); rte_errno = errno; return ret; } @@ -864,7 +874,7 @@ mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, ret = mlx5_glue->modify_qp(qp, &mod, (IBV_QP_STATE | IBV_QP_PORT)); if (ret) { - DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s\n", + DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s", strerror(errno)); rte_errno = errno; return ret; @@ -872,7 +882,7 @@ mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, mod.qp_state = IBV_QPS_RTR; ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); if (ret) { - DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s\n", + DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s", strerror(errno)); rte_errno = errno; return ret; @@ -880,7 +890,7 @@ mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, mod.qp_state = IBV_QPS_RTS; ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); if (ret) { - DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s\n", + DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s", strerror(errno)); rte_errno = errno; return ret; @@ -928,14 +938,15 @@ mlx5_queue_state_modify(struct rte_eth_dev *dev, * * @param[in] rxq * Pointer to RX queue structure. - * @param[in] mbuf_prepare - * Whether to prepare mbufs for the RQ. + * @param[in] vec + * 1 when called from vectorized Rx burst, need to prepare mbufs for the RQ. + * 0 when called from non-vectorized Rx burst. * * @return * -1 in case of recovery error, otherwise the CQE status. */ int -mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare) +mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) { const uint16_t cqe_n = 1 << rxq->cqe_n; const uint16_t cqe_mask = cqe_n - 1; @@ -1002,7 +1013,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare) if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm)) return -1; - if (mbuf_prepare) { + if (vec) { const uint16_t q_mask = wqe_n - 1; uint16_t elt_idx; struct rte_mbuf **elt; @@ -1026,6 +1037,16 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare) return -1; } } + for (i = 0; i < (int)wqe_n; ++i) { + elt = &(*rxq->elts)[i]; + DATA_LEN(*elt) = + (uint16_t)((*elt)->buf_len - + rte_pktmbuf_headroom(*elt)); + } + /* Padding with a fake mbuf for vec Rx. */ + for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) + (*rxq->elts)[wqe_n + i] = + &rxq->fake_mbuf; } mlx5_rxq_initialize(rxq); rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; @@ -1323,6 +1344,13 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) if (rxq->crc_present) len -= RTE_ETHER_CRC_LEN; PKT_LEN(pkt) = len; + if (cqe->lro_num_seg > 1) { + mlx5_lro_update_hdr + (rte_pktmbuf_mtod(pkt, uint8_t *), cqe, + len); + pkt->ol_flags |= PKT_RX_LRO; + pkt->tso_segsz = len / cqe->lro_num_seg; + } } DATA_LEN(rep) = DATA_LEN(seg); PKT_LEN(rep) = PKT_LEN(seg); @@ -1992,6 +2020,45 @@ mlx5_tx_copy_elts(struct mlx5_txq_data *restrict txq, (pkts_n - part) * sizeof(struct rte_mbuf *)); } +/** + * Update completion queue consuming index via doorbell + * and flush the completed data buffers. + * + * @param txq + * Pointer to TX queue structure. + * @param valid CQE pointer + * if not NULL update txq->wqe_pi and flush the buffers + * @param itail + * if not negative - flush the buffers till this index. + * @param olx + * Configured Tx offloads mask. It is fully defined at + * compile time and may be used for optimization. + */ +static __rte_always_inline void +mlx5_tx_comp_flush(struct mlx5_txq_data *restrict txq, + volatile struct mlx5_cqe *last_cqe, + int itail, + unsigned int olx __rte_unused) +{ + uint16_t tail; + + if (likely(last_cqe != NULL)) { + txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter); + tail = ((volatile struct mlx5_wqe_cseg *) + (txq->wqes + (txq->wqe_pi & txq->wqe_m)))->misc; + } else if (itail >= 0) { + tail = (uint16_t)itail; + } else { + return; + } + rte_compiler_barrier(); + *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); + if (likely(tail != txq->elts_tail)) { + mlx5_tx_free_elts(txq, tail, olx); + assert(tail == txq->elts_tail); + } +} + /** * Manage TX completions. This routine checks the CQ for * arrived CQEs, deduces the last accomplished WQE in SQ, @@ -2010,13 +2077,14 @@ static void mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, unsigned int olx __rte_unused) { - bool update = false; + unsigned int count = MLX5_TX_COMP_MAX_CQE; + volatile struct mlx5_cqe *last_cqe = NULL; int ret; + static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); + static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative value"); do { - volatile struct mlx5_wqe_cseg *cseg; volatile struct mlx5_cqe *cqe; - uint16_t tail; cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); @@ -2024,38 +2092,42 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, if (likely(ret != MLX5_CQE_STATUS_ERR)) { /* No new CQEs in completion queue. */ assert(ret == MLX5_CQE_STATUS_HW_OWN); - if (likely(update)) { - /* Update the consumer index. */ - rte_compiler_barrier(); - *txq->cq_db = - rte_cpu_to_be_32(txq->cq_ci); - } - return; + break; } - /* Some error occurred, try to restart. */ + /* + * Some error occurred, try to restart. + * We have no barrier after WQE related Doorbell + * written, make sure all writes are completed + * here, before we might perform SQ reset. + */ rte_wmb(); - tail = mlx5_tx_error_cqe_handle + ret = mlx5_tx_error_cqe_handle (txq, (volatile struct mlx5_err_cqe *)cqe); - } else { - /* Normal transmit completion. */ - ++txq->cq_ci; - rte_cio_rmb(); - txq->wqe_pi = rte_be_to_cpu_16(cqe->wqe_counter); - cseg = (volatile struct mlx5_wqe_cseg *) - (txq->wqes + (txq->wqe_pi & txq->wqe_m)); - tail = cseg->misc; + /* + * Flush buffers, update consuming index + * if recovery succeeded. Otherwise + * just try to recover later. + */ + last_cqe = NULL; + break; } + /* Normal transmit completion. */ + ++txq->cq_ci; + last_cqe = cqe; #ifndef NDEBUG if (txq->cq_pi) --txq->cq_pi; #endif - if (likely(tail != txq->elts_tail)) { - /* Free data buffers from elts. */ - mlx5_tx_free_elts(txq, tail, olx); - assert(tail == txq->elts_tail); - } - update = true; - } while (true); + /* + * We have to restrict the amount of processed CQEs + * in one tx_burst routine call. The CQ may be large + * and many CQEs may be updated by the NIC in one + * transaction. Buffers freeing is time consuming, + * multiple iterations may introduce significant + * latency. + */ + } while (--count); + mlx5_tx_comp_flush(txq, last_cqe, ret, olx); } /** @@ -2065,28 +2137,35 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, * * @param txq * Pointer to TX queue structure. - * @param n_mbuf - * Number of mbuf not stored yet in elts array. * @param loc * Pointer to burst routine local context. + * @param multi, + * Routine is called from multi-segment sending loop, + * do not correct the elts_head according to the pkts_copy. * @param olx * Configured Tx offloads mask. It is fully defined at * compile time and may be used for optimization. */ static __rte_always_inline void mlx5_tx_request_completion(struct mlx5_txq_data *restrict txq, - unsigned int n_mbuf, struct mlx5_txq_local *restrict loc, - unsigned int olx __rte_unused) + bool multi, + unsigned int olx) { - uint16_t head = txq->elts_head + n_mbuf; + uint16_t head = txq->elts_head; + unsigned int part; + part = (MLX5_TXOFF_CONFIG(INLINE) || multi) ? + 0 : loc->pkts_sent - loc->pkts_copy; + head += part; if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || - (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres) { + (MLX5_TXOFF_CONFIG(INLINE) && + (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { volatile struct mlx5_wqe *last = loc->wqe_last; txq->elts_comp = head; - txq->wqe_comp = txq->wqe_ci; + if (MLX5_TXOFF_CONFIG(INLINE)) + txq->wqe_comp = txq->wqe_ci; /* Request unconditional completion on last WQE. */ last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET); @@ -2200,8 +2279,8 @@ mlx5_tx_eseg_none(struct mlx5_txq_data *restrict txq __rte_unused, es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); /* Fill metadata field if needed. */ es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? - loc->mbuf->ol_flags & PKT_TX_METADATA ? - loc->mbuf->tx_metadata : 0 : 0; + loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? + *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; /* Engage VLAN tag insertion feature if requested. */ if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { @@ -2260,8 +2339,8 @@ mlx5_tx_eseg_dmin(struct mlx5_txq_data *restrict txq __rte_unused, es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); /* Fill metadata field if needed. */ es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? - loc->mbuf->ol_flags & PKT_TX_METADATA ? - loc->mbuf->tx_metadata : 0 : 0; + loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? + *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; static_assert(MLX5_ESEG_MIN_INLINE_SIZE == (sizeof(uint16_t) + sizeof(rte_v128u32_t)), @@ -2353,8 +2432,8 @@ mlx5_tx_eseg_data(struct mlx5_txq_data *restrict txq, es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); /* Fill metadata field if needed. */ es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? - loc->mbuf->ol_flags & PKT_TX_METADATA ? - loc->mbuf->tx_metadata : 0 : 0; + loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? + *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; static_assert(MLX5_ESEG_MIN_INLINE_SIZE == (sizeof(uint16_t) + sizeof(rte_v128u32_t)), @@ -2547,8 +2626,8 @@ mlx5_tx_eseg_mdat(struct mlx5_txq_data *restrict txq, es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); /* Fill metadata field if needed. */ es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? - loc->mbuf->ol_flags & PKT_TX_METADATA ? - loc->mbuf->tx_metadata : 0 : 0; + loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? + *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; static_assert(MLX5_ESEG_MIN_INLINE_SIZE == (sizeof(uint16_t) + sizeof(rte_v128u32_t)), @@ -2558,7 +2637,7 @@ mlx5_tx_eseg_mdat(struct mlx5_txq_data *restrict txq, sizeof(struct rte_vlan_hdr) + 2 * RTE_ETHER_ADDR_LEN), "invalid Ethernet Segment data size"); - assert(inlen > MLX5_ESEG_MIN_INLINE_SIZE); + assert(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); es->inline_hdr_sz = rte_cpu_to_be_16(inlen); pdst = (uint8_t *)&es->inline_data; if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { @@ -2666,27 +2745,33 @@ mlx5_tx_dseg_iptr(struct mlx5_txq_data *restrict txq, /* Unrolled implementation of generic rte_memcpy. */ dst = (uintptr_t)&dseg->inline_data[0]; src = (uintptr_t)buf; + if (len & 0x08) { #ifdef RTE_ARCH_STRICT_ALIGN - memcpy(dst, src, len); + assert(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); + *(uint32_t *)dst = *(unaligned_uint32_t *)src; + dst += sizeof(uint32_t); + src += sizeof(uint32_t); + *(uint32_t *)dst = *(unaligned_uint32_t *)src; + dst += sizeof(uint32_t); + src += sizeof(uint32_t); #else - if (len & 0x08) { - *(uint64_t *)dst = *(uint64_t *)src; + *(uint64_t *)dst = *(unaligned_uint64_t *)src; dst += sizeof(uint64_t); src += sizeof(uint64_t); +#endif } if (len & 0x04) { - *(uint32_t *)dst = *(uint32_t *)src; + *(uint32_t *)dst = *(unaligned_uint32_t *)src; dst += sizeof(uint32_t); src += sizeof(uint32_t); } if (len & 0x02) { - *(uint16_t *)dst = *(uint16_t *)src; + *(uint16_t *)dst = *(unaligned_uint16_t *)src; dst += sizeof(uint16_t); src += sizeof(uint16_t); } if (len & 0x01) *(uint8_t *)dst = *(uint8_t *)src; -#endif } /** @@ -2791,13 +2876,14 @@ mlx5_tx_dseg_vlan(struct mlx5_txq_data *restrict txq, memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); buf += MLX5_DSEG_MIN_INLINE_SIZE; pdst += MLX5_DSEG_MIN_INLINE_SIZE; + len -= MLX5_DSEG_MIN_INLINE_SIZE; /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ assert(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); + if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) + pdst = (uint8_t *)txq->wqes; *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | loc->mbuf->vlan_tci); pdst += sizeof(struct rte_vlan_hdr); - if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) - pdst = (uint8_t *)txq->wqes; /* * The WQEBB space availability is checked by caller. * Here we should be aware of WQE ring buffer wraparound only. @@ -3025,6 +3111,8 @@ mlx5_tx_packet_multi_tso(struct mlx5_txq_data *restrict txq, wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); txq->wqe_ci += (ds + 3) / 4; loc->wqe_free -= (ds + 3) / 4; + /* Request CQE generation if limits are reached. */ + mlx5_tx_request_completion(txq, loc, true, olx); return MLX5_TXCMP_CODE_MULTI; } @@ -3133,6 +3221,8 @@ mlx5_tx_packet_multi_send(struct mlx5_txq_data *restrict txq, } while (true); txq->wqe_ci += (ds + 3) / 4; loc->wqe_free -= (ds + 3) / 4; + /* Request CQE generation if limits are reached. */ + mlx5_tx_request_completion(txq, loc, true, olx); return MLX5_TXCMP_CODE_MULTI; } @@ -3289,6 +3379,8 @@ do_align: wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); txq->wqe_ci += (ds + 3) / 4; loc->wqe_free -= (ds + 3) / 4; + /* Request CQE generation if limits are reached. */ + mlx5_tx_request_completion(txq, loc, true, olx); return MLX5_TXCMP_CODE_MULTI; } @@ -3370,7 +3462,7 @@ mlx5_tx_burst_mseg(struct mlx5_txq_data *restrict txq, continue; /* Here ends the series of multi-segment packets. */ if (MLX5_TXOFF_CONFIG(TSO) && - unlikely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) + unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) return MLX5_TXCMP_CODE_TSO; return MLX5_TXCMP_CODE_SINGLE; } @@ -3498,6 +3590,8 @@ mlx5_tx_burst_tso(struct mlx5_txq_data *restrict txq, --loc->elts_free; ++loc->pkts_sent; --pkts_n; + /* Request CQE generation if limits are reached. */ + mlx5_tx_request_completion(txq, loc, false, olx); if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; loc->mbuf = *pkts++; @@ -3506,7 +3600,7 @@ mlx5_tx_burst_tso(struct mlx5_txq_data *restrict txq, if (MLX5_TXOFF_CONFIG(MULTI) && unlikely(NB_SEGS(loc->mbuf) > 1)) return MLX5_TXCMP_CODE_MULTI; - if (unlikely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) + if (likely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) return MLX5_TXCMP_CODE_SINGLE; /* Continue with the next TSO packet. */ } @@ -3604,8 +3698,8 @@ mlx5_tx_match_empw(struct mlx5_txq_data *restrict txq __rte_unused, return false; /* Fill metadata field if needed. */ if (MLX5_TXOFF_CONFIG(METADATA) && - es->metadata != (loc->mbuf->ol_flags & PKT_TX_METADATA ? - loc->mbuf->tx_metadata : 0)) + es->metadata != (loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? + *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0)) return false; /* There must be no VLAN packets in eMPW loop. */ if (MLX5_TXOFF_CONFIG(VLAN)) @@ -3639,7 +3733,7 @@ mlx5_tx_sdone_empw(struct mlx5_txq_data *restrict txq, struct mlx5_txq_local *restrict loc, unsigned int ds, unsigned int slen, - unsigned int olx __rte_unused) + unsigned int olx) { assert(!MLX5_TXOFF_CONFIG(INLINE)); #ifdef MLX5_PMD_SOFT_COUNTERS @@ -3654,6 +3748,8 @@ mlx5_tx_sdone_empw(struct mlx5_txq_data *restrict txq, loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); txq->wqe_ci += (ds + 3) / 4; loc->wqe_free -= (ds + 3) / 4; + /* Request CQE generation if limits are reached. */ + mlx5_tx_request_completion(txq, loc, false, olx); } /* @@ -3696,6 +3792,8 @@ mlx5_tx_idone_empw(struct mlx5_txq_data *restrict txq, loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); txq->wqe_ci += (len + 3) / 4; loc->wqe_free -= (len + 3) / 4; + /* Request CQE generation if limits are reached. */ + mlx5_tx_request_completion(txq, loc, false, olx); } /** @@ -3867,6 +3965,7 @@ next_empw: if (unlikely(!loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; + pkts_n -= part; goto next_empw; } /* Packet attributes match, continue the same eMPW. */ @@ -3886,6 +3985,8 @@ next_empw: txq->wqe_ci += (2 + part + 3) / 4; loc->wqe_free -= (2 + part + 3) / 4; pkts_n -= part; + /* Request CQE generation if limits are reached. */ + mlx5_tx_request_completion(txq, loc, false, olx); if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; loc->mbuf = *pkts++; @@ -3924,10 +4025,14 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, struct mlx5_wqe_dseg *restrict dseg; struct mlx5_wqe_eseg *restrict eseg; enum mlx5_txcmp_code ret; - unsigned int room, part; + unsigned int room, part, nlim; unsigned int slen = 0; -next_empw: + /* + * Limits the amount of packets in one WQE + * to improve CQE latency generation. + */ + nlim = RTE_MIN(pkts_n, MLX5_EMPW_MAX_PACKETS); /* Check whether we have minimal amount WQEs */ if (unlikely(loc->wqe_free < ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) @@ -4046,12 +4151,6 @@ next_mbuf: mlx5_tx_idone_empw(txq, loc, part, slen, olx); return MLX5_TXCMP_CODE_EXIT; } - /* Check if we have minimal room left. */ - if (room < MLX5_WQE_DSEG_SIZE) { - part -= room; - mlx5_tx_idone_empw(txq, loc, part, slen, olx); - goto next_empw; - } loc->mbuf = *pkts++; if (likely(pkts_n > 1)) rte_prefetch0(*pkts); @@ -4091,6 +4190,10 @@ next_mbuf: mlx5_tx_idone_empw(txq, loc, part, slen, olx); return MLX5_TXCMP_CODE_ERROR; } + /* Check if we have minimal room left. */ + nlim--; + if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) + break; /* * Check whether packet parameters coincide * within assumed eMPW batch: @@ -4116,7 +4219,7 @@ next_mbuf: if (unlikely(!loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; - goto next_empw; + /* Continue the loop with new eMPW session. */ } assert(false); } @@ -4357,6 +4460,8 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, } ++loc->pkts_sent; --pkts_n; + /* Request CQE generation if limits are reached. */ + mlx5_tx_request_completion(txq, loc, false, olx); if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; loc->mbuf = *pkts++; @@ -4435,6 +4540,14 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *restrict txq, assert(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); assert(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); + if (unlikely(!pkts_n)) + return 0; + loc.pkts_sent = 0; + loc.pkts_copy = 0; + loc.wqe_last = NULL; + +send_loop: + loc.pkts_loop = loc.pkts_sent; /* * Check if there are some CQEs, if any: * - process an encountered errors @@ -4442,9 +4555,7 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *restrict txq, * - free related mbufs * - doorbell the NIC about processed CQEs */ - if (unlikely(!pkts_n)) - return 0; - rte_prefetch0(*pkts); + rte_prefetch0(*(pkts + loc.pkts_sent)); mlx5_tx_handle_completion(txq, olx); /* * Calculate the number of available resources - elts and WQEs. @@ -4461,10 +4572,7 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *restrict txq, loc.wqe_free = txq->wqe_s - (uint16_t)(txq->wqe_ci - txq->wqe_pi); if (unlikely(!loc.elts_free || !loc.wqe_free)) - return 0; - loc.pkts_sent = 0; - loc.pkts_copy = 0; - loc.wqe_last = NULL; + goto burst_exit; for (;;) { /* * Fetch the packet from array. Usually this is @@ -4630,11 +4738,8 @@ enter_send_single: */ assert(MLX5_TXOFF_CONFIG(INLINE) || loc.pkts_sent >= loc.pkts_copy); /* Take a shortcut if nothing is sent. */ - if (unlikely(loc.pkts_sent == 0)) - return 0; - /* Not all of the mbufs may be stored into elts yet. */ - part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; - mlx5_tx_request_completion(txq, part, &loc, olx); + if (unlikely(loc.pkts_sent == loc.pkts_loop)) + goto burst_exit; /* * Ring QP doorbell immediately after WQE building completion * to improve latencies. The pure software related data treatment @@ -4642,10 +4747,12 @@ enter_send_single: * processed in this thread only by the polling. */ mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, 0); + /* Not all of the mbufs may be stored into elts yet. */ + part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; if (!MLX5_TXOFF_CONFIG(INLINE) && part) { /* * There are some single-segment mbufs not stored in elts. - * It can be only if last packet was single-segment. + * It can be only if the last packet was single-segment. * The copying is gathered into one place due to it is * a good opportunity to optimize that with SIMD. * Unfortunately if inlining is enabled the gaps in @@ -4653,13 +4760,23 @@ enter_send_single: * inlined mbufs. */ mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); + loc.pkts_copy = loc.pkts_sent; + } + assert(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); + assert(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); + if (pkts_n > loc.pkts_sent) { + /* + * If burst size is large there might be no enough CQE + * fetched from completion queue and no enough resources + * freed to send all the packets. + */ + goto send_loop; } +burst_exit: #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent packets counter. */ txq->stats.opackets += loc.pkts_sent; #endif - assert(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); - assert(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); return loc.pkts_sent; } @@ -5030,7 +5147,7 @@ mlx5_select_tx_function(struct rte_eth_dev *dev) */ olx |= MLX5_TXOFF_CONFIG_EMPW; } - if (tx_offloads & DEV_TX_OFFLOAD_MATCH_METADATA) { + if (rte_flow_dynf_metadata_avail()) { /* We should support Flow metadata. */ olx |= MLX5_TXOFF_CONFIG_METADATA; } @@ -5106,5 +5223,3 @@ mlx5_select_tx_function(struct rte_eth_dev *dev) DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)"); return txoff_func[m].func; } - -