X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;ds=sidebyside;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx.c;h=8acd8cd2a4a9802301163ea2c24b2cf514d2052c;hb=8d0715f054b44750c2e1553d6af29487bcea25d8;hp=6627b54c9b2603f9e548785ee0fcf81ea0390226;hpb=2579543f604daf459bd8c939c52c6fc43b235f43;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 6627b54c9b..8acd8cd2a4 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -2027,13 +2027,13 @@ static void mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, unsigned int olx __rte_unused) { + unsigned int count = MLX5_TX_COMP_MAX_CQE; bool update = false; + uint16_t tail = txq->elts_tail; int ret; do { - volatile struct mlx5_wqe_cseg *cseg; volatile struct mlx5_cqe *cqe; - uint16_t tail; cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); @@ -2041,19 +2041,21 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, if (likely(ret != MLX5_CQE_STATUS_ERR)) { /* No new CQEs in completion queue. */ assert(ret == MLX5_CQE_STATUS_HW_OWN); - if (likely(update)) { - /* Update the consumer index. */ - rte_compiler_barrier(); - *txq->cq_db = - rte_cpu_to_be_32(txq->cq_ci); - } - return; + break; } /* Some error occurred, try to restart. */ rte_wmb(); tail = mlx5_tx_error_cqe_handle (txq, (volatile struct mlx5_err_cqe *)cqe); + if (likely(tail != txq->elts_tail)) { + mlx5_tx_free_elts(txq, tail, olx); + assert(tail == txq->elts_tail); + } + /* Allow flushing all CQEs from the queue. */ + count = txq->cqe_s; } else { + volatile struct mlx5_wqe_cseg *cseg; + /* Normal transmit completion. */ ++txq->cq_ci; rte_cio_rmb(); @@ -2066,13 +2068,27 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, if (txq->cq_pi) --txq->cq_pi; #endif - if (likely(tail != txq->elts_tail)) { - /* Free data buffers from elts. */ - mlx5_tx_free_elts(txq, tail, olx); - assert(tail == txq->elts_tail); - } update = true; - } while (true); + /* + * We have to restrict the amount of processed CQEs + * in one tx_burst routine call. The CQ may be large + * and many CQEs may be updated by the NIC in one + * transaction. Buffers freeing is time consuming, + * multiple iterations may introduce significant + * latency. + */ + } while (--count); + if (likely(tail != txq->elts_tail)) { + /* Free data buffers from elts. */ + mlx5_tx_free_elts(txq, tail, olx); + assert(tail == txq->elts_tail); + } + if (likely(update)) { + /* Update the consumer index. */ + rte_compiler_barrier(); + *txq->cq_db = + rte_cpu_to_be_32(txq->cq_ci); + } } /** @@ -2082,8 +2098,6 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, * * @param txq * Pointer to TX queue structure. - * @param n_mbuf - * Number of mbuf not stored yet in elts array. * @param loc * Pointer to burst routine local context. * @param olx @@ -2092,18 +2106,23 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, */ static __rte_always_inline void mlx5_tx_request_completion(struct mlx5_txq_data *restrict txq, - unsigned int n_mbuf, struct mlx5_txq_local *restrict loc, - unsigned int olx __rte_unused) + unsigned int olx) { - uint16_t head = txq->elts_head + n_mbuf; + uint16_t head = txq->elts_head; + unsigned int part; + part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc->pkts_sent - + (MLX5_TXOFF_CONFIG(MULTI) ? loc->pkts_copy : 0); + head += part; if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || - (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres) { + (MLX5_TXOFF_CONFIG(INLINE) && + (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { volatile struct mlx5_wqe *last = loc->wqe_last; txq->elts_comp = head; - txq->wqe_comp = txq->wqe_ci; + if (MLX5_TXOFF_CONFIG(INLINE)) + txq->wqe_comp = txq->wqe_ci; /* Request unconditional completion on last WQE. */ last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET); @@ -3042,6 +3061,8 @@ mlx5_tx_packet_multi_tso(struct mlx5_txq_data *restrict txq, wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); txq->wqe_ci += (ds + 3) / 4; loc->wqe_free -= (ds + 3) / 4; + /* Request CQE generation if limits are reached. */ + mlx5_tx_request_completion(txq, loc, olx); return MLX5_TXCMP_CODE_MULTI; } @@ -3150,6 +3171,8 @@ mlx5_tx_packet_multi_send(struct mlx5_txq_data *restrict txq, } while (true); txq->wqe_ci += (ds + 3) / 4; loc->wqe_free -= (ds + 3) / 4; + /* Request CQE generation if limits are reached. */ + mlx5_tx_request_completion(txq, loc, olx); return MLX5_TXCMP_CODE_MULTI; } @@ -3306,6 +3329,8 @@ do_align: wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); txq->wqe_ci += (ds + 3) / 4; loc->wqe_free -= (ds + 3) / 4; + /* Request CQE generation if limits are reached. */ + mlx5_tx_request_completion(txq, loc, olx); return MLX5_TXCMP_CODE_MULTI; } @@ -3515,6 +3540,8 @@ mlx5_tx_burst_tso(struct mlx5_txq_data *restrict txq, --loc->elts_free; ++loc->pkts_sent; --pkts_n; + /* Request CQE generation if limits are reached. */ + mlx5_tx_request_completion(txq, loc, olx); if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; loc->mbuf = *pkts++; @@ -3656,7 +3683,7 @@ mlx5_tx_sdone_empw(struct mlx5_txq_data *restrict txq, struct mlx5_txq_local *restrict loc, unsigned int ds, unsigned int slen, - unsigned int olx __rte_unused) + unsigned int olx) { assert(!MLX5_TXOFF_CONFIG(INLINE)); #ifdef MLX5_PMD_SOFT_COUNTERS @@ -3671,6 +3698,8 @@ mlx5_tx_sdone_empw(struct mlx5_txq_data *restrict txq, loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); txq->wqe_ci += (ds + 3) / 4; loc->wqe_free -= (ds + 3) / 4; + /* Request CQE generation if limits are reached. */ + mlx5_tx_request_completion(txq, loc, olx); } /* @@ -3713,6 +3742,8 @@ mlx5_tx_idone_empw(struct mlx5_txq_data *restrict txq, loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); txq->wqe_ci += (len + 3) / 4; loc->wqe_free -= (len + 3) / 4; + /* Request CQE generation if limits are reached. */ + mlx5_tx_request_completion(txq, loc, olx); } /** @@ -3884,6 +3915,7 @@ next_empw: if (unlikely(!loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; + pkts_n -= part; goto next_empw; } /* Packet attributes match, continue the same eMPW. */ @@ -3903,6 +3935,8 @@ next_empw: txq->wqe_ci += (2 + part + 3) / 4; loc->wqe_free -= (2 + part + 3) / 4; pkts_n -= part; + /* Request CQE generation if limits are reached. */ + mlx5_tx_request_completion(txq, loc, olx); if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; loc->mbuf = *pkts++; @@ -3941,10 +3975,14 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, struct mlx5_wqe_dseg *restrict dseg; struct mlx5_wqe_eseg *restrict eseg; enum mlx5_txcmp_code ret; - unsigned int room, part; + unsigned int room, part, nlim; unsigned int slen = 0; -next_empw: + /* + * Limits the amount of packets in one WQE + * to improve CQE latency generation. + */ + nlim = RTE_MIN(pkts_n, MLX5_EMPW_MAX_PACKETS); /* Check whether we have minimal amount WQEs */ if (unlikely(loc->wqe_free < ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) @@ -4063,12 +4101,6 @@ next_mbuf: mlx5_tx_idone_empw(txq, loc, part, slen, olx); return MLX5_TXCMP_CODE_EXIT; } - /* Check if we have minimal room left. */ - if (room < MLX5_WQE_DSEG_SIZE) { - part -= room; - mlx5_tx_idone_empw(txq, loc, part, slen, olx); - goto next_empw; - } loc->mbuf = *pkts++; if (likely(pkts_n > 1)) rte_prefetch0(*pkts); @@ -4108,6 +4140,10 @@ next_mbuf: mlx5_tx_idone_empw(txq, loc, part, slen, olx); return MLX5_TXCMP_CODE_ERROR; } + /* Check if we have minimal room left. */ + nlim--; + if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) + break; /* * Check whether packet parameters coincide * within assumed eMPW batch: @@ -4133,7 +4169,7 @@ next_mbuf: if (unlikely(!loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; - goto next_empw; + /* Continue the loop with new eMPW session. */ } assert(false); } @@ -4374,6 +4410,8 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, } ++loc->pkts_sent; --pkts_n; + /* Request CQE generation if limits are reached. */ + mlx5_tx_request_completion(txq, loc, olx); if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; loc->mbuf = *pkts++; @@ -4649,9 +4687,6 @@ enter_send_single: /* Take a shortcut if nothing is sent. */ if (unlikely(loc.pkts_sent == 0)) return 0; - /* Not all of the mbufs may be stored into elts yet. */ - part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; - mlx5_tx_request_completion(txq, part, &loc, olx); /* * Ring QP doorbell immediately after WQE building completion * to improve latencies. The pure software related data treatment @@ -4659,10 +4694,13 @@ enter_send_single: * processed in this thread only by the polling. */ mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, 0); + /* Not all of the mbufs may be stored into elts yet. */ + part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - + (MLX5_TXOFF_CONFIG(MULTI) ? loc.pkts_copy : 0); if (!MLX5_TXOFF_CONFIG(INLINE) && part) { /* * There are some single-segment mbufs not stored in elts. - * It can be only if last packet was single-segment. + * It can be only if the last packet was single-segment. * The copying is gathered into one place due to it is * a good opportunity to optimize that with SIMD. * Unfortunately if inlining is enabled the gaps in @@ -5123,5 +5161,3 @@ mlx5_select_tx_function(struct rte_eth_dev *dev) DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)"); return txoff_func[m].func; } - -