X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx.c;h=f5409774fcfc9958d3801094bc9a1611b40972bb;hb=8de0c42019260813b71699748a08bc94b608b5d9;hp=a4fc388d2293a751aeda88f4821ca3728a805c9d;hpb=5a93e173b874e549f0adac9796f7284837a98566;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index a4fc388d22..f5409774fc 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -654,9 +654,10 @@ check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe) * Pointer to the error CQE. * * @return - * The last Tx buffer element to free. + * Negative value if queue recovery failed, + * the last Tx buffer element to free otherwise. */ -uint16_t +int mlx5_tx_error_cqe_handle(struct mlx5_txq_data *restrict txq, volatile struct mlx5_err_cqe *err_cqe) { @@ -706,6 +707,7 @@ mlx5_tx_error_cqe_handle(struct mlx5_txq_data *restrict txq, return txq->elts_head; } /* Recovering failed - try again later on the same WQE. */ + return -1; } else { txq->cq_ci++; } @@ -2009,6 +2011,45 @@ mlx5_tx_copy_elts(struct mlx5_txq_data *restrict txq, (pkts_n - part) * sizeof(struct rte_mbuf *)); } +/** + * Update completion queue consuming index via doorbell + * and flush the completed data buffers. + * + * @param txq + * Pointer to TX queue structure. + * @param valid CQE pointer + * if not NULL update txq->wqe_pi and flush the buffers + * @param itail + * if not negative - flush the buffers till this index. + * @param olx + * Configured Tx offloads mask. It is fully defined at + * compile time and may be used for optimization. + */ +static __rte_always_inline void +mlx5_tx_comp_flush(struct mlx5_txq_data *restrict txq, + volatile struct mlx5_cqe *last_cqe, + int itail, + unsigned int olx __rte_unused) +{ + uint16_t tail; + + if (likely(last_cqe != NULL)) { + txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter); + tail = ((volatile struct mlx5_wqe_cseg *) + (txq->wqes + (txq->wqe_pi & txq->wqe_m)))->misc; + } else if (itail >= 0) { + tail = (uint16_t)itail; + } else { + return; + } + rte_compiler_barrier(); + *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); + if (likely(tail != txq->elts_tail)) { + mlx5_tx_free_elts(txq, tail, olx); + assert(tail == txq->elts_tail); + } +} + /** * Manage TX completions. This routine checks the CQ for * arrived CQEs, deduces the last accomplished WQE in SQ, @@ -2028,10 +2069,11 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, unsigned int olx __rte_unused) { unsigned int count = MLX5_TX_COMP_MAX_CQE; - bool update = false; - uint16_t tail = txq->elts_tail; + volatile struct mlx5_cqe *last_cqe = NULL; int ret; + static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); + static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative value"); do { volatile struct mlx5_cqe *cqe; @@ -2043,32 +2085,30 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, assert(ret == MLX5_CQE_STATUS_HW_OWN); break; } - /* Some error occurred, try to restart. */ + /* + * Some error occurred, try to restart. + * We have no barrier after WQE related Doorbell + * written, make sure all writes are completed + * here, before we might perform SQ reset. + */ rte_wmb(); - tail = mlx5_tx_error_cqe_handle + ret = mlx5_tx_error_cqe_handle (txq, (volatile struct mlx5_err_cqe *)cqe); - if (likely(tail != txq->elts_tail)) { - mlx5_tx_free_elts(txq, tail, olx); - assert(tail == txq->elts_tail); - } - /* Allow flushing all CQEs from the queue. */ - count = txq->cqe_s; - } else { - volatile struct mlx5_wqe_cseg *cseg; - - /* Normal transmit completion. */ - ++txq->cq_ci; - rte_cio_rmb(); - txq->wqe_pi = rte_be_to_cpu_16(cqe->wqe_counter); - cseg = (volatile struct mlx5_wqe_cseg *) - (txq->wqes + (txq->wqe_pi & txq->wqe_m)); - tail = cseg->misc; + /* + * Flush buffers, update consuming index + * if recovery succeeded. Otherwise + * just try to recover later. + */ + last_cqe = NULL; + break; } + /* Normal transmit completion. */ + ++txq->cq_ci; + last_cqe = cqe; #ifndef NDEBUG if (txq->cq_pi) --txq->cq_pi; #endif - update = true; /* * We have to restrict the amount of processed CQEs * in one tx_burst routine call. The CQ may be large @@ -2078,17 +2118,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, * latency. */ } while (--count); - if (likely(tail != txq->elts_tail)) { - /* Free data buffers from elts. */ - mlx5_tx_free_elts(txq, tail, olx); - assert(tail == txq->elts_tail); - } - if (likely(update)) { - /* Update the consumer index. */ - rte_compiler_barrier(); - *txq->cq_db = - rte_cpu_to_be_32(txq->cq_ci); - } + mlx5_tx_comp_flush(txq, last_cqe, ret, olx); } /** @@ -2100,6 +2130,9 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, * Pointer to TX queue structure. * @param loc * Pointer to burst routine local context. + * @param multi, + * Routine is called from multi-segment sending loop, + * do not correct the elts_head according to the pkts_copy. * @param olx * Configured Tx offloads mask. It is fully defined at * compile time and may be used for optimization. @@ -2107,13 +2140,14 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, static __rte_always_inline void mlx5_tx_request_completion(struct mlx5_txq_data *restrict txq, struct mlx5_txq_local *restrict loc, + bool multi, unsigned int olx) { uint16_t head = txq->elts_head; unsigned int part; - part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc->pkts_sent - - (MLX5_TXOFF_CONFIG(MULTI) ? loc->pkts_copy : 0); + part = (MLX5_TXOFF_CONFIG(INLINE) || multi) ? + 0 : loc->pkts_sent - loc->pkts_copy; head += part; if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || (MLX5_TXOFF_CONFIG(INLINE) && @@ -2594,7 +2628,7 @@ mlx5_tx_eseg_mdat(struct mlx5_txq_data *restrict txq, sizeof(struct rte_vlan_hdr) + 2 * RTE_ETHER_ADDR_LEN), "invalid Ethernet Segment data size"); - assert(inlen > MLX5_ESEG_MIN_INLINE_SIZE); + assert(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); es->inline_hdr_sz = rte_cpu_to_be_16(inlen); pdst = (uint8_t *)&es->inline_data; if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { @@ -2827,13 +2861,14 @@ mlx5_tx_dseg_vlan(struct mlx5_txq_data *restrict txq, memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); buf += MLX5_DSEG_MIN_INLINE_SIZE; pdst += MLX5_DSEG_MIN_INLINE_SIZE; + len -= MLX5_DSEG_MIN_INLINE_SIZE; /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ assert(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); + if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) + pdst = (uint8_t *)txq->wqes; *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | loc->mbuf->vlan_tci); pdst += sizeof(struct rte_vlan_hdr); - if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) - pdst = (uint8_t *)txq->wqes; /* * The WQEBB space availability is checked by caller. * Here we should be aware of WQE ring buffer wraparound only. @@ -3062,7 +3097,7 @@ mlx5_tx_packet_multi_tso(struct mlx5_txq_data *restrict txq, txq->wqe_ci += (ds + 3) / 4; loc->wqe_free -= (ds + 3) / 4; /* Request CQE generation if limits are reached. */ - mlx5_tx_request_completion(txq, loc, olx); + mlx5_tx_request_completion(txq, loc, true, olx); return MLX5_TXCMP_CODE_MULTI; } @@ -3172,7 +3207,7 @@ mlx5_tx_packet_multi_send(struct mlx5_txq_data *restrict txq, txq->wqe_ci += (ds + 3) / 4; loc->wqe_free -= (ds + 3) / 4; /* Request CQE generation if limits are reached. */ - mlx5_tx_request_completion(txq, loc, olx); + mlx5_tx_request_completion(txq, loc, true, olx); return MLX5_TXCMP_CODE_MULTI; } @@ -3330,7 +3365,7 @@ do_align: txq->wqe_ci += (ds + 3) / 4; loc->wqe_free -= (ds + 3) / 4; /* Request CQE generation if limits are reached. */ - mlx5_tx_request_completion(txq, loc, olx); + mlx5_tx_request_completion(txq, loc, true, olx); return MLX5_TXCMP_CODE_MULTI; } @@ -3412,7 +3447,7 @@ mlx5_tx_burst_mseg(struct mlx5_txq_data *restrict txq, continue; /* Here ends the series of multi-segment packets. */ if (MLX5_TXOFF_CONFIG(TSO) && - unlikely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) + unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) return MLX5_TXCMP_CODE_TSO; return MLX5_TXCMP_CODE_SINGLE; } @@ -3541,7 +3576,7 @@ mlx5_tx_burst_tso(struct mlx5_txq_data *restrict txq, ++loc->pkts_sent; --pkts_n; /* Request CQE generation if limits are reached. */ - mlx5_tx_request_completion(txq, loc, olx); + mlx5_tx_request_completion(txq, loc, false, olx); if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; loc->mbuf = *pkts++; @@ -3550,7 +3585,7 @@ mlx5_tx_burst_tso(struct mlx5_txq_data *restrict txq, if (MLX5_TXOFF_CONFIG(MULTI) && unlikely(NB_SEGS(loc->mbuf) > 1)) return MLX5_TXCMP_CODE_MULTI; - if (unlikely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) + if (likely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) return MLX5_TXCMP_CODE_SINGLE; /* Continue with the next TSO packet. */ } @@ -3699,7 +3734,7 @@ mlx5_tx_sdone_empw(struct mlx5_txq_data *restrict txq, txq->wqe_ci += (ds + 3) / 4; loc->wqe_free -= (ds + 3) / 4; /* Request CQE generation if limits are reached. */ - mlx5_tx_request_completion(txq, loc, olx); + mlx5_tx_request_completion(txq, loc, false, olx); } /* @@ -3743,7 +3778,7 @@ mlx5_tx_idone_empw(struct mlx5_txq_data *restrict txq, txq->wqe_ci += (len + 3) / 4; loc->wqe_free -= (len + 3) / 4; /* Request CQE generation if limits are reached. */ - mlx5_tx_request_completion(txq, loc, olx); + mlx5_tx_request_completion(txq, loc, false, olx); } /** @@ -3936,7 +3971,7 @@ next_empw: loc->wqe_free -= (2 + part + 3) / 4; pkts_n -= part; /* Request CQE generation if limits are reached. */ - mlx5_tx_request_completion(txq, loc, olx); + mlx5_tx_request_completion(txq, loc, false, olx); if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; loc->mbuf = *pkts++; @@ -4411,7 +4446,7 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, ++loc->pkts_sent; --pkts_n; /* Request CQE generation if limits are reached. */ - mlx5_tx_request_completion(txq, loc, olx); + mlx5_tx_request_completion(txq, loc, false, olx); if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) return MLX5_TXCMP_CODE_EXIT; loc->mbuf = *pkts++; @@ -4490,6 +4525,14 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *restrict txq, assert(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); assert(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); + if (unlikely(!pkts_n)) + return 0; + loc.pkts_sent = 0; + loc.pkts_copy = 0; + loc.wqe_last = NULL; + +send_loop: + loc.pkts_loop = loc.pkts_sent; /* * Check if there are some CQEs, if any: * - process an encountered errors @@ -4497,9 +4540,7 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *restrict txq, * - free related mbufs * - doorbell the NIC about processed CQEs */ - if (unlikely(!pkts_n)) - return 0; - rte_prefetch0(*pkts); + rte_prefetch0(*(pkts + loc.pkts_sent)); mlx5_tx_handle_completion(txq, olx); /* * Calculate the number of available resources - elts and WQEs. @@ -4516,10 +4557,7 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *restrict txq, loc.wqe_free = txq->wqe_s - (uint16_t)(txq->wqe_ci - txq->wqe_pi); if (unlikely(!loc.elts_free || !loc.wqe_free)) - return 0; - loc.pkts_sent = 0; - loc.pkts_copy = 0; - loc.wqe_last = NULL; + return loc.pkts_sent; for (;;) { /* * Fetch the packet from array. Usually this is @@ -4685,8 +4723,8 @@ enter_send_single: */ assert(MLX5_TXOFF_CONFIG(INLINE) || loc.pkts_sent >= loc.pkts_copy); /* Take a shortcut if nothing is sent. */ - if (unlikely(loc.pkts_sent == 0)) - return 0; + if (unlikely(loc.pkts_sent == loc.pkts_loop)) + return loc.pkts_sent; /* * Ring QP doorbell immediately after WQE building completion * to improve latencies. The pure software related data treatment @@ -4695,8 +4733,7 @@ enter_send_single: */ mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, 0); /* Not all of the mbufs may be stored into elts yet. */ - part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - - (MLX5_TXOFF_CONFIG(MULTI) ? loc.pkts_copy : 0); + part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; if (!MLX5_TXOFF_CONFIG(INLINE) && part) { /* * There are some single-segment mbufs not stored in elts. @@ -4708,6 +4745,7 @@ enter_send_single: * inlined mbufs. */ mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); + loc.pkts_copy = loc.pkts_sent; } #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent packets counter. */ @@ -4715,6 +4753,14 @@ enter_send_single: #endif assert(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); assert(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); + if (pkts_n > loc.pkts_sent) { + /* + * If burst size is large there might be no enough CQE + * fetched from completion queue and no enough resources + * freed to send all the packets. + */ + goto send_loop; + } return loc.pkts_sent; } @@ -5161,5 +5207,3 @@ mlx5_select_tx_function(struct rte_eth_dev *dev) DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)"); return txoff_func[m].func; } - -