X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx_vec_sse.h;h=df66c2fbde6a135c2a7d399692253a962110febb;hb=3669a1af893088064778f14fba0c2ca88819e095;hp=95e41d51af996cf32cffea58c7da535be6378cbe;hpb=380a7aab1ae2894fa598d8c51962778670996e4a;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h index 95e41d51af..df66c2fbde 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h @@ -62,7 +62,7 @@ * @param txq * Pointer to TX queue structure. * @param dseg - * Pointer to buffer descriptor to be writen. + * Pointer to buffer descriptor to be written. * @param pkts * Pointer to array of packets to be sent. * @param n @@ -135,6 +135,8 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, assert(elts_n > pkts_n); mlx5_tx_complete(txq); + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); if (unlikely(!pkts_n)) return 0; for (n = 0; n < pkts_n; ++n) { @@ -148,7 +150,7 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, 8, 9, 10, 11, /* bswap32 */ 4, 5, 6, 7, /* bswap32 */ 0, 1, 2, 3 /* bswap32 */); - uint8_t cs_flags = 0; + uint8_t cs_flags; uint16_t max_elts; uint16_t max_wqe; __m128i *t_wqe, *dseg; @@ -170,22 +172,7 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, } wqe = &((volatile struct mlx5_wqe64 *) txq->wqes)[wqe_ci & wq_mask].hdr; - if (buf->ol_flags & - (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { - const uint64_t is_tunneled = - buf->ol_flags & (PKT_TX_TUNNEL_GRE | - PKT_TX_TUNNEL_VXLAN); - - if (is_tunneled && txq->tunnel_en) { - cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM | - MLX5_ETH_WQE_L4_INNER_CSUM; - if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM) - cs_flags |= MLX5_ETH_WQE_L3_CSUM; - } else { - cs_flags = MLX5_ETH_WQE_L3_CSUM | - MLX5_ETH_WQE_L4_CSUM; - } - } + cs_flags = txq_ol_cksum_to_cs(txq, buf); /* Title WQEBB pointer. */ t_wqe = (__m128i *)wqe; dseg = (__m128i *)(wqe + 1); @@ -221,7 +208,9 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, wqe->ctrl[2] = rte_cpu_to_be_32(8); wqe->ctrl[3] = txq->elts_head; txq->elts_comp = 0; +#ifndef NDEBUG ++txq->cq_pi; +#endif } #ifdef MLX5_PMD_SOFT_COUNTERS txq->stats.opackets += n; @@ -234,7 +223,7 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, * Send burst of packets with Enhanced MPW. If it encounters a multi-seg packet, * it returns to make it processed by txq_scatter_v(). All the packets in * the pkts list should be single segment packets having same offload flags. - * This must be checked by txq_check_multiseg() and txq_calc_offload(). + * This must be checked by txq_count_contig_single_seg() and txq_calc_offload(). * * @param txq * Pointer to TX queue structure. @@ -283,6 +272,8 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n, assert(elts_n > pkts_n); mlx5_tx_complete(txq); max_elts = (elts_n - (elts_head - txq->elts_tail)); + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts); assert(pkts_n <= MLX5_DSEG_MAX - nb_dword_in_hdr); @@ -322,7 +313,9 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n, } else { /* Request a completion. */ txq->elts_comp = 0; +#ifndef NDEBUG ++txq->cq_pi; +#endif comp_req = 8; } /* Fill CTRL in the header. */ @@ -344,7 +337,7 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n, txq->wqe_ci += (nb_dword_in_hdr + pkts_n + (nb_dword_per_wqebb - 1)) / nb_dword_per_wqebb; /* Ring QP doorbell. */ - mlx5_tx_dbrec(txq, wqe); + mlx5_tx_dbrec_cond_wmb(txq, wqe, pkts_n < MLX5_VPMD_TX_MAX_BURST); return pkts_n; } @@ -591,7 +584,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4], _mm_set_epi32(0xffffff00, 0xffffff00, 0xffffff00, 0xffffff00); const __m128i fdir_flags = _mm_set1_epi32(PKT_RX_FDIR); - const __m128i fdir_id_flags = _mm_set1_epi32(PKT_RX_FDIR_ID); + __m128i fdir_id_flags = _mm_set1_epi32(PKT_RX_FDIR_ID); __m128i flow_tag, invalid_mask; flow_tag = _mm_and_si128(pinfo, pinfo_ft_mask); @@ -601,7 +594,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4], _mm_andnot_si128(invalid_mask, fdir_flags)); /* Mask out invalid entries. */ - flow_tag = _mm_andnot_si128(invalid_mask, flow_tag); + fdir_id_flags = _mm_andnot_si128(invalid_mask, fdir_id_flags); /* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */ ol_flags = _mm_or_si128(ol_flags, _mm_andnot_si128( @@ -669,12 +662,16 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4], * Array to store received packets. * @param pkts_n * Maximum number of packets in array. + * @param[out] err + * Pointer to a flag. Set non-zero value if pkts array has at least one error + * packet to handle. * * @return * Number of packets received including errors (<= pkts_n). */ static inline uint16_t -rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) +rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, + uint64_t *err) { const uint16_t q_n = 1 << rxq->cqe_n; const uint16_t q_mask = q_n - 1; @@ -836,7 +833,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) /* B.2 copy mbuf pointers. */ _mm_storeu_si128((__m128i *)&pkts[pos], mbp1); _mm_storeu_si128((__m128i *)&pkts[pos + 2], mbp2); - rte_compiler_barrier(); + rte_cio_rmb(); /* C.1 load remained CQE data and extract necessary fields. */ cqe_tmp2 = _mm_load_si128((__m128i *)&cq[pos + p3]); cqe_tmp1 = _mm_load_si128((__m128i *)&cq[pos + p2]); @@ -936,7 +933,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) opcode = _mm_packs_epi32(opcode, zero); opcode = _mm_andnot_si128(invalid_mask, opcode); /* D.4 mark if any error is set */ - rxq->pending_err |= !!_mm_cvtsi128_si64(opcode); + *err |= _mm_cvtsi128_si64(opcode); /* D.5 fill in mbuf - rearm_data and packet_type. */ rxq_cq_to_ptype_oflags_v(rxq, cqes, opcode, &pkts[pos]); if (rxq->hw_timestamp) {