X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;ds=sidebyside;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx_vec_sse.c;h=075dce9087d7e9be0e2bae6fd46973296df0d8a1;hb=c68f27a2a48f7c0276d4032e4ca8f11d4cb5ea9d;hp=74e595386c89ce1cd02674dae19d774e571120da;hpb=ea16068c00647fb6c7fe8704d8ad2adff6bf378f;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c index 74e595386c..075dce9087 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c @@ -43,22 +43,14 @@ #pragma GCC diagnostic ignored "-Wpedantic" #endif #include -#include -#include +#include #ifdef PEDANTIC #pragma GCC diagnostic error "-Wpedantic" #endif -/* DPDK headers don't like -pedantic. */ -#ifdef PEDANTIC -#pragma GCC diagnostic ignored "-Wpedantic" -#endif #include #include #include -#ifdef PEDANTIC -#pragma GCC diagnostic error "-Wpedantic" -#endif #include "mlx5.h" #include "mlx5_utils.h" @@ -119,8 +111,7 @@ txq_wr_dseg_v(struct txq *txq, __m128i *dseg, } /** - * Count the number of continuous single segment packets. The first packet must - * be a single segment packet. + * Count the number of continuous single segment packets. * * @param pkts * Pointer to array of packets. @@ -137,9 +128,8 @@ txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n) if (!pkts_n) return 0; - assert(NB_SEGS(pkts[0]) == 1); /* Count the number of continuous single segment packets. */ - for (pos = 1; pos < pkts_n; ++pos) + for (pos = 0; pos < pkts_n; ++pos) if (NB_SEGS(pkts[pos]) > 1) break; return pos; @@ -257,6 +247,10 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (segs_n == 1 || max_elts < segs_n || max_wqe < 2) break; + if (segs_n > MLX5_MPW_DSEG_MAX) { + txq->stats.oerrors++; + break; + } wqe = &((volatile struct mlx5_wqe64 *) txq->wqes)[wqe_ci & wq_mask].hdr; if (buf->ol_flags & @@ -298,7 +292,7 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Fill ESEG in the header. */ _mm_store_si128(t_wqe + 1, _mm_set_epi16(0, 0, 0, 0, - htons(len), cs_flags, + rte_cpu_to_be_16(len), cs_flags, 0, 0)); txq->wqe_ci = wqe_ci; } @@ -307,7 +301,7 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n) txq->elts_comp += (uint16_t)(elts_head - txq->elts_head); txq->elts_head = elts_head; if (txq->elts_comp >= MLX5_TX_COMP_THRESH) { - wqe->ctrl[2] = htonl(8); + wqe->ctrl[2] = rte_cpu_to_be_32(8); wqe->ctrl[3] = txq->elts_head; txq->elts_comp = 0; ++txq->cq_pi; @@ -374,6 +368,7 @@ txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n, max_elts = (elts_n - (elts_head - txq->elts_tail)); max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts); + assert(pkts_n <= MLX5_DSEG_MAX - nb_dword_in_hdr); if (unlikely(!pkts_n)) return 0; elts = &(*txq->elts)[elts_head & elts_m]; @@ -568,10 +563,11 @@ rxq_replenish_bulk_mbuf(struct rxq *rxq, uint16_t n) return; } for (i = 0; i < n; ++i) - wq[i].addr = htonll(rte_pktmbuf_mtod(elts[i], uintptr_t)); + wq[i].addr = rte_cpu_to_be_64((uintptr_t)elts[i]->buf_addr + + RTE_PKTMBUF_HEADROOM); rxq->rq_ci += n; rte_wmb(); - *rxq->rq_db = htonl(rxq->rq_ci); + *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); } /** @@ -642,6 +638,13 @@ rxq_cq_decompress_v(struct rxq *rxq, offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12); + /* + * Not to overflow elts array. Decompress next time after mbuf + * replenishment. + */ + if (unlikely(mcqe_n + MLX5_VPMD_DESCS_PER_LOOP > + (uint16_t)(rxq->rq_ci - rxq->cq_ci))) + return; /* * A. load mCQEs into a 128bit register. * B. store rearm data to mbuf. @@ -828,8 +831,9 @@ rxq_cq_to_ptype_oflags_v(struct rxq *rxq, __m128i cqes[4], __m128i op_err, ptype = _mm_and_si128(ptype, ptype_mask); pinfo = _mm_and_si128(pinfo, pinfo_mask); pinfo = _mm_slli_epi32(pinfo, 16); - ptype = _mm_or_si128(ptype, pinfo); - ptype = _mm_srli_epi32(ptype, 10); + /* Make pinfo has merged fields for ol_flags calculation. */ + pinfo = _mm_or_si128(ptype, pinfo); + ptype = _mm_srli_epi32(pinfo, 10); ptype = _mm_packs_epi32(ptype, zero); /* Errored packets will have RTE_PTYPE_ALL_MASK. */ op_err = _mm_srli_epi16(op_err, 8); @@ -885,16 +889,28 @@ rxq_handle_pending_error(struct rxq *rxq, struct rte_mbuf **pkts, { uint16_t n = 0; unsigned int i; +#ifdef MLX5_PMD_SOFT_COUNTERS + uint32_t err_bytes = 0; +#endif for (i = 0; i < pkts_n; ++i) { struct rte_mbuf *pkt = pkts[i]; - if (pkt->packet_type == RTE_PTYPE_ALL_MASK) + if (pkt->packet_type == RTE_PTYPE_ALL_MASK) { +#ifdef MLX5_PMD_SOFT_COUNTERS + err_bytes += PKT_LEN(pkt); +#endif rte_pktmbuf_free_seg(pkt); - else + } else { pkts[n++] = pkt; + } } rxq->stats.idropped += (pkts_n - n); +#ifdef MLX5_PMD_SOFT_COUNTERS + /* Correct counters of errored completions. */ + rxq->stats.ipackets -= (pkts_n - n); + rxq->stats.ibytes -= err_bytes; +#endif rxq->pending_err = 0; return n; } @@ -1019,8 +1035,10 @@ rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) } elts_idx = rxq->rq_pi & q_mask; elts = &(*rxq->elts)[elts_idx]; - /* Not to overflow pkts array. */ - pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP); + pkts_n = RTE_MIN(pkts_n - rcvd_pkt, + (uint16_t)(rxq->rq_ci - rxq->cq_ci)); + /* Not to overflow pkts/elts array. */ + pkts_n = RTE_ALIGN_FLOOR(pkts_n, MLX5_VPMD_DESCS_PER_LOOP); /* Not to cross queue end. */ pkts_n = RTE_MIN(pkts_n, q_n - elts_idx); if (!pkts_n) @@ -1241,7 +1259,7 @@ rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) } } rte_wmb(); - *rxq->cq_db = htonl(rxq->cq_ci); + *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); return rcvd_pkt; } @@ -1309,7 +1327,8 @@ priv_check_raw_vec_tx_support(struct priv *priv) int __attribute__((cold)) priv_check_vec_tx_support(struct priv *priv) { - if (priv->txqs_n > MLX5_VPMD_MIN_TXQS || + if (!priv->tx_vec_en || + priv->txqs_n > MLX5_VPMD_MIN_TXQS || priv->mps != MLX5_MPW_ENHANCED || priv->tso) return -ENOTSUP; @@ -1328,7 +1347,9 @@ priv_check_vec_tx_support(struct priv *priv) int __attribute__((cold)) rxq_check_vec_support(struct rxq *rxq) { - if (rxq->sges_n != 0) + struct rxq_ctrl *ctrl = container_of(rxq, struct rxq_ctrl, rxq); + + if (!ctrl->priv->rx_vec_en || rxq->sges_n != 0) return -ENOTSUP; return 1; } @@ -1347,6 +1368,8 @@ priv_check_vec_rx_support(struct priv *priv) { uint16_t i; + if (!priv->rx_vec_en) + return -ENOTSUP; /* All the configured queues should support. */ for (i = 0; i < priv->rxqs_n; ++i) { struct rxq *rxq = (*priv->rxqs)[i]; @@ -1358,41 +1381,3 @@ priv_check_vec_rx_support(struct priv *priv) return -ENOTSUP; return 1; } - -/** - * Prepare for vectorized RX. - * - * @param priv - * Pointer to private structure. - */ -void -priv_prep_vec_rx_function(struct priv *priv) -{ - uint16_t i; - - for (i = 0; i < priv->rxqs_n; ++i) { - struct rxq *rxq = (*priv->rxqs)[i]; - struct rte_mbuf *mbuf_init = &rxq->fake_mbuf; - const uint16_t desc = 1 << rxq->elts_n; - int j; - - assert(rxq->elts_n == rxq->cqe_n); - /* Initialize default rearm_data for vPMD. */ - mbuf_init->data_off = RTE_PKTMBUF_HEADROOM; - rte_mbuf_refcnt_set(mbuf_init, 1); - mbuf_init->nb_segs = 1; - mbuf_init->port = rxq->port_id; - /* - * prevent compiler reordering: - * rearm_data covers previous fields. - */ - rte_compiler_barrier(); - rxq->mbuf_initializer = - *(uint64_t *)&mbuf_init->rearm_data; - /* Padding with a fake mbuf for vectorized Rx. */ - for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j) - (*rxq->elts)[desc + j] = &rxq->fake_mbuf; - /* Mark that it need to be cleaned up for rxq_alloc_elts(). */ - rxq->trim_elts = 1; - } -}