X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fbnxt%2Fbnxt_rxtx_vec_sse.c;h=c479697ac0ed679f5d66732dd704a3346202f9a7;hb=c6c90a33de906eb40a8eb01e16736cbaa2845b97;hp=ce92629ab0eb23c236a08587abd4c255eb599ca2;hpb=c7de4195cc4c397c20452a67f7262c07858a1371;p=dpdk.git diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c index ce92629ab0..c479697ac0 100644 --- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c +++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause */ -/* Copyright(c) 2019-2020 Broadcom All rights reserved. */ +/* Copyright(c) 2019-2021 Broadcom All rights reserved. */ #include #include @@ -27,11 +27,11 @@ uint32_t tmp, of; \ \ of = _mm_extract_epi32((rss_flags), (pi)) | \ - bnxt_ol_flags_table[_mm_extract_epi32((ol_index), (pi))]; \ + rxr->ol_flags_table[_mm_extract_epi32((ol_index), (pi))]; \ \ tmp = _mm_extract_epi32((errors), (pi)); \ if (tmp) \ - of |= bnxt_ol_flags_err_table[tmp]; \ + of |= rxr->ol_flags_err_table[tmp]; \ (ol_flags) = of; \ } @@ -54,7 +54,8 @@ static inline void descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], - __m128i mbuf_init, struct rte_mbuf **mbuf) + __m128i mbuf_init, struct rte_mbuf **mbuf, + struct bnxt_rx_ring_info *rxr) { const __m128i shuf_msk = _mm_set_epi8(15, 14, 13, 12, /* rss */ @@ -65,31 +66,37 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], const __m128i flags_type_mask = _mm_set1_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK); const __m128i flags2_mask1 = - _mm_set1_epi32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC); + _mm_set1_epi32(CMPL_FLAGS2_VLAN_TUN_MSK); const __m128i flags2_mask2 = _mm_set1_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE); const __m128i rss_mask = _mm_set1_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID); __m128i t0, t1, flags_type, flags2, index, errors, rss_flags; - __m128i ptype_idx; + __m128i ptype_idx, is_tunnel; uint32_t ol_flags; + /* Validate ptype table indexing at build time. */ + bnxt_check_ptype_constants(); + /* Compute packet type table indexes for four packets */ t0 = _mm_unpacklo_epi32(mm_rxcmp[0], mm_rxcmp[1]); t1 = _mm_unpacklo_epi32(mm_rxcmp[2], mm_rxcmp[3]); flags_type = _mm_unpacklo_epi64(t0, t1); - ptype_idx = - _mm_srli_epi32(_mm_and_si128(flags_type, flags_type_mask), 9); + ptype_idx = _mm_srli_epi32(_mm_and_si128(flags_type, flags_type_mask), + RX_PKT_CMPL_FLAGS_ITYPE_SFT - BNXT_PTYPE_TBL_TYPE_SFT); t0 = _mm_unpacklo_epi32(mm_rxcmp1[0], mm_rxcmp1[1]); t1 = _mm_unpacklo_epi32(mm_rxcmp1[2], mm_rxcmp1[3]); flags2 = _mm_unpacklo_epi64(t0, t1); ptype_idx = _mm_or_si128(ptype_idx, - _mm_srli_epi32(_mm_and_si128(flags2, flags2_mask1), 2)); + _mm_srli_epi32(_mm_and_si128(flags2, flags2_mask1), + RX_PKT_CMPL_FLAGS2_META_FORMAT_SFT - + BNXT_PTYPE_TBL_VLAN_SFT)); ptype_idx = _mm_or_si128(ptype_idx, - _mm_srli_epi32(_mm_and_si128(flags2, flags2_mask2), 7)); + _mm_srli_epi32(_mm_and_si128(flags2, flags2_mask2), + RX_PKT_CMPL_FLAGS2_IP_TYPE_SFT - + BNXT_PTYPE_TBL_IP_VER_SFT)); /* Extract RSS valid flags for four packets. */ rss_flags = _mm_srli_epi32(_mm_and_si128(flags_type, rss_mask), 9); @@ -99,6 +106,8 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], t1 = _mm_unpackhi_epi32(mm_rxcmp1[2], mm_rxcmp1[3]); /* Compute ol_flags and checksum error indexes for four packets. */ + is_tunnel = _mm_and_si128(flags2, _mm_set1_epi32(4)); + is_tunnel = _mm_slli_epi32(is_tunnel, 3); flags2 = _mm_and_si128(flags2, _mm_set1_epi32(0x1F)); errors = _mm_srli_epi32(_mm_unpacklo_epi64(t0, t1), 4); @@ -106,6 +115,8 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], errors = _mm_and_si128(errors, flags2); index = _mm_andnot_si128(errors, flags2); + errors = _mm_or_si128(errors, _mm_srli_epi32(is_tunnel, 1)); + index = _mm_or_si128(index, is_tunnel); /* Update mbuf rearm_data for four packets. */ GET_OL_FLAGS(rss_flags, index, errors, 0, ol_flags); @@ -138,9 +149,8 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], _mm_store_si128((void *)&mbuf[3]->rx_descriptor_fields1, t0); } -uint16_t -bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts) +static uint16_t +recv_burst_vec_sse(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { struct bnxt_rx_queue *rxq = rx_queue; const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer); @@ -165,9 +175,6 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, if (rxq->rxrearm_nb >= rxq->rx_free_thresh) bnxt_rxq_rearm(rxq, rxr); - /* Return no more than RTE_BNXT_MAX_RX_BURST per call. */ - nb_pkts = RTE_MIN(nb_pkts, RTE_BNXT_MAX_RX_BURST); - cons = raw_cons & (cp_ring_size - 1); mbcons = (raw_cons / 2) & (rx_ring_size - 1); @@ -184,17 +191,20 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, * maximum number of packets to receive to be a multiple of the per- * loop count. */ - if (nb_pkts < RTE_BNXT_DESCS_PER_LOOP) - desc_valid_mask >>= 16 * (RTE_BNXT_DESCS_PER_LOOP - nb_pkts); - else - nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_BNXT_DESCS_PER_LOOP); + if (nb_pkts < BNXT_RX_DESCS_PER_LOOP_VEC128) { + desc_valid_mask >>= + 16 * (BNXT_RX_DESCS_PER_LOOP_VEC128 - nb_pkts); + } else { + nb_pkts = + RTE_ALIGN_FLOOR(nb_pkts, BNXT_RX_DESCS_PER_LOOP_VEC128); + } /* Handle RX burst request */ - for (i = 0; i < nb_pkts; i += RTE_BNXT_DESCS_PER_LOOP, - cons += RTE_BNXT_DESCS_PER_LOOP * 2, - mbcons += RTE_BNXT_DESCS_PER_LOOP) { - __m128i rxcmp1[RTE_BNXT_DESCS_PER_LOOP]; - __m128i rxcmp[RTE_BNXT_DESCS_PER_LOOP]; + for (i = 0; i < nb_pkts; i += BNXT_RX_DESCS_PER_LOOP_VEC128, + cons += BNXT_RX_DESCS_PER_LOOP_VEC128 * 2, + mbcons += BNXT_RX_DESCS_PER_LOOP_VEC128) { + __m128i rxcmp1[BNXT_RX_DESCS_PER_LOOP_VEC128]; + __m128i rxcmp[BNXT_RX_DESCS_PER_LOOP_VEC128]; __m128i tmp0, tmp1, info3_v; uint32_t num_valid; @@ -209,7 +219,7 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, #endif /* Prefetch four descriptor pairs for next iteration. */ - if (i + RTE_BNXT_DESCS_PER_LOOP < nb_pkts) { + if (i + BNXT_RX_DESCS_PER_LOOP_VEC128 < nb_pkts) { rte_prefetch0(&cp_desc_ring[cons + 8]); rte_prefetch0(&cp_desc_ring[cons + 12]); } @@ -251,31 +261,17 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, valid = _mm_cvtsi128_si64(_mm_packs_epi32(info3_v, info3_v)); num_valid = __builtin_popcountll(valid & desc_valid_mask); - switch (num_valid) { - case 4: - rxr->rx_buf_ring[mbcons + 3] = NULL; - /* FALLTHROUGH */ - case 3: - rxr->rx_buf_ring[mbcons + 2] = NULL; - /* FALLTHROUGH */ - case 2: - rxr->rx_buf_ring[mbcons + 1] = NULL; - /* FALLTHROUGH */ - case 1: - rxr->rx_buf_ring[mbcons + 0] = NULL; + if (num_valid == 0) break; - case 0: - goto out; - } - descs_to_mbufs(rxcmp, rxcmp1, mbuf_init, &rx_pkts[nb_rx_pkts]); + descs_to_mbufs(rxcmp, rxcmp1, mbuf_init, &rx_pkts[nb_rx_pkts], + rxr); nb_rx_pkts += num_valid; - if (num_valid < RTE_BNXT_DESCS_PER_LOOP) + if (num_valid < BNXT_RX_DESCS_PER_LOOP_VEC128) break; } -out: if (nb_rx_pkts) { rxr->rx_raw_prod = RING_ADV(rxr->rx_raw_prod, nb_rx_pkts); @@ -289,6 +285,27 @@ out: return nb_rx_pkts; } +uint16_t +bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) +{ + uint16_t cnt = 0; + + while (nb_pkts > RTE_BNXT_MAX_RX_BURST) { + uint16_t burst; + + burst = recv_burst_vec_sse(rx_queue, rx_pkts + cnt, + RTE_BNXT_MAX_RX_BURST); + + cnt += burst; + nb_pkts -= burst; + + if (burst < RTE_BNXT_MAX_RX_BURST) + return cnt; + } + + return cnt + recv_burst_vec_sse(rx_queue, rx_pkts + cnt, nb_pkts); +} + static void bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq) { @@ -330,12 +347,11 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq) static inline void bnxt_xmit_one(struct rte_mbuf *mbuf, struct tx_bd_long *txbd, - struct bnxt_sw_tx_bd *tx_buf) + struct rte_mbuf **tx_buf) { __m128i desc; - tx_buf->mbuf = mbuf; - tx_buf->nr_bds = 1; + *tx_buf = mbuf; desc = _mm_set_epi64x(mbuf->buf_iova + mbuf->data_off, bnxt_xmit_flags_len(mbuf->data_len, @@ -352,7 +368,7 @@ bnxt_xmit_fixed_burst_vec(struct bnxt_tx_queue *txq, struct rte_mbuf **tx_pkts, struct bnxt_tx_ring_info *txr = txq->tx_ring; uint16_t tx_prod, tx_raw_prod = txr->tx_raw_prod; struct tx_bd_long *txbd; - struct bnxt_sw_tx_bd *tx_buf; + struct rte_mbuf **tx_buf; uint16_t to_send; tx_prod = RING_IDX(txr->tx_ring_struct, tx_raw_prod); @@ -370,7 +386,7 @@ bnxt_xmit_fixed_burst_vec(struct bnxt_tx_queue *txq, struct rte_mbuf **tx_pkts, /* Handle TX burst request */ to_send = nb_pkts; - while (to_send >= RTE_BNXT_DESCS_PER_LOOP) { + while (to_send >= BNXT_TX_DESCS_PER_LOOP) { /* Prefetch next transmit buffer descriptors. */ rte_prefetch0(txbd + 4); rte_prefetch0(txbd + 7); @@ -380,8 +396,8 @@ bnxt_xmit_fixed_burst_vec(struct bnxt_tx_queue *txq, struct rte_mbuf **tx_pkts, bnxt_xmit_one(tx_pkts[2], txbd++, tx_buf++); bnxt_xmit_one(tx_pkts[3], txbd++, tx_buf++); - to_send -= RTE_BNXT_DESCS_PER_LOOP; - tx_pkts += RTE_BNXT_DESCS_PER_LOOP; + to_send -= BNXT_TX_DESCS_PER_LOOP; + tx_pkts += BNXT_TX_DESCS_PER_LOOP; } while (to_send) {