X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fbnxt%2Fbnxt_rxtx_vec_sse.c;h=6e563053260a5620fc974b85ff2d9d8c071d20a8;hb=bdf4a3c6316b3e7b7a850efe5facf158cd658ee8;hp=271e7908e3a66fc4560d6b524971c76a6d6e8d08;hpb=7054dfc015b66824552423d2a02e97324923399a;p=dpdk.git diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c index 271e7908e3..6e56305326 100644 --- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c +++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause */ -/* Copyright(c) 2019-2020 Broadcom All rights reserved. */ +/* Copyright(c) 2019-2021 Broadcom All rights reserved. */ #include #include @@ -13,10 +13,10 @@ #include "bnxt.h" #include "bnxt_cpr.h" #include "bnxt_ring.h" -#include "bnxt_rxtx_vec_common.h" #include "bnxt_txq.h" #include "bnxt_txr.h" +#include "bnxt_rxtx_vec_common.h" /* * RX Ring handling @@ -27,11 +27,11 @@ uint32_t tmp, of; \ \ of = _mm_extract_epi32((rss_flags), (pi)) | \ - bnxt_ol_flags_table[_mm_extract_epi32((ol_index), (pi))]; \ + rxr->ol_flags_table[_mm_extract_epi32((ol_index), (pi))]; \ \ tmp = _mm_extract_epi32((errors), (pi)); \ if (tmp) \ - of |= bnxt_ol_flags_err_table[tmp]; \ + of |= rxr->ol_flags_err_table[tmp]; \ (ol_flags) = of; \ } @@ -54,7 +54,8 @@ static inline void descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], - __m128i mbuf_init, struct rte_mbuf **mbuf) + __m128i mbuf_init, struct rte_mbuf **mbuf, + struct bnxt_rx_ring_info *rxr) { const __m128i shuf_msk = _mm_set_epi8(15, 14, 13, 12, /* rss */ @@ -63,48 +64,39 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], 0xFF, 0xFF, 3, 2, /* pkt_len */ 0xFF, 0xFF, 0xFF, 0xFF); /* pkt_type (zeroes) */ const __m128i flags_type_mask = - _mm_set_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK, - RX_PKT_CMPL_FLAGS_ITYPE_MASK, - RX_PKT_CMPL_FLAGS_ITYPE_MASK, - RX_PKT_CMPL_FLAGS_ITYPE_MASK); + _mm_set1_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK); const __m128i flags2_mask1 = - _mm_set_epi32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC); + _mm_set1_epi32(CMPL_FLAGS2_VLAN_TUN_MSK); const __m128i flags2_mask2 = - _mm_set_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE, - RX_PKT_CMPL_FLAGS2_IP_TYPE, - RX_PKT_CMPL_FLAGS2_IP_TYPE, - RX_PKT_CMPL_FLAGS2_IP_TYPE); + _mm_set1_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE); const __m128i rss_mask = - _mm_set_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID, - RX_PKT_CMPL_FLAGS_RSS_VALID, - RX_PKT_CMPL_FLAGS_RSS_VALID, - RX_PKT_CMPL_FLAGS_RSS_VALID); + _mm_set1_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID); __m128i t0, t1, flags_type, flags2, index, errors, rss_flags; - __m128i ptype_idx; + __m128i ptype_idx, is_tunnel; uint32_t ol_flags; + /* Validate ptype table indexing at build time. */ + bnxt_check_ptype_constants(); + /* Compute packet type table indexes for four packets */ t0 = _mm_unpacklo_epi32(mm_rxcmp[0], mm_rxcmp[1]); t1 = _mm_unpacklo_epi32(mm_rxcmp[2], mm_rxcmp[3]); flags_type = _mm_unpacklo_epi64(t0, t1); - ptype_idx = - _mm_srli_epi32(_mm_and_si128(flags_type, flags_type_mask), 9); + ptype_idx = _mm_srli_epi32(_mm_and_si128(flags_type, flags_type_mask), + RX_PKT_CMPL_FLAGS_ITYPE_SFT - BNXT_PTYPE_TBL_TYPE_SFT); t0 = _mm_unpacklo_epi32(mm_rxcmp1[0], mm_rxcmp1[1]); t1 = _mm_unpacklo_epi32(mm_rxcmp1[2], mm_rxcmp1[3]); flags2 = _mm_unpacklo_epi64(t0, t1); ptype_idx = _mm_or_si128(ptype_idx, - _mm_srli_epi32(_mm_and_si128(flags2, flags2_mask1), 2)); + _mm_srli_epi32(_mm_and_si128(flags2, flags2_mask1), + RX_PKT_CMPL_FLAGS2_META_FORMAT_SFT - + BNXT_PTYPE_TBL_VLAN_SFT)); ptype_idx = _mm_or_si128(ptype_idx, - _mm_srli_epi32(_mm_and_si128(flags2, flags2_mask2), 7)); + _mm_srli_epi32(_mm_and_si128(flags2, flags2_mask2), + RX_PKT_CMPL_FLAGS2_IP_TYPE_SFT - + BNXT_PTYPE_TBL_IP_VER_SFT)); /* Extract RSS valid flags for four packets. */ rss_flags = _mm_srli_epi32(_mm_and_si128(flags_type, rss_mask), 9); @@ -114,13 +106,17 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], t1 = _mm_unpackhi_epi32(mm_rxcmp1[2], mm_rxcmp1[3]); /* Compute ol_flags and checksum error indexes for four packets. */ - flags2 = _mm_and_si128(flags2, _mm_set_epi32(0x1F, 0x1F, 0x1F, 0x1F)); + is_tunnel = _mm_and_si128(flags2, _mm_set1_epi32(4)); + is_tunnel = _mm_slli_epi32(is_tunnel, 3); + flags2 = _mm_and_si128(flags2, _mm_set1_epi32(0x1F)); errors = _mm_srli_epi32(_mm_unpacklo_epi64(t0, t1), 4); - errors = _mm_and_si128(errors, _mm_set_epi32(0xF, 0xF, 0xF, 0xF)); + errors = _mm_and_si128(errors, _mm_set1_epi32(0xF)); errors = _mm_and_si128(errors, flags2); index = _mm_andnot_si128(errors, flags2); + errors = _mm_or_si128(errors, _mm_srli_epi32(is_tunnel, 1)); + index = _mm_or_si128(index, is_tunnel); /* Update mbuf rearm_data for four packets. */ GET_OL_FLAGS(rss_flags, index, errors, 0, ol_flags); @@ -153,9 +149,8 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], _mm_store_si128((void *)&mbuf[3]->rx_descriptor_fields1, t0); } -uint16_t -bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts) +static uint16_t +recv_burst_vec_sse(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { struct bnxt_rx_queue *rxq = rx_queue; const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer); @@ -165,16 +160,12 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size; struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring; uint64_t valid, desc_valid_mask = ~0ULL; - const __m128i info3_v_mask = _mm_set_epi32(CMPL_BASE_V, CMPL_BASE_V, - CMPL_BASE_V, CMPL_BASE_V); + const __m128i info3_v_mask = _mm_set1_epi32(CMPL_BASE_V); uint32_t raw_cons = cpr->cp_raw_cons; uint32_t cons, mbcons; int nb_rx_pkts = 0; const __m128i valid_target = - _mm_set_epi32(!!(raw_cons & cp_ring_size), - !!(raw_cons & cp_ring_size), - !!(raw_cons & cp_ring_size), - !!(raw_cons & cp_ring_size)); + _mm_set1_epi32(!!(raw_cons & cp_ring_size)); int i; /* If Rx Q was stopped return */ @@ -184,9 +175,6 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, if (rxq->rxrearm_nb >= rxq->rx_free_thresh) bnxt_rxq_rearm(rxq, rxr); - /* Return no more than RTE_BNXT_MAX_RX_BURST per call. */ - nb_pkts = RTE_MIN(nb_pkts, RTE_BNXT_MAX_RX_BURST); - cons = raw_cons & (cp_ring_size - 1); mbcons = (raw_cons / 2) & (rx_ring_size - 1); @@ -203,17 +191,20 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, * maximum number of packets to receive to be a multiple of the per- * loop count. */ - if (nb_pkts < RTE_BNXT_DESCS_PER_LOOP) - desc_valid_mask >>= 16 * (RTE_BNXT_DESCS_PER_LOOP - nb_pkts); - else - nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_BNXT_DESCS_PER_LOOP); + if (nb_pkts < BNXT_RX_DESCS_PER_LOOP_VEC128) { + desc_valid_mask >>= + 16 * (BNXT_RX_DESCS_PER_LOOP_VEC128 - nb_pkts); + } else { + nb_pkts = + RTE_ALIGN_FLOOR(nb_pkts, BNXT_RX_DESCS_PER_LOOP_VEC128); + } /* Handle RX burst request */ - for (i = 0; i < nb_pkts; i += RTE_BNXT_DESCS_PER_LOOP, - cons += RTE_BNXT_DESCS_PER_LOOP * 2, - mbcons += RTE_BNXT_DESCS_PER_LOOP) { - __m128i rxcmp1[RTE_BNXT_DESCS_PER_LOOP]; - __m128i rxcmp[RTE_BNXT_DESCS_PER_LOOP]; + for (i = 0; i < nb_pkts; i += BNXT_RX_DESCS_PER_LOOP_VEC128, + cons += BNXT_RX_DESCS_PER_LOOP_VEC128 * 2, + mbcons += BNXT_RX_DESCS_PER_LOOP_VEC128) { + __m128i rxcmp1[BNXT_RX_DESCS_PER_LOOP_VEC128]; + __m128i rxcmp[BNXT_RX_DESCS_PER_LOOP_VEC128]; __m128i tmp0, tmp1, info3_v; uint32_t num_valid; @@ -228,7 +219,7 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, #endif /* Prefetch four descriptor pairs for next iteration. */ - if (i + RTE_BNXT_DESCS_PER_LOOP < nb_pkts) { + if (i + BNXT_RX_DESCS_PER_LOOP_VEC128 < nb_pkts) { rte_prefetch0(&cp_desc_ring[cons + 8]); rte_prefetch0(&cp_desc_ring[cons + 12]); } @@ -270,75 +261,47 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, valid = _mm_cvtsi128_si64(_mm_packs_epi32(info3_v, info3_v)); num_valid = __builtin_popcountll(valid & desc_valid_mask); - switch (num_valid) { - case 4: - rxr->rx_buf_ring[mbcons + 3] = NULL; - /* FALLTHROUGH */ - case 3: - rxr->rx_buf_ring[mbcons + 2] = NULL; - /* FALLTHROUGH */ - case 2: - rxr->rx_buf_ring[mbcons + 1] = NULL; - /* FALLTHROUGH */ - case 1: - rxr->rx_buf_ring[mbcons + 0] = NULL; + if (num_valid == 0) break; - case 0: - goto out; - } - descs_to_mbufs(rxcmp, rxcmp1, mbuf_init, &rx_pkts[nb_rx_pkts]); + descs_to_mbufs(rxcmp, rxcmp1, mbuf_init, &rx_pkts[nb_rx_pkts], + rxr); nb_rx_pkts += num_valid; - if (num_valid < RTE_BNXT_DESCS_PER_LOOP) + if (num_valid < BNXT_RX_DESCS_PER_LOOP_VEC128) break; } -out: if (nb_rx_pkts) { - rxr->rx_prod = - RING_ADV(rxr->rx_ring_struct, rxr->rx_prod, nb_rx_pkts); + rxr->rx_raw_prod = RING_ADV(rxr->rx_raw_prod, nb_rx_pkts); rxq->rxrearm_nb += nb_rx_pkts; cpr->cp_raw_cons += 2 * nb_rx_pkts; - cpr->valid = - !!(cpr->cp_raw_cons & cpr->cp_ring_struct->ring_size); bnxt_db_cq(cpr); } return nb_rx_pkts; } -static void -bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, int nr_pkts) +uint16_t +bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { - struct bnxt_tx_ring_info *txr = txq->tx_ring; - struct rte_mbuf **free = txq->free; - uint16_t cons = txr->tx_cons; - unsigned int blk = 0; - uint32_t ring_mask = txr->tx_ring_struct->ring_mask; - - while (nr_pkts--) { - struct bnxt_sw_tx_bd *tx_buf; - struct rte_mbuf *mbuf; - - tx_buf = &txr->tx_buf_ring[cons]; - cons = (cons + 1) & ring_mask; - mbuf = rte_pktmbuf_prefree_seg(tx_buf->mbuf); - if (unlikely(mbuf == NULL)) - continue; - tx_buf->mbuf = NULL; - - if (blk && mbuf->pool != free[0]->pool) { - rte_mempool_put_bulk(free[0]->pool, (void **)free, blk); - blk = 0; - } - free[blk++] = mbuf; + uint16_t cnt = 0; + + while (nb_pkts > RTE_BNXT_MAX_RX_BURST) { + uint16_t burst; + + burst = recv_burst_vec_sse(rx_queue, rx_pkts + cnt, + RTE_BNXT_MAX_RX_BURST); + + cnt += burst; + nb_pkts -= burst; + + if (burst < RTE_BNXT_MAX_RX_BURST) + return cnt; } - if (blk) - rte_mempool_put_bulk(free[0]->pool, (void **)free, blk); - txr->tx_cons = cons; + return cnt + recv_burst_vec_sse(rx_queue, rx_pkts + cnt, nb_pkts); } static void @@ -357,7 +320,7 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq) cons = RING_CMPL(ring_mask, raw_cons); txcmp = (struct tx_cmpl *)&cp_desc_ring[cons]; - if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct)) + if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1)) break; if (likely(CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2)) @@ -369,9 +332,11 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq) raw_cons = NEXT_RAW_CMP(raw_cons); } while (nb_tx_pkts < ring_mask); - cpr->valid = !!(raw_cons & cp_ring_struct->ring_size); if (nb_tx_pkts) { - bnxt_tx_cmp_vec(txq, nb_tx_pkts); + if (txq->offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE) + bnxt_tx_cmp_vec_fast(txq, nb_tx_pkts); + else + bnxt_tx_cmp_vec(txq, nb_tx_pkts); cpr->cp_raw_cons = raw_cons; bnxt_db_cq(cpr); } @@ -379,12 +344,11 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq) static inline void bnxt_xmit_one(struct rte_mbuf *mbuf, struct tx_bd_long *txbd, - struct bnxt_sw_tx_bd *tx_buf) + struct rte_mbuf **tx_buf) { __m128i desc; - tx_buf->mbuf = mbuf; - tx_buf->nr_bds = 1; + *tx_buf = mbuf; desc = _mm_set_epi64x(mbuf->buf_iova + mbuf->data_off, bnxt_xmit_flags_len(mbuf->data_len, @@ -399,11 +363,12 @@ bnxt_xmit_fixed_burst_vec(struct bnxt_tx_queue *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { struct bnxt_tx_ring_info *txr = txq->tx_ring; - uint16_t tx_prod = txr->tx_prod; + uint16_t tx_prod, tx_raw_prod = txr->tx_raw_prod; struct tx_bd_long *txbd; - struct bnxt_sw_tx_bd *tx_buf; + struct rte_mbuf **tx_buf; uint16_t to_send; + tx_prod = RING_IDX(txr->tx_ring_struct, tx_raw_prod); txbd = &txr->tx_desc_ring[tx_prod]; tx_buf = &txr->tx_buf_ring[tx_prod]; @@ -418,7 +383,7 @@ bnxt_xmit_fixed_burst_vec(struct bnxt_tx_queue *txq, struct rte_mbuf **tx_pkts, /* Handle TX burst request */ to_send = nb_pkts; - while (to_send >= RTE_BNXT_DESCS_PER_LOOP) { + while (to_send >= BNXT_TX_DESCS_PER_LOOP) { /* Prefetch next transmit buffer descriptors. */ rte_prefetch0(txbd + 4); rte_prefetch0(txbd + 7); @@ -428,8 +393,8 @@ bnxt_xmit_fixed_burst_vec(struct bnxt_tx_queue *txq, struct rte_mbuf **tx_pkts, bnxt_xmit_one(tx_pkts[2], txbd++, tx_buf++); bnxt_xmit_one(tx_pkts[3], txbd++, tx_buf++); - to_send -= RTE_BNXT_DESCS_PER_LOOP; - tx_pkts += RTE_BNXT_DESCS_PER_LOOP; + to_send -= BNXT_TX_DESCS_PER_LOOP; + tx_pkts += BNXT_TX_DESCS_PER_LOOP; } while (to_send) { @@ -443,10 +408,10 @@ bnxt_xmit_fixed_burst_vec(struct bnxt_tx_queue *txq, struct rte_mbuf **tx_pkts, txbd[-1].opaque = nb_pkts; txbd[-1].flags_type &= ~TX_BD_LONG_FLAGS_NO_CMPL; - tx_prod = RING_ADV(txr->tx_ring_struct, tx_prod, nb_pkts); - bnxt_db_write(&txr->tx_db, tx_prod); + tx_raw_prod += nb_pkts; + bnxt_db_write(&txr->tx_db, tx_raw_prod); - txr->tx_prod = tx_prod; + txr->tx_raw_prod = tx_raw_prod; return nb_pkts; } @@ -483,8 +448,8 @@ bnxt_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, * Ensure that a ring wrap does not occur within a call to * bnxt_xmit_fixed_burst_vec(). */ - num = RTE_MIN(num, - ring_size - (txr->tx_prod & (ring_size - 1))); + num = RTE_MIN(num, ring_size - + (txr->tx_raw_prod & (ring_size - 1))); ret = bnxt_xmit_fixed_burst_vec(txq, &tx_pkts[nb_sent], num); nb_sent += ret; nb_pkts -= ret;