net/ice/base: support extended GPIO access
[dpdk.git] / drivers / net / iavf / iavf_rxtx_vec_avx2.c
index 9f467d4..8f28afc 100644 (file)
@@ -52,8 +52,8 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
                mb0 = rxp[0];
                mb1 = rxp[1];
 
-               /* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
-               RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_physaddr) !=
+               /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+               RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
                                offsetof(struct rte_mbuf, buf_addr) + 8);
                vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
                vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
@@ -85,8 +85,8 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
                mb2 = rxp[2];
                mb3 = rxp[3];
 
-               /* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
-               RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_physaddr) !=
+               /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+               RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
                                offsetof(struct rte_mbuf, buf_addr) + 8);
                vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
                vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
@@ -695,7 +695,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
                _mm256_set_epi8
                        (/* first descriptor */
                         0xFF, 0xFF,
-                        0xFF, 0xFF,    /* rss not supported */
+                        0xFF, 0xFF,    /* rss hash parsed separately */
                         11, 10,        /* octet 10~11, 16 bits vlan_macip */
                         5, 4,          /* octet 4~5, 16 bits data_len */
                         0xFF, 0xFF,    /* skip hi 16 bits pkt_len, zero out */
@@ -704,7 +704,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
                         0xFF, 0xFF,    /*pkt_type set as unknown */
                         /* second descriptor */
                         0xFF, 0xFF,
-                        0xFF, 0xFF,    /* rss not supported */
+                        0xFF, 0xFF,    /* rss hash parsed separately */
                         11, 10,        /* octet 10~11, 16 bits vlan_macip */
                         5, 4,          /* octet 4~5, 16 bits data_len */
                         0xFF, 0xFF,    /* skip hi 16 bits pkt_len, zero out */
@@ -991,6 +991,96 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
                                _mm256_extract_epi32(fdir_id0_7, 4);
                } /* if() on fdir_enabled */
 
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+               /**
+                * needs to load 2nd 16B of each desc for RSS hash parsing,
+                * will cause performance drop to get into this context.
+                */
+               if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
+                               DEV_RX_OFFLOAD_RSS_HASH) {
+                       /* load bottom half of every 32B desc */
+                       const __m128i raw_desc_bh7 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[7].wb.status_error1));
+                       rte_compiler_barrier();
+                       const __m128i raw_desc_bh6 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[6].wb.status_error1));
+                       rte_compiler_barrier();
+                       const __m128i raw_desc_bh5 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[5].wb.status_error1));
+                       rte_compiler_barrier();
+                       const __m128i raw_desc_bh4 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[4].wb.status_error1));
+                       rte_compiler_barrier();
+                       const __m128i raw_desc_bh3 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[3].wb.status_error1));
+                       rte_compiler_barrier();
+                       const __m128i raw_desc_bh2 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[2].wb.status_error1));
+                       rte_compiler_barrier();
+                       const __m128i raw_desc_bh1 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[1].wb.status_error1));
+                       rte_compiler_barrier();
+                       const __m128i raw_desc_bh0 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[0].wb.status_error1));
+
+                       __m256i raw_desc_bh6_7 =
+                               _mm256_inserti128_si256
+                                       (_mm256_castsi128_si256(raw_desc_bh6),
+                                       raw_desc_bh7, 1);
+                       __m256i raw_desc_bh4_5 =
+                               _mm256_inserti128_si256
+                                       (_mm256_castsi128_si256(raw_desc_bh4),
+                                       raw_desc_bh5, 1);
+                       __m256i raw_desc_bh2_3 =
+                               _mm256_inserti128_si256
+                                       (_mm256_castsi128_si256(raw_desc_bh2),
+                                       raw_desc_bh3, 1);
+                       __m256i raw_desc_bh0_1 =
+                               _mm256_inserti128_si256
+                                       (_mm256_castsi128_si256(raw_desc_bh0),
+                                       raw_desc_bh1, 1);
+
+                       /**
+                        * to shift the 32b RSS hash value to the
+                        * highest 32b of each 128b before mask
+                        */
+                       __m256i rss_hash6_7 =
+                               _mm256_slli_epi64(raw_desc_bh6_7, 32);
+                       __m256i rss_hash4_5 =
+                               _mm256_slli_epi64(raw_desc_bh4_5, 32);
+                       __m256i rss_hash2_3 =
+                               _mm256_slli_epi64(raw_desc_bh2_3, 32);
+                       __m256i rss_hash0_1 =
+                               _mm256_slli_epi64(raw_desc_bh0_1, 32);
+
+                       __m256i rss_hash_msk =
+                               _mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
+                                                0xFFFFFFFF, 0, 0, 0);
+
+                       rss_hash6_7 = _mm256_and_si256
+                                       (rss_hash6_7, rss_hash_msk);
+                       rss_hash4_5 = _mm256_and_si256
+                                       (rss_hash4_5, rss_hash_msk);
+                       rss_hash2_3 = _mm256_and_si256
+                                       (rss_hash2_3, rss_hash_msk);
+                       rss_hash0_1 = _mm256_and_si256
+                                       (rss_hash0_1, rss_hash_msk);
+
+                       mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
+                       mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
+                       mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
+                       mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
+               } /* if() on RSS hash parsing */
+#endif
+
                /**
                 * At this point, we have the 8 sets of flags in the low 16-bits
                 * of each 32-bit value in vlan0.
@@ -1301,7 +1391,7 @@ iavf_vtx1(volatile struct iavf_tx_desc *txdp,
                 ((uint64_t)pkt->data_len << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT));
 
        __m128i descriptor = _mm_set_epi64x(high_qw,
-                               pkt->buf_physaddr + pkt->data_off);
+                               pkt->buf_iova + pkt->data_off);
        _mm_store_si128((__m128i *)txdp, descriptor);
 }
 
@@ -1340,15 +1430,15 @@ iavf_vtx(volatile struct iavf_tx_desc *txdp,
                __m256i desc2_3 =
                        _mm256_set_epi64x
                                (hi_qw3,
-                                pkt[3]->buf_physaddr + pkt[3]->data_off,
+                                pkt[3]->buf_iova + pkt[3]->data_off,
                                 hi_qw2,
-                                pkt[2]->buf_physaddr + pkt[2]->data_off);
+                                pkt[2]->buf_iova + pkt[2]->data_off);
                __m256i desc0_1 =
                        _mm256_set_epi64x
                                (hi_qw1,
-                                pkt[1]->buf_physaddr + pkt[1]->data_off,
+                                pkt[1]->buf_iova + pkt[1]->data_off,
                                 hi_qw0,
-                                pkt[0]->buf_physaddr + pkt[0]->data_off);
+                                pkt[0]->buf_iova + pkt[0]->data_off);
                _mm256_store_si256((void *)(txdp + 2), desc2_3);
                _mm256_store_si256((void *)txdp, desc0_1);
        }