net/ice: support flex Rx descriptor RxDID22
authorJunyu Jiang <junyux.jiang@intel.com>
Wed, 16 Sep 2020 03:09:58 +0000 (03:09 +0000)
committerFerruh Yigit <ferruh.yigit@intel.com>
Fri, 18 Sep 2020 16:55:11 +0000 (18:55 +0200)
This patch supports RxDID #22 by the following changes:
- add structure and macro definition for RxDID #22.
- support RxDID #22 format in normal path.
- change RSS hash parsing from RxDID #22 in AVX/SSE data path.

Signed-off-by: Junyu Jiang <junyux.jiang@intel.com>
Acked-by: Leyi Rong <leyi.rong@intel.com>
drivers/net/ice/ice_ethdev.c
drivers/net/ice/ice_ethdev.h
drivers/net/ice/ice_rxtx.c
drivers/net/ice/ice_rxtx.h
drivers/net/ice/ice_rxtx_vec_avx2.c
drivers/net/ice/ice_rxtx_vec_sse.c

index c42581e..097b720 100644 (file)
@@ -2147,6 +2147,24 @@ ice_rss_ctx_init(struct ice_pf *pf)
        ICE_HASH_CFG_RESET(&pf->gtpu_hash_ctx.ipv6_tcp);
 }
 
+static uint64_t
+ice_get_supported_rxdid(struct ice_hw *hw)
+{
+       uint64_t supported_rxdid = 0; /* bitmap for supported RXDID */
+       uint32_t regval;
+       int i;
+
+       supported_rxdid |= BIT(ICE_RXDID_LEGACY_1);
+
+       for (i = ICE_RXDID_FLEX_NIC; i < ICE_FLEX_DESC_RXDID_MAX_NUM; i++) {
+               regval = ICE_READ_REG(hw, GLFLXP_RXDID_FLAGS(i, 0));
+               if ((regval >> GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S)
+                       & GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M)
+                       supported_rxdid |= BIT(i);
+       }
+       return supported_rxdid;
+}
+
 static int
 ice_dev_init(struct rte_eth_dev *dev)
 {
@@ -2298,6 +2316,8 @@ ice_dev_init(struct rte_eth_dev *dev)
                return ret;
        }
 
+       pf->supported_rxdid = ice_get_supported_rxdid(hw);
+
        return 0;
 
 err_pf_setup:
index 243a023..e8c9971 100644 (file)
 #define ICE_RXTX_BYTES_HIGH(bytes) ((bytes) & ~ICE_40_BIT_MASK)
 #define ICE_RXTX_BYTES_LOW(bytes) ((bytes) & ICE_40_BIT_MASK)
 
+/* Max number of flexible descriptor rxdid */
+#define ICE_FLEX_DESC_RXDID_MAX_NUM 64
+
 /* DDP package type */
 enum ice_pkg_type {
        ICE_PKG_TYPE_UNKNOWN,
@@ -435,6 +438,7 @@ struct ice_pf {
        bool init_link_up;
        uint64_t old_rx_bytes;
        uint64_t old_tx_bytes;
+       uint64_t supported_rxdid; /* bitmap for supported RXDID */
 };
 
 #define ICE_MAX_QUEUE_NUM  2048
index fecb134..fef6ad4 100644 (file)
@@ -63,7 +63,7 @@ static inline uint8_t
 ice_proto_xtr_type_to_rxdid(uint8_t xtr_type)
 {
        static uint8_t rxdid_map[] = {
-               [PROTO_XTR_NONE]      = ICE_RXDID_COMMS_GENERIC,
+               [PROTO_XTR_NONE]      = ICE_RXDID_COMMS_OVS,
                [PROTO_XTR_VLAN]      = ICE_RXDID_COMMS_AUX_VLAN,
                [PROTO_XTR_IPV4]      = ICE_RXDID_COMMS_AUX_IPV4,
                [PROTO_XTR_IPV6]      = ICE_RXDID_COMMS_AUX_IPV6,
@@ -73,7 +73,7 @@ ice_proto_xtr_type_to_rxdid(uint8_t xtr_type)
        };
 
        return xtr_type < RTE_DIM(rxdid_map) ?
-                               rxdid_map[xtr_type] : ICE_RXDID_COMMS_GENERIC;
+                               rxdid_map[xtr_type] : ICE_RXDID_COMMS_OVS;
 }
 
 static enum ice_status
@@ -81,12 +81,13 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
 {
        struct ice_vsi *vsi = rxq->vsi;
        struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
+       struct ice_pf *pf = ICE_VSI_TO_PF(vsi);
        struct rte_eth_dev *dev = ICE_VSI_TO_ETH_DEV(rxq->vsi);
        struct ice_rlan_ctx rx_ctx;
        enum ice_status err;
        uint16_t buf_size, len;
        struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
-       uint32_t rxdid = ICE_RXDID_COMMS_GENERIC;
+       uint32_t rxdid = ICE_RXDID_COMMS_OVS;
        uint32_t regval;
 
        /* Set buffer size as the head split is disabled. */
@@ -151,6 +152,12 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
        PMD_DRV_LOG(DEBUG, "Port (%u) - Rx queue (%u) is set with RXDID : %u",
                    rxq->port_id, rxq->queue_id, rxdid);
 
+       if (!(pf->supported_rxdid & BIT(rxdid))) {
+               PMD_DRV_LOG(ERR, "currently package doesn't support RXDID (%u)",
+                           rxdid);
+               return -EINVAL;
+       }
+
        /* Enable Flexible Descriptors in the queue context which
         * allows this driver to select a specific receive descriptor format
         */
@@ -1338,7 +1345,7 @@ ice_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union ice_rx_flex_desc *rxdp)
 
 static void
 ice_rxd_to_proto_xtr(struct rte_mbuf *mb,
-                    volatile struct ice_32b_rx_flex_desc_comms *desc)
+                    volatile struct ice_32b_rx_flex_desc_comms_ovs *desc)
 {
        uint16_t stat_err = rte_le_to_cpu_16(desc->status_error1);
        uint32_t metadata = 0;
@@ -1376,8 +1383,9 @@ static inline void
 ice_rxd_to_pkt_fields(struct rte_mbuf *mb,
                      volatile union ice_rx_flex_desc *rxdp)
 {
-       volatile struct ice_32b_rx_flex_desc_comms *desc =
-                       (volatile struct ice_32b_rx_flex_desc_comms *)rxdp;
+       volatile struct ice_32b_rx_flex_desc_comms_ovs *desc =
+                       (volatile struct ice_32b_rx_flex_desc_comms_ovs *)rxdp;
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
        uint16_t stat_err;
 
        stat_err = rte_le_to_cpu_16(desc->status_error0);
@@ -1385,13 +1393,14 @@ ice_rxd_to_pkt_fields(struct rte_mbuf *mb,
                mb->ol_flags |= PKT_RX_RSS_HASH;
                mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
        }
+#endif
 
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
        if (desc->flow_id != 0xFFFFFFFF) {
                mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
                mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
        }
 
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
        if (unlikely(rte_net_ice_dynf_proto_xtr_metadata_avail()))
                ice_rxd_to_proto_xtr(mb, desc);
 #endif
index 2fdcfb7..e21ba15 100644 (file)
@@ -38,6 +38,8 @@
 
 #define ICE_FDIR_PKT_LEN       512
 
+#define ICE_RXDID_COMMS_OVS    22
+
 typedef void (*ice_rx_release_mbufs_t)(struct ice_rx_queue *rxq);
 typedef void (*ice_tx_release_mbufs_t)(struct ice_tx_queue *txq);
 
@@ -135,6 +137,46 @@ union ice_tx_offload {
        };
 };
 
+/* Rx Flex Descriptor for Comms Package Profile
+ * RxDID Profile ID 22 (swap Hash and FlowID)
+ * Flex-field 0: Flow ID lower 16-bits
+ * Flex-field 1: Flow ID upper 16-bits
+ * Flex-field 2: RSS hash lower 16-bits
+ * Flex-field 3: RSS hash upper 16-bits
+ * Flex-field 4: AUX0
+ * Flex-field 5: AUX1
+ */
+struct ice_32b_rx_flex_desc_comms_ovs {
+       /* Qword 0 */
+       u8 rxdid;
+       u8 mir_id_umb_cast;
+       __le16 ptype_flexi_flags0;
+       __le16 pkt_len;
+       __le16 hdr_len_sph_flex_flags1;
+
+       /* Qword 1 */
+       __le16 status_error0;
+       __le16 l2tag1;
+       __le32 flow_id;
+
+       /* Qword 2 */
+       __le16 status_error1;
+       u8 flexi_flags2;
+       u8 ts_low;
+       __le16 l2tag2_1st;
+       __le16 l2tag2_2nd;
+
+       /* Qword 3 */
+       __le32 rss_hash;
+       union {
+               struct {
+                       __le16 aux0;
+                       __le16 aux1;
+               } flex;
+               __le32 ts_high;
+       } flex_ts;
+};
+
 int ice_rx_queue_setup(struct rte_eth_dev *dev,
                       uint16_t queue_idx,
                       uint16_t nb_desc,
index b653805..96f6f2e 100644 (file)
@@ -191,8 +191,8 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
        const __m256i shuf_msk =
                _mm256_set_epi8
                        (/* first descriptor */
-                        15, 14,
-                        13, 12,        /* octet 12~15, 32 bits rss */
+                        0xFF, 0xFF,
+                        0xFF, 0xFF,    /* rss hash parsed separately */
                         11, 10,        /* octet 10~11, 16 bits vlan_macip */
                         5, 4,          /* octet 4~5, 16 bits data_len */
                         0xFF, 0xFF,    /* skip hi 16 bits pkt_len, zero out */
@@ -200,8 +200,8 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                         0xFF, 0xFF,    /* pkt_type set as unknown */
                         0xFF, 0xFF,    /*pkt_type set as unknown */
                         /* second descriptor */
-                        15, 14,
-                        13, 12,        /* octet 12~15, 32 bits rss */
+                        0xFF, 0xFF,
+                        0xFF, 0xFF,    /* rss hash parsed separately */
                         11, 10,        /* octet 10~11, 16 bits vlan_macip */
                         5, 4,          /* octet 4~5, 16 bits data_len */
                         0xFF, 0xFF,    /* skip hi 16 bits pkt_len, zero out */
@@ -461,6 +461,96 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                /* merge flags */
                const __m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
                                rss_vlan_flags);
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+               /**
+                * needs to load 2nd 16B of each desc for RSS hash parsing,
+                * will cause performance drop to get into this context.
+                */
+               if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
+                               DEV_RX_OFFLOAD_RSS_HASH) {
+                       /* load bottom half of every 32B desc */
+                       const __m128i raw_desc_bh7 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[7].wb.status_error1));
+                       rte_compiler_barrier();
+                       const __m128i raw_desc_bh6 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[6].wb.status_error1));
+                       rte_compiler_barrier();
+                       const __m128i raw_desc_bh5 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[5].wb.status_error1));
+                       rte_compiler_barrier();
+                       const __m128i raw_desc_bh4 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[4].wb.status_error1));
+                       rte_compiler_barrier();
+                       const __m128i raw_desc_bh3 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[3].wb.status_error1));
+                       rte_compiler_barrier();
+                       const __m128i raw_desc_bh2 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[2].wb.status_error1));
+                       rte_compiler_barrier();
+                       const __m128i raw_desc_bh1 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[1].wb.status_error1));
+                       rte_compiler_barrier();
+                       const __m128i raw_desc_bh0 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[0].wb.status_error1));
+
+                       __m256i raw_desc_bh6_7 =
+                               _mm256_inserti128_si256
+                                       (_mm256_castsi128_si256(raw_desc_bh6),
+                                       raw_desc_bh7, 1);
+                       __m256i raw_desc_bh4_5 =
+                               _mm256_inserti128_si256
+                                       (_mm256_castsi128_si256(raw_desc_bh4),
+                                       raw_desc_bh5, 1);
+                       __m256i raw_desc_bh2_3 =
+                               _mm256_inserti128_si256
+                                       (_mm256_castsi128_si256(raw_desc_bh2),
+                                       raw_desc_bh3, 1);
+                       __m256i raw_desc_bh0_1 =
+                               _mm256_inserti128_si256
+                                       (_mm256_castsi128_si256(raw_desc_bh0),
+                                       raw_desc_bh1, 1);
+
+                       /**
+                        * to shift the 32b RSS hash value to the
+                        * highest 32b of each 128b before mask
+                        */
+                       __m256i rss_hash6_7 =
+                               _mm256_slli_epi64(raw_desc_bh6_7, 32);
+                       __m256i rss_hash4_5 =
+                               _mm256_slli_epi64(raw_desc_bh4_5, 32);
+                       __m256i rss_hash2_3 =
+                               _mm256_slli_epi64(raw_desc_bh2_3, 32);
+                       __m256i rss_hash0_1 =
+                               _mm256_slli_epi64(raw_desc_bh0_1, 32);
+
+                       __m256i rss_hash_msk =
+                               _mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
+                                                0xFFFFFFFF, 0, 0, 0);
+
+                       rss_hash6_7 = _mm256_and_si256
+                                       (rss_hash6_7, rss_hash_msk);
+                       rss_hash4_5 = _mm256_and_si256
+                                       (rss_hash4_5, rss_hash_msk);
+                       rss_hash2_3 = _mm256_and_si256
+                                       (rss_hash2_3, rss_hash_msk);
+                       rss_hash0_1 = _mm256_and_si256
+                                       (rss_hash0_1, rss_hash_msk);
+
+                       mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
+                       mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
+                       mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
+                       mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
+               } /* if() on RSS hash parsing */
+#endif
                /**
                 * At this point, we have the 8 sets of flags in the low 16-bits
                 * of each 32-bit value in vlan0.
index 382ef31..fffb271 100644 (file)
@@ -230,7 +230,8 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
        const __m128i zero = _mm_setzero_si128();
        /* mask to shuffle from desc. to mbuf */
        const __m128i shuf_msk = _mm_set_epi8
-                       (15, 14, 13, 12,  /* octet 12~15, 32 bits rss */
+                       (0xFF, 0xFF,
+                        0xFF, 0xFF,  /* rss hash parsed separately */
                         11, 10,      /* octet 10~11, 16 bits vlan_macip */
                         5, 4,        /* octet 4~5, 16 bits data_len */
                         0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
@@ -321,7 +322,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
             pos += ICE_DESCS_PER_LOOP,
             rxdp += ICE_DESCS_PER_LOOP) {
                __m128i descs[ICE_DESCS_PER_LOOP];
-               __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+               __m128i pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;
                __m128i staterr, sterr_tmp1, sterr_tmp2;
                /* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
                __m128i mbp1;
@@ -367,8 +368,12 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                rte_compiler_barrier();
 
                /* D.1 pkt 3,4 convert format from desc to pktmbuf */
-               pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);
-               pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);
+               pkt_mb3 = _mm_shuffle_epi8(descs[3], shuf_msk);
+               pkt_mb2 = _mm_shuffle_epi8(descs[2], shuf_msk);
+
+               /* D.1 pkt 1,2 convert format from desc to pktmbuf */
+               pkt_mb1 = _mm_shuffle_epi8(descs[1], shuf_msk);
+               pkt_mb0 = _mm_shuffle_epi8(descs[0], shuf_msk);
 
                /* C.1 4=>2 filter staterr info only */
                sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]);
@@ -378,12 +383,68 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                ice_rx_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
 
                /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
-               pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
                pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
+               pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
 
-               /* D.1 pkt 1,2 convert format from desc to pktmbuf */
-               pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
-               pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);
+               /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+               pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+               pkt_mb0 = _mm_add_epi16(pkt_mb0, crc_adjust);
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+               /**
+                * needs to load 2nd 16B of each desc for RSS hash parsing,
+                * will cause performance drop to get into this context.
+                */
+               if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
+                               DEV_RX_OFFLOAD_RSS_HASH) {
+                       /* load bottom half of every 32B desc */
+                       const __m128i raw_desc_bh3 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[3].wb.status_error1));
+                       rte_compiler_barrier();
+                       const __m128i raw_desc_bh2 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[2].wb.status_error1));
+                       rte_compiler_barrier();
+                       const __m128i raw_desc_bh1 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[1].wb.status_error1));
+                       rte_compiler_barrier();
+                       const __m128i raw_desc_bh0 =
+                               _mm_load_si128
+                                       ((void *)(&rxdp[0].wb.status_error1));
+
+                       /**
+                        * to shift the 32b RSS hash value to the
+                        * highest 32b of each 128b before mask
+                        */
+                       __m128i rss_hash3 =
+                               _mm_slli_epi64(raw_desc_bh3, 32);
+                       __m128i rss_hash2 =
+                               _mm_slli_epi64(raw_desc_bh2, 32);
+                       __m128i rss_hash1 =
+                               _mm_slli_epi64(raw_desc_bh1, 32);
+                       __m128i rss_hash0 =
+                               _mm_slli_epi64(raw_desc_bh0, 32);
+
+                       __m128i rss_hash_msk =
+                               _mm_set_epi32(0xFFFFFFFF, 0, 0, 0);
+
+                       rss_hash3 = _mm_and_si128
+                                       (rss_hash3, rss_hash_msk);
+                       rss_hash2 = _mm_and_si128
+                                       (rss_hash2, rss_hash_msk);
+                       rss_hash1 = _mm_and_si128
+                                       (rss_hash1, rss_hash_msk);
+                       rss_hash0 = _mm_and_si128
+                                       (rss_hash0, rss_hash_msk);
+
+                       pkt_mb3 = _mm_or_si128(pkt_mb3, rss_hash3);
+                       pkt_mb2 = _mm_or_si128(pkt_mb2, rss_hash2);
+                       pkt_mb1 = _mm_or_si128(pkt_mb1, rss_hash1);
+                       pkt_mb0 = _mm_or_si128(pkt_mb0, rss_hash0);
+               } /* if() on RSS hash parsing */
+#endif
 
                /* C.2 get 4 pkts staterr value  */
                staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
@@ -391,14 +452,10 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                /* D.3 copy final 3,4 data to rx_pkts */
                _mm_storeu_si128
                        ((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
-                        pkt_mb4);
+                        pkt_mb3);
                _mm_storeu_si128
                        ((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
-                        pkt_mb3);
-
-               /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
-               pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
-               pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+                        pkt_mb2);
 
                /* C* extract and record EOP bit */
                if (split_packet) {
@@ -422,9 +479,9 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                /* D.3 copy final 1,2 data to rx_pkts */
                _mm_storeu_si128
                        ((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
-                        pkt_mb2);
+                        pkt_mb1);
                _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
-                                pkt_mb1);
+                                pkt_mb0);
                ice_rx_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
                /* C.4 calc avaialbe number of desc */
                var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));