X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fice%2Fice_rxtx_vec_sse.c;h=6cd44c5847c984f7939c257af2c74b10b0288f59;hb=0663a84524e5c63cb737cd723b4ea33493e8d17a;hp=626364719bbc6a9f48e274530dc728efacf1fad0;hpb=bdc027118814781740065d962ce3dcb39ecebea0;p=dpdk.git diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c b/drivers/net/ice/ice_rxtx_vec_sse.c index 626364719b..6cd44c5847 100644 --- a/drivers/net/ice/ice_rxtx_vec_sse.c +++ b/drivers/net/ice/ice_rxtx_vec_sse.c @@ -14,10 +14,10 @@ static inline __m128i ice_flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3) { #define FDID_MIS_MAGIC 0xFFFFFFFF - RTE_BUILD_BUG_ON(PKT_RX_FDIR != (1 << 2)); - RTE_BUILD_BUG_ON(PKT_RX_FDIR_ID != (1 << 13)); - const __m128i pkt_fdir_bit = _mm_set1_epi32(PKT_RX_FDIR | - PKT_RX_FDIR_ID); + RTE_BUILD_BUG_ON(RTE_MBUF_F_RX_FDIR != (1 << 2)); + RTE_BUILD_BUG_ON(RTE_MBUF_F_RX_FDIR_ID != (1 << 13)); + const __m128i pkt_fdir_bit = _mm_set1_epi32(RTE_MBUF_F_RX_FDIR | + RTE_MBUF_F_RX_FDIR_ID); /* desc->flow_id field == 0xFFFFFFFF means fdir mismatch */ const __m128i fdir_mis_mask = _mm_set1_epi32(FDID_MIS_MAGIC); __m128i fdir_mask = _mm_cmpeq_epi32(fdir_id0_3, @@ -114,46 +114,74 @@ ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4], * bit12 for RSS indication. * bit13 for VLAN indication. */ - const __m128i desc_mask = _mm_set_epi32(0x3070, 0x3070, - 0x3070, 0x3070); - - const __m128i cksum_mask = _mm_set_epi32(PKT_RX_IP_CKSUM_MASK | - PKT_RX_L4_CKSUM_MASK | - PKT_RX_EIP_CKSUM_BAD, - PKT_RX_IP_CKSUM_MASK | - PKT_RX_L4_CKSUM_MASK | - PKT_RX_EIP_CKSUM_BAD, - PKT_RX_IP_CKSUM_MASK | - PKT_RX_L4_CKSUM_MASK | - PKT_RX_EIP_CKSUM_BAD, - PKT_RX_IP_CKSUM_MASK | - PKT_RX_L4_CKSUM_MASK | - PKT_RX_EIP_CKSUM_BAD); + const __m128i desc_mask = _mm_set_epi32(0x30f0, 0x30f0, + 0x30f0, 0x30f0); + const __m128i cksum_mask = _mm_set_epi32(RTE_MBUF_F_RX_IP_CKSUM_MASK | + RTE_MBUF_F_RX_L4_CKSUM_MASK | + RTE_MBUF_F_RX_OUTER_L4_CKSUM_MASK | + RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD, + RTE_MBUF_F_RX_IP_CKSUM_MASK | + RTE_MBUF_F_RX_L4_CKSUM_MASK | + RTE_MBUF_F_RX_OUTER_L4_CKSUM_MASK | + RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD, + RTE_MBUF_F_RX_IP_CKSUM_MASK | + RTE_MBUF_F_RX_L4_CKSUM_MASK | + RTE_MBUF_F_RX_OUTER_L4_CKSUM_MASK | + RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD, + RTE_MBUF_F_RX_IP_CKSUM_MASK | + RTE_MBUF_F_RX_L4_CKSUM_MASK | + RTE_MBUF_F_RX_OUTER_L4_CKSUM_MASK | + RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD); /* map the checksum, rss and vlan fields to the checksum, rss * and vlan flag */ - const __m128i cksum_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, - /* shift right 1 bit to make sure it not exceed 255 */ - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1); + const __m128i cksum_flags = + _mm_set_epi8((RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | + RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD | + RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1, + (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | + RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1, + (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | + RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1, + (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | + RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1, + (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | RTE_MBUF_F_RX_L4_CKSUM_BAD | + RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1, + (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | RTE_MBUF_F_RX_L4_CKSUM_BAD | + RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1, + (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | RTE_MBUF_F_RX_L4_CKSUM_GOOD | + RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1, + (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | RTE_MBUF_F_RX_L4_CKSUM_GOOD | + RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1, + /** + * shift right 20 bits to use the low two bits to indicate + * outer checksum status + * shift right 1 bit to make sure it not exceed 255 + */ + (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | + RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1, + (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | + RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1, + (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | + RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1, + (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | + RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1, + (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | RTE_MBUF_F_RX_L4_CKSUM_BAD | + RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1, + (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | RTE_MBUF_F_RX_L4_CKSUM_BAD | + RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1, + (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | RTE_MBUF_F_RX_L4_CKSUM_GOOD | + RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1, + (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | RTE_MBUF_F_RX_L4_CKSUM_GOOD | + RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1); const __m128i rss_vlan_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, - PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, - PKT_RX_RSS_HASH, 0); + RTE_MBUF_F_RX_RSS_HASH | RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED, + RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED, + RTE_MBUF_F_RX_RSS_HASH, 0); /* merge 4 descriptors */ flags = _mm_unpackhi_epi32(descs[0], descs[1]); @@ -166,6 +194,14 @@ ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4], flags = _mm_shuffle_epi8(cksum_flags, tmp_desc); /* then we shift left 1 bit */ flags = _mm_slli_epi32(flags, 1); + + __m128i l4_outer_mask = _mm_set_epi32(0x6, 0x6, 0x6, 0x6); + __m128i l4_outer_flags = _mm_and_si128(flags, l4_outer_mask); + l4_outer_flags = _mm_slli_epi32(l4_outer_flags, 20); + + __m128i l3_l4_mask = _mm_set_epi32(~0x6, ~0x6, ~0x6, ~0x6); + __m128i l3_l4_flags = _mm_and_si128(flags, l3_l4_mask); + flags = _mm_or_si128(l3_l4_flags, l4_outer_flags); /* we need to mask out the reduntant bits introduced by RSS or * VLAN fields. */ @@ -217,10 +253,10 @@ ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4], * appropriate flags means that we have to do a shift and blend for * each mbuf before we do the write. */ - rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x10); - rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x10); - rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x10); - rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x10); + rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x30); + rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x30); + rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x30); + rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x30); /* write the rearm data and the olflags in one write */ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) != @@ -380,7 +416,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */ mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]); /* Read desc statuses backwards to avoid race condition */ - /* A.1 load 4 pkts desc */ + /* A.1 load desc[3] */ descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); rte_compiler_barrier(); @@ -392,9 +428,9 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]); #endif + /* A.1 load desc[2-0] */ descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); rte_compiler_barrier(); - /* B.1 load 2 mbuf point */ descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); rte_compiler_barrier(); descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); @@ -442,8 +478,8 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, * needs to load 2nd 16B of each desc for RSS hash parsing, * will cause performance drop to get into this context. */ - if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads & - DEV_RX_OFFLOAD_RSS_HASH) { + if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads & + RTE_ETH_RX_OFFLOAD_RSS_HASH) { /* load bottom half of every 32B desc */ const __m128i raw_desc_bh3 = _mm_load_si128 @@ -509,7 +545,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* and with mask to extract bits, flipping 1-0 */ __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); /* the staterr values are not in order, as the count - * count of dd bits doesn't care. However, for end of + * of dd bits doesn't care. However, for end of * packet tracking, we do care, so shuffle. This also * compresses the 32-bit values to 8-bit */ @@ -666,7 +702,7 @@ ice_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh); if (txq->nb_tx_free < txq->tx_free_thresh) - ice_tx_free_bufs(txq); + ice_tx_free_bufs_vec(txq); nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts); nb_commit = nb_pkts;