X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fice%2Fice_rxtx_vec_sse.c;h=653bd28b417c1a67bd2b1ec2aca466c81b0edf55;hb=a972972fe00a9f06f6fa8632981c4ee6ef8cdcd1;hp=626364719bbc6a9f48e274530dc728efacf1fad0;hpb=bdc027118814781740065d962ce3dcb39ecebea0;p=dpdk.git diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c b/drivers/net/ice/ice_rxtx_vec_sse.c index 626364719b..653bd28b41 100644 --- a/drivers/net/ice/ice_rxtx_vec_sse.c +++ b/drivers/net/ice/ice_rxtx_vec_sse.c @@ -114,39 +114,67 @@ ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4], * bit12 for RSS indication. * bit13 for VLAN indication. */ - const __m128i desc_mask = _mm_set_epi32(0x3070, 0x3070, - 0x3070, 0x3070); - + const __m128i desc_mask = _mm_set_epi32(0x30f0, 0x30f0, + 0x30f0, 0x30f0); const __m128i cksum_mask = _mm_set_epi32(PKT_RX_IP_CKSUM_MASK | PKT_RX_L4_CKSUM_MASK | - PKT_RX_EIP_CKSUM_BAD, + PKT_RX_OUTER_L4_CKSUM_MASK | + PKT_RX_OUTER_IP_CKSUM_BAD, PKT_RX_IP_CKSUM_MASK | PKT_RX_L4_CKSUM_MASK | - PKT_RX_EIP_CKSUM_BAD, + PKT_RX_OUTER_L4_CKSUM_MASK | + PKT_RX_OUTER_IP_CKSUM_BAD, PKT_RX_IP_CKSUM_MASK | PKT_RX_L4_CKSUM_MASK | - PKT_RX_EIP_CKSUM_BAD, + PKT_RX_OUTER_L4_CKSUM_MASK | + PKT_RX_OUTER_IP_CKSUM_BAD, PKT_RX_IP_CKSUM_MASK | PKT_RX_L4_CKSUM_MASK | - PKT_RX_EIP_CKSUM_BAD); + PKT_RX_OUTER_L4_CKSUM_MASK | + PKT_RX_OUTER_IP_CKSUM_BAD); /* map the checksum, rss and vlan fields to the checksum, rss * and vlan flag */ - const __m128i cksum_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, - /* shift right 1 bit to make sure it not exceed 255 */ - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1); + const __m128i cksum_flags = + _mm_set_epi8((PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | + PKT_RX_OUTER_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_OUTER_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_OUTER_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_OUTER_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + /** + * shift right 20 bits to use the low two bits to indicate + * outer checksum status + * shift right 1 bit to make sure it not exceed 255 + */ + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_OUTER_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_OUTER_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_OUTER_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_OUTER_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1); const __m128i rss_vlan_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, @@ -166,6 +194,14 @@ ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4], flags = _mm_shuffle_epi8(cksum_flags, tmp_desc); /* then we shift left 1 bit */ flags = _mm_slli_epi32(flags, 1); + + __m128i l4_outer_mask = _mm_set_epi32(0x6, 0x6, 0x6, 0x6); + __m128i l4_outer_flags = _mm_and_si128(flags, l4_outer_mask); + l4_outer_flags = _mm_slli_epi32(l4_outer_flags, 20); + + __m128i l3_l4_mask = _mm_set_epi32(~0x6, ~0x6, ~0x6, ~0x6); + __m128i l3_l4_flags = _mm_and_si128(flags, l3_l4_mask); + flags = _mm_or_si128(l3_l4_flags, l4_outer_flags); /* we need to mask out the reduntant bits introduced by RSS or * VLAN fields. */ @@ -217,10 +253,10 @@ ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4], * appropriate flags means that we have to do a shift and blend for * each mbuf before we do the write. */ - rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x10); - rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x10); - rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x10); - rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x10); + rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x30); + rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x30); + rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x30); + rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x30); /* write the rearm data and the olflags in one write */ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) != @@ -380,7 +416,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */ mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]); /* Read desc statuses backwards to avoid race condition */ - /* A.1 load 4 pkts desc */ + /* A.1 load desc[3] */ descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); rte_compiler_barrier(); @@ -392,9 +428,9 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]); #endif + /* A.1 load desc[2-0] */ descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); rte_compiler_barrier(); - /* B.1 load 2 mbuf point */ descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); rte_compiler_barrier(); descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); @@ -442,7 +478,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, * needs to load 2nd 16B of each desc for RSS hash parsing, * will cause performance drop to get into this context. */ - if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads & + if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_RSS_HASH) { /* load bottom half of every 32B desc */ const __m128i raw_desc_bh3 = @@ -509,7 +545,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* and with mask to extract bits, flipping 1-0 */ __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); /* the staterr values are not in order, as the count - * count of dd bits doesn't care. However, for end of + * of dd bits doesn't care. However, for end of * packet tracking, we do care, so shuffle. This also * compresses the 32-bit values to 8-bit */ @@ -666,7 +702,7 @@ ice_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh); if (txq->nb_tx_free < txq->tx_free_thresh) - ice_tx_free_bufs(txq); + ice_tx_free_bufs_vec(txq); nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts); nb_commit = nb_pkts;