X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fice%2Fice_rxtx_vec_avx2.c;h=7838e17787019c7ce0b8a65f5e0c308e33691abd;hb=cb71192486c34eada5b65c6c46d32afd05cc091b;hp=5969a3048271dd5a961938712d35840c1804fd75;hpb=59852468c19c9e1e74f5b9ec1004c22d965aa06b;p=dpdk.git diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c index 5969a30482..7838e17787 100644 --- a/drivers/net/ice/ice_rxtx_vec_avx2.c +++ b/drivers/net/ice/ice_rxtx_vec_avx2.c @@ -129,7 +129,7 @@ ice_rxq_rearm(struct ice_rx_queue *rxq) (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); /* Update the tail pointer on the NIC */ - ICE_PCI_REG_WRITE(rxq->qrx_tail, rx_id); + ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id); } static inline __m256i @@ -251,43 +251,88 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, * bit13 is for VLAN indication. */ const __m256i flags_mask = - _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13)); + _mm256_set1_epi32((0xF << 4) | (1 << 12) | (1 << 13)); /** * data to be shuffled by the result of the flags mask shifted by 4 * bits. This gives use the l3_l4 flags. */ - const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, - /* shift right 1 bit to make sure it not exceed 255 */ - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, - /* second 128-bits */ - 0, 0, 0, 0, 0, 0, 0, 0, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1); + const __m256i l3_l4_flags_shuf = + _mm256_set_epi8((PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | + PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + /** + * second 128-bits + * shift right 20 bits to use the low two bits to indicate + * outer checksum status + * shift right 1 bit to make sure it not exceed 255 + */ + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1); const __m256i cksum_mask = - _mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | - PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_EIP_CKSUM_BAD); + _mm256_set1_epi32(PKT_RX_IP_CKSUM_MASK | + PKT_RX_L4_CKSUM_MASK | + PKT_RX_EIP_CKSUM_BAD | + PKT_RX_OUTER_L4_CKSUM_MASK); /** * data to be shuffled by result of flag mask, shifted down 12. * If RSS(bit12)/VLAN(bit13) are set, @@ -469,6 +514,15 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, __m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf, _mm256_srli_epi32(flag_bits, 4)); l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1); + + __m256i l4_outer_mask = _mm256_set1_epi32(0x6); + __m256i l4_outer_flags = + _mm256_and_si256(l3_l4_flags, l4_outer_mask); + l4_outer_flags = _mm256_slli_epi32(l4_outer_flags, 20); + + __m256i l3_l4_mask = _mm256_set1_epi32(~0x6); + l3_l4_flags = _mm256_and_si256(l3_l4_flags, l3_l4_mask); + l3_l4_flags = _mm256_or_si256(l3_l4_flags, l4_outer_flags); l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask); /* set rss and vlan flags */ const __m256i rss_vlan_flag_bits = @@ -962,7 +1016,7 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts, txq->tx_tail = tx_id; - ICE_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail); + ICE_PCI_REG_WC_WRITE(txq->qtx_tail, txq->tx_tail); return nb_pkts; }