X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fixgbe%2Fixgbe_rxtx_vec_neon.c;h=c541f537c7787f5e975280853f957bd7e4297e9b;hb=6ceb7ab83f168fa6b8e90e4bd5a1392de1a48c70;hp=e6d877af90cfc3b8e887506956ce325b704c48f4;hpb=ff1294ca53ac0338746b7a6c818d208ef16c58a0;p=dpdk.git diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c index e6d877af90..c541f537c7 100644 --- a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c @@ -3,7 +3,7 @@ */ #include -#include +#include #include #include @@ -83,14 +83,19 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq) static inline void desc_to_olflags_v(uint8x16x2_t sterr_tmp1, uint8x16x2_t sterr_tmp2, - uint8x16_t staterr, uint8_t vlan_flags, struct rte_mbuf **rx_pkts) + uint8x16_t staterr, uint8_t vlan_flags, uint16_t udp_p_flag, + struct rte_mbuf **rx_pkts) { - uint8x16_t ptype; - uint8x16_t vtag; + uint16_t udp_p_flag_hi; + uint8x16_t ptype, udp_csum_skip; + uint32x4_t temp_udp_csum_skip = {0, 0, 0, 0}; + uint8x16_t vtag_lo, vtag_hi, vtag; + uint8x16_t temp_csum; + uint32x4_t csum = {0, 0, 0, 0}; union { - uint8_t e[4]; - uint32_t word; + uint16_t e[4]; + uint64_t word; } vol; const uint8x16_t rsstype_msk = { @@ -105,29 +110,100 @@ desc_to_olflags_v(uint8x16x2_t sterr_tmp1, uint8x16x2_t sterr_tmp2, PKT_RX_RSS_HASH, 0, 0, 0, 0, 0, 0, PKT_RX_FDIR}; - const uint8x16_t vlan_msk = { + /* mask everything except vlan present and l4/ip csum error */ + const uint8x16_t vlan_csum_msk = { IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP, 0, 0, 0, 0, 0, 0, 0, 0, + (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 24, + (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 24, + (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 24, + (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 24}; + + /* map vlan present (0x8), IPE (0x2), L4E (0x1) to ol_flags */ + const uint8x16_t vlan_csum_map_lo = { + PKT_RX_IP_CKSUM_GOOD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD, + PKT_RX_IP_CKSUM_BAD, + PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD, + 0, 0, 0, 0, + vlan_flags | PKT_RX_IP_CKSUM_GOOD, + vlan_flags | PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD, + vlan_flags | PKT_RX_IP_CKSUM_BAD, + vlan_flags | PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD, + 0, 0, 0, 0}; + + const uint8x16_t vlan_csum_map_hi = { + PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0, + PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0, + 0, 0, 0, 0, + PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0, + PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0, + 0, 0, 0, 0}; + + /* change mask from 0x200(IXGBE_RXDADV_PKTTYPE_UDP) to 0x2 */ + udp_p_flag_hi = udp_p_flag >> 8; + + /* mask everything except UDP header present if specified */ + const uint8x16_t udp_hdr_p_msk = { + 0, 0, 0, 0, + udp_p_flag_hi, udp_p_flag_hi, udp_p_flag_hi, udp_p_flag_hi, + 0, 0, 0, 0, 0, 0, 0, 0}; - const uint8x16_t vlan_map = { + const uint8x16_t udp_csum_bad_shuf = { + 0xFF, ~(uint8_t)PKT_RX_L4_CKSUM_BAD, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - vlan_flags, 0, 0, 0, 0, 0, 0, 0}; ptype = vzipq_u8(sterr_tmp1.val[0], sterr_tmp2.val[0]).val[0]; + + /* save the UDP header present information */ + udp_csum_skip = vandq_u8(ptype, udp_hdr_p_msk); + + /* move UDP header present information to low 32bits */ + temp_udp_csum_skip = vcopyq_laneq_u32(temp_udp_csum_skip, 0, + vreinterpretq_u32_u8(udp_csum_skip), 1); + ptype = vandq_u8(ptype, rsstype_msk); ptype = vqtbl1q_u8(rss_flags, ptype); - /* extract vlan_flags from IXGBE_RXD_STAT_VP bits of staterr */ - vtag = vandq_u8(staterr, vlan_msk); - vtag = vqtbl1q_u8(vlan_map, vtag); - vtag = vorrq_u8(ptype, vtag); + /* extract vlan_flags and csum_error from staterr */ + vtag = vandq_u8(staterr, vlan_csum_msk); - vol.word = vgetq_lane_u32(vreinterpretq_u32_u8(vtag), 0); + /* csum bits are in the most significant, to use shuffle we need to + * shift them. Change mask from 0xc0 to 0x03. + */ + temp_csum = vshrq_n_u8(vtag, 6); + + /* 'OR' the most significant 32 bits containing the checksum + * flags with the vlan present flags + * Then bits layout of each lane(8bits) will be 'xxxx,VP,x,IPE,L4E' + */ + csum = vsetq_lane_u32(vgetq_lane_u32(vreinterpretq_u32_u8(temp_csum), 3), csum, 0); + vtag = vorrq_u8(vreinterpretq_u8_u32(csum), vtag); + + /* convert L4 checksum correct type to vtag_hi */ + vtag_hi = vqtbl1q_u8(vlan_csum_map_hi, vtag); + vtag_hi = vshrq_n_u8(vtag_hi, 7); + + /* convert VP, IPE, L4E to vtag_lo */ + vtag_lo = vqtbl1q_u8(vlan_csum_map_lo, vtag); + vtag_lo = vorrq_u8(ptype, vtag_lo); + + /* convert the UDP header present 0x2 to 0x1 for aligning with each + * PKT_RX_L4_CKSUM_BAD value in low byte of 8 bits word ol_flag in + * vtag_lo (4x8). Then mask out the bad checksum value by shuffle and + * bit-mask. + */ + udp_csum_skip = vshrq_n_u8(vreinterpretq_u8_u32(temp_udp_csum_skip), 1); + udp_csum_skip = vqtbl1q_u8(udp_csum_bad_shuf, udp_csum_skip); + vtag_lo = vandq_u8(vtag_lo, udp_csum_skip); + + vtag = vzipq_u8(vtag_lo, vtag_hi).val[0]; + vol.word = vgetq_lane_u64(vreinterpretq_u64_u8(vtag), 0); rx_pkts[0]->ol_flags = vol.e[0]; rx_pkts[1]->ol_flags = vol.e[1]; @@ -227,6 +303,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16x8_t crc_adjust = {0, 0, rxq->crc_len, 0, rxq->crc_len, 0, 0, 0}; uint8_t vlan_flags; + uint16_t udp_p_flag = 0; /* Rx Descriptor UDP header present */ /* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP); @@ -251,6 +328,9 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) return 0; + if (rxq->rx_udp_csum_zero_err) + udp_p_flag = IXGBE_RXDADV_PKTTYPE_UDP; + /* Cache is empty -> need to scan the buffer rings, but first move * the next 'n' mbufs into the cache */ @@ -322,7 +402,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* set ol_flags with vlan packet type */ desc_to_olflags_v(sterr_tmp1, sterr_tmp2, staterr, vlan_flags, - &rx_pkts[pos]); + udp_p_flag, &rx_pkts[pos]); /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust); @@ -391,7 +471,6 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, * Notice: * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two - * - don't support ol_flags for rss and csum err */ uint16_t ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, @@ -404,7 +483,6 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, * vPMD receive routine that reassembles scattered packets * * Notice: - * - don't support ol_flags for rss and csum err * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two */ @@ -600,11 +678,5 @@ ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq) int __rte_cold ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) { - struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; - - /* no csum error report support */ - if (rxmode->offloads & DEV_RX_OFFLOAD_CHECKSUM) - return -1; - return ixgbe_rx_vec_dev_conf_condition_check_default(dev); }