X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;ds=sidebyside;f=drivers%2Fnet%2Fixgbe%2Fixgbe_rxtx_vec_neon.c;h=feb86c61ee9aae481a4dfe25b70b98d8e2f49d69;hb=refs%2Fheads%2Fmempool_20200116b;hp=edb13835439a6c61251d4c4d99bea6c094232f28;hpb=ec3b1124d14d1481961a0d8b8163d378b5c63b04;p=dpdk.git diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c index edb1383543..feb86c61ee 100644 --- a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c @@ -142,8 +142,70 @@ desc_to_olflags_v(uint8x16x2_t sterr_tmp1, uint8x16x2_t sterr_tmp2, * - don't support ol_flags for rss and csum err */ -#define IXGBE_VPMD_DESC_DD_MASK 0x01010101 #define IXGBE_VPMD_DESC_EOP_MASK 0x02020202 +#define IXGBE_UINT8_BIT (CHAR_BIT * sizeof(uint8_t)) + +static inline uint32_t +get_packet_type(uint32_t pkt_info, + uint32_t etqf_check, + uint32_t tunnel_check) +{ + if (etqf_check) + return RTE_PTYPE_UNKNOWN; + + if (tunnel_check) { + pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL; + return ptype_table_tn[pkt_info]; + } + + pkt_info &= IXGBE_PACKET_TYPE_MASK_82599; + return ptype_table[pkt_info]; +} + +static inline void +desc_to_ptype_v(uint64x2_t descs[4], uint16_t pkt_type_mask, + struct rte_mbuf **rx_pkts) +{ + uint32x4_t etqf_check, tunnel_check; + uint32x4_t etqf_mask = vdupq_n_u32(0x8000); + uint32x4_t tunnel_mask = vdupq_n_u32(0x10000); + uint32x4_t ptype_mask = vdupq_n_u32((uint32_t)pkt_type_mask); + uint32x4_t ptype0 = vzipq_u32(vreinterpretq_u32_u64(descs[0]), + vreinterpretq_u32_u64(descs[2])).val[0]; + uint32x4_t ptype1 = vzipq_u32(vreinterpretq_u32_u64(descs[1]), + vreinterpretq_u32_u64(descs[3])).val[0]; + + /* interleave low 32 bits, + * now we have 4 ptypes in a NEON register + */ + ptype0 = vzipq_u32(ptype0, ptype1).val[0]; + + /* mask etqf bits */ + etqf_check = vandq_u32(ptype0, etqf_mask); + /* mask tunnel bits */ + tunnel_check = vandq_u32(ptype0, tunnel_mask); + + /* shift right by IXGBE_PACKET_TYPE_SHIFT, and apply ptype mask */ + ptype0 = vandq_u32(vshrq_n_u32(ptype0, IXGBE_PACKET_TYPE_SHIFT), + ptype_mask); + + rx_pkts[0]->packet_type = + get_packet_type(vgetq_lane_u32(ptype0, 0), + vgetq_lane_u32(etqf_check, 0), + vgetq_lane_u32(tunnel_check, 0)); + rx_pkts[1]->packet_type = + get_packet_type(vgetq_lane_u32(ptype0, 1), + vgetq_lane_u32(etqf_check, 1), + vgetq_lane_u32(tunnel_check, 1)); + rx_pkts[2]->packet_type = + get_packet_type(vgetq_lane_u32(ptype0, 2), + vgetq_lane_u32(etqf_check, 2), + vgetq_lane_u32(tunnel_check, 2)); + rx_pkts[3]->packet_type = + get_packet_type(vgetq_lane_u32(ptype0, 3), + vgetq_lane_u32(etqf_check, 3), + vgetq_lane_u32(tunnel_check, 3)); +} static inline uint16_t _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, @@ -211,16 +273,15 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint64x2_t mbp1, mbp2; uint8x16_t staterr; uint16x8_t tmp; - uint32_t var = 0; uint32_t stat; - /* B.1 load 1 mbuf point */ + /* B.1 load 2 mbuf point */ mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]); /* B.2 copy 2 mbuf point into rx_pkts */ vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1); - /* B.1 load 1 mbuf point */ + /* B.1 load 2 mbuf point */ mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]); /* A. load 4 pkts descs */ @@ -228,7 +289,6 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, descs[1] = vld1q_u64((uint64_t *)(rxdp + 1)); descs[2] = vld1q_u64((uint64_t *)(rxdp + 2)); descs[3] = vld1q_u64((uint64_t *)(rxdp + 3)); - rte_smp_rmb(); /* B.2 copy 2 mbuf point into rx_pkts */ vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2); @@ -257,7 +317,6 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* C.2 get 4 pkts staterr value */ staterr = vzipq_u8(sterr_tmp1.val[1], sterr_tmp2.val[1]).val[0]; - stat = vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0); /* set ol_flags with vlan packet type */ desc_to_olflags_v(sterr_tmp1, sterr_tmp2, staterr, @@ -283,12 +342,20 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* C* extract and record EOP bit */ if (split_packet) { + stat = vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0); /* and with mask to extract bits, flipping 1-0 */ *(int *)split_packet = ~stat & IXGBE_VPMD_DESC_EOP_MASK; split_packet += RTE_IXGBE_DESCS_PER_LOOP; } + /* C.4 expand DD bit to saturate UINT8 */ + staterr = vshlq_n_u8(staterr, IXGBE_UINT8_BIT - 1); + staterr = vreinterpretq_u8_s8 + (vshrq_n_s8(vreinterpretq_s8_u8(staterr), + IXGBE_UINT8_BIT - 1)); + stat = ~vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0); + rte_prefetch_non_temporal(rxdp + RTE_IXGBE_DESCS_PER_LOOP); /* D.3 copy final 1,2 data to rx_pkts */ @@ -297,18 +364,14 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, vst1q_u8((uint8_t *)&rx_pkts[pos]->rx_descriptor_fields1, pkt_mb1); - stat &= IXGBE_VPMD_DESC_DD_MASK; + desc_to_ptype_v(descs, rxq->pkt_type_mask, &rx_pkts[pos]); - /* C.4 calc avaialbe number of desc */ - if (likely(stat != IXGBE_VPMD_DESC_DD_MASK)) { - while (stat & 0x01) { - ++var; - stat = stat >> 8; - } - nb_pkts_recd += var; - break; - } else { + /* C.5 calc available number of desc */ + if (unlikely(stat == 0)) { nb_pkts_recd += RTE_IXGBE_DESCS_PER_LOOP; + } else { + nb_pkts_recd += __builtin_ctz(stat) / IXGBE_UINT8_BIT; + break; } } @@ -375,6 +438,7 @@ ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, i++; if (i == nb_bufs) return nb_bufs; + rxq->pkt_first_seg = rx_pkts[i]; } return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i, &split_flags[i]);