X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fhns3%2Fhns3_rxtx_vec_neon.h;h=74c848d5efcd40413eae70d64c10182469c04c2b;hb=ee930d38ffca82af6c70209be7a8f2b408dc5610;hp=e878ee1d24c804ddb932c66a51150020aa283322;hpb=e31f123db06b4400dbaa1298882401cebd541398;p=dpdk.git diff --git a/drivers/net/hns3/hns3_rxtx_vec_neon.h b/drivers/net/hns3/hns3_rxtx_vec_neon.h index e878ee1d24..74c848d5ef 100644 --- a/drivers/net/hns3/hns3_rxtx_vec_neon.h +++ b/drivers/net/hns3/hns3_rxtx_vec_neon.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2020 Hisilicon Limited. + * Copyright(c) 2020-2021 HiSilicon Limited. */ #ifndef _HNS3_RXTX_VEC_NEON_H_ @@ -42,7 +42,7 @@ hns3_xmit_fixed_burst_vec(void *__restrict tx_queue, nb_commit = RTE_MIN(txq->tx_bd_ready, nb_pkts); if (unlikely(nb_commit == 0)) { - txq->queue_full_cnt++; + txq->dfx_stats.queue_full_cnt++; return 0; } nb_tx = nb_commit; @@ -61,6 +61,9 @@ hns3_xmit_fixed_burst_vec(void *__restrict tx_queue, for (i = 0; i < n; i++, tx_pkts++, tx_desc++) { hns3_vec_tx(tx_desc, *tx_pkts); tx_entry[i].mbuf = *tx_pkts; + + /* Increment bytes counter */ + txq->basic_stats.bytes += (*tx_pkts)->pkt_len; } nb_commit -= n; @@ -72,14 +75,228 @@ hns3_xmit_fixed_burst_vec(void *__restrict tx_queue, for (i = 0; i < nb_commit; i++, tx_pkts++, tx_desc++) { hns3_vec_tx(tx_desc, *tx_pkts); tx_entry[i].mbuf = *tx_pkts; + + /* Increment bytes counter */ + txq->basic_stats.bytes += (*tx_pkts)->pkt_len; } next_to_use += nb_commit; txq->next_to_use = next_to_use; txq->tx_bd_ready -= nb_tx; - hns3_write_reg_opt(txq->io_tail_reg, nb_tx); + hns3_write_txq_tail_reg(txq, nb_tx); return nb_tx; } + +static inline uint32_t +hns3_desc_parse_field(struct hns3_rx_queue *rxq, + struct hns3_entry *sw_ring, + struct hns3_desc *rxdp, + uint32_t bd_vld_num) +{ + uint32_t l234_info, ol_info, bd_base_info; + struct rte_mbuf *pkt; + uint32_t retcode = 0; + uint32_t i; + int ret; + + for (i = 0; i < bd_vld_num; i++) { + pkt = sw_ring[i].mbuf; + + /* init rte_mbuf.rearm_data last 64-bit */ + pkt->ol_flags = PKT_RX_RSS_HASH; + + l234_info = rxdp[i].rx.l234_info; + ol_info = rxdp[i].rx.ol_info; + bd_base_info = rxdp[i].rx.bd_base_info; + ret = hns3_handle_bdinfo(rxq, pkt, bd_base_info, l234_info); + if (unlikely(ret)) { + retcode |= 1u << i; + continue; + } + + pkt->packet_type = hns3_rx_calc_ptype(rxq, l234_info, ol_info); + + /* Increment bytes counter */ + rxq->basic_stats.bytes += pkt->pkt_len; + } + + return retcode; +} + +static inline uint16_t +hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq, + struct rte_mbuf **__restrict rx_pkts, + uint16_t nb_pkts, + uint64_t *bd_err_mask) +{ + uint16_t rx_id = rxq->next_to_use; + struct hns3_entry *sw_ring = &rxq->sw_ring[rx_id]; + struct hns3_desc *rxdp = &rxq->rx_ring[rx_id]; + uint32_t bd_valid_num, parse_retcode; + uint16_t nb_rx = 0; + uint32_t pos; + int offset; + + /* mask to shuffle from desc to mbuf's rx_descriptor_fields1 */ + uint8x16_t shuf_desc_fields_msk = { + 0xff, 0xff, 0xff, 0xff, /* packet type init zero */ + 22, 23, 0xff, 0xff, /* rx.pkt_len to rte_mbuf.pkt_len */ + 20, 21, /* size to rte_mbuf.data_len */ + 0xff, 0xff, /* rte_mbuf.vlan_tci init zero */ + 8, 9, 10, 11, /* rx.rss_hash to rte_mbuf.hash.rss */ + }; + + uint16x8_t crc_adjust = { + 0, 0, /* ignore pkt_type field */ + rxq->crc_len, /* sub crc on pkt_len */ + 0, /* ignore high-16bits of pkt_len */ + rxq->crc_len, /* sub crc on data_len */ + 0, 0, 0, /* ignore non-length fields */ + }; + + /* compile-time verifies the shuffle mask */ + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) != + offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) != + offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash.rss) != + offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12); + + for (pos = 0; pos < nb_pkts; pos += HNS3_DEFAULT_DESCS_PER_LOOP, + rxdp += HNS3_DEFAULT_DESCS_PER_LOOP) { + uint64x2x2_t descs[HNS3_DEFAULT_DESCS_PER_LOOP]; + uint8x16x2_t pkt_mbuf1, pkt_mbuf2, pkt_mbuf3, pkt_mbuf4; + uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; + uint64x2_t mbp1, mbp2; + uint16x4_t bd_vld = {0}; + uint16x8_t tmp; + uint64_t stat; + + /* calc how many bd valid */ + bd_vld = vset_lane_u16(rxdp[0].rx.bdtype_vld_udp0, bd_vld, 0); + bd_vld = vset_lane_u16(rxdp[1].rx.bdtype_vld_udp0, bd_vld, 1); + bd_vld = vset_lane_u16(rxdp[2].rx.bdtype_vld_udp0, bd_vld, 2); + bd_vld = vset_lane_u16(rxdp[3].rx.bdtype_vld_udp0, bd_vld, 3); + + /* load 2 mbuf pointer */ + mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]); + + bd_vld = vshl_n_u16(bd_vld, + HNS3_UINT16_BIT - 1 - HNS3_RXD_VLD_B); + bd_vld = vreinterpret_u16_s16( + vshr_n_s16(vreinterpret_s16_u16(bd_vld), + HNS3_UINT16_BIT - 1)); + stat = ~vget_lane_u64(vreinterpret_u64_u16(bd_vld), 0); + + /* load 2 mbuf pointer again */ + mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]); + + if (likely(stat == 0)) + bd_valid_num = HNS3_DEFAULT_DESCS_PER_LOOP; + else + bd_valid_num = __builtin_ctzl(stat) / HNS3_UINT16_BIT; + if (bd_valid_num == 0) + break; + + /* use offset to control below data load oper ordering */ + offset = rxq->offset_table[bd_valid_num]; + + /* store 2 mbuf pointer into rx_pkts */ + vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1); + + /* read first two descs */ + descs[0] = vld2q_u64((uint64_t *)(rxdp + offset)); + descs[1] = vld2q_u64((uint64_t *)(rxdp + offset + 1)); + + /* store 2 mbuf pointer into rx_pkts again */ + vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2); + + /* read remains two descs */ + descs[2] = vld2q_u64((uint64_t *)(rxdp + offset + 2)); + descs[3] = vld2q_u64((uint64_t *)(rxdp + offset + 3)); + + pkt_mbuf1.val[0] = vreinterpretq_u8_u64(descs[0].val[0]); + pkt_mbuf1.val[1] = vreinterpretq_u8_u64(descs[0].val[1]); + pkt_mbuf2.val[0] = vreinterpretq_u8_u64(descs[1].val[0]); + pkt_mbuf2.val[1] = vreinterpretq_u8_u64(descs[1].val[1]); + + /* pkt 1,2 convert format from desc to pktmbuf */ + pkt_mb1 = vqtbl2q_u8(pkt_mbuf1, shuf_desc_fields_msk); + pkt_mb2 = vqtbl2q_u8(pkt_mbuf2, shuf_desc_fields_msk); + + /* store the first 8 bytes of pkt 1,2 mbuf's rearm_data */ + *(uint64_t *)&sw_ring[pos + 0].mbuf->rearm_data = + rxq->mbuf_initializer; + *(uint64_t *)&sw_ring[pos + 1].mbuf->rearm_data = + rxq->mbuf_initializer; + + /* pkt 1,2 remove crc */ + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust); + pkt_mb1 = vreinterpretq_u8_u16(tmp); + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust); + pkt_mb2 = vreinterpretq_u8_u16(tmp); + + pkt_mbuf3.val[0] = vreinterpretq_u8_u64(descs[2].val[0]); + pkt_mbuf3.val[1] = vreinterpretq_u8_u64(descs[2].val[1]); + pkt_mbuf4.val[0] = vreinterpretq_u8_u64(descs[3].val[0]); + pkt_mbuf4.val[1] = vreinterpretq_u8_u64(descs[3].val[1]); + + /* pkt 3,4 convert format from desc to pktmbuf */ + pkt_mb3 = vqtbl2q_u8(pkt_mbuf3, shuf_desc_fields_msk); + pkt_mb4 = vqtbl2q_u8(pkt_mbuf4, shuf_desc_fields_msk); + + /* pkt 1,2 save to rx_pkts mbuf */ + vst1q_u8((void *)&sw_ring[pos + 0].mbuf->rx_descriptor_fields1, + pkt_mb1); + vst1q_u8((void *)&sw_ring[pos + 1].mbuf->rx_descriptor_fields1, + pkt_mb2); + + /* pkt 3,4 remove crc */ + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust); + pkt_mb3 = vreinterpretq_u8_u16(tmp); + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust); + pkt_mb4 = vreinterpretq_u8_u16(tmp); + + /* store the first 8 bytes of pkt 3,4 mbuf's rearm_data */ + *(uint64_t *)&sw_ring[pos + 2].mbuf->rearm_data = + rxq->mbuf_initializer; + *(uint64_t *)&sw_ring[pos + 3].mbuf->rearm_data = + rxq->mbuf_initializer; + + /* pkt 3,4 save to rx_pkts mbuf */ + vst1q_u8((void *)&sw_ring[pos + 2].mbuf->rx_descriptor_fields1, + pkt_mb3); + vst1q_u8((void *)&sw_ring[pos + 3].mbuf->rx_descriptor_fields1, + pkt_mb4); + + rte_prefetch_non_temporal(rxdp + HNS3_DEFAULT_DESCS_PER_LOOP); + + parse_retcode = hns3_desc_parse_field(rxq, &sw_ring[pos], + &rxdp[offset], bd_valid_num); + if (unlikely(parse_retcode)) + (*bd_err_mask) |= ((uint64_t)parse_retcode) << pos; + + rte_prefetch0(sw_ring[pos + + HNS3_DEFAULT_DESCS_PER_LOOP + 0].mbuf); + rte_prefetch0(sw_ring[pos + + HNS3_DEFAULT_DESCS_PER_LOOP + 1].mbuf); + rte_prefetch0(sw_ring[pos + + HNS3_DEFAULT_DESCS_PER_LOOP + 2].mbuf); + rte_prefetch0(sw_ring[pos + + HNS3_DEFAULT_DESCS_PER_LOOP + 3].mbuf); + + nb_rx += bd_valid_num; + if (bd_valid_num < HNS3_DEFAULT_DESCS_PER_LOOP) + break; + } + + rxq->rx_rearm_nb += nb_rx; + rxq->next_to_use += nb_rx; + if (rxq->next_to_use >= rxq->nb_rx_desc) + rxq->next_to_use = 0; + + return nb_rx; +} #endif /* _HNS3_RXTX_VEC_NEON_H_ */