net/hns3: add compile-time verification on Rx vector
[dpdk.git] / drivers / net / hns3 / hns3_rxtx_vec_neon.h
index e878ee1..e5c7d69 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2020 Hisilicon Limited.
+ * Copyright(c) 2020-2021 HiSilicon Limited.
  */
 
 #ifndef _HNS3_RXTX_VEC_NEON_H_
@@ -42,7 +42,7 @@ hns3_xmit_fixed_burst_vec(void *__restrict tx_queue,
 
        nb_commit = RTE_MIN(txq->tx_bd_ready, nb_pkts);
        if (unlikely(nb_commit == 0)) {
-               txq->queue_full_cnt++;
+               txq->dfx_stats.queue_full_cnt++;
                return 0;
        }
        nb_tx = nb_commit;
@@ -61,6 +61,9 @@ hns3_xmit_fixed_burst_vec(void *__restrict tx_queue,
                for (i = 0; i < n; i++, tx_pkts++, tx_desc++) {
                        hns3_vec_tx(tx_desc, *tx_pkts);
                        tx_entry[i].mbuf = *tx_pkts;
+
+                       /* Increment bytes counter */
+                       txq->basic_stats.bytes += (*tx_pkts)->pkt_len;
                }
 
                nb_commit -= n;
@@ -72,6 +75,9 @@ hns3_xmit_fixed_burst_vec(void *__restrict tx_queue,
        for (i = 0; i < nb_commit; i++, tx_pkts++, tx_desc++) {
                hns3_vec_tx(tx_desc, *tx_pkts);
                tx_entry[i].mbuf = *tx_pkts;
+
+               /* Increment bytes counter */
+               txq->basic_stats.bytes += (*tx_pkts)->pkt_len;
        }
 
        next_to_use += nb_commit;
@@ -82,4 +88,215 @@ hns3_xmit_fixed_burst_vec(void *__restrict tx_queue,
 
        return nb_tx;
 }
+
+static inline uint32_t
+hns3_desc_parse_field(struct hns3_rx_queue *rxq,
+                     struct hns3_entry *sw_ring,
+                     struct hns3_desc *rxdp,
+                     uint32_t   bd_vld_num)
+{
+       uint32_t l234_info, ol_info, bd_base_info;
+       struct rte_mbuf *pkt;
+       uint32_t retcode = 0;
+       uint32_t i;
+       int ret;
+
+       for (i = 0; i < bd_vld_num; i++) {
+               pkt = sw_ring[i].mbuf;
+
+               /* init rte_mbuf.rearm_data last 64-bit */
+               pkt->ol_flags = PKT_RX_RSS_HASH;
+
+               l234_info = rxdp[i].rx.l234_info;
+               ol_info = rxdp[i].rx.ol_info;
+               bd_base_info = rxdp[i].rx.bd_base_info;
+               ret = hns3_handle_bdinfo(rxq, pkt, bd_base_info, l234_info);
+               if (unlikely(ret)) {
+                       retcode |= 1u << i;
+                       continue;
+               }
+
+               pkt->packet_type = hns3_rx_calc_ptype(rxq, l234_info, ol_info);
+
+               /* Increment bytes counter */
+               rxq->basic_stats.bytes += pkt->pkt_len;
+       }
+
+       return retcode;
+}
+
+static inline uint16_t
+hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,
+                   struct rte_mbuf **__restrict rx_pkts,
+                   uint16_t nb_pkts,
+                   uint64_t *bd_err_mask)
+{
+       uint16_t rx_id = rxq->next_to_use;
+       struct hns3_entry *sw_ring = &rxq->sw_ring[rx_id];
+       struct hns3_desc *rxdp = &rxq->rx_ring[rx_id];
+       uint32_t bd_valid_num, parse_retcode;
+       uint16_t nb_rx = 0;
+       uint32_t pos;
+       int offset;
+
+       /* mask to shuffle from desc to mbuf's rx_descriptor_fields1 */
+       uint8x16_t shuf_desc_fields_msk = {
+               0xff, 0xff, 0xff, 0xff,  /* packet type init zero */
+               22, 23, 0xff, 0xff,      /* rx.pkt_len to rte_mbuf.pkt_len */
+               20, 21,                  /* size to rte_mbuf.data_len */
+               0xff, 0xff,              /* rte_mbuf.vlan_tci init zero */
+               8, 9, 10, 11,            /* rx.rss_hash to rte_mbuf.hash.rss */
+       };
+
+       uint16x8_t crc_adjust = {
+               0, 0,         /* ignore pkt_type field */
+               rxq->crc_len, /* sub crc on pkt_len */
+               0,            /* ignore high-16bits of pkt_len */
+               rxq->crc_len, /* sub crc on data_len */
+               0, 0, 0,      /* ignore non-length fields */
+       };
+
+       /* compile-time verifies the shuffle mask */
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+                        offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+                        offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash.rss) !=
+                        offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
+
+       for (pos = 0; pos < nb_pkts; pos += HNS3_DEFAULT_DESCS_PER_LOOP,
+                                    rxdp += HNS3_DEFAULT_DESCS_PER_LOOP) {
+               uint64x2x2_t descs[HNS3_DEFAULT_DESCS_PER_LOOP];
+               uint8x16x2_t pkt_mbuf1, pkt_mbuf2, pkt_mbuf3, pkt_mbuf4;
+               uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+               uint64x2_t mbp1, mbp2;
+               uint16x4_t bd_vld = {0};
+               uint16x8_t tmp;
+               uint64_t stat;
+
+               /* calc how many bd valid */
+               bd_vld = vset_lane_u16(rxdp[0].rx.bdtype_vld_udp0, bd_vld, 0);
+               bd_vld = vset_lane_u16(rxdp[1].rx.bdtype_vld_udp0, bd_vld, 1);
+               bd_vld = vset_lane_u16(rxdp[2].rx.bdtype_vld_udp0, bd_vld, 2);
+               bd_vld = vset_lane_u16(rxdp[3].rx.bdtype_vld_udp0, bd_vld, 3);
+
+               /* load 2 mbuf pointer */
+               mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
+
+               bd_vld = vshl_n_u16(bd_vld,
+                                   HNS3_UINT16_BIT - 1 - HNS3_RXD_VLD_B);
+               bd_vld = vreinterpret_u16_s16(
+                               vshr_n_s16(vreinterpret_s16_u16(bd_vld),
+                                          HNS3_UINT16_BIT - 1));
+               stat = ~vget_lane_u64(vreinterpret_u64_u16(bd_vld), 0);
+
+               /* load 2 mbuf pointer again */
+               mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
+
+               if (likely(stat == 0))
+                       bd_valid_num = HNS3_DEFAULT_DESCS_PER_LOOP;
+               else
+                       bd_valid_num = __builtin_ctzl(stat) / HNS3_UINT16_BIT;
+               if (bd_valid_num == 0)
+                       break;
+
+               /* use offset to control below data load oper ordering */
+               offset = rxq->offset_table[bd_valid_num];
+
+               /* store 2 mbuf pointer into rx_pkts */
+               vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
+
+               /* read first two descs */
+               descs[0] = vld2q_u64((uint64_t *)(rxdp + offset));
+               descs[1] = vld2q_u64((uint64_t *)(rxdp + offset + 1));
+
+               /* store 2 mbuf pointer into rx_pkts again */
+               vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
+
+               /* read remains two descs */
+               descs[2] = vld2q_u64((uint64_t *)(rxdp + offset + 2));
+               descs[3] = vld2q_u64((uint64_t *)(rxdp + offset + 3));
+
+               pkt_mbuf1.val[0] = vreinterpretq_u8_u64(descs[0].val[0]);
+               pkt_mbuf1.val[1] = vreinterpretq_u8_u64(descs[0].val[1]);
+               pkt_mbuf2.val[0] = vreinterpretq_u8_u64(descs[1].val[0]);
+               pkt_mbuf2.val[1] = vreinterpretq_u8_u64(descs[1].val[1]);
+
+               /* pkt 1,2 convert format from desc to pktmbuf */
+               pkt_mb1 = vqtbl2q_u8(pkt_mbuf1, shuf_desc_fields_msk);
+               pkt_mb2 = vqtbl2q_u8(pkt_mbuf2, shuf_desc_fields_msk);
+
+               /* store the first 8 bytes of pkt 1,2 mbuf's rearm_data */
+               *(uint64_t *)&sw_ring[pos + 0].mbuf->rearm_data =
+                       rxq->mbuf_initializer;
+               *(uint64_t *)&sw_ring[pos + 1].mbuf->rearm_data =
+                       rxq->mbuf_initializer;
+
+               /* pkt 1,2 remove crc */
+               tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust);
+               pkt_mb1 = vreinterpretq_u8_u16(tmp);
+               tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust);
+               pkt_mb2 = vreinterpretq_u8_u16(tmp);
+
+               pkt_mbuf3.val[0] = vreinterpretq_u8_u64(descs[2].val[0]);
+               pkt_mbuf3.val[1] = vreinterpretq_u8_u64(descs[2].val[1]);
+               pkt_mbuf4.val[0] = vreinterpretq_u8_u64(descs[3].val[0]);
+               pkt_mbuf4.val[1] = vreinterpretq_u8_u64(descs[3].val[1]);
+
+               /* pkt 3,4 convert format from desc to pktmbuf */
+               pkt_mb3 = vqtbl2q_u8(pkt_mbuf3, shuf_desc_fields_msk);
+               pkt_mb4 = vqtbl2q_u8(pkt_mbuf4, shuf_desc_fields_msk);
+
+               /* pkt 1,2 save to rx_pkts mbuf */
+               vst1q_u8((void *)&sw_ring[pos + 0].mbuf->rx_descriptor_fields1,
+                        pkt_mb1);
+               vst1q_u8((void *)&sw_ring[pos + 1].mbuf->rx_descriptor_fields1,
+                        pkt_mb2);
+
+               /* pkt 3,4 remove crc */
+               tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);
+               pkt_mb3 = vreinterpretq_u8_u16(tmp);
+               tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
+               pkt_mb4 = vreinterpretq_u8_u16(tmp);
+
+               /* store the first 8 bytes of pkt 3,4 mbuf's rearm_data */
+               *(uint64_t *)&sw_ring[pos + 2].mbuf->rearm_data =
+                       rxq->mbuf_initializer;
+               *(uint64_t *)&sw_ring[pos + 3].mbuf->rearm_data =
+                       rxq->mbuf_initializer;
+
+               /* pkt 3,4 save to rx_pkts mbuf */
+               vst1q_u8((void *)&sw_ring[pos + 2].mbuf->rx_descriptor_fields1,
+                        pkt_mb3);
+               vst1q_u8((void *)&sw_ring[pos + 3].mbuf->rx_descriptor_fields1,
+                        pkt_mb4);
+
+               rte_prefetch_non_temporal(rxdp + HNS3_DEFAULT_DESCS_PER_LOOP);
+
+               parse_retcode = hns3_desc_parse_field(rxq, &sw_ring[pos],
+                       &rxdp[offset], bd_valid_num);
+               if (unlikely(parse_retcode))
+                       (*bd_err_mask) |= ((uint64_t)parse_retcode) << pos;
+
+               rte_prefetch0(sw_ring[pos +
+                                     HNS3_DEFAULT_DESCS_PER_LOOP + 0].mbuf);
+               rte_prefetch0(sw_ring[pos +
+                                     HNS3_DEFAULT_DESCS_PER_LOOP + 1].mbuf);
+               rte_prefetch0(sw_ring[pos +
+                                     HNS3_DEFAULT_DESCS_PER_LOOP + 2].mbuf);
+               rte_prefetch0(sw_ring[pos +
+                                     HNS3_DEFAULT_DESCS_PER_LOOP + 3].mbuf);
+
+               nb_rx += bd_valid_num;
+               if (bd_valid_num < HNS3_DEFAULT_DESCS_PER_LOOP)
+                       break;
+       }
+
+       rxq->rx_rearm_nb += nb_rx;
+       rxq->next_to_use += nb_rx;
+       if (rxq->next_to_use >= rxq->nb_rx_desc)
+               rxq->next_to_use = 0;
+
+       return nb_rx;
+}
 #endif /* _HNS3_RXTX_VEC_NEON_H_ */