net/mlx5: make VLAN network interface thread safe
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx_vec_neon.c
index 86fb3af..4c81ae9 100644 (file)
@@ -5,13 +5,12 @@
 #include <stdint.h>
 #include <rte_ethdev_driver.h>
 #include <rte_malloc.h>
+#include <rte_vect.h>
 
 #include "ixgbe_ethdev.h"
 #include "ixgbe_rxtx.h"
 #include "ixgbe_rxtx_vec_common.h"
 
-#include <arm_neon.h>
-
 #pragma GCC diagnostic ignored "-Wcast-qual"
 
 static inline void
@@ -131,20 +130,78 @@ desc_to_olflags_v(uint8x16x2_t sterr_tmp1, uint8x16x2_t sterr_tmp2,
        rx_pkts[3]->ol_flags = vol.e[3];
 }
 
-/*
+#define IXGBE_VPMD_DESC_EOP_MASK       0x02020202
+#define IXGBE_UINT8_BIT                        (CHAR_BIT * sizeof(uint8_t))
+
+static inline uint32_t
+get_packet_type(uint32_t pkt_info,
+               uint32_t etqf_check,
+               uint32_t tunnel_check)
+{
+       if (etqf_check)
+               return RTE_PTYPE_UNKNOWN;
+
+       if (tunnel_check) {
+               pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
+               return ptype_table_tn[pkt_info];
+       }
+
+       pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
+       return ptype_table[pkt_info];
+}
+
+static inline void
+desc_to_ptype_v(uint64x2_t descs[4], uint16_t pkt_type_mask,
+               struct rte_mbuf **rx_pkts)
+{
+       uint32x4_t etqf_check, tunnel_check;
+       uint32x4_t etqf_mask = vdupq_n_u32(0x8000);
+       uint32x4_t tunnel_mask = vdupq_n_u32(0x10000);
+       uint32x4_t ptype_mask = vdupq_n_u32((uint32_t)pkt_type_mask);
+       uint32x4_t ptype0 = vzipq_u32(vreinterpretq_u32_u64(descs[0]),
+                               vreinterpretq_u32_u64(descs[2])).val[0];
+       uint32x4_t ptype1 = vzipq_u32(vreinterpretq_u32_u64(descs[1]),
+                               vreinterpretq_u32_u64(descs[3])).val[0];
+
+       /* interleave low 32 bits,
+        * now we have 4 ptypes in a NEON register
+        */
+       ptype0 = vzipq_u32(ptype0, ptype1).val[0];
+
+       /* mask etqf bits */
+       etqf_check = vandq_u32(ptype0, etqf_mask);
+       /* mask tunnel bits */
+       tunnel_check = vandq_u32(ptype0, tunnel_mask);
+
+       /* shift right by IXGBE_PACKET_TYPE_SHIFT, and apply ptype mask */
+       ptype0 = vandq_u32(vshrq_n_u32(ptype0, IXGBE_PACKET_TYPE_SHIFT),
+                       ptype_mask);
+
+       rx_pkts[0]->packet_type =
+               get_packet_type(vgetq_lane_u32(ptype0, 0),
+                               vgetq_lane_u32(etqf_check, 0),
+                               vgetq_lane_u32(tunnel_check, 0));
+       rx_pkts[1]->packet_type =
+               get_packet_type(vgetq_lane_u32(ptype0, 1),
+                               vgetq_lane_u32(etqf_check, 1),
+                               vgetq_lane_u32(tunnel_check, 1));
+       rx_pkts[2]->packet_type =
+               get_packet_type(vgetq_lane_u32(ptype0, 2),
+                               vgetq_lane_u32(etqf_check, 2),
+                               vgetq_lane_u32(tunnel_check, 2));
+       rx_pkts[3]->packet_type =
+               get_packet_type(vgetq_lane_u32(ptype0, 3),
+                               vgetq_lane_u32(etqf_check, 3),
+                               vgetq_lane_u32(tunnel_check, 3));
+}
+
+/**
  * vPMD raw receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
  *
  * Notice:
  * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
- *   numbers of DD bit
  * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
- * - don't support ol_flags for rss and csum err
  */
-
-#define IXGBE_VPMD_DESC_DD_MASK                0x01010101
-#define IXGBE_VPMD_DESC_EOP_MASK       0x02020202
-
 static inline uint16_t
 _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                   uint16_t nb_pkts, uint8_t *split_packet)
@@ -165,9 +222,6 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
        uint16x8_t crc_adjust = {0, 0, rxq->crc_len, 0,
                                 rxq->crc_len, 0, 0, 0};
 
-       /* nb_pkts shall be less equal than RTE_IXGBE_MAX_RX_BURST */
-       nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_MAX_RX_BURST);
-
        /* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */
        nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
 
@@ -211,7 +265,6 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                uint64x2_t mbp1, mbp2;
                uint8x16_t staterr;
                uint16x8_t tmp;
-               uint32_t var = 0;
                uint32_t stat;
 
                /* B.1 load 2 mbuf point */
@@ -256,7 +309,6 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 
                /* C.2 get 4 pkts staterr value  */
                staterr = vzipq_u8(sterr_tmp1.val[1], sterr_tmp2.val[1]).val[0];
-               stat = vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0);
 
                /* set ol_flags with vlan packet type */
                desc_to_olflags_v(sterr_tmp1, sterr_tmp2, staterr,
@@ -282,12 +334,20 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 
                /* C* extract and record EOP bit */
                if (split_packet) {
+                       stat = vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0);
                        /* and with mask to extract bits, flipping 1-0 */
                        *(int *)split_packet = ~stat & IXGBE_VPMD_DESC_EOP_MASK;
 
                        split_packet += RTE_IXGBE_DESCS_PER_LOOP;
                }
 
+               /* C.4 expand DD bit to saturate UINT8 */
+               staterr = vshlq_n_u8(staterr, IXGBE_UINT8_BIT - 1);
+               staterr = vreinterpretq_u8_s8
+                               (vshrq_n_s8(vreinterpretq_s8_u8(staterr),
+                                       IXGBE_UINT8_BIT - 1));
+               stat = ~vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0);
+
                rte_prefetch_non_temporal(rxdp + RTE_IXGBE_DESCS_PER_LOOP);
 
                /* D.3 copy final 1,2 data to rx_pkts */
@@ -296,18 +356,14 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                vst1q_u8((uint8_t *)&rx_pkts[pos]->rx_descriptor_fields1,
                         pkt_mb1);
 
-               stat &= IXGBE_VPMD_DESC_DD_MASK;
+               desc_to_ptype_v(descs, rxq->pkt_type_mask, &rx_pkts[pos]);
 
-               /* C.4 calc avaialbe number of desc */
-               if (likely(stat != IXGBE_VPMD_DESC_DD_MASK)) {
-                       while (stat & 0x01) {
-                               ++var;
-                               stat = stat >> 8;
-                       }
-                       nb_pkts_recd += var;
-                       break;
-               } else {
+               /* C.5 calc available number of desc */
+               if (unlikely(stat == 0)) {
                        nb_pkts_recd += RTE_IXGBE_DESCS_PER_LOOP;
+               } else {
+                       nb_pkts_recd += __builtin_ctz(stat) / IXGBE_UINT8_BIT;
+                       break;
                }
        }
 
@@ -319,13 +375,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
        return nb_pkts_recd;
 }
 
-/*
+/**
  * vPMD receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
  *
  * Notice:
  * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
- *   numbers of DD bit
  * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
  * - don't support ol_flags for rss and csum err
  */
@@ -336,19 +390,17 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
        return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
-/*
+/**
  * vPMD receive routine that reassembles scattered packets
  *
  * Notice:
  * - don't support ol_flags for rss and csum err
  * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
- *   numbers of DD bit
  * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
  */
-uint16_t
-ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
-               uint16_t nb_pkts)
+static uint16_t
+ixgbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+                              uint16_t nb_pkts)
 {
        struct ixgbe_rx_queue *rxq = rx_queue;
        uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
@@ -374,11 +426,38 @@ ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
                        i++;
                if (i == nb_bufs)
                        return nb_bufs;
+               rxq->pkt_first_seg = rx_pkts[i];
        }
        return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
                &split_flags[i]);
 }
 
+/**
+ * vPMD receive routine that reassembles scattered packets.
+ */
+uint16_t
+ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+                             uint16_t nb_pkts)
+{
+       uint16_t retval = 0;
+
+       while (nb_pkts > RTE_IXGBE_MAX_RX_BURST) {
+               uint16_t burst;
+
+               burst = ixgbe_recv_scattered_burst_vec(rx_queue,
+                                                      rx_pkts + retval,
+                                                      RTE_IXGBE_MAX_RX_BURST);
+               retval += burst;
+               nb_pkts -= burst;
+               if (burst < RTE_IXGBE_MAX_RX_BURST)
+                       return retval;
+       }
+
+       return retval + ixgbe_recv_scattered_burst_vec(rx_queue,
+                                                      rx_pkts + retval,
+                                                      nb_pkts);
+}
+
 static inline void
 vtx1(volatile union ixgbe_adv_tx_desc *txdp,
                struct rte_mbuf *pkt, uint64_t flags)
@@ -466,25 +545,25 @@ ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
        return nb_pkts;
 }
 
-static void __attribute__((cold))
+static void __rte_cold
 ixgbe_tx_queue_release_mbufs_vec(struct ixgbe_tx_queue *txq)
 {
        _ixgbe_tx_queue_release_mbufs_vec(txq);
 }
 
-void __attribute__((cold))
+void __rte_cold
 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
 {
        _ixgbe_rx_queue_release_mbufs_vec(rxq);
 }
 
-static void __attribute__((cold))
+static void __rte_cold
 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
 {
        _ixgbe_tx_free_swring_vec(txq);
 }
 
-static void __attribute__((cold))
+static void __rte_cold
 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
 {
        _ixgbe_reset_tx_queue_vec(txq);
@@ -496,19 +575,19 @@ static const struct ixgbe_txq_ops vec_txq_ops = {
        .reset = ixgbe_reset_tx_queue,
 };
 
-int __attribute__((cold))
+int __rte_cold
 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq)
 {
        return ixgbe_rxq_vec_setup_default(rxq);
 }
 
-int __attribute__((cold))
+int __rte_cold
 ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq)
 {
        return ixgbe_txq_vec_setup_default(txq, &vec_txq_ops);
 }
 
-int __attribute__((cold))
+int __rte_cold
 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev)
 {
        struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;