ixgbe: rework vector pmd following mbuf changes
[dpdk.git] / lib / librte_pmd_ixgbe / ixgbe_rxtx_vec.c
index b8721dd..d53e239 100644 (file)
 #include "ixgbe_ethdev.h"
 #include "ixgbe_rxtx.h"
 
-#include <nmmintrin.h>
+#include <tmmintrin.h>
 
 #ifndef __INTEL_COMPILER
 #pragma GCC diagnostic ignored "-Wcast-qual"
 #endif
 
-static struct rte_mbuf mb_def = {
-
-       .ol_flags = 0,
-       {
-               .pkt = {
-                       .data_len = 0,
-                       .pkt_len = 0,
-
-                       .vlan_macip = {
-                               .data = 0,
-                       },
-                       .hash = {
-                               .rss = 0,
-                       },
-
-                       .nb_segs = 1,
-                       .in_port = 0,
-
-                       .next = NULL,
-                       .data = NULL,
-               },
-       },
-};
-
 static inline void
 ixgbe_rxq_rearm(struct igb_rx_queue *rxq)
 {
@@ -76,7 +52,6 @@ ixgbe_rxq_rearm(struct igb_rx_queue *rxq)
        volatile union ixgbe_adv_rx_desc *rxdp;
        struct igb_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
        struct rte_mbuf *mb0, *mb1;
-       __m128i def_low;
        __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
                        RTE_PKTMBUF_HEADROOM);
 
@@ -87,8 +62,6 @@ ixgbe_rxq_rearm(struct igb_rx_queue *rxq)
 
        rxdp = rxq->rx_ring + rxq->rxrearm_start;
 
-       def_low = _mm_load_si128((__m128i *)&(mb_def.pkt));
-
        /* Initialize the mbufs in vector, process 2 mbufs in one loop */
        for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
                __m128i dma_addr0, dma_addr1;
@@ -97,33 +70,25 @@ ixgbe_rxq_rearm(struct igb_rx_queue *rxq)
                mb0 = rxep[0].mbuf;
                mb1 = rxep[1].mbuf;
 
+               /* flush mbuf with pkt template */
+               mb0->rearm_data[0] = rxq->mbuf_initializer;
+               mb1->rearm_data[0] = rxq->mbuf_initializer;
+
                /* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
                vaddr0 = _mm_loadu_si128((__m128i *)&(mb0->buf_addr));
                vaddr1 = _mm_loadu_si128((__m128i *)&(mb1->buf_addr));
 
-               /* calc va/pa of pkt data point */
-               vaddr0 = _mm_add_epi64(vaddr0, hdr_room);
-               vaddr1 = _mm_add_epi64(vaddr1, hdr_room);
-
                /* convert pa to dma_addr hdr/data */
                dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
                dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
 
-               /* fill va into t0 def pkt template */
-               vaddr0 = _mm_unpacklo_epi64(def_low, vaddr0);
-               vaddr1 = _mm_unpacklo_epi64(def_low, vaddr1);
+               /* add headroom to pa values */
+               dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+               dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
 
                /* flush desc with pa dma_addr */
                _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
                _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
-
-               /* flush mbuf with pkt template */
-               _mm_store_si128((__m128i *)&mb0->pkt, vaddr0);
-               _mm_store_si128((__m128i *)&mb1->pkt, vaddr1);
-
-               /* update refcnt per pkt */
-               rte_mbuf_refcnt_set(mb0, 1);
-               rte_mbuf_refcnt_set(mb1, 1);
        }
 
        rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH;
@@ -145,7 +110,7 @@ ixgbe_rxq_rearm(struct igb_rx_queue *rxq)
  * gives improved performance, at the cost of losing the offload info
  * in the received packet
  */
-#ifndef RTE_IXGBE_RX_OLFLAGS_DISABLE
+#ifdef RTE_IXGBE_RX_OLFLAGS_ENABLE
 
 #define OLFLAGS_MASK     ((uint16_t)(PKT_RX_VLAN_PKT | PKT_RX_IPV4_HDR |\
                                     PKT_RX_IPV4_HDR_EXT | PKT_RX_IPV6_HDR |\
@@ -210,7 +175,13 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
        int pos;
        uint64_t var;
        __m128i shuf_msk;
-       __m128i in_port;
+       __m128i crc_adjust = _mm_set_epi16(
+                               0, 0, 0, 0, /* ignore non-length fields */
+                               0,          /* ignore high-16bits of pkt_len */
+                               -rxq->crc_len, /* sub crc on pkt_len */
+                               -rxq->crc_len, /* sub crc on data_len */
+                               0            /* ignore pkt_type field */
+                       );
        __m128i dd_check;
 
        if (unlikely(nb_pkts < RTE_IXGBE_VPMD_RX_BURST))
@@ -243,8 +214,8 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
                15, 14,      /* octet 14~15, low 16 bits vlan_macip */
                0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
                13, 12,      /* octet 12~13, low 16 bits pkt_len */
-               0xFF, 0xFF,  /* skip nb_segs and in_port, zero out */
-               13, 12       /* octet 12~13, 16 bits data_len */
+               13, 12,      /* octet 12~13, 16 bits data_len */
+               0xFF, 0xFF   /* skip pkt_type field */
                );
 
 
@@ -252,9 +223,6 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
         * the next 'n' mbufs into the cache */
        sw_ring = &rxq->sw_ring[rxq->rx_tail];
 
-       /* in_port, nb_seg = 1, crc_len */
-       in_port = rxq->misc_info;
-
        /*
         * A. load 4 packet in one loop
         * B. copy 4 mbuf point from swring to rx_pkts
@@ -277,7 +245,7 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
                descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
 
                /* B.2 copy 2 mbuf point into rx_pkts  */
-               _mm_store_si128((__m128i *)&rx_pkts[pos], mbp1);
+               _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);
 
                /* B.1 load 1 mbuf point */
                mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]);
@@ -288,7 +256,7 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
                descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
 
                /* B.2 copy 2 mbuf point into rx_pkts  */
-               _mm_store_si128((__m128i *)&rx_pkts[pos+2], mbp2);
+               _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2);
 
                /* avoid compiler reorder optimization */
                rte_compiler_barrier();
@@ -306,8 +274,8 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
                desc_to_olflags_v(descs, &rx_pkts[pos]);
 
                /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
-               pkt_mb4 = _mm_add_epi16(pkt_mb4, in_port);
-               pkt_mb3 = _mm_add_epi16(pkt_mb3, in_port);
+               pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
+               pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
 
                /* D.1 pkt 1,2 convert format from desc to pktmbuf */
                pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
@@ -318,27 +286,27 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
                staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
 
                /* D.3 copy final 3,4 data to rx_pkts */
-               _mm_storeu_si128((__m128i *)&(rx_pkts[pos+3]->pkt.data_len),
+               _mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1,
                                pkt_mb4);
-               _mm_storeu_si128((__m128i *)&(rx_pkts[pos+2]->pkt.data_len),
+               _mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1,
                                pkt_mb3);
 
                /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
-               pkt_mb2 = _mm_add_epi16(pkt_mb2, in_port);
-               pkt_mb1 = _mm_add_epi16(pkt_mb1, in_port);
+               pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
+               pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
 
                /* C.3 calc avaialbe number of desc */
                staterr = _mm_and_si128(staterr, dd_check);
                staterr = _mm_packs_epi32(staterr, zero);
 
                /* D.3 copy final 1,2 data to rx_pkts */
-               _mm_storeu_si128((__m128i *)&(rx_pkts[pos+1]->pkt.data_len),
+               _mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1,
                                pkt_mb2);
-               _mm_storeu_si128((__m128i *)&(rx_pkts[pos]->pkt.data_len),
+               _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
                                pkt_mb1);
 
                /* C.4 calc avaialbe number of desc */
-               var = _mm_popcnt_u64(_mm_cvtsi128_si64(staterr));
+               var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
                nb_pkts_recd += var;
                if (likely(var != RTE_IXGBE_DESCS_PER_LOOP))
                        break;
@@ -351,46 +319,19 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 
        return nb_pkts_recd;
 }
-
 static inline void
 vtx1(volatile union ixgbe_adv_tx_desc *txdp,
-               struct rte_mbuf *pkt, __m128i flags)
+               struct rte_mbuf *pkt, uint64_t flags)
 {
-       __m128i t0, t1, offset, ols, ba, ctl;
-
-       /* load buf_addr/buf_physaddr in t0 */
-       t0 = _mm_loadu_si128((__m128i *)&(pkt->buf_addr));
-       /* load data, ... pkt_len in t1 */
-       t1 = _mm_loadu_si128((__m128i *)&(pkt->pkt.data));
-
-       /* calc offset = (data - buf_adr) */
-       offset = _mm_sub_epi64(t1, t0);
-
-       /* cmd_type_len: pkt_len |= DCMD_DTYP_FLAGS */
-       ctl = _mm_or_si128(t1, flags);
-
-       /* reorder as buf_physaddr/buf_addr */
-       offset = _mm_shuffle_epi32(offset, 0x4E);
-
-       /* olinfo_stats: pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT */
-       ols = _mm_slli_epi32(t1, IXGBE_ADVTXD_PAYLEN_SHIFT);
-
-       /* buffer_addr = buf_physaddr + offset */
-       ba = _mm_add_epi64(t0, offset);
-
-       /* format cmd_type_len/olinfo_status */
-       ctl = _mm_unpackhi_epi32(ctl, ols);
-
-       /* format buf_physaddr/cmd_type_len */
-       ba = _mm_unpackhi_epi64(ba, ctl);
-
-       /* write desc */
-       _mm_store_si128((__m128i *)&txdp->read, ba);
+       __m128i descriptor = _mm_set_epi64x((uint64_t)pkt->pkt_len << 46 |
+                       flags | pkt->data_len,
+                       pkt->buf_physaddr + pkt->data_off);
+       _mm_store_si128((__m128i *)&txdp->read, descriptor);
 }
 
 static inline void
 vtx(volatile union ixgbe_adv_tx_desc *txdp,
-               struct rte_mbuf **pkt, uint16_t nb_pkts,  __m128i flags)
+               struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)
 {
        int i;
        for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt)
@@ -404,7 +345,7 @@ ixgbe_tx_free_bufs(struct igb_tx_queue *txq)
        struct igb_tx_entry_seq *txsp;
        uint32_t status;
        uint32_t n, k;
-#ifdef RTE_MBUF_SCATTER_GATHER
+#ifdef RTE_MBUF_REFCNT
        uint32_t i;
        int nb_free = 0;
        struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
@@ -427,7 +368,7 @@ ixgbe_tx_free_bufs(struct igb_tx_queue *txq)
 
        while (n > 0) {
                k = RTE_MIN(n, txsp[n-1].same_pool);
-#ifdef RTE_MBUF_SCATTER_GATHER
+#ifdef RTE_MBUF_REFCNT
                for (i = 0; i < k; i++) {
                        m = __rte_pktmbuf_prefree_seg((txep+n-k+i)->mbuf);
                        if (m != NULL)
@@ -477,9 +418,8 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
        struct igb_tx_entry_v *txep;
        struct igb_tx_entry_seq *txsp;
        uint16_t n, nb_commit, tx_id;
-       __m128i flags = _mm_set_epi32(DCMD_DTYP_FLAGS, 0, 0, 0);
-       __m128i rs = _mm_set_epi32(IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS,
-                       0, 0, 0);
+       uint64_t flags = DCMD_DTYP_FLAGS;
+       uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;
        int i;
 
        if (unlikely(nb_pkts > RTE_IXGBE_VPMD_TX_BURST))
@@ -631,6 +571,23 @@ static struct ixgbe_txq_ops vec_txq_ops = {
        .reset = ixgbe_reset_tx_queue,
 };
 
+int
+ixgbe_rxq_vec_setup(struct igb_rx_queue *rxq)
+{
+       static struct rte_mbuf mb_def = {
+               .nb_segs = 1,
+               .data_off = RTE_PKTMBUF_HEADROOM,
+#ifdef RTE_MBUF_REFCNT
+               .refcnt = 1,
+#endif
+       };
+
+       mb_def.buf_len = rxq->mb_pool->elt_size - sizeof(struct rte_mbuf);
+       mb_def.port = rxq->port_id;
+       rxq->mbuf_initializer = *((uint64_t *)&mb_def.rearm_data);
+       return 0;
+}
+
 int ixgbe_txq_vec_setup(struct igb_tx_queue *txq,
                        unsigned int socket_id)
 {
@@ -658,28 +615,13 @@ int ixgbe_txq_vec_setup(struct igb_tx_queue *txq,
        return 0;
 }
 
-int ixgbe_rxq_vec_setup(struct igb_rx_queue *rxq,
-                       __rte_unused unsigned int socket_id)
-{
-       rxq->misc_info =
-               _mm_set_epi16(
-                       0, 0, 0, 0, 0,
-                       (uint16_t)-rxq->crc_len, /* sub crc on pkt_len */
-                       (uint16_t)(rxq->port_id << 8 | 1),
-                       /* 8b port_id and 8b nb_seg*/
-                       (uint16_t)-rxq->crc_len  /* sub crc on data_len */
-                       );
-
-       return 0;
-}
-
 int ixgbe_rx_vec_condition_check(struct rte_eth_dev *dev)
 {
 #ifndef RTE_LIBRTE_IEEE1588
        struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
        struct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf;
 
-#ifdef RTE_IXGBE_RX_OLFLAGS_DISABLE
+#ifndef RTE_IXGBE_RX_OLFLAGS_ENABLE
        /* whithout rx ol_flags, no VP flag report */
        if (rxmode->hw_vlan_strip != 0 ||
            rxmode->hw_vlan_extend != 0)
@@ -700,6 +642,7 @@ int ixgbe_rx_vec_condition_check(struct rte_eth_dev *dev)
 
        return 0;
 #else
+       RTE_SET_USED(dev);
        return -1;
 #endif
 }