ixgbe: improve slow-path perf with vector scattered Rx
[dpdk.git] / lib / librte_pmd_ixgbe / ixgbe_rxtx.c
index 575a014..a80cade 100644 (file)
@@ -142,10 +142,6 @@ ixgbe_tx_free_bufs(struct igb_tx_queue *txq)
         */
        txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
 
-       /* prefetch the mbufs that are about to be freed */
-       for (i = 0; i < txq->tx_rs_thresh; ++i)
-               rte_prefetch0((txep + i)->mbuf);
-
        /* free buffers one at a time */
        if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
                for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
@@ -186,6 +182,7 @@ tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
                                ((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
                txdp->read.olinfo_status =
                                (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+               rte_prefetch0(&(*pkts)->pool);
        }
 }
 
@@ -205,6 +202,7 @@ tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
                        ((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
        txdp->read.olinfo_status =
                        (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+       rte_prefetch0(&(*pkts)->pool);
 }
 
 /*
@@ -358,7 +356,7 @@ ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 static inline void
 ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,
                volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
-               uint16_t ol_flags, uint32_t vlan_macip_lens)
+               uint64_t ol_flags, uint32_t vlan_macip_lens)
 {
        uint32_t type_tucmd_mlhl;
        uint32_t mss_l4len_idx;
@@ -421,7 +419,7 @@ ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,
  * or create a new context descriptor.
  */
 static inline uint32_t
-what_advctx_update(struct igb_tx_queue *txq, uint16_t flags,
+what_advctx_update(struct igb_tx_queue *txq, uint64_t flags,
                uint32_t vlan_macip_lens)
 {
        /* If match with the current used context */
@@ -444,7 +442,7 @@ what_advctx_update(struct igb_tx_queue *txq, uint16_t flags,
 }
 
 static inline uint32_t
-tx_desc_cksum_flags_to_olinfo(uint16_t ol_flags)
+tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
 {
        static const uint32_t l4_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_TXSM};
        static const uint32_t l3_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_IXSM};
@@ -456,7 +454,7 @@ tx_desc_cksum_flags_to_olinfo(uint16_t ol_flags)
 }
 
 static inline uint32_t
-tx_desc_vlan_flags_to_cmdtype(uint16_t ol_flags)
+tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
 {
        static const uint32_t vlan_cmd[2] = {0, IXGBE_ADVTXD_DCMD_VLE};
        return vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
@@ -546,12 +544,12 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
        uint32_t cmd_type_len;
        uint32_t pkt_len;
        uint16_t slen;
-       uint16_t ol_flags;
+       uint64_t ol_flags;
        uint16_t tx_id;
        uint16_t tx_last;
        uint16_t nb_tx;
        uint16_t nb_used;
-       uint16_t tx_ol_req;
+       uint64_t tx_ol_req;
        uint32_t ctx = 0;
        uint32_t new_ctx;
 
@@ -583,7 +581,7 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len;
 
                /* If hardware offload required */
-               tx_ol_req = (uint16_t)(ol_flags & PKT_TX_OFFLOAD_MASK);
+               tx_ol_req = ol_flags & PKT_TX_OFFLOAD_MASK;
                if (tx_ol_req) {
                        /* If new context need be built or reuse the exist ctx. */
                        ctx = what_advctx_update(txq, tx_ol_req,
@@ -813,19 +811,19 @@ end_of_tx:
  *  RX functions
  *
  **********************************************************************/
-static inline uint16_t
+static inline uint64_t
 rx_desc_hlen_type_rss_to_pkt_flags(uint32_t hl_tp_rs)
 {
        uint16_t pkt_flags;
 
-       static uint16_t ip_pkt_types_map[16] = {
+       static uint64_t ip_pkt_types_map[16] = {
                0, PKT_RX_IPV4_HDR, PKT_RX_IPV4_HDR_EXT, PKT_RX_IPV4_HDR_EXT,
                PKT_RX_IPV6_HDR, 0, 0, 0,
                PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
                PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
        };
 
-       static uint16_t ip_rss_types_map[16] = {
+       static uint64_t ip_rss_types_map[16] = {
                0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
                0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
                PKT_RX_RSS_HASH, 0, 0, 0,
@@ -838,45 +836,44 @@ rx_desc_hlen_type_rss_to_pkt_flags(uint32_t hl_tp_rs)
                0, 0, 0, 0,
        };
 
-       pkt_flags = (uint16_t) ((hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ?
-                               ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07] :
-                               ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
+       pkt_flags = (hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ?
+                       ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07] :
+                       ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F];
 #else
-       pkt_flags = (uint16_t) ((hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ? 0 :
-                               ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
+       pkt_flags = (hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ? 0 :
+                       ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F];
 
 #endif
-       return (uint16_t)(pkt_flags | ip_rss_types_map[hl_tp_rs & 0xF]);
+       return pkt_flags | ip_rss_types_map[hl_tp_rs & 0xF];
 }
 
-static inline uint16_t
+static inline uint64_t
 rx_desc_status_to_pkt_flags(uint32_t rx_status)
 {
-       uint16_t pkt_flags;
+       uint64_t pkt_flags;
 
        /*
         * Check if VLAN present only.
         * Do not check whether L3/L4 rx checksum done by NIC or not,
         * That can be found from rte_eth_rxmode.hw_ip_checksum flag
         */
-       pkt_flags = (uint16_t)((rx_status & IXGBE_RXD_STAT_VP) ?
-                                               PKT_RX_VLAN_PKT : 0);
+       pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  PKT_RX_VLAN_PKT : 0;
 
 #ifdef RTE_LIBRTE_IEEE1588
        if (rx_status & IXGBE_RXD_STAT_TMST)
-               pkt_flags = (uint16_t)(pkt_flags | PKT_RX_IEEE1588_TMST);
+               pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
 #endif
        return pkt_flags;
 }
 
-static inline uint16_t
+static inline uint64_t
 rx_desc_error_to_pkt_flags(uint32_t rx_status)
 {
        /*
         * Bit 31: IPE, IPv4 checksum error
         * Bit 30: L4I, L4I integrity error
         */
-       static uint16_t error_to_pkt_flags_map[4] = {
+       static uint64_t error_to_pkt_flags_map[4] = {
                0,  PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
                PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
        };
@@ -947,10 +944,10 @@ ixgbe_rx_scan_hw_ring(struct igb_rx_queue *rxq)
                        mb->ol_flags  = rx_desc_hlen_type_rss_to_pkt_flags(
                                        rxdp[j].wb.lower.lo_dword.data);
                        /* reuse status field from scan list */
-                       mb->ol_flags = (uint16_t)(mb->ol_flags |
-                                       rx_desc_status_to_pkt_flags(s[j]));
-                       mb->ol_flags = (uint16_t)(mb->ol_flags |
-                                       rx_desc_error_to_pkt_flags(s[j]));
+                       mb->ol_flags = mb->ol_flags |
+                                       rx_desc_status_to_pkt_flags(s[j]);
+                       mb->ol_flags = mb->ol_flags |
+                                       rx_desc_error_to_pkt_flags(s[j]);
                }
 
                /* Move mbuf pointers from the S/W ring to the stage */
@@ -997,7 +994,7 @@ ixgbe_rx_alloc_bufs(struct igb_rx_queue *rxq)
                mb = rxep[i].mbuf;
                rte_mbuf_refcnt_set(mb, 1);
                mb->next = NULL;
-               mb->data = (char *)mb->buf_addr + RTE_PKTMBUF_HEADROOM;
+               mb->data_off = RTE_PKTMBUF_HEADROOM;
                mb->nb_segs = 1;
                mb->port = rxq->port_id;
 
@@ -1143,7 +1140,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
        uint16_t rx_id;
        uint16_t nb_rx;
        uint16_t nb_hold;
-       uint16_t pkt_flags;
+       uint64_t pkt_flags;
 
        nb_rx = 0;
        nb_hold = 0;
@@ -1248,8 +1245,8 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                 */
                pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
                                      rxq->crc_len);
-               rxm->data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
-               rte_packet_prefetch(rxm->data);
+               rxm->data_off = RTE_PKTMBUF_HEADROOM;
+               rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
                rxm->nb_segs = 1;
                rxm->next = NULL;
                rxm->pkt_len = pkt_len;
@@ -1261,10 +1258,8 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
 
                pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
-               pkt_flags = (uint16_t)(pkt_flags |
-                               rx_desc_status_to_pkt_flags(staterr));
-               pkt_flags = (uint16_t)(pkt_flags |
-                               rx_desc_error_to_pkt_flags(staterr));
+               pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
+               pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
                rxm->ol_flags = pkt_flags;
 
                if (likely(pkt_flags & PKT_RX_RSS_HASH))
@@ -1431,7 +1426,7 @@ ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                 */
                data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
                rxm->data_len = data_len;
-               rxm->data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
+               rxm->data_off = RTE_PKTMBUF_HEADROOM;
 
                /*
                 * If this is the first buffer of the received packet,
@@ -1522,7 +1517,8 @@ ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                }
 
                /* Prefetch data of first segment, if configured to do so. */
-               rte_packet_prefetch(first_seg->data);
+               rte_packet_prefetch((char *)first_seg->buf_addr +
+                       first_seg->data_off);
 
                /*
                 * Store the mbuf address into the next entry of the array
@@ -1877,7 +1873,7 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
                PMD_INIT_LOG(INFO, "Using simple tx code path\n");
 #ifdef RTE_IXGBE_INC_VECTOR
                if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
-                   ixgbe_txq_vec_setup(txq, socket_id) == 0) {
+                   ixgbe_txq_vec_setup(txq) == 0) {
                        PMD_INIT_LOG(INFO, "Vector tx enabled.\n");
                        dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
                }
@@ -2168,7 +2164,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
                if (!ixgbe_rx_vec_condition_check(dev)) {
                        PMD_INIT_LOG(INFO, "Vector rx enabled, please make "
                                     "sure RX burst size no less than 32.\n");
-                       ixgbe_rxq_vec_setup(rxq, socket_id);
+                       ixgbe_rxq_vec_setup(rxq);
                        dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
                }
 #endif
@@ -3212,7 +3208,7 @@ ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
 
                rte_mbuf_refcnt_set(mbuf, 1);
                mbuf->next = NULL;
-               mbuf->data = (char *)mbuf->buf_addr + RTE_PKTMBUF_HEADROOM;
+               mbuf->data_off = RTE_PKTMBUF_HEADROOM;
                mbuf->nb_segs = 1;
                mbuf->port = rxq->port_id;
 
@@ -3480,12 +3476,20 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
                if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
                                2 * IXGBE_VLAN_TAG_SIZE) > buf_size){
                        dev->data->scattered_rx = 1;
+#ifdef RTE_IXGBE_INC_VECTOR
+                       dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
+#else
                        dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
+#endif
                }
        }
 
        if (dev->data->dev_conf.rxmode.enable_scatter) {
+#ifdef RTE_IXGBE_INC_VECTOR
+               dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
+#else
                dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
+#endif
                dev->data->scattered_rx = 1;
        }
 
@@ -3973,12 +3977,20 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
                if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
                                2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
                        dev->data->scattered_rx = 1;
+#ifdef RTE_IXGBE_INC_VECTOR
+                       dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
+#else
                        dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
+#endif
                }
        }
 
        if (dev->data->dev_conf.rxmode.enable_scatter) {
+#ifdef RTE_IXGBE_INC_VECTOR
+               dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
+#else
                dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
+#endif
                dev->data->scattered_rx = 1;
        }