ethdev: separate driver APIs
[dpdk.git] / drivers / net / i40e / i40e_rxtx.c
index ff70c06..1217e5a 100644 (file)
@@ -1,34 +1,5 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2016 Intel Corporation
  */
 
 #include <stdio.h>
@@ -46,7 +17,7 @@
 #include <rte_mbuf.h>
 #include <rte_malloc.h>
 #include <rte_ether.h>
-#include <rte_ethdev.h>
+#include <rte_ethdev_driver.h>
 #include <rte_tcp.h>
 #include <rte_sctp.h>
 #include <rte_udp.h>
@@ -61,7 +32,6 @@
 
 #define DEFAULT_TX_RS_THRESH   32
 #define DEFAULT_TX_FREE_THRESH 32
-#define I40E_MAX_PKT_TYPE      256
 
 #define I40E_TX_MAX_BURST  32
 
 #define I40E_TX_OFFLOAD_NOTSUP_MASK \
                (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK)
 
-static uint16_t i40e_xmit_pkts_simple(void *tx_queue,
-                                     struct rte_mbuf **tx_pkts,
-                                     uint16_t nb_pkts);
-
 static inline void
 i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp)
 {
        if (rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
                (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
-               mb->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
+               mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
                mb->vlan_tci =
                        rte_le_to_cpu_16(rxdp->wb.qword0.lo_dword.l2tag1);
                PMD_RX_LOG(DEBUG, "Descriptor l2tag1: %u",
@@ -458,6 +424,7 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
        int32_t s[I40E_LOOK_AHEAD], nb_dd;
        int32_t i, j, nb_rx = 0;
        uint64_t pkt_flags;
+       uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
 
        rxdp = &rxq->rx_ring[rxq->rx_tail];
        rxep = &rxq->sw_ring[rxq->rx_tail];
@@ -506,9 +473,9 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
                        pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
                        pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
                        mb->packet_type =
-                               i40e_rxd_pkt_type_mapping((uint8_t)((qword1 &
-                                               I40E_RXD_QW1_PTYPE_MASK) >>
-                                               I40E_RXD_QW1_PTYPE_SHIFT));
+                               ptype_tbl[(uint8_t)((qword1 &
+                               I40E_RXD_QW1_PTYPE_MASK) >>
+                               I40E_RXD_QW1_PTYPE_SHIFT)];
                        if (pkt_flags & PKT_RX_RSS_HASH)
                                mb->hash.rss = rte_le_to_cpu_32(\
                                        rxdp[j].wb.qword0.hi_dword.rss);
@@ -589,7 +556,7 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
                mb->nb_segs = 1;
                mb->port = rxq->port_id;
                dma_addr = rte_cpu_to_le_64(\
-                       rte_mbuf_data_dma_addr_default(mb));
+                       rte_mbuf_data_iova_default(mb));
                rxdp[i].read.hdr_addr = 0;
                rxdp[i].read.pkt_addr = dma_addr;
        }
@@ -610,6 +577,7 @@ static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
        struct i40e_rx_queue *rxq = (struct i40e_rx_queue *)rx_queue;
+       struct rte_eth_dev *dev;
        uint16_t nb_rx = 0;
 
        if (!nb_pkts)
@@ -627,9 +595,10 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                if (i40e_rx_alloc_bufs(rxq) != 0) {
                        uint16_t i, j;
 
-                       PMD_RX_LOG(DEBUG, "Rx mbuf alloc failed for "
-                                  "port_id=%u, queue_id=%u",
-                                  rxq->port_id, rxq->queue_id);
+                       dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+                       dev->data->rx_mbuf_alloc_failed +=
+                               rxq->rx_free_thresh;
+
                        rxq->rx_nb_avail = 0;
                        rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
                        for (i = 0, j = rxq->rx_tail; i < nb_rx; i++, j++)
@@ -691,6 +660,7 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
        union i40e_rx_desc rxd;
        struct i40e_rx_entry *sw_ring;
        struct i40e_rx_entry *rxe;
+       struct rte_eth_dev *dev;
        struct rte_mbuf *rxm;
        struct rte_mbuf *nmb;
        uint16_t nb_rx;
@@ -700,6 +670,7 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
        uint16_t rx_id, nb_hold;
        uint64_t dma_addr;
        uint64_t pkt_flags;
+       uint32_t *ptype_tbl;
 
        nb_rx = 0;
        nb_hold = 0;
@@ -707,6 +678,7 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
        rx_id = rxq->rx_tail;
        rx_ring = rxq->rx_ring;
        sw_ring = rxq->sw_ring;
+       ptype_tbl = rxq->vsi->adapter->ptype_tbl;
 
        while (nb_rx < nb_pkts) {
                rxdp = &rx_ring[rx_id];
@@ -719,10 +691,13 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                        break;
 
                nmb = rte_mbuf_raw_alloc(rxq->mp);
-               if (unlikely(!nmb))
+               if (unlikely(!nmb)) {
+                       dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+                       dev->data->rx_mbuf_alloc_failed++;
                        break;
-               rxd = *rxdp;
+               }
 
+               rxd = *rxdp;
                nb_hold++;
                rxe = &sw_ring[rx_id];
                rx_id++;
@@ -744,7 +719,7 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                rxm = rxe->mbuf;
                rxe->mbuf = nmb;
                dma_addr =
-                       rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
+                       rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
                rxdp->read.hdr_addr = 0;
                rxdp->read.pkt_addr = dma_addr;
 
@@ -763,8 +738,8 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
                pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
                rxm->packet_type =
-                       i40e_rxd_pkt_type_mapping((uint8_t)((qword1 &
-                       I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT));
+                       ptype_tbl[(uint8_t)((qword1 &
+                       I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT)];
                if (pkt_flags & PKT_RX_RSS_HASH)
                        rxm->hash.rss =
                                rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
@@ -814,10 +789,12 @@ i40e_recv_scattered_pkts(void *rx_queue,
        struct rte_mbuf *nmb, *rxm;
        uint16_t rx_id = rxq->rx_tail;
        uint16_t nb_rx = 0, nb_hold = 0, rx_packet_len;
+       struct rte_eth_dev *dev;
        uint32_t rx_status;
        uint64_t qword1;
        uint64_t dma_addr;
        uint64_t pkt_flags;
+       uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
 
        while (nb_rx < nb_pkts) {
                rxdp = &rx_ring[rx_id];
@@ -830,8 +807,12 @@ i40e_recv_scattered_pkts(void *rx_queue,
                        break;
 
                nmb = rte_mbuf_raw_alloc(rxq->mp);
-               if (unlikely(!nmb))
+               if (unlikely(!nmb)) {
+                       dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
+                       dev->data->rx_mbuf_alloc_failed++;
                        break;
+               }
+
                rxd = *rxdp;
                nb_hold++;
                rxe = &sw_ring[rx_id];
@@ -855,7 +836,7 @@ i40e_recv_scattered_pkts(void *rx_queue,
                rxm = rxe->mbuf;
                rxe->mbuf = nmb;
                dma_addr =
-                       rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
+                       rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 
                /* Set data buffer address and data length of the mbuf */
                rxdp->read.hdr_addr = 0;
@@ -925,8 +906,8 @@ i40e_recv_scattered_pkts(void *rx_queue,
                pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
                pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
                first_seg->packet_type =
-                       i40e_rxd_pkt_type_mapping((uint8_t)((qword1 &
-                       I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT));
+                       ptype_tbl[(uint8_t)((qword1 &
+                       I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT)];
                if (pkt_flags & PKT_RX_RSS_HASH)
                        first_seg->hash.rss =
                                rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
@@ -1188,7 +1169,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
                        /* Setup TX Descriptor */
                        slen = m_seg->data_len;
-                       buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
+                       buf_dma_addr = rte_mbuf_data_iova(m_seg);
 
                        PMD_TX_LOG(DEBUG, "mbuf: %p, TDD[%u]:\n"
                                "buf_dma_addr: %#"PRIx64";\n"
@@ -1243,7 +1224,7 @@ end_of_tx:
        return nb_tx;
 }
 
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
 i40e_tx_free_bufs(struct i40e_tx_queue *txq)
 {
        struct i40e_tx_entry *txep;
@@ -1287,7 +1268,7 @@ tx4(volatile struct i40e_tx_desc *txdp, struct rte_mbuf **pkts)
        uint32_t i;
 
        for (i = 0; i < 4; i++, txdp++, pkts++) {
-               dma_addr = rte_mbuf_data_dma_addr(*pkts);
+               dma_addr = rte_mbuf_data_iova(*pkts);
                txdp->buffer_addr = rte_cpu_to_le_64(dma_addr);
                txdp->cmd_type_offset_bsz =
                        i40e_build_ctob((uint32_t)I40E_TD_CMD, 0,
@@ -1301,7 +1282,7 @@ tx1(volatile struct i40e_tx_desc *txdp, struct rte_mbuf **pkts)
 {
        uint64_t dma_addr;
 
-       dma_addr = rte_mbuf_data_dma_addr(*pkts);
+       dma_addr = rte_mbuf_data_iova(*pkts);
        txdp->buffer_addr = rte_cpu_to_le_64(dma_addr);
        txdp->cmd_type_offset_bsz =
                i40e_build_ctob((uint32_t)I40E_TD_CMD, 0,
@@ -1459,13 +1440,10 @@ i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
                m = tx_pkts[i];
                ol_flags = m->ol_flags;
 
-               /**
-                * m->nb_segs is uint8_t, so nb_segs is always less than
-                * I40E_TX_MAX_SEG.
-                * We check only a condition for nb_segs > I40E_TX_MAX_MTU_SEG.
-                */
+               /* Check for m->nb_segs to not exceed the limits. */
                if (!(ol_flags & PKT_TX_TCP_SEG)) {
-                       if (m->nb_segs > I40E_TX_MAX_MTU_SEG) {
+                       if (m->nb_segs > I40E_TX_MAX_SEG ||
+                           m->nb_segs > I40E_TX_MAX_MTU_SEG) {
                                rte_errno = -EINVAL;
                                return i;
                        }
@@ -1594,7 +1572,7 @@ i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
                rxq = dev->data->rx_queues[rx_queue_id];
 
                /*
-               * rx_queue_id is queue id aplication refers to, while
+               * rx_queue_id is queue id application refers to, while
                * rxq->reg_idx is the real queue index.
                */
                err = i40e_switch_rx_queue(hw, rxq->reg_idx, FALSE);
@@ -1625,7 +1603,7 @@ i40e_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
                txq = dev->data->tx_queues[tx_queue_id];
 
                /*
-               * tx_queue_id is queue id aplication refers to, while
+               * tx_queue_id is queue id application refers to, while
                * rxq->reg_idx is the real queue index.
                */
                err = i40e_switch_tx_queue(hw, txq->reg_idx, TRUE);
@@ -1650,7 +1628,7 @@ i40e_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
                txq = dev->data->tx_queues[tx_queue_id];
 
                /*
-               * tx_queue_id is queue id aplication refers to, while
+               * tx_queue_id is queue id application refers to, while
                * txq->reg_idx is the real queue index.
                */
                err = i40e_switch_tx_queue(hw, txq->reg_idx, FALSE);
@@ -1707,7 +1685,9 @@ i40e_dev_supported_ptypes_get(struct rte_eth_dev *dev)
 #endif
            dev->rx_pkt_burst == i40e_recv_scattered_pkts ||
            dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec ||
-           dev->rx_pkt_burst == i40e_recv_pkts_vec)
+           dev->rx_pkt_burst == i40e_recv_pkts_vec ||
+           dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec_avx2 ||
+           dev->rx_pkt_burst == i40e_recv_pkts_vec_avx2)
                return ptypes;
        return NULL;
 }
@@ -1720,36 +1700,42 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
                        const struct rte_eth_rxconf *rx_conf,
                        struct rte_mempool *mp)
 {
-       struct i40e_vsi *vsi;
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        struct i40e_adapter *ad =
                I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct i40e_vsi *vsi;
+       struct i40e_pf *pf = NULL;
+       struct i40e_vf *vf = NULL;
        struct i40e_rx_queue *rxq;
        const struct rte_memzone *rz;
        uint32_t ring_size;
        uint16_t len, i;
-       uint16_t base, bsf, tc_mapping;
-       int use_def_burst_func = 1;
+       uint16_t reg_idx, base, bsf, tc_mapping;
+       int q_offset, use_def_burst_func = 1;
 
        if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF) {
-               struct i40e_vf *vf =
-                       I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+               vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
                vsi = &vf->vsi;
-       } else
+               if (!vsi)
+                       return -EINVAL;
+               reg_idx = queue_idx;
+       } else {
+               pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
                vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx);
-
-       if (vsi == NULL) {
-               PMD_DRV_LOG(ERR, "VSI not available or queue "
-                           "index exceeds the maximum");
-               return I40E_ERR_PARAM;
+               if (!vsi)
+                       return -EINVAL;
+               q_offset = i40e_get_queue_offset_by_qindex(pf, queue_idx);
+               if (q_offset < 0)
+                       return -EINVAL;
+               reg_idx = vsi->base_queue + q_offset;
        }
+
        if (nb_desc % I40E_ALIGN_RING_DESC != 0 ||
-                       (nb_desc > I40E_MAX_RING_DESC) ||
-                       (nb_desc < I40E_MIN_RING_DESC)) {
+           (nb_desc > I40E_MAX_RING_DESC) ||
+           (nb_desc < I40E_MIN_RING_DESC)) {
                PMD_DRV_LOG(ERR, "Number (%u) of receive descriptors is "
                            "invalid", nb_desc);
-               return I40E_ERR_PARAM;
+               return -EINVAL;
        }
 
        /* Free memory if needed */
@@ -1772,12 +1758,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
        rxq->nb_rx_desc = nb_desc;
        rxq->rx_free_thresh = rx_conf->rx_free_thresh;
        rxq->queue_id = queue_idx;
-       if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF)
-               rxq->reg_idx = queue_idx;
-       else /* PF device */
-               rxq->reg_idx = vsi->base_queue +
-                       i40e_get_queue_offset_by_qindex(pf, queue_idx);
-
+       rxq->reg_idx = reg_idx;
        rxq->port_id = dev->data->port_id;
        rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
                                                        0 : ETHER_CRC_LEN);
@@ -1788,13 +1769,11 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
        /* Allocate the maximun number of RX ring hardware descriptor. */
        len = I40E_MAX_RING_DESC;
 
-#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
        /**
         * Allocating a little more memory because vectorized/bulk_alloc Rx
         * functions doesn't check boundaries each time.
         */
        len += RTE_PMD_I40E_RX_MAX_BURST;
-#endif
 
        ring_size = RTE_ALIGN(len * sizeof(union i40e_rx_desc),
                              I40E_DMA_MEM_ALIGN);
@@ -1810,14 +1789,10 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
        /* Zero all the descriptors in the ring. */
        memset(rz->addr, 0, ring_size);
 
-       rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+       rxq->rx_ring_phys_addr = rz->iova;
        rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
 
-#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
        len = (uint16_t)(nb_desc + RTE_PMD_I40E_RX_MAX_BURST);
-#else
-       len = nb_desc;
-#endif
 
        /* Allocate the software ring. */
        rxq->sw_ring =
@@ -2004,34 +1979,40 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
                        unsigned int socket_id,
                        const struct rte_eth_txconf *tx_conf)
 {
-       struct i40e_vsi *vsi;
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       struct i40e_vsi *vsi;
+       struct i40e_pf *pf = NULL;
+       struct i40e_vf *vf = NULL;
        struct i40e_tx_queue *txq;
        const struct rte_memzone *tz;
        uint32_t ring_size;
        uint16_t tx_rs_thresh, tx_free_thresh;
-       uint16_t i, base, bsf, tc_mapping;
+       uint16_t reg_idx, i, base, bsf, tc_mapping;
+       int q_offset;
 
        if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF) {
-               struct i40e_vf *vf =
-                       I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+               vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
                vsi = &vf->vsi;
-       } else
+               if (!vsi)
+                       return -EINVAL;
+               reg_idx = queue_idx;
+       } else {
+               pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
                vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx);
-
-       if (vsi == NULL) {
-               PMD_DRV_LOG(ERR, "VSI is NULL, or queue index (%u) "
-                           "exceeds the maximum", queue_idx);
-               return I40E_ERR_PARAM;
+               if (!vsi)
+                       return -EINVAL;
+               q_offset = i40e_get_queue_offset_by_qindex(pf, queue_idx);
+               if (q_offset < 0)
+                       return -EINVAL;
+               reg_idx = vsi->base_queue + q_offset;
        }
 
        if (nb_desc % I40E_ALIGN_RING_DESC != 0 ||
-                       (nb_desc > I40E_MAX_RING_DESC) ||
-                       (nb_desc < I40E_MIN_RING_DESC)) {
+           (nb_desc > I40E_MAX_RING_DESC) ||
+           (nb_desc < I40E_MIN_RING_DESC)) {
                PMD_DRV_LOG(ERR, "Number (%u) of transmit descriptors is "
                            "invalid", nb_desc);
-               return I40E_ERR_PARAM;
+               return -EINVAL;
        }
 
        /**
@@ -2140,18 +2121,13 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
        txq->hthresh = tx_conf->tx_thresh.hthresh;
        txq->wthresh = tx_conf->tx_thresh.wthresh;
        txq->queue_id = queue_idx;
-       if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF)
-               txq->reg_idx = queue_idx;
-       else /* PF device */
-               txq->reg_idx = vsi->base_queue +
-                       i40e_get_queue_offset_by_qindex(pf, queue_idx);
-
+       txq->reg_idx = reg_idx;
        txq->port_id = dev->data->port_id;
        txq->txq_flags = tx_conf->txq_flags;
        txq->vsi = vsi;
        txq->tx_deferred_start = tx_conf->tx_deferred_start;
 
-       txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+       txq->tx_ring_phys_addr = tz->iova;
        txq->tx_ring = (struct i40e_tx_desc *)tz->addr;
 
        /* Allocate software ring */
@@ -2213,12 +2189,8 @@ i40e_memzone_reserve(const char *name, uint32_t len, int socket_id)
        if (mz)
                return mz;
 
-       if (rte_xen_dom0_supported())
-               mz = rte_memzone_reserve_bounded(name, len,
-                               socket_id, 0, I40E_RING_BASE_ALIGN, RTE_PGSIZE_2M);
-       else
-               mz = rte_memzone_reserve_aligned(name, len,
-                               socket_id, 0, I40E_RING_BASE_ALIGN);
+       mz = rte_memzone_reserve_aligned(name, len,
+                                        socket_id, 0, I40E_RING_BASE_ALIGN);
        return mz;
 }
 
@@ -2299,18 +2271,41 @@ i40e_reset_rx_queue(struct i40e_rx_queue *rxq)
 void
 i40e_tx_queue_release_mbufs(struct i40e_tx_queue *txq)
 {
+       struct rte_eth_dev *dev;
        uint16_t i;
 
+       dev = &rte_eth_devices[txq->port_id];
+
        if (!txq || !txq->sw_ring) {
                PMD_DRV_LOG(DEBUG, "Pointer to rxq or sw_ring is NULL");
                return;
        }
 
-       for (i = 0; i < txq->nb_tx_desc; i++) {
-               if (txq->sw_ring[i].mbuf) {
+       /**
+        *  vPMD tx will not set sw_ring's mbuf to NULL after free,
+        *  so need to free remains more carefully.
+        */
+       if (dev->tx_pkt_burst == i40e_xmit_pkts_vec_avx2 ||
+                       dev->tx_pkt_burst == i40e_xmit_pkts_vec) {
+               i = txq->tx_next_dd - txq->tx_rs_thresh + 1;
+               if (txq->tx_tail < i) {
+                       for (; i < txq->nb_tx_desc; i++) {
+                               rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
+                               txq->sw_ring[i].mbuf = NULL;
+                       }
+                       i = 0;
+               }
+               for (; i < txq->tx_tail; i++) {
                        rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
                        txq->sw_ring[i].mbuf = NULL;
                }
+       } else {
+               for (i = 0; i < txq->nb_tx_desc; i++) {
+                       if (txq->sw_ring[i].mbuf) {
+                               rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
+                               txq->sw_ring[i].mbuf = NULL;
+                       }
+               }
        }
 }
 
@@ -2423,7 +2418,7 @@ i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
                mbuf->port = rxq->port_id;
 
                dma_addr =
-                       rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
+                       rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
                rxd = &rxq->rx_ring[i];
                rxd->read.pkt_addr = dma_addr;
@@ -2466,7 +2461,7 @@ i40e_rx_queue_config(struct i40e_rx_queue *rxq)
        case I40E_FLAG_HEADER_SPLIT_DISABLED:
        default:
                rxq->rx_hdr_len = 0;
-               rxq->rx_buf_len = RTE_ALIGN(buf_size,
+               rxq->rx_buf_len = RTE_ALIGN_FLOOR(buf_size,
                        (1 << I40E_RXQ_CTX_DBUFF_SHIFT));
                rxq->hs_mode = i40e_header_split_none;
                break;
@@ -2667,7 +2662,7 @@ i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
        txq->reg_idx = pf->fdir.fdir_vsi->base_queue;
        txq->vsi = pf->fdir.fdir_vsi;
 
-       txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+       txq->tx_ring_phys_addr = tz->iova;
        txq->tx_ring = (struct i40e_tx_desc *)tz->addr;
        /*
         * don't need to allocate software ring and reset for the fdir
@@ -2723,7 +2718,8 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
        rxq->reg_idx = pf->fdir.fdir_vsi->base_queue;
        rxq->vsi = pf->fdir.fdir_vsi;
 
-       rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+       rxq->rx_ring_phys_addr = rz->iova;
+       memset(rz->addr, 0, I40E_FDIR_NUM_RX_DESC * sizeof(union i40e_rx_desc));
        rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
 
        /*
@@ -2814,6 +2810,17 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
                                     dev->data->port_id);
 
                        dev->rx_pkt_burst = i40e_recv_scattered_pkts_vec;
+#ifdef RTE_ARCH_X86
+                       /*
+                        * since AVX frequency can be different to base
+                        * frequency, limit use of AVX2 version to later
+                        * plaforms, not all those that could theoretically
+                        * run it.
+                        */
+                       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+                               dev->rx_pkt_burst =
+                                       i40e_recv_scattered_pkts_vec_avx2;
+#endif
                } else {
                        PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
                                           "allocation callback (port=%d).",
@@ -2833,6 +2840,16 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
                             dev->data->port_id);
 
                dev->rx_pkt_burst = i40e_recv_pkts_vec;
+#ifdef RTE_ARCH_X86
+               /*
+                * since AVX frequency can be different to base
+                * frequency, limit use of AVX2 version to later
+                * plaforms, not all those that could theoretically
+                * run it.
+                */
+               if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+                       dev->rx_pkt_burst = i40e_recv_pkts_vec_avx2;
+#endif
        } else if (ad->rx_bulk_alloc_allowed) {
                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
                                    "satisfied. Rx Burst Bulk Alloc function "
@@ -2853,7 +2870,9 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
        if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
                rx_using_sse =
                        (dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec ||
-                        dev->rx_pkt_burst == i40e_recv_pkts_vec);
+                        dev->rx_pkt_burst == i40e_recv_pkts_vec ||
+                        dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec_avx2 ||
+                        dev->rx_pkt_burst == i40e_recv_pkts_vec_avx2);
 
                for (i = 0; i < dev->data->nb_rx_queues; i++) {
                        struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
@@ -2910,6 +2929,16 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
                if (ad->tx_vec_allowed) {
                        PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
                        dev->tx_pkt_burst = i40e_xmit_pkts_vec;
+#ifdef RTE_ARCH_X86
+                       /*
+                        * since AVX frequency can be different to base
+                        * frequency, limit use of AVX2 version to later
+                        * plaforms, not all those that could theoretically
+                        * run it.
+                        */
+                       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+                               dev->tx_pkt_burst = i40e_xmit_pkts_vec_avx2;
+#endif
                } else {
                        PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
                        dev->tx_pkt_burst = i40e_xmit_pkts_simple;
@@ -2922,6 +2951,75 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
        }
 }
 
+void __attribute__((cold))
+i40e_set_default_ptype_table(struct rte_eth_dev *dev)
+{
+       struct i40e_adapter *ad =
+               I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       int i;
+
+       for (i = 0; i < I40E_MAX_PKT_TYPE; i++)
+               ad->ptype_tbl[i] = i40e_get_default_pkt_type(i);
+}
+
+void __attribute__((cold))
+i40e_set_default_pctype_table(struct rte_eth_dev *dev)
+{
+       struct i40e_adapter *ad =
+                       I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       int i;
+
+       for (i = 0; i < I40E_FLOW_TYPE_MAX; i++)
+               ad->pctypes_tbl[i] = 0ULL;
+       ad->flow_types_mask = 0ULL;
+       ad->pctypes_mask = 0ULL;
+
+       ad->pctypes_tbl[RTE_ETH_FLOW_FRAG_IPV4] =
+                               (1ULL << I40E_FILTER_PCTYPE_FRAG_IPV4);
+       ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_UDP] =
+                               (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP);
+       ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_TCP] =
+                               (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP);
+       ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_SCTP] =
+                               (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP);
+       ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_OTHER] =
+                               (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER);
+       ad->pctypes_tbl[RTE_ETH_FLOW_FRAG_IPV6] =
+                               (1ULL << I40E_FILTER_PCTYPE_FRAG_IPV6);
+       ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_UDP] =
+                               (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP);
+       ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_TCP] =
+                               (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP);
+       ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_SCTP] =
+                               (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_SCTP);
+       ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_OTHER] =
+                               (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER);
+       ad->pctypes_tbl[RTE_ETH_FLOW_L2_PAYLOAD] =
+                               (1ULL << I40E_FILTER_PCTYPE_L2_PAYLOAD);
+
+       if (hw->mac.type == I40E_MAC_X722) {
+               ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_UDP] |=
+                       (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP);
+               ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_UDP] |=
+                       (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP);
+               ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_TCP] |=
+                       (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK);
+               ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_UDP] |=
+                       (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP);
+               ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_UDP] |=
+                       (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP);
+               ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_TCP] |=
+                       (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK);
+       }
+
+       for (i = 0; i < I40E_FLOW_TYPE_MAX; i++) {
+               if (ad->pctypes_tbl[i])
+                       ad->flow_types_mask |= (1ULL << i);
+               ad->pctypes_mask |= ad->pctypes_tbl[i];
+       }
+}
+
 /* Stubs needed for linkage when CONFIG_RTE_I40E_INC_VECTOR is set to 'n' */
 int __attribute__((weak))
 i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
@@ -2947,6 +3045,22 @@ i40e_recv_scattered_pkts_vec(
        return 0;
 }
 
+uint16_t __attribute__((weak))
+i40e_recv_pkts_vec_avx2(void __rte_unused *rx_queue,
+                       struct rte_mbuf __rte_unused **rx_pkts,
+                       uint16_t __rte_unused nb_pkts)
+{
+       return 0;
+}
+
+uint16_t __attribute__((weak))
+i40e_recv_scattered_pkts_vec_avx2(void __rte_unused *rx_queue,
+                       struct rte_mbuf __rte_unused **rx_pkts,
+                       uint16_t __rte_unused nb_pkts)
+{
+       return 0;
+}
+
 int __attribute__((weak))
 i40e_rxq_vec_setup(struct i40e_rx_queue __rte_unused *rxq)
 {
@@ -2972,3 +3086,11 @@ i40e_xmit_fixed_burst_vec(void __rte_unused * tx_queue,
 {
        return 0;
 }
+
+uint16_t __attribute__((weak))
+i40e_xmit_pkts_vec_avx2(void __rte_unused * tx_queue,
+                         struct rte_mbuf __rte_unused **tx_pkts,
+                         uint16_t __rte_unused nb_pkts)
+{
+       return 0;
+}