net/i40e: add AVX512 vector path
[dpdk.git] / drivers / net / i40e / i40e_rxtx.c
index 6a66cec..25963b3 100644 (file)
@@ -23,6 +23,7 @@
 #include <rte_udp.h>
 #include <rte_ip.h>
 #include <rte_net.h>
+#include <rte_vect.h>
 
 #include "i40e_logs.h"
 #include "base/i40e_prototype.h"
 #define I40E_TX_OFFLOAD_NOTSUP_MASK \
                (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK)
 
+int
+i40e_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
+{
+       struct i40e_rx_queue *rxq = rx_queue;
+       volatile union i40e_rx_desc *rxdp;
+       uint16_t desc;
+
+       desc = rxq->rx_tail;
+       rxdp = &rxq->rx_ring[desc];
+       /* watch for changes in status bit */
+       pmc->addr = &rxdp->wb.qword1.status_error_len;
+
+       /*
+        * we expect the DD bit to be set to 1 if this descriptor was already
+        * written to.
+        */
+       pmc->val = rte_cpu_to_le_64(1 << I40E_RX_DESC_STATUS_DD_SHIFT);
+       pmc->mask = rte_cpu_to_le_64(1 << I40E_RX_DESC_STATUS_DD_SHIFT);
+
+       /* registers are 64-bit */
+       pmc->data_sz = sizeof(uint64_t);
+
+       return 0;
+}
+
 static inline void
 i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp)
 {
@@ -760,7 +786,7 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
        if (nb_hold > rxq->rx_free_thresh) {
                rx_id = (uint16_t) ((rx_id == 0) ?
                        (rxq->nb_rx_desc - 1) : (rx_id - 1));
-               I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+               I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
                nb_hold = 0;
        }
        rxq->nb_rx_hold = nb_hold;
@@ -938,7 +964,7 @@ i40e_recv_scattered_pkts(void *rx_queue,
        if (nb_hold > rxq->rx_free_thresh) {
                rx_id = (uint16_t)(rx_id == 0 ?
                        (rxq->nb_rx_desc - 1) : (rx_id - 1));
-               I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+               I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
                nb_hold = 0;
        }
        rxq->nb_rx_hold = nb_hold;
@@ -989,6 +1015,24 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
        return ctx_desc;
 }
 
+/* HW requires that Tx buffer size ranges from 1B up to (16K-1)B. */
+#define I40E_MAX_DATA_PER_TXD \
+       (I40E_TXD_QW1_TX_BUF_SZ_MASK >> I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
+/* Calculate the number of TX descriptors needed for each pkt */
+static inline uint16_t
+i40e_calc_pkt_desc(struct rte_mbuf *tx_pkt)
+{
+       struct rte_mbuf *txd = tx_pkt;
+       uint16_t count = 0;
+
+       while (txd != NULL) {
+               count += DIV_ROUND_UP(txd->data_len, I40E_MAX_DATA_PER_TXD);
+               txd = txd->next;
+       }
+
+       return count;
+}
+
 uint16_t
 i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
@@ -1021,7 +1065,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
        /* Check if the descriptor ring needs to be cleaned. */
        if (txq->nb_tx_free < txq->tx_free_thresh)
-               i40e_xmit_cleanup(txq);
+               (void)i40e_xmit_cleanup(txq);
 
        for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
                td_cmd = 0;
@@ -1046,8 +1090,15 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                 * The number of descriptors that must be allocated for
                 * a packet equals to the number of the segments of that
                 * packet plus 1 context descriptor if needed.
+                * Recalculate the needed tx descs when TSO enabled in case
+                * the mbuf data size exceeds max data size that hw allows
+                * per tx desc.
                 */
-               nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
+               if (ol_flags & PKT_TX_TCP_SEG)
+                       nb_used = (uint16_t)(i40e_calc_pkt_desc(tx_pkt) +
+                                            nb_ctx);
+               else
+                       nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
                tx_last = (uint16_t)(tx_id + nb_used - 1);
 
                /* Circular ring */
@@ -1160,6 +1211,24 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                        slen = m_seg->data_len;
                        buf_dma_addr = rte_mbuf_data_iova(m_seg);
 
+                       while ((ol_flags & PKT_TX_TCP_SEG) &&
+                               unlikely(slen > I40E_MAX_DATA_PER_TXD)) {
+                               txd->buffer_addr =
+                                       rte_cpu_to_le_64(buf_dma_addr);
+                               txd->cmd_type_offset_bsz =
+                                       i40e_build_ctob(td_cmd,
+                                       td_offset, I40E_MAX_DATA_PER_TXD,
+                                       td_tag);
+
+                               buf_dma_addr += I40E_MAX_DATA_PER_TXD;
+                               slen -= I40E_MAX_DATA_PER_TXD;
+
+                               txe->last_id = tx_last;
+                               tx_id = txe->next_id;
+                               txe = txn;
+                               txd = &txr[tx_id];
+                               txn = &sw_ring[txe->next_id];
+                       }
                        PMD_TX_LOG(DEBUG, "mbuf: %p, TDD[%u]:\n"
                                "buf_dma_addr: %#"PRIx64";\n"
                                "td_cmd: %#x;\n"
@@ -1205,7 +1274,8 @@ end_of_tx:
                   (unsigned) txq->port_id, (unsigned) txq->queue_id,
                   (unsigned) tx_id, (unsigned) nb_tx);
 
-       I40E_PCI_REG_WRITE(txq->qtx_tail, tx_id);
+       rte_io_wmb();
+       I40E_PCI_REG_WC_WRITE_RELAXED(txq->qtx_tail, tx_id);
        txq->tx_tail = tx_id;
 
        return nb_tx;
@@ -1356,7 +1426,7 @@ tx_xmit_pkts(struct i40e_tx_queue *txq,
                txq->tx_tail = 0;
 
        /* Update the tx tail register */
-       I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
+       I40E_PCI_REG_WC_WRITE(txq->qtx_tail, txq->tx_tail);
 
        return nb_pkts;
 }
@@ -1527,6 +1597,15 @@ i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
        PMD_INIT_FUNC_TRACE();
 
        rxq = dev->data->rx_queues[rx_queue_id];
+       if (!rxq || !rxq->q_set) {
+               PMD_DRV_LOG(ERR, "RX queue %u not available or setup",
+                           rx_queue_id);
+               return -EINVAL;
+       }
+
+       if (rxq->rx_deferred_start)
+               PMD_DRV_LOG(WARNING, "RX queue %u is deferrd start",
+                           rx_queue_id);
 
        err = i40e_alloc_rx_queue_mbufs(rxq);
        if (err) {
@@ -1559,6 +1638,11 @@ i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
        rxq = dev->data->rx_queues[rx_queue_id];
+       if (!rxq || !rxq->q_set) {
+               PMD_DRV_LOG(ERR, "RX queue %u not available or setup",
+                               rx_queue_id);
+               return -EINVAL;
+       }
 
        /*
         * rx_queue_id is queue id application refers to, while
@@ -1587,6 +1671,15 @@ i40e_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
        PMD_INIT_FUNC_TRACE();
 
        txq = dev->data->tx_queues[tx_queue_id];
+       if (!txq || !txq->q_set) {
+               PMD_DRV_LOG(ERR, "TX queue %u is not available or setup",
+                           tx_queue_id);
+               return -EINVAL;
+       }
+
+       if (txq->tx_deferred_start)
+               PMD_DRV_LOG(WARNING, "TX queue %u is deferrd start",
+                           tx_queue_id);
 
        /*
         * tx_queue_id is queue id application refers to, while
@@ -1611,6 +1704,11 @@ i40e_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
        txq = dev->data->tx_queues[tx_queue_id];
+       if (!txq || !txq->q_set) {
+               PMD_DRV_LOG(ERR, "TX queue %u is not available or setup",
+                       tx_queue_id);
+               return -EINVAL;
+       }
 
        /*
         * tx_queue_id is queue id application refers to, while
@@ -1669,6 +1767,10 @@ i40e_dev_supported_ptypes_get(struct rte_eth_dev *dev)
            dev->rx_pkt_burst == i40e_recv_scattered_pkts ||
            dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec ||
            dev->rx_pkt_burst == i40e_recv_pkts_vec ||
+#ifdef CC_AVX512_SUPPORT
+           dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec_avx512 ||
+           dev->rx_pkt_burst == i40e_recv_pkts_vec_avx512 ||
+#endif
            dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec_avx2 ||
            dev->rx_pkt_burst == i40e_recv_pkts_vec_avx2)
                return ptypes;
@@ -2455,6 +2557,113 @@ i40e_tx_queue_release_mbufs(struct i40e_tx_queue *txq)
        }
 }
 
+static int
+i40e_tx_done_cleanup_full(struct i40e_tx_queue *txq,
+                       uint32_t free_cnt)
+{
+       struct i40e_tx_entry *swr_ring = txq->sw_ring;
+       uint16_t i, tx_last, tx_id;
+       uint16_t nb_tx_free_last;
+       uint16_t nb_tx_to_clean;
+       uint32_t pkt_cnt;
+
+       /* Start free mbuf from the next of tx_tail */
+       tx_last = txq->tx_tail;
+       tx_id  = swr_ring[tx_last].next_id;
+
+       if (txq->nb_tx_free == 0 && i40e_xmit_cleanup(txq))
+               return 0;
+
+       nb_tx_to_clean = txq->nb_tx_free;
+       nb_tx_free_last = txq->nb_tx_free;
+       if (!free_cnt)
+               free_cnt = txq->nb_tx_desc;
+
+       /* Loop through swr_ring to count the amount of
+        * freeable mubfs and packets.
+        */
+       for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
+               for (i = 0; i < nb_tx_to_clean &&
+                       pkt_cnt < free_cnt &&
+                       tx_id != tx_last; i++) {
+                       if (swr_ring[tx_id].mbuf != NULL) {
+                               rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
+                               swr_ring[tx_id].mbuf = NULL;
+
+                               /*
+                                * last segment in the packet,
+                                * increment packet count
+                                */
+                               pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
+                       }
+
+                       tx_id = swr_ring[tx_id].next_id;
+               }
+
+               if (txq->tx_rs_thresh > txq->nb_tx_desc -
+                       txq->nb_tx_free || tx_id == tx_last)
+                       break;
+
+               if (pkt_cnt < free_cnt) {
+                       if (i40e_xmit_cleanup(txq))
+                               break;
+
+                       nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
+                       nb_tx_free_last = txq->nb_tx_free;
+               }
+       }
+
+       return (int)pkt_cnt;
+}
+
+static int
+i40e_tx_done_cleanup_simple(struct i40e_tx_queue *txq,
+                       uint32_t free_cnt)
+{
+       int i, n, cnt;
+
+       if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
+               free_cnt = txq->nb_tx_desc;
+
+       cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
+
+       for (i = 0; i < cnt; i += n) {
+               if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
+                       break;
+
+               n = i40e_tx_free_bufs(txq);
+
+               if (n == 0)
+                       break;
+       }
+
+       return i;
+}
+
+static int
+i40e_tx_done_cleanup_vec(struct i40e_tx_queue *txq __rte_unused,
+                       uint32_t free_cnt __rte_unused)
+{
+       return -ENOTSUP;
+}
+int
+i40e_tx_done_cleanup(void *txq, uint32_t free_cnt)
+{
+       struct i40e_tx_queue *q = (struct i40e_tx_queue *)txq;
+       struct rte_eth_dev *dev = &rte_eth_devices[q->port_id];
+       struct i40e_adapter *ad =
+               I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+
+       if (ad->tx_simple_allowed) {
+               if (ad->tx_vec_allowed)
+                       return i40e_tx_done_cleanup_vec(q, free_cnt);
+               else
+                       return i40e_tx_done_cleanup_simple(q, free_cnt);
+       } else {
+               return i40e_tx_done_cleanup_full(q, free_cnt);
+       }
+}
+
 void
 i40e_reset_tx_queue(struct i40e_tx_queue *txq)
 {
@@ -2749,6 +2958,7 @@ i40e_dev_free_queues(struct rte_eth_dev *dev)
                        continue;
                i40e_dev_rx_queue_release(dev->data->rx_queues[i]);
                dev->data->rx_queues[i] = NULL;
+               rte_eth_dma_zone_free(dev, "rx_ring", i);
        }
 
        for (i = 0; i < dev->data->nb_tx_queues; i++) {
@@ -2756,19 +2966,17 @@ i40e_dev_free_queues(struct rte_eth_dev *dev)
                        continue;
                i40e_dev_tx_queue_release(dev->data->tx_queues[i]);
                dev->data->tx_queues[i] = NULL;
+               rte_eth_dma_zone_free(dev, "tx_ring", i);
        }
 }
 
-#define I40E_FDIR_NUM_TX_DESC  I40E_MIN_RING_DESC
-#define I40E_FDIR_NUM_RX_DESC  I40E_MIN_RING_DESC
-
 enum i40e_status_code
 i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
 {
        struct i40e_tx_queue *txq;
        const struct rte_memzone *tz = NULL;
-       uint32_t ring_size;
        struct rte_eth_dev *dev;
+       uint32_t ring_size;
 
        if (!pf) {
                PMD_DRV_LOG(ERR, "PF is not available");
@@ -2808,12 +3016,14 @@ i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
 
        txq->tx_ring_phys_addr = tz->iova;
        txq->tx_ring = (struct i40e_tx_desc *)tz->addr;
+
        /*
         * don't need to allocate software ring and reset for the fdir
         * program queue just set the queue has been configured.
         */
        txq->q_set = TRUE;
        pf->fdir.txq = txq;
+       pf->fdir.txq_available_buf_count = I40E_FDIR_PRG_PKT_CNT;
 
        return I40E_SUCCESS;
 }
@@ -2914,41 +3124,49 @@ i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
        qinfo->conf.offloads = txq->offloads;
 }
 
-static eth_rx_burst_t
-i40e_get_latest_rx_vec(bool scatter)
+static inline bool
+get_avx_supported(bool request_avx512)
 {
-#if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
-       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
-               return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
-                                i40e_recv_pkts_vec_avx2;
+#ifdef RTE_ARCH_X86
+       if (request_avx512) {
+               if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512 &&
+               rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&
+               rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1)
+#ifdef CC_AVX512_SUPPORT
+                       return true;
+#else
+               PMD_DRV_LOG(NOTICE,
+                       "AVX512 is not supported in build env");
+               return false;
 #endif
-       return scatter ? i40e_recv_scattered_pkts_vec :
-                        i40e_recv_pkts_vec;
-}
-
-static eth_rx_burst_t
-i40e_get_recommend_rx_vec(bool scatter)
-{
-#if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
-       /*
-        * since AVX frequency can be different to base frequency, limit
-        * use of AVX2 version to later plaforms, not all those that could
-        * theoretically run it.
-        */
-       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
-               return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
-                                i40e_recv_pkts_vec_avx2;
+       } else {
+               if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256 &&
+               rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 &&
+               rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+#ifdef CC_AVX2_SUPPORT
+                       return true;
+#else
+               PMD_DRV_LOG(NOTICE,
+                       "AVX2 is not supported in build env");
+               return false;
 #endif
-       return scatter ? i40e_recv_scattered_pkts_vec :
-                        i40e_recv_pkts_vec;
+       }
+#else
+       RTE_SET_USED(request_avx512);
+#endif /* RTE_ARCH_X86 */
+
+       return false;
 }
 
-void __attribute__((cold))
+
+void __rte_cold
 i40e_set_rx_function(struct rte_eth_dev *dev)
 {
        struct i40e_adapter *ad =
                I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        uint16_t rx_using_sse, i;
+       bool use_avx2 = false;
+       bool use_avx512 = false;
        /* In order to allow Vector Rx there are a few configuration
         * conditions to be met and Rx Bulk Allocation should be allowed.
         */
@@ -2971,19 +3189,53 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
                                        break;
                                }
                        }
+
+                       use_avx512 = get_avx_supported(1);
+
+                       if (!use_avx512)
+                               use_avx2 = get_avx_supported(0);
                }
        }
 
-       if (ad->rx_vec_allowed) {
-               /* Vec Rx path */
-               PMD_INIT_LOG(DEBUG, "Vector Rx path will be used on port=%d.",
-                               dev->data->port_id);
-               if (ad->use_latest_vec)
-                       dev->rx_pkt_burst =
-                       i40e_get_latest_rx_vec(dev->data->scattered_rx);
-               else
-                       dev->rx_pkt_burst =
-                       i40e_get_recommend_rx_vec(dev->data->scattered_rx);
+       if (ad->rx_vec_allowed  &&
+                       rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
+               if (dev->data->scattered_rx) {
+                       if (use_avx512) {
+#ifdef CC_AVX512_SUPPORT
+                               PMD_DRV_LOG(NOTICE,
+                                       "Using AVX512 Vector Scattered Rx (port %d).",
+                                       dev->data->port_id);
+                               dev->rx_pkt_burst =
+                                       i40e_recv_scattered_pkts_vec_avx512;
+#endif
+                       } else {
+                               PMD_INIT_LOG(DEBUG,
+                                       "Using %sVector Scattered Rx (port %d).",
+                                       use_avx2 ? "avx2 " : "",
+                                       dev->data->port_id);
+                               dev->rx_pkt_burst = use_avx2 ?
+                                       i40e_recv_scattered_pkts_vec_avx2 :
+                                       i40e_recv_scattered_pkts_vec;
+                       }
+               } else {
+                       if (use_avx512) {
+#ifdef CC_AVX512_SUPPORT
+                               PMD_DRV_LOG(NOTICE,
+                                       "Using AVX512 Vector Rx (port %d).",
+                                       dev->data->port_id);
+                               dev->rx_pkt_burst =
+                                       i40e_recv_pkts_vec_avx512;
+#endif
+                       } else {
+                               PMD_INIT_LOG(DEBUG,
+                                       "Using %sVector Rx (port %d).",
+                                       use_avx2 ? "avx2 " : "",
+                                       dev->data->port_id);
+                               dev->rx_pkt_burst = use_avx2 ?
+                                       i40e_recv_pkts_vec_avx2 :
+                                       i40e_recv_pkts_vec;
+                       }
+               }
        } else if (!dev->data->scattered_rx && ad->rx_bulk_alloc_allowed) {
                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
                                    "satisfied. Rx Burst Bulk Alloc function "
@@ -3005,6 +3257,10 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
                rx_using_sse =
                        (dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec ||
                         dev->rx_pkt_burst == i40e_recv_pkts_vec ||
+#ifdef CC_AVX512_SUPPORT
+                        dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec_avx512 ||
+                        dev->rx_pkt_burst == i40e_recv_pkts_vec_avx512 ||
+#endif
                         dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec_avx2 ||
                         dev->rx_pkt_burst == i40e_recv_pkts_vec_avx2);
 
@@ -3017,52 +3273,52 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
        }
 }
 
-int
-i40e_rx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,
-                      struct rte_eth_burst_mode *mode)
-{
-       eth_rx_burst_t pkt_burst = dev->rx_pkt_burst;
-       uint64_t options;
-
-       if (pkt_burst == i40e_recv_scattered_pkts)
-               options = RTE_ETH_BURST_SCALAR | RTE_ETH_BURST_SCATTERED;
-       else if (pkt_burst == i40e_recv_pkts_bulk_alloc)
-               options = RTE_ETH_BURST_SCALAR | RTE_ETH_BURST_BULK_ALLOC;
-       else if (pkt_burst == i40e_recv_pkts)
-               options = RTE_ETH_BURST_SCALAR;
+static const struct {
+       eth_rx_burst_t pkt_burst;
+       const char *info;
+} i40e_rx_burst_infos[] = {
+       { i40e_recv_scattered_pkts,          "Scalar Scattered" },
+       { i40e_recv_pkts_bulk_alloc,         "Scalar Bulk Alloc" },
+       { i40e_recv_pkts,                    "Scalar" },
 #ifdef RTE_ARCH_X86
-       else if (pkt_burst == i40e_recv_scattered_pkts_vec_avx2)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_AVX2 |
-                         RTE_ETH_BURST_SCATTERED;
-       else if (pkt_burst == i40e_recv_pkts_vec_avx2)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_AVX2;
-       else if (pkt_burst == i40e_recv_scattered_pkts_vec)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_SSE |
-                         RTE_ETH_BURST_SCATTERED;
-       else if (pkt_burst == i40e_recv_pkts_vec)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_SSE;
+#ifdef CC_AVX512_SUPPORT
+       { i40e_recv_scattered_pkts_vec_avx512, "Vector AVX512 Scattered" },
+       { i40e_recv_pkts_vec_avx512,           "Vector AVX512" },
+#endif
+       { i40e_recv_scattered_pkts_vec_avx2, "Vector AVX2 Scattered" },
+       { i40e_recv_pkts_vec_avx2,           "Vector AVX2" },
+       { i40e_recv_scattered_pkts_vec,      "Vector SSE Scattered" },
+       { i40e_recv_pkts_vec,                "Vector SSE" },
 #elif defined(RTE_ARCH_ARM64)
-       else if (pkt_burst == i40e_recv_scattered_pkts_vec)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_NEON |
-                         RTE_ETH_BURST_SCATTERED;
-       else if (pkt_burst == i40e_recv_pkts_vec)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_NEON;
+       { i40e_recv_scattered_pkts_vec,      "Vector Neon Scattered" },
+       { i40e_recv_pkts_vec,                "Vector Neon" },
 #elif defined(RTE_ARCH_PPC_64)
-       else if (pkt_burst == i40e_recv_scattered_pkts_vec)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_ALTIVEC |
-                         RTE_ETH_BURST_SCATTERED;
-       else if (pkt_burst == i40e_recv_pkts_vec)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_ALTIVEC;
+       { i40e_recv_scattered_pkts_vec,      "Vector AltiVec Scattered" },
+       { i40e_recv_pkts_vec,                "Vector AltiVec" },
 #endif
-       else
-               options = 0;
+};
 
-       mode->options = options;
+int
+i40e_rx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,
+                      struct rte_eth_burst_mode *mode)
+{
+       eth_rx_burst_t pkt_burst = dev->rx_pkt_burst;
+       int ret = -EINVAL;
+       unsigned int i;
+
+       for (i = 0; i < RTE_DIM(i40e_rx_burst_infos); ++i) {
+               if (pkt_burst == i40e_rx_burst_infos[i].pkt_burst) {
+                       snprintf(mode->info, sizeof(mode->info), "%s",
+                                i40e_rx_burst_infos[i].info);
+                       ret = 0;
+                       break;
+               }
+       }
 
-       return options != 0 ? 0 : -EINVAL;
+       return ret;
 }
 
-void __attribute__((cold))
+void __rte_cold
 i40e_set_tx_function_flag(struct rte_eth_dev *dev, struct i40e_tx_queue *txq)
 {
        struct i40e_adapter *ad =
@@ -3088,37 +3344,14 @@ i40e_set_tx_function_flag(struct rte_eth_dev *dev, struct i40e_tx_queue *txq)
                                txq->queue_id);
 }
 
-static eth_tx_burst_t
-i40e_get_latest_tx_vec(void)
-{
-#if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
-       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
-               return i40e_xmit_pkts_vec_avx2;
-#endif
-       return i40e_xmit_pkts_vec;
-}
-
-static eth_tx_burst_t
-i40e_get_recommend_tx_vec(void)
-{
-#if defined(RTE_ARCH_X86) && defined(CC_AVX2_SUPPORT)
-       /*
-        * since AVX frequency can be different to base frequency, limit
-        * use of AVX2 version to later plaforms, not all those that could
-        * theoretically run it.
-        */
-       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
-               return i40e_xmit_pkts_vec_avx2;
-#endif
-       return i40e_xmit_pkts_vec;
-}
-
-void __attribute__((cold))
+void __rte_cold
 i40e_set_tx_function(struct rte_eth_dev *dev)
 {
        struct i40e_adapter *ad =
                I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        int i;
+       bool use_avx2 = false;
+       bool use_avx512 = false;
 
        if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
                if (ad->tx_vec_allowed) {
@@ -3131,18 +3364,31 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
                                        break;
                                }
                        }
+
+                       use_avx512 = get_avx_supported(1);
+
+                       if (!use_avx512)
+                               use_avx2 = get_avx_supported(0);
                }
        }
 
        if (ad->tx_simple_allowed) {
-               if (ad->tx_vec_allowed) {
-                       PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
-                       if (ad->use_latest_vec)
-                               dev->tx_pkt_burst =
-                                       i40e_get_latest_tx_vec();
-                       else
-                               dev->tx_pkt_burst =
-                                       i40e_get_recommend_tx_vec();
+               if (ad->tx_vec_allowed &&
+                               rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
+                       if (use_avx512) {
+#ifdef CC_AVX512_SUPPORT
+                               PMD_DRV_LOG(NOTICE, "Using AVX512 Vector Tx (port %d).",
+                                           dev->data->port_id);
+                               dev->tx_pkt_burst = i40e_xmit_pkts_vec_avx512;
+#endif
+                       } else {
+                               PMD_INIT_LOG(DEBUG, "Using %sVector Tx (port %d).",
+                                            use_avx2 ? "avx2 " : "",
+                                            dev->data->port_id);
+                               dev->tx_pkt_burst = use_avx2 ?
+                                                   i40e_xmit_pkts_vec_avx2 :
+                                                   i40e_xmit_pkts_vec;
+                       }
                } else {
                        PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
                        dev->tx_pkt_burst = i40e_xmit_pkts_simple;
@@ -3155,38 +3401,46 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
        }
 }
 
-int
-i40e_tx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,
-                      struct rte_eth_burst_mode *mode)
-{
-       eth_tx_burst_t pkt_burst = dev->tx_pkt_burst;
-       uint64_t options;
-
-       if (pkt_burst == i40e_xmit_pkts_simple)
-               options = RTE_ETH_BURST_SCALAR | RTE_ETH_BURST_SIMPLE;
-       else if (pkt_burst == i40e_xmit_pkts)
-               options = RTE_ETH_BURST_SCALAR;
+static const struct {
+       eth_tx_burst_t pkt_burst;
+       const char *info;
+} i40e_tx_burst_infos[] = {
+       { i40e_xmit_pkts_simple,   "Scalar Simple" },
+       { i40e_xmit_pkts,          "Scalar" },
 #ifdef RTE_ARCH_X86
-       else if (pkt_burst == i40e_xmit_pkts_vec_avx2)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_AVX2;
-       else if (pkt_burst == i40e_xmit_pkts_vec)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_SSE;
+#ifdef CC_AVX512_SUPPORT
+       { i40e_xmit_pkts_vec_avx512, "Vector AVX512" },
+#endif
+       { i40e_xmit_pkts_vec_avx2, "Vector AVX2" },
+       { i40e_xmit_pkts_vec,      "Vector SSE" },
 #elif defined(RTE_ARCH_ARM64)
-       else if (pkt_burst == i40e_xmit_pkts_vec)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_NEON;
+       { i40e_xmit_pkts_vec,      "Vector Neon" },
 #elif defined(RTE_ARCH_PPC_64)
-       else if (pkt_burst == i40e_xmit_pkts_vec)
-               options = RTE_ETH_BURST_VECTOR | RTE_ETH_BURST_ALTIVEC;
+       { i40e_xmit_pkts_vec,      "Vector AltiVec" },
 #endif
-       else
-               options = 0;
+};
 
-       mode->options = options;
+int
+i40e_tx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,
+                      struct rte_eth_burst_mode *mode)
+{
+       eth_tx_burst_t pkt_burst = dev->tx_pkt_burst;
+       int ret = -EINVAL;
+       unsigned int i;
+
+       for (i = 0; i < RTE_DIM(i40e_tx_burst_infos); ++i) {
+               if (pkt_burst == i40e_tx_burst_infos[i].pkt_burst) {
+                       snprintf(mode->info, sizeof(mode->info), "%s",
+                                i40e_tx_burst_infos[i].info);
+                       ret = 0;
+                       break;
+               }
+       }
 
-       return options != 0 ? 0 : -EINVAL;
+       return ret;
 }
 
-void __attribute__((cold))
+void __rte_cold
 i40e_set_default_ptype_table(struct rte_eth_dev *dev)
 {
        struct i40e_adapter *ad =
@@ -3197,7 +3451,7 @@ i40e_set_default_ptype_table(struct rte_eth_dev *dev)
                ad->ptype_tbl[i] = i40e_get_default_pkt_type(i);
 }
 
-void __attribute__((cold))
+void __rte_cold
 i40e_set_default_pctype_table(struct rte_eth_dev *dev)
 {
        struct i40e_adapter *ad =
@@ -3257,7 +3511,6 @@ i40e_set_default_pctype_table(struct rte_eth_dev *dev)
 }
 
 #ifndef RTE_LIBRTE_I40E_INC_VECTOR
-/* Stubs needed for linkage when CONFIG_RTE_LIBRTE_I40E_INC_VECTOR is set to 'n' */
 int
 i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
 {