ethdev: allow returning error on VLAN offload ops
[dpdk.git] / drivers / net / fm10k / fm10k_ethdev.c
index 421266b..7e52a2c 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  */
 
 #include <rte_ethdev.h>
+#include <rte_ethdev_pci.h>
 #include <rte_malloc.h>
 #include <rte_memzone.h>
 #include <rte_string_fns.h>
 #include <rte_dev.h>
 #include <rte_spinlock.h>
+#include <rte_kvargs.h>
 
 #include "fm10k.h"
 #include "base/fm10k_api.h"
 #define MAX_QUERY_SWITCH_STATE_TIMES 10
 /* Wait interval to get switch status */
 #define WAIT_SWITCH_MSG_US    100000
+/* A period of quiescence for switch */
+#define FM10K_SWITCH_QUIESCE_US 10000
 /* Number of chars per uint32 type */
 #define CHARS_PER_UINT32 (sizeof(uint32_t))
 #define BIT_MASK_PER_UINT32 ((1 << CHARS_PER_UINT32) - 1)
 
+/* default 1:1 map from queue ID to interrupt vector ID */
+#define Q2V(pci_dev, queue_id) ((pci_dev)->intr_handle.intr_vec[queue_id])
+
+/* First 64 Logical ports for PF/VMDQ, second 64 for Flow director */
+#define MAX_LPORT_NUM    128
+#define GLORT_FD_Q_BASE  0x40
+#define GLORT_PF_MASK    0xFFC0
+#define GLORT_FD_MASK    GLORT_PF_MASK
+#define GLORT_FD_INDEX   GLORT_FD_Q_BASE
+
 static void fm10k_close_mbx_service(struct fm10k_hw *hw);
 static void fm10k_dev_promiscuous_enable(struct rte_eth_dev *dev);
 static void fm10k_dev_promiscuous_disable(struct rte_eth_dev *dev);
@@ -69,6 +83,8 @@ static void fm10k_tx_queue_release(void *queue);
 static void fm10k_rx_queue_release(void *queue);
 static void fm10k_set_rx_function(struct rte_eth_dev *dev);
 static void fm10k_set_tx_function(struct rte_eth_dev *dev);
+static int fm10k_check_ftag(struct rte_devargs *devargs);
+static int fm10k_link_update(struct rte_eth_dev *dev, int wait_to_complete);
 
 struct fm10k_xstats_name_off {
        char name[RTE_ETH_XSTATS_NAME_SIZE];
@@ -109,6 +125,8 @@ struct fm10k_xstats_name_off fm10k_hw_stats_tx_q_strings[] = {
 
 #define FM10K_NB_XSTATS (FM10K_NB_HW_XSTATS + FM10K_MAX_QUEUES_PF * \
                (FM10K_NB_RX_Q_XSTATS + FM10K_NB_TX_Q_XSTATS))
+static int
+fm10k_dev_rxq_interrupt_setup(struct rte_eth_dev *dev);
 
 static void
 fm10k_mbx_initlock(struct fm10k_hw *hw)
@@ -181,9 +199,9 @@ fm10k_tx_vec_condition_check(__rte_unused struct fm10k_tx_queue *txq)
 }
 
 uint16_t __attribute__((weak))
-fm10k_xmit_pkts_vec(__rte_unused void *tx_queue,
-               __rte_unused struct rte_mbuf **tx_pkts,
-               __rte_unused uint16_t nb_pkts)
+fm10k_xmit_fixed_burst_vec(__rte_unused void *tx_queue,
+                          __rte_unused struct rte_mbuf **tx_pkts,
+                          __rte_unused uint16_t nb_pkts)
 {
        return 0;
 }
@@ -517,8 +535,10 @@ fm10k_dev_rss_configure(struct rte_eth_dev *dev)
 
        if (dev->data->nb_rx_queues == 1 ||
            dev_conf->rxmode.mq_mode != ETH_MQ_RX_RSS ||
-           dev_conf->rx_adv_conf.rss_conf.rss_hf == 0)
+           dev_conf->rx_adv_conf.rss_conf.rss_hf == 0) {
+               FM10K_WRITE_REG(hw, FM10K_MRQC(0), 0);
                return;
+       }
 
        /* random key is rss_intel_key (default) or user provided (rss_key) */
        if (dev_conf->rx_adv_conf.rss_conf.rss_key == NULL)
@@ -571,22 +591,11 @@ fm10k_dev_rss_configure(struct rte_eth_dev *dev)
 }
 
 static void
-fm10k_dev_logic_port_update(struct rte_eth_dev *dev,
-       uint16_t nb_lport_old, uint16_t nb_lport_new)
+fm10k_dev_logic_port_update(struct rte_eth_dev *dev, uint16_t nb_lport_new)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint32_t i;
 
-       fm10k_mbx_lock(hw);
-       /* Disable previous logic ports */
-       if (nb_lport_old)
-               hw->mac.ops.update_lport_state(hw, hw->mac.dglort_map,
-                       nb_lport_old, false);
-       /* Enable new logic ports */
-       hw->mac.ops.update_lport_state(hw, hw->mac.dglort_map,
-               nb_lport_new, true);
-       fm10k_mbx_unlock(hw);
-
        for (i = 0; i < nb_lport_new; i++) {
                /* Set unicast mode by default. App can change
                 * to other mode in other API func.
@@ -606,7 +615,7 @@ fm10k_dev_mq_rx_configure(struct rte_eth_dev *dev)
        struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
        struct fm10k_macvlan_filter_info *macvlan;
        uint16_t nb_queue_pools = 0; /* pool number in configuration */
-       uint16_t nb_lport_new, nb_lport_old;
+       uint16_t nb_lport_new;
 
        macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
        vmdq_conf = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
@@ -624,9 +633,8 @@ fm10k_dev_mq_rx_configure(struct rte_eth_dev *dev)
        if (macvlan->nb_queue_pools == nb_queue_pools)
                return;
 
-       nb_lport_old = macvlan->nb_queue_pools ? macvlan->nb_queue_pools : 1;
        nb_lport_new = nb_queue_pools ? nb_queue_pools : 1;
-       fm10k_dev_logic_port_update(dev, nb_lport_old, nb_lport_new);
+       fm10k_dev_logic_port_update(dev, nb_lport_new);
 
        /* reset MAC/VLAN as it's based on VMDQ or PF main VSI */
        memset(dev->data->mac_addrs, 0,
@@ -668,6 +676,19 @@ fm10k_dev_tx_init(struct rte_eth_dev *dev)
                        PMD_INIT_LOG(ERR, "failed to disable queue %d", i);
                        return -1;
                }
+               /* Enable use of FTAG bit in TX descriptor, PFVTCTL
+                * register is read-only for VF.
+                */
+               if (fm10k_check_ftag(dev->device->devargs)) {
+                       if (hw->mac.type == fm10k_mac_pf) {
+                               FM10K_WRITE_REG(hw, FM10K_PFVTCTL(i),
+                                               FM10K_PFVTCTL_FTAG_DESC_ENABLE);
+                               PMD_INIT_LOG(DEBUG, "FTAG mode is enabled");
+                       } else {
+                               PMD_INIT_LOG(ERR, "VF FTAG is not supported.");
+                               return -ENOTSUP;
+                       }
+               }
 
                /* set location and size for descriptor ring */
                FM10K_WRITE_REG(hw, FM10K_TDBAL(i),
@@ -675,6 +696,10 @@ fm10k_dev_tx_init(struct rte_eth_dev *dev)
                FM10K_WRITE_REG(hw, FM10K_TDBAH(i),
                                base_addr >> (CHAR_BIT * sizeof(uint32_t)));
                FM10K_WRITE_REG(hw, FM10K_TDLEN(i), size);
+
+               /* assign default SGLORT for each TX queue by PF */
+               if (hw->mac.type == fm10k_mac_pf)
+                       FM10K_WRITE_REG(hw, FM10K_TX_SGLORT(i), hw->mac.dglort_map);
        }
 
        /* set up vector or scalar TX function as appropriate */
@@ -687,17 +712,37 @@ static int
 fm10k_dev_rx_init(struct rte_eth_dev *dev)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct fm10k_macvlan_filter_info *macvlan;
+       struct rte_pci_device *pdev = RTE_ETH_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pdev->intr_handle;
        int i, ret;
        struct fm10k_rx_queue *rxq;
        uint64_t base_addr;
        uint32_t size;
        uint32_t rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY;
+       uint32_t logic_port = hw->mac.dglort_map;
        uint16_t buf_size;
-
-       /* Disable RXINT to avoid possible interrupt */
-       for (i = 0; i < hw->mac.max_queues; i++)
+       uint16_t queue_stride = 0;
+
+       /* enable RXINT for interrupt mode */
+       i = 0;
+       if (rte_intr_dp_is_en(intr_handle)) {
+               for (; i < dev->data->nb_rx_queues; i++) {
+                       FM10K_WRITE_REG(hw, FM10K_RXINT(i), Q2V(pdev, i));
+                       if (hw->mac.type == fm10k_mac_pf)
+                               FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(pdev, i)),
+                                       FM10K_ITR_AUTOMASK |
+                                       FM10K_ITR_MASK_CLEAR);
+                       else
+                               FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, i)),
+                                       FM10K_ITR_AUTOMASK |
+                                       FM10K_ITR_MASK_CLEAR);
+               }
+       }
+       /* Disable other RXINT to avoid possible interrupt */
+       for (; i < hw->mac.max_queues; i++)
                FM10K_WRITE_REG(hw, FM10K_RXINT(i),
-                               3 << FM10K_RXINT_TIMER_SHIFT);
+                       3 << FM10K_RXINT_TIMER_SHIFT);
 
        /* Setup RX queues */
        for (i = 0; i < dev->data->nb_rx_queues; ++i) {
@@ -732,7 +777,8 @@ fm10k_dev_rx_init(struct rte_eth_dev *dev)
                buf_size -= FM10K_RX_DATABUF_ALIGN;
 
                FM10K_WRITE_REG(hw, FM10K_SRRCTL(i),
-                               buf_size >> FM10K_SRRCTL_BSIZEPKT_SHIFT);
+                               (buf_size >> FM10K_SRRCTL_BSIZEPKT_SHIFT) |
+                               FM10K_SRRCTL_LOOPBACK_SUPPRESS);
 
                /* It adds dual VLAN length for supporting dual VLAN */
                if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
@@ -759,6 +805,18 @@ fm10k_dev_rx_init(struct rte_eth_dev *dev)
        /* Decide the best RX function */
        fm10k_set_rx_function(dev);
 
+       /* update RX_SGLORT for loopback suppress*/
+       if (hw->mac.type != fm10k_mac_pf)
+               return 0;
+       macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
+       if (macvlan->nb_queue_pools)
+               queue_stride = dev->data->nb_rx_queues / macvlan->nb_queue_pools;
+       for (i = 0; i < dev->data->nb_rx_queues; ++i) {
+               if (i && queue_stride && !(i % queue_stride))
+                       logic_port++;
+               FM10K_WRITE_REG(hw, FM10K_RX_SGLORT(i), logic_port);
+       }
+
        return 0;
 }
 
@@ -895,7 +953,7 @@ fm10k_dev_promiscuous_enable(struct rte_eth_dev *dev)
        PMD_INIT_FUNC_TRACE();
 
        /* Return if it didn't acquire valid glort range */
-       if (!fm10k_glort_valid(hw))
+       if ((hw->mac.type == fm10k_mac_pf) && !fm10k_glort_valid(hw))
                return;
 
        fm10k_mbx_lock(hw);
@@ -917,7 +975,7 @@ fm10k_dev_promiscuous_disable(struct rte_eth_dev *dev)
        PMD_INIT_FUNC_TRACE();
 
        /* Return if it didn't acquire valid glort range */
-       if (!fm10k_glort_valid(hw))
+       if ((hw->mac.type == fm10k_mac_pf) && !fm10k_glort_valid(hw))
                return;
 
        if (dev->data->all_multicast == 1)
@@ -943,7 +1001,7 @@ fm10k_dev_allmulticast_enable(struct rte_eth_dev *dev)
        PMD_INIT_FUNC_TRACE();
 
        /* Return if it didn't acquire valid glort range */
-       if (!fm10k_glort_valid(hw))
+       if ((hw->mac.type == fm10k_mac_pf) && !fm10k_glort_valid(hw))
                return;
 
        /* If promiscuous mode is enabled, it doesn't make sense to enable
@@ -974,7 +1032,7 @@ fm10k_dev_allmulticast_disable(struct rte_eth_dev *dev)
        PMD_INIT_FUNC_TRACE();
 
        /* Return if it didn't acquire valid glort range */
-       if (!fm10k_glort_valid(hw))
+       if ((hw->mac.type == fm10k_mac_pf) && !fm10k_glort_valid(hw))
                return;
 
        if (dev->data->promiscuous) {
@@ -997,7 +1055,7 @@ static void
 fm10k_dev_dglort_map_configure(struct rte_eth_dev *dev)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       uint32_t dglortdec, pool_len, rss_len, i;
+       uint32_t dglortdec, pool_len, rss_len, i, dglortmask;
        uint16_t nb_queue_pools;
        struct fm10k_macvlan_filter_info *macvlan;
 
@@ -1005,16 +1063,24 @@ fm10k_dev_dglort_map_configure(struct rte_eth_dev *dev)
        nb_queue_pools = macvlan->nb_queue_pools;
        pool_len = nb_queue_pools ? fls(nb_queue_pools - 1) : 0;
        rss_len = fls(dev->data->nb_rx_queues - 1) - pool_len;
-       dglortdec = (rss_len << FM10K_DGLORTDEC_RSSLENGTH_SHIFT) | pool_len;
-
-       /* Establish only MAP 0 as valid */
-       FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(0), FM10K_DGLORTMAP_ANY);
 
+       /* GLORT 0x0-0x3F are used by PF and VMDQ,  0x40-0x7F used by FD */
+       dglortdec = (rss_len << FM10K_DGLORTDEC_RSSLENGTH_SHIFT) | pool_len;
+       dglortmask = (GLORT_PF_MASK << FM10K_DGLORTMAP_MASK_SHIFT) |
+                       hw->mac.dglort_map;
+       FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(0), dglortmask);
        /* Configure VMDQ/RSS DGlort Decoder */
        FM10K_WRITE_REG(hw, FM10K_DGLORTDEC(0), dglortdec);
 
+       /* Flow Director configurations, only queue number is valid. */
+       dglortdec = fls(dev->data->nb_rx_queues - 1);
+       dglortmask = (GLORT_FD_MASK << FM10K_DGLORTMAP_MASK_SHIFT) |
+                       (hw->mac.dglort_map + GLORT_FD_Q_BASE);
+       FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(1), dglortmask);
+       FM10K_WRITE_REG(hw, FM10K_DGLORTDEC(1), dglortdec);
+
        /* Invalidate all other GLORT entries */
-       for (i = 1; i < FM10K_DGLORT_COUNT; i++)
+       for (i = 2; i < FM10K_DGLORT_COUNT; i++)
                FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(i),
                                FM10K_DGLORTMAP_NONE);
 }
@@ -1053,6 +1119,9 @@ fm10k_dev_start(struct rte_eth_dev *dev)
                return diag;
        }
 
+       if (fm10k_dev_rxq_interrupt_setup(dev))
+               return -EIO;
+
        diag = fm10k_dev_rx_init(dev);
        if (diag) {
                PMD_INIT_LOG(ERR, "RX init failed: %d", diag);
@@ -1098,12 +1167,17 @@ fm10k_dev_start(struct rte_eth_dev *dev)
        if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_VMDQ_FLAG))
                fm10k_vlan_filter_set(dev, hw->mac.default_vid, true);
 
+       fm10k_link_update(dev, 0);
+
        return 0;
 }
 
 static void
 fm10k_dev_stop(struct rte_eth_dev *dev)
 {
+       struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_pci_device *pdev = RTE_ETH_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pdev->intr_handle;
        int i;
 
        PMD_INIT_FUNC_TRACE();
@@ -1115,6 +1189,24 @@ fm10k_dev_stop(struct rte_eth_dev *dev)
        if (dev->data->rx_queues)
                for (i = 0; i < dev->data->nb_rx_queues; i++)
                        fm10k_dev_rx_queue_stop(dev, i);
+
+       /* Disable datapath event */
+       if (rte_intr_dp_is_en(intr_handle)) {
+               for (i = 0; i < dev->data->nb_rx_queues; i++) {
+                       FM10K_WRITE_REG(hw, FM10K_RXINT(i),
+                               3 << FM10K_RXINT_TIMER_SHIFT);
+                       if (hw->mac.type == fm10k_mac_pf)
+                               FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(pdev, i)),
+                                       FM10K_ITR_MASK_SET);
+                       else
+                               FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, i)),
+                                       FM10K_ITR_MASK_SET);
+               }
+       }
+       /* Clean datapath event and queue/vec mapping */
+       rte_intr_efd_disable(intr_handle);
+       rte_free(intr_handle->intr_vec);
+       intr_handle->intr_vec = NULL;
 }
 
 static void
@@ -1142,18 +1234,17 @@ static void
 fm10k_dev_close(struct rte_eth_dev *dev)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       uint16_t nb_lport;
-       struct fm10k_macvlan_filter_info *macvlan;
 
        PMD_INIT_FUNC_TRACE();
 
-       macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
-       nb_lport = macvlan->nb_queue_pools ? macvlan->nb_queue_pools : 1;
        fm10k_mbx_lock(hw);
        hw->mac.ops.update_lport_state(hw, hw->mac.dglort_map,
-               nb_lport, false);
+               MAX_LPORT_NUM, false);
        fm10k_mbx_unlock(hw);
 
+       /* allow 10ms for device to quiesce */
+       rte_delay_us(FM10K_SWITCH_QUIESCE_US);
+
        /* Stop mailbox service first */
        fm10k_close_mbx_service(hw);
        fm10k_dev_stop(dev);
@@ -1165,20 +1256,61 @@ static int
 fm10k_link_update(struct rte_eth_dev *dev,
        __rte_unused int wait_to_complete)
 {
+       struct fm10k_dev_info *dev_info =
+               FM10K_DEV_PRIVATE_TO_INFO(dev->data->dev_private);
        PMD_INIT_FUNC_TRACE();
 
-       /* The host-interface link is always up.  The speed is ~50Gbps per Gen3
-        * x8 PCIe interface. For now, we leave the speed undefined since there
-        * is no 50Gbps Ethernet. */
+       /* The speed is ~50Gbps per Gen3 x8 PCIe interface. For now, we
+        * leave the speed undefined since there is no 50Gbps Ethernet.
+        */
        dev->data->dev_link.link_speed  = 0;
        dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
-       dev->data->dev_link.link_status = 1;
+       dev->data->dev_link.link_status =
+               dev_info->sm_down ? ETH_LINK_DOWN : ETH_LINK_UP;
 
        return 0;
 }
 
+static int fm10k_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
+       struct rte_eth_xstat_name *xstats_names, __rte_unused unsigned limit)
+{
+       unsigned i, q;
+       unsigned count = 0;
+
+       if (xstats_names != NULL) {
+               /* Note: limit checked in rte_eth_xstats_names() */
+
+               /* Global stats */
+               for (i = 0; i < FM10K_NB_HW_XSTATS; i++) {
+                       snprintf(xstats_names[count].name,
+                               sizeof(xstats_names[count].name),
+                               "%s", fm10k_hw_stats_strings[count].name);
+                       count++;
+               }
+
+               /* PF queue stats */
+               for (q = 0; q < FM10K_MAX_QUEUES_PF; q++) {
+                       for (i = 0; i < FM10K_NB_RX_Q_XSTATS; i++) {
+                               snprintf(xstats_names[count].name,
+                                       sizeof(xstats_names[count].name),
+                                       "rx_q%u_%s", q,
+                                       fm10k_hw_stats_rx_q_strings[i].name);
+                               count++;
+                       }
+                       for (i = 0; i < FM10K_NB_TX_Q_XSTATS; i++) {
+                               snprintf(xstats_names[count].name,
+                                       sizeof(xstats_names[count].name),
+                                       "tx_q%u_%s", q,
+                                       fm10k_hw_stats_tx_q_strings[i].name);
+                               count++;
+                       }
+               }
+       }
+       return FM10K_NB_XSTATS;
+}
+
 static int
-fm10k_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstats *xstats,
+fm10k_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
                 unsigned n)
 {
        struct fm10k_hw_stats *hw_stats =
@@ -1190,31 +1322,26 @@ fm10k_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstats *xstats,
 
        /* Global stats */
        for (i = 0; i < FM10K_NB_HW_XSTATS; i++) {
-               snprintf(xstats[count].name, sizeof(xstats[count].name),
-                        "%s", fm10k_hw_stats_strings[count].name);
                xstats[count].value = *(uint64_t *)(((char *)hw_stats) +
                        fm10k_hw_stats_strings[count].offset);
+               xstats[count].id = count;
                count++;
        }
 
        /* PF queue stats */
        for (q = 0; q < FM10K_MAX_QUEUES_PF; q++) {
                for (i = 0; i < FM10K_NB_RX_Q_XSTATS; i++) {
-                       snprintf(xstats[count].name, sizeof(xstats[count].name),
-                                "rx_q%u_%s", q,
-                                fm10k_hw_stats_rx_q_strings[i].name);
                        xstats[count].value =
                                *(uint64_t *)(((char *)&hw_stats->q[q]) +
                                fm10k_hw_stats_rx_q_strings[i].offset);
+                       xstats[count].id = count;
                        count++;
                }
                for (i = 0; i < FM10K_NB_TX_Q_XSTATS; i++) {
-                       snprintf(xstats[count].name, sizeof(xstats[count].name),
-                                "tx_q%u_%s", q,
-                                fm10k_hw_stats_tx_q_strings[i].name);
                        xstats[count].value =
                                *(uint64_t *)(((char *)&hw_stats->q[q]) +
                                fm10k_hw_stats_tx_q_strings[i].offset);
+                       xstats[count].id = count;
                        count++;
                }
        }
@@ -1222,7 +1349,7 @@ fm10k_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstats *xstats,
        return FM10K_NB_XSTATS;
 }
 
-static void
+static int
 fm10k_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
        uint64_t ipackets, opackets, ibytes, obytes;
@@ -1252,6 +1379,7 @@ fm10k_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
        stats->opackets = opackets;
        stats->ibytes = ibytes;
        stats->obytes = obytes;
+       return 0;
 }
 
 static void
@@ -1272,16 +1400,18 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
        struct rte_eth_dev_info *dev_info)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_pci_device *pdev = RTE_ETH_DEV_TO_PCI(dev);
 
        PMD_INIT_FUNC_TRACE();
 
+       dev_info->pci_dev            = pdev;
        dev_info->min_rx_bufsize     = FM10K_MIN_RX_BUF_SIZE;
        dev_info->max_rx_pktlen      = FM10K_MAX_PKT_SIZE;
        dev_info->max_rx_queues      = hw->mac.max_queues;
        dev_info->max_tx_queues      = hw->mac.max_queues;
        dev_info->max_mac_addrs      = FM10K_MAX_MACADDR_NUM;
        dev_info->max_hash_mac_addrs = 0;
-       dev_info->max_vfs            = dev->pci_dev->max_vfs;
+       dev_info->max_vfs            = pdev->max_vfs;
        dev_info->vmdq_pool_base     = 0;
        dev_info->vmdq_queue_base    = 0;
        dev_info->max_vmdq_pools     = ETH_32_POOLS;
@@ -1332,8 +1462,63 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
                .nb_max = FM10K_MAX_TX_DESC,
                .nb_min = FM10K_MIN_TX_DESC,
                .nb_align = FM10K_MULT_TX_DESC,
+               .nb_seg_max = FM10K_TX_MAX_SEG,
+               .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG,
        };
+
+       dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
+                       ETH_LINK_SPEED_10G | ETH_LINK_SPEED_25G |
+                       ETH_LINK_SPEED_40G | ETH_LINK_SPEED_100G;
+}
+
+#ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE
+static const uint32_t *
+fm10k_dev_supported_ptypes_get(struct rte_eth_dev *dev)
+{
+       if (dev->rx_pkt_burst == fm10k_recv_pkts ||
+           dev->rx_pkt_burst == fm10k_recv_scattered_pkts) {
+               static uint32_t ptypes[] = {
+                       /* refers to rx_desc_to_ol_flags() */
+                       RTE_PTYPE_L2_ETHER,
+                       RTE_PTYPE_L3_IPV4,
+                       RTE_PTYPE_L3_IPV4_EXT,
+                       RTE_PTYPE_L3_IPV6,
+                       RTE_PTYPE_L3_IPV6_EXT,
+                       RTE_PTYPE_L4_TCP,
+                       RTE_PTYPE_L4_UDP,
+                       RTE_PTYPE_UNKNOWN
+               };
+
+               return ptypes;
+       } else if (dev->rx_pkt_burst == fm10k_recv_pkts_vec ||
+                  dev->rx_pkt_burst == fm10k_recv_scattered_pkts_vec) {
+               static uint32_t ptypes_vec[] = {
+                       /* refers to fm10k_desc_to_pktype_v() */
+                       RTE_PTYPE_L3_IPV4,
+                       RTE_PTYPE_L3_IPV4_EXT,
+                       RTE_PTYPE_L3_IPV6,
+                       RTE_PTYPE_L3_IPV6_EXT,
+                       RTE_PTYPE_L4_TCP,
+                       RTE_PTYPE_L4_UDP,
+                       RTE_PTYPE_TUNNEL_GENEVE,
+                       RTE_PTYPE_TUNNEL_NVGRE,
+                       RTE_PTYPE_TUNNEL_VXLAN,
+                       RTE_PTYPE_TUNNEL_GRE,
+                       RTE_PTYPE_UNKNOWN
+               };
+
+               return ptypes_vec;
+       }
+
+       return NULL;
+}
+#else
+static const uint32_t *
+fm10k_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused)
+{
+       return NULL;
 }
+#endif
 
 static int
 fm10k_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
@@ -1409,8 +1594,8 @@ fm10k_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
        return 0;
 }
 
-static void
-fm10k_vlan_offload_set(__rte_unused struct rte_eth_dev *dev, int mask)
+static int
+fm10k_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 {
        if (mask & ETH_VLAN_STRIP_MASK) {
                if (!dev->data->dev_conf.rxmode.hw_vlan_strip)
@@ -1428,6 +1613,8 @@ fm10k_vlan_offload_set(__rte_unused struct rte_eth_dev *dev, int mask)
                if (!dev->data->dev_conf.rxmode.hw_vlan_filter)
                        PMD_INIT_LOG(ERR, "VLAN filter is always on in fm10k");
        }
+
+       return 0;
 }
 
 /* Add/Remove a MAC address, and update filters to main VSI */
@@ -1512,7 +1699,7 @@ static void fm10k_MAC_filter_set(struct rte_eth_dev *dev,
 }
 
 /* Add a MAC address, and update filters */
-static void
+static int
 fm10k_macaddr_add(struct rte_eth_dev *dev,
                struct ether_addr *mac_addr,
                uint32_t index,
@@ -1523,6 +1710,7 @@ fm10k_macaddr_add(struct rte_eth_dev *dev,
        macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
        fm10k_MAC_filter_set(dev, mac_addr->addr_bytes, TRUE, pool);
        macvlan->mac_vmdq_id[index] = pool;
+       return 0;
 }
 
 /* Remove a MAC address, and update filters */
@@ -1626,7 +1814,8 @@ fm10k_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_id,
        const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct fm10k_dev_info *dev_info = FM10K_DEV_PRIVATE_TO_INFO(dev);
+       struct fm10k_dev_info *dev_info =
+               FM10K_DEV_PRIVATE_TO_INFO(dev->data->dev_private);
        struct fm10k_rx_queue *q;
        const struct rte_memzone *mz;
 
@@ -1704,7 +1893,7 @@ fm10k_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_id,
                return -ENOMEM;
        }
        q->hw_ring = mz->addr;
-       q->hw_ring_phys_addr = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr);
+       q->hw_ring_phys_addr = mz->phys_addr;
 
        /* Check if number of descs satisfied Vector requirement */
        if (!rte_is_power_of_2(nb_desc)) {
@@ -1864,7 +2053,7 @@ fm10k_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_id,
                return -ENOMEM;
        }
        q->hw_ring = mz->addr;
-       q->hw_ring_phys_addr = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr);
+       q->hw_ring_phys_addr = mz->phys_addr;
 
        /*
         * allocate memory for the RS bit tracker. Enough slots to hold the
@@ -1997,8 +2186,8 @@ fm10k_rss_hash_update(struct rte_eth_dev *dev,
 
        PMD_INIT_FUNC_TRACE();
 
-       if (rss_conf->rss_key_len < FM10K_RSSRK_SIZE *
-               FM10K_RSSRK_ENTRIES_PER_REG)
+       if (key && (rss_conf->rss_key_len < FM10K_RSSRK_SIZE *
+                               FM10K_RSSRK_ENTRIES_PER_REG))
                return -EINVAL;
 
        if (hf == 0)
@@ -2040,8 +2229,8 @@ fm10k_rss_hash_conf_get(struct rte_eth_dev *dev,
 
        PMD_INIT_FUNC_TRACE();
 
-       if (rss_conf->rss_key_len < FM10K_RSSRK_SIZE *
-                               FM10K_RSSRK_ENTRIES_PER_REG)
+       if (key && (rss_conf->rss_key_len < FM10K_RSSRK_SIZE *
+                               FM10K_RSSRK_ENTRIES_PER_REG))
                return -EINVAL;
 
        if (key != NULL)
@@ -2072,14 +2261,14 @@ fm10k_dev_enable_intr_pf(struct rte_eth_dev *dev)
        uint32_t int_map = FM10K_INT_MAP_IMMEDIATE;
 
        /* Bind all local non-queue interrupt to vector 0 */
-       int_map |= 0;
+       int_map |= FM10K_MISC_VEC_ID;
 
-       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_Mailbox), int_map);
-       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_PCIeFault), int_map);
-       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_SwitchUpDown), int_map);
-       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_SwitchEvent), int_map);
-       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_SRAM), int_map);
-       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_VFLR), int_map);
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_mailbox), int_map);
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_pcie_fault), int_map);
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_switch_up_down), int_map);
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_switch_event), int_map);
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_sram), int_map);
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_vflr), int_map);
 
        /* Enable misc causes */
        FM10K_WRITE_REG(hw, FM10K_EIMR, FM10K_EIMR_ENABLE(PCA_FAULT) |
@@ -2103,14 +2292,14 @@ fm10k_dev_disable_intr_pf(struct rte_eth_dev *dev)
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint32_t int_map = FM10K_INT_MAP_DISABLE;
 
-       int_map |= 0;
+       int_map |= FM10K_MISC_VEC_ID;
 
-       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_Mailbox), int_map);
-       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_PCIeFault), int_map);
-       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_SwitchUpDown), int_map);
-       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_SwitchEvent), int_map);
-       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_SRAM), int_map);
-       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_VFLR), int_map);
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_mailbox), int_map);
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_pcie_fault), int_map);
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_switch_up_down), int_map);
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_switch_event), int_map);
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_sram), int_map);
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_vflr), int_map);
 
        /* Disable misc causes */
        FM10K_WRITE_REG(hw, FM10K_EIMR, FM10K_EIMR_DISABLE(PCA_FAULT) |
@@ -2134,7 +2323,7 @@ fm10k_dev_enable_intr_vf(struct rte_eth_dev *dev)
        uint32_t int_map = FM10K_INT_MAP_IMMEDIATE;
 
        /* Bind all local non-queue interrupt to vector 0 */
-       int_map |= 0;
+       int_map |= FM10K_MISC_VEC_ID;
 
        /* Only INT 0 available, other 15 are reserved. */
        FM10K_WRITE_REG(hw, FM10K_VFINT_MAP, int_map);
@@ -2151,7 +2340,7 @@ fm10k_dev_disable_intr_vf(struct rte_eth_dev *dev)
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint32_t int_map = FM10K_INT_MAP_DISABLE;
 
-       int_map |= 0;
+       int_map |= FM10K_MISC_VEC_ID;
 
        /* Only INT 0 available, other 15 are reserved. */
        FM10K_WRITE_REG(hw, FM10K_VFINT_MAP, int_map);
@@ -2161,6 +2350,100 @@ fm10k_dev_disable_intr_vf(struct rte_eth_dev *dev)
        FM10K_WRITE_FLUSH(hw);
 }
 
+static int
+fm10k_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+       struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_pci_device *pdev = RTE_ETH_DEV_TO_PCI(dev);
+
+       /* Enable ITR */
+       if (hw->mac.type == fm10k_mac_pf)
+               FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(pdev, queue_id)),
+                       FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR);
+       else
+               FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, queue_id)),
+                       FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR);
+       rte_intr_enable(&pdev->intr_handle);
+       return 0;
+}
+
+static int
+fm10k_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+       struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_pci_device *pdev = RTE_ETH_DEV_TO_PCI(dev);
+
+       /* Disable ITR */
+       if (hw->mac.type == fm10k_mac_pf)
+               FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(pdev, queue_id)),
+                       FM10K_ITR_MASK_SET);
+       else
+               FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, queue_id)),
+                       FM10K_ITR_MASK_SET);
+       return 0;
+}
+
+static int
+fm10k_dev_rxq_interrupt_setup(struct rte_eth_dev *dev)
+{
+       struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_pci_device *pdev = RTE_ETH_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pdev->intr_handle;
+       uint32_t intr_vector, vec;
+       uint16_t queue_id;
+       int result = 0;
+
+       /* fm10k needs one separate interrupt for mailbox,
+        * so only drivers which support multiple interrupt vectors
+        * e.g. vfio-pci can work for fm10k interrupt mode
+        */
+       if (!rte_intr_cap_multiple(intr_handle) ||
+                       dev->data->dev_conf.intr_conf.rxq == 0)
+               return result;
+
+       intr_vector = dev->data->nb_rx_queues;
+
+       /* disable interrupt first */
+       rte_intr_disable(intr_handle);
+       if (hw->mac.type == fm10k_mac_pf)
+               fm10k_dev_disable_intr_pf(dev);
+       else
+               fm10k_dev_disable_intr_vf(dev);
+
+       if (rte_intr_efd_enable(intr_handle, intr_vector)) {
+               PMD_INIT_LOG(ERR, "Failed to init event fd");
+               result = -EIO;
+       }
+
+       if (rte_intr_dp_is_en(intr_handle) && !result) {
+               intr_handle->intr_vec = rte_zmalloc("intr_vec",
+                       dev->data->nb_rx_queues * sizeof(int), 0);
+               if (intr_handle->intr_vec) {
+                       for (queue_id = 0, vec = FM10K_RX_VEC_START;
+                                       queue_id < dev->data->nb_rx_queues;
+                                       queue_id++) {
+                               intr_handle->intr_vec[queue_id] = vec;
+                               if (vec < intr_handle->nb_efd - 1
+                                               + FM10K_RX_VEC_START)
+                                       vec++;
+                       }
+               } else {
+                       PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues"
+                               " intr_vec", dev->data->nb_rx_queues);
+                       rte_intr_efd_disable(intr_handle);
+                       result = -ENOMEM;
+               }
+       }
+
+       if (hw->mac.type == fm10k_mac_pf)
+               fm10k_dev_enable_intr_pf(dev);
+       else
+               fm10k_dev_enable_intr_vf(dev);
+       rte_intr_enable(intr_handle);
+       hw->mac.ops.update_int_moderator(hw);
+       return result;
+}
+
 static int
 fm10k_dev_handle_fault(struct fm10k_hw *hw, uint32_t eicr)
 {
@@ -2270,13 +2553,15 @@ error:
  *  void
  */
 static void
-fm10k_dev_interrupt_handler_pf(
-                       __rte_unused struct rte_intr_handle *handle,
-                       void *param)
+fm10k_dev_interrupt_handler_pf(void *param)
 {
        struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint32_t cause, status;
+       struct fm10k_dev_info *dev_info =
+               FM10K_DEV_PRIVATE_TO_INFO(dev->data->dev_private);
+       int status_mbx;
+       s32 err;
 
        if (hw->mac.type != fm10k_mac_pf)
                return;
@@ -2293,14 +2578,69 @@ fm10k_dev_interrupt_handler_pf(
        if (cause & FM10K_EICR_SWITCHNOTREADY)
                PMD_INIT_LOG(ERR, "INT: Switch is not ready");
 
-       if (cause & FM10K_EICR_SWITCHREADY)
+       if (cause & FM10K_EICR_SWITCHREADY) {
                PMD_INIT_LOG(INFO, "INT: Switch is ready");
+               if (dev_info->sm_down == 1) {
+                       fm10k_mbx_lock(hw);
+
+                       /* For recreating logical ports */
+                       status_mbx = hw->mac.ops.update_lport_state(hw,
+                                       hw->mac.dglort_map, MAX_LPORT_NUM, 1);
+                       if (status_mbx == FM10K_SUCCESS)
+                               PMD_INIT_LOG(INFO,
+                                       "INT: Recreated Logical port");
+                       else
+                               PMD_INIT_LOG(INFO,
+                                       "INT: Logical ports weren't recreated");
+
+                       status_mbx = hw->mac.ops.update_xcast_mode(hw,
+                               hw->mac.dglort_map, FM10K_XCAST_MODE_NONE);
+                       if (status_mbx != FM10K_SUCCESS)
+                               PMD_INIT_LOG(ERR, "Failed to set XCAST mode");
+
+                       fm10k_mbx_unlock(hw);
+
+                       /* first clear the internal SW recording structure */
+                       if (!(dev->data->dev_conf.rxmode.mq_mode &
+                                               ETH_MQ_RX_VMDQ_FLAG))
+                               fm10k_vlan_filter_set(dev, hw->mac.default_vid,
+                                       false);
+
+                       fm10k_MAC_filter_set(dev, hw->mac.addr, false,
+                                       MAIN_VSI_POOL_NUMBER);
+
+                       /*
+                        * Add default mac address and vlan for the logical
+                        * ports that have been created, leave to the
+                        * application to fully recover Rx filtering.
+                        */
+                       fm10k_MAC_filter_set(dev, hw->mac.addr, true,
+                                       MAIN_VSI_POOL_NUMBER);
+
+                       if (!(dev->data->dev_conf.rxmode.mq_mode &
+                                               ETH_MQ_RX_VMDQ_FLAG))
+                               fm10k_vlan_filter_set(dev, hw->mac.default_vid,
+                                       true);
+
+                       dev_info->sm_down = 0;
+                       _rte_eth_dev_callback_process(dev,
+                                       RTE_ETH_EVENT_INTR_LSC,
+                                       NULL, NULL);
+               }
+       }
 
        /* Handle mailbox message */
        fm10k_mbx_lock(hw);
-       hw->mbx.ops.process(hw, &hw->mbx);
+       err = hw->mbx.ops.process(hw, &hw->mbx);
        fm10k_mbx_unlock(hw);
 
+       if (err == FM10K_ERR_RESET_REQUESTED) {
+               PMD_INIT_LOG(INFO, "INT: Switch is down");
+               dev_info->sm_down = 1;
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC,
+                               NULL, NULL);
+       }
+
        /* Handle SRAM error */
        if (cause & FM10K_EICR_SRAMERROR) {
                PMD_INIT_LOG(ERR, "INT: SRAM error on PEP");
@@ -2322,7 +2662,7 @@ fm10k_dev_interrupt_handler_pf(
        FM10K_WRITE_REG(hw, FM10K_ITR(0), FM10K_ITR_AUTOMASK |
                                        FM10K_ITR_MASK_CLEAR);
        /* Re-enable interrupt from host side */
-       rte_intr_enable(&(dev->pci_dev->intr_handle));
+       rte_intr_enable(dev->intr_handle);
 }
 
 /**
@@ -2337,12 +2677,15 @@ fm10k_dev_interrupt_handler_pf(
  *  void
  */
 static void
-fm10k_dev_interrupt_handler_vf(
-                       __rte_unused struct rte_intr_handle *handle,
-                       void *param)
+fm10k_dev_interrupt_handler_vf(void *param)
 {
        struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct fm10k_mbx_info *mbx = &hw->mbx;
+       struct fm10k_dev_info *dev_info =
+               FM10K_DEV_PRIVATE_TO_INFO(dev->data->dev_private);
+       const enum fm10k_mbx_state state = mbx->state;
+       int status_mbx;
 
        if (hw->mac.type != fm10k_mac_vf)
                return;
@@ -2352,11 +2695,54 @@ fm10k_dev_interrupt_handler_vf(
        hw->mbx.ops.process(hw, &hw->mbx);
        fm10k_mbx_unlock(hw);
 
+       if (state == FM10K_STATE_OPEN && mbx->state == FM10K_STATE_CONNECT) {
+               PMD_INIT_LOG(INFO, "INT: Switch has gone down");
+
+               fm10k_mbx_lock(hw);
+               hw->mac.ops.update_lport_state(hw, hw->mac.dglort_map,
+                               MAX_LPORT_NUM, 1);
+               fm10k_mbx_unlock(hw);
+
+               /* Setting reset flag */
+               dev_info->sm_down = 1;
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC,
+                               NULL, NULL);
+       }
+
+       if (dev_info->sm_down == 1 &&
+                       hw->mac.dglort_map == FM10K_DGLORTMAP_ZERO) {
+               PMD_INIT_LOG(INFO, "INT: Switch has gone up");
+               fm10k_mbx_lock(hw);
+               status_mbx = hw->mac.ops.update_xcast_mode(hw,
+                               hw->mac.dglort_map, FM10K_XCAST_MODE_NONE);
+               if (status_mbx != FM10K_SUCCESS)
+                       PMD_INIT_LOG(ERR, "Failed to set XCAST mode");
+               fm10k_mbx_unlock(hw);
+
+               /* first clear the internal SW recording structure */
+               fm10k_vlan_filter_set(dev, hw->mac.default_vid, false);
+               fm10k_MAC_filter_set(dev, hw->mac.addr, false,
+                               MAIN_VSI_POOL_NUMBER);
+
+               /*
+                * Add default mac address and vlan for the logical ports that
+                * have been created, leave to the application to fully recover
+                * Rx filtering.
+                */
+               fm10k_MAC_filter_set(dev, hw->mac.addr, true,
+                               MAIN_VSI_POOL_NUMBER);
+               fm10k_vlan_filter_set(dev, hw->mac.default_vid, true);
+
+               dev_info->sm_down = 0;
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC,
+                               NULL, NULL);
+       }
+
        /* Re-enable interrupt from device side */
        FM10K_WRITE_REG(hw, FM10K_VFITR(0), FM10K_ITR_AUTOMASK |
                                        FM10K_ITR_MASK_CLEAR);
        /* Re-enable interrupt from host side */
-       rte_intr_enable(&(dev->pci_dev->intr_handle));
+       rte_intr_enable(dev->intr_handle);
 }
 
 /* Mailbox message handler in VF */
@@ -2367,29 +2753,16 @@ static const struct fm10k_msg_data fm10k_msgdata_vf[] = {
        FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
 };
 
-/* Mailbox message handler in PF */
-static const struct fm10k_msg_data fm10k_msgdata_pf[] = {
-       FM10K_PF_MSG_ERR_HANDLER(XCAST_MODES, fm10k_msg_err_pf),
-       FM10K_PF_MSG_ERR_HANDLER(UPDATE_MAC_FWD_RULE, fm10k_msg_err_pf),
-       FM10K_PF_MSG_LPORT_MAP_HANDLER(fm10k_msg_lport_map_pf),
-       FM10K_PF_MSG_ERR_HANDLER(LPORT_CREATE, fm10k_msg_err_pf),
-       FM10K_PF_MSG_ERR_HANDLER(LPORT_DELETE, fm10k_msg_err_pf),
-       FM10K_PF_MSG_UPDATE_PVID_HANDLER(fm10k_msg_update_pvid_pf),
-       FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
-};
-
 static int
 fm10k_setup_mbx_service(struct fm10k_hw *hw)
 {
-       int err;
+       int err = 0;
 
        /* Initialize mailbox lock */
        fm10k_mbx_initlock(hw);
 
        /* Replace default message handler with new ones */
-       if (hw->mac.type == fm10k_mac_pf)
-               err = hw->mbx.ops.register_handlers(&hw->mbx, fm10k_msgdata_pf);
-       else
+       if (hw->mac.type == fm10k_mac_vf)
                err = hw->mbx.ops.register_handlers(&hw->mbx, fm10k_msgdata_vf);
 
        if (err) {
@@ -2419,10 +2792,12 @@ static const struct eth_dev_ops fm10k_eth_dev_ops = {
        .allmulticast_disable   = fm10k_dev_allmulticast_disable,
        .stats_get              = fm10k_stats_get,
        .xstats_get             = fm10k_xstats_get,
+       .xstats_get_names       = fm10k_xstats_get_names,
        .stats_reset            = fm10k_stats_reset,
        .xstats_reset           = fm10k_stats_reset,
        .link_update            = fm10k_link_update,
        .dev_infos_get          = fm10k_dev_infos_get,
+       .dev_supported_ptypes_get = fm10k_dev_supported_ptypes_get,
        .vlan_filter_set        = fm10k_vlan_filter_set,
        .vlan_offload_set       = fm10k_vlan_offload_set,
        .mac_addr_add           = fm10k_macaddr_add,
@@ -2435,26 +2810,106 @@ static const struct eth_dev_ops fm10k_eth_dev_ops = {
        .rx_queue_release       = fm10k_rx_queue_release,
        .tx_queue_setup         = fm10k_tx_queue_setup,
        .tx_queue_release       = fm10k_tx_queue_release,
+       .rx_descriptor_done     = fm10k_dev_rx_descriptor_done,
+       .rx_queue_intr_enable   = fm10k_dev_rx_queue_intr_enable,
+       .rx_queue_intr_disable  = fm10k_dev_rx_queue_intr_disable,
        .reta_update            = fm10k_reta_update,
        .reta_query             = fm10k_reta_query,
        .rss_hash_update        = fm10k_rss_hash_update,
        .rss_hash_conf_get      = fm10k_rss_hash_conf_get,
 };
 
+static int ftag_check_handler(__rte_unused const char *key,
+               const char *value, __rte_unused void *opaque)
+{
+       if (strcmp(value, "1"))
+               return -1;
+
+       return 0;
+}
+
+static int
+fm10k_check_ftag(struct rte_devargs *devargs)
+{
+       struct rte_kvargs *kvlist;
+       const char *ftag_key = "enable_ftag";
+
+       if (devargs == NULL)
+               return 0;
+
+       kvlist = rte_kvargs_parse(devargs->args, NULL);
+       if (kvlist == NULL)
+               return 0;
+
+       if (!rte_kvargs_count(kvlist, ftag_key)) {
+               rte_kvargs_free(kvlist);
+               return 0;
+       }
+       /* FTAG is enabled when there's key-value pair: enable_ftag=1 */
+       if (rte_kvargs_process(kvlist, ftag_key,
+                               ftag_check_handler, NULL) < 0) {
+               rte_kvargs_free(kvlist);
+               return 0;
+       }
+       rte_kvargs_free(kvlist);
+
+       return 1;
+}
+
+static uint16_t
+fm10k_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
+                   uint16_t nb_pkts)
+{
+       uint16_t nb_tx = 0;
+       struct fm10k_tx_queue *txq = (struct fm10k_tx_queue *)tx_queue;
+
+       while (nb_pkts) {
+               uint16_t ret, num;
+
+               num = (uint16_t)RTE_MIN(nb_pkts, txq->rs_thresh);
+               ret = fm10k_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
+                                                num);
+               nb_tx += ret;
+               nb_pkts -= ret;
+               if (ret < num)
+                       break;
+       }
+
+       return nb_tx;
+}
+
 static void __attribute__((cold))
 fm10k_set_tx_function(struct rte_eth_dev *dev)
 {
        struct fm10k_tx_queue *txq;
        int i;
        int use_sse = 1;
+       uint16_t tx_ftag_en = 0;
+
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+               /* primary process has set the ftag flag and txq_flags */
+               txq = dev->data->tx_queues[0];
+               if (fm10k_tx_vec_condition_check(txq)) {
+                       dev->tx_pkt_burst = fm10k_xmit_pkts;
+                       dev->tx_pkt_prepare = fm10k_prep_pkts;
+                       PMD_INIT_LOG(DEBUG, "Use regular Tx func");
+               } else {
+                       PMD_INIT_LOG(DEBUG, "Use vector Tx func");
+                       dev->tx_pkt_burst = fm10k_xmit_pkts_vec;
+                       dev->tx_pkt_prepare = NULL;
+               }
+               return;
+       }
+
+       if (fm10k_check_ftag(dev->device->devargs))
+               tx_ftag_en = 1;
 
        for (i = 0; i < dev->data->nb_tx_queues; i++) {
                txq = dev->data->tx_queues[i];
+               txq->tx_ftag_en = tx_ftag_en;
                /* Check if Vector Tx is satisfied */
-               if (fm10k_tx_vec_condition_check(txq)) {
+               if (fm10k_tx_vec_condition_check(txq))
                        use_sse = 0;
-                       break;
-               }
        }
 
        if (use_sse) {
@@ -2464,8 +2919,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
                        fm10k_txq_vec_setup(txq);
                }
                dev->tx_pkt_burst = fm10k_xmit_pkts_vec;
+               dev->tx_pkt_prepare = NULL;
        } else {
                dev->tx_pkt_burst = fm10k_xmit_pkts;
+               dev->tx_pkt_prepare = fm10k_prep_pkts;
                PMD_INIT_LOG(DEBUG, "Use regular Tx func");
        }
 }
@@ -2473,13 +2930,19 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
 static void __attribute__((cold))
 fm10k_set_rx_function(struct rte_eth_dev *dev)
 {
-       struct fm10k_dev_info *dev_info = FM10K_DEV_PRIVATE_TO_INFO(dev);
+       struct fm10k_dev_info *dev_info =
+               FM10K_DEV_PRIVATE_TO_INFO(dev->data->dev_private);
        uint16_t i, rx_using_sse;
+       uint16_t rx_ftag_en = 0;
+
+       if (fm10k_check_ftag(dev->device->devargs))
+               rx_ftag_en = 1;
 
        /* In order to allow Vector Rx there are a few configuration
         * conditions to be met.
         */
-       if (!fm10k_rx_vec_condition_check(dev) && dev_info->rx_vec_allowed) {
+       if (!fm10k_rx_vec_condition_check(dev) &&
+                       dev_info->rx_vec_allowed && !rx_ftag_en) {
                if (dev->data->scattered_rx)
                        dev->rx_pkt_burst = fm10k_recv_scattered_pkts_vec;
                else
@@ -2498,10 +2961,14 @@ fm10k_set_rx_function(struct rte_eth_dev *dev)
        else
                PMD_INIT_LOG(DEBUG, "Use regular Rx func");
 
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return;
+
        for (i = 0; i < dev->data->nb_rx_queues; i++) {
                struct fm10k_rx_queue *rxq = dev->data->rx_queues[i];
 
                rxq->rx_using_sse = rx_using_sse;
+               rxq->rx_ftag_en = rx_ftag_en;
        }
 }
 
@@ -2509,7 +2976,8 @@ static void
 fm10k_params_init(struct rte_eth_dev *dev)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct fm10k_dev_info *info = FM10K_DEV_PRIVATE_TO_INFO(dev);
+       struct fm10k_dev_info *info =
+               FM10K_DEV_PRIVATE_TO_INFO(dev->data->dev_private);
 
        /* Inialize bus info. Normally we would call fm10k_get_bus_info(), but
         * there is no way to get link status without reading BAR4.  Until this
@@ -2530,7 +2998,9 @@ static int
 eth_fm10k_dev_init(struct rte_eth_dev *dev)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       int diag;
+       struct rte_pci_device *pdev = RTE_ETH_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pdev->intr_handle;
+       int diag, i;
        struct fm10k_macvlan_filter_info *macvlan;
 
        PMD_INIT_FUNC_TRACE();
@@ -2538,23 +3008,30 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
        dev->dev_ops = &fm10k_eth_dev_ops;
        dev->rx_pkt_burst = &fm10k_recv_pkts;
        dev->tx_pkt_burst = &fm10k_xmit_pkts;
+       dev->tx_pkt_prepare = &fm10k_prep_pkts;
 
-       /* only initialize in the primary process */
-       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+       /*
+        * Primary process does the whole initialization, for secondary
+        * processes, we just select the same Rx and Tx function as primary.
+        */
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+               fm10k_set_rx_function(dev);
+               fm10k_set_tx_function(dev);
                return 0;
+       }
 
-       rte_eth_copy_pci_info(dev, dev->pci_dev);
+       rte_eth_copy_pci_info(dev, pdev);
 
        macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
        memset(macvlan, 0, sizeof(*macvlan));
        /* Vendor and Device ID need to be set before init of shared code */
        memset(hw, 0, sizeof(*hw));
-       hw->device_id = dev->pci_dev->id.device_id;
-       hw->vendor_id = dev->pci_dev->id.vendor_id;
-       hw->subsystem_device_id = dev->pci_dev->id.subsystem_device_id;
-       hw->subsystem_vendor_id = dev->pci_dev->id.subsystem_vendor_id;
+       hw->device_id = pdev->id.device_id;
+       hw->vendor_id = pdev->id.vendor_id;
+       hw->subsystem_device_id = pdev->id.subsystem_device_id;
+       hw->subsystem_vendor_id = pdev->id.subsystem_vendor_id;
        hw->revision_id = 0;
-       hw->hw_addr = (void *)dev->pci_dev->mem_resource[0].addr;
+       hw->hw_addr = (void *)pdev->mem_resource[0].addr;
        if (hw->hw_addr == NULL) {
                PMD_INIT_LOG(ERR, "Bad mem resource."
                        " Try to blacklist unused devices.");
@@ -2624,27 +3101,26 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
        /*PF/VF has different interrupt handling mechanism */
        if (hw->mac.type == fm10k_mac_pf) {
                /* register callback func to eal lib */
-               rte_intr_callback_register(&(dev->pci_dev->intr_handle),
+               rte_intr_callback_register(intr_handle,
                        fm10k_dev_interrupt_handler_pf, (void *)dev);
 
                /* enable MISC interrupt */
                fm10k_dev_enable_intr_pf(dev);
        } else { /* VF */
-               rte_intr_callback_register(&(dev->pci_dev->intr_handle),
+               rte_intr_callback_register(intr_handle,
                        fm10k_dev_interrupt_handler_vf, (void *)dev);
 
                fm10k_dev_enable_intr_vf(dev);
        }
 
-       /* Enable uio intr after callback registered */
-       rte_intr_enable(&(dev->pci_dev->intr_handle));
+       /* Enable intr after callback registered */
+       rte_intr_enable(intr_handle);
 
        hw->mac.ops.update_int_moderator(hw);
 
        /* Make sure Switch Manager is ready before going forward. */
        if (hw->mac.type == fm10k_mac_pf) {
                int switch_ready = 0;
-               int i;
 
                for (i = 0; i < MAX_QUERY_SWITCH_STATE_TIMES; i++) {
                        fm10k_mbx_lock(hw);
@@ -2671,7 +3147,8 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
         */
        fm10k_mbx_lock(hw);
        /* Enable port first */
-       hw->mac.ops.update_lport_state(hw, hw->mac.dglort_map, 1, 1);
+       hw->mac.ops.update_lport_state(hw, hw->mac.dglort_map,
+                                       MAX_LPORT_NUM, 1);
 
        /* Set unicast mode by default. App can change to other mode in other
         * API func.
@@ -2681,6 +3158,21 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
 
        fm10k_mbx_unlock(hw);
 
+       /* Make sure default VID is ready before going forward. */
+       if (hw->mac.type == fm10k_mac_pf) {
+               for (i = 0; i < MAX_QUERY_SWITCH_STATE_TIMES; i++) {
+                       if (hw->mac.default_vid)
+                               break;
+                       /* Delay some time to acquire async port VLAN info. */
+                       rte_delay_us(WAIT_SWITCH_MSG_US);
+               }
+
+               if (!hw->mac.default_vid) {
+                       PMD_INIT_LOG(ERR, "default VID is not ready");
+                       return -1;
+               }
+       }
+
        /* Add default mac address */
        fm10k_MAC_filter_set(dev, hw->mac.addr, true,
                MAIN_VSI_POOL_NUMBER);
@@ -2692,7 +3184,8 @@ static int
 eth_fm10k_dev_uninit(struct rte_eth_dev *dev)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-
+       struct rte_pci_device *pdev = RTE_ETH_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pdev->intr_handle;
        PMD_INIT_FUNC_TRACE();
 
        /* only uninitialize in the primary process */
@@ -2707,7 +3200,7 @@ eth_fm10k_dev_uninit(struct rte_eth_dev *dev)
        dev->tx_pkt_burst = NULL;
 
        /* disable uio/vfio intr */
-       rte_intr_disable(&(dev->pci_dev->intr_handle));
+       rte_intr_disable(intr_handle);
 
        /*PF/VF has different interrupt handling mechanism */
        if (hw->mac.type == fm10k_mac_pf) {
@@ -2715,13 +3208,13 @@ eth_fm10k_dev_uninit(struct rte_eth_dev *dev)
                fm10k_dev_disable_intr_pf(dev);
 
                /* unregister callback func to eal lib */
-               rte_intr_callback_unregister(&(dev->pci_dev->intr_handle),
+               rte_intr_callback_unregister(intr_handle,
                        fm10k_dev_interrupt_handler_pf, (void *)dev);
        } else {
                /* disable interrupt */
                fm10k_dev_disable_intr_vf(dev);
 
-               rte_intr_callback_unregister(&(dev->pci_dev->intr_handle),
+               rte_intr_callback_unregister(intr_handle,
                        fm10k_dev_interrupt_handler_vf, (void *)dev);
        }
 
@@ -2736,45 +3229,37 @@ eth_fm10k_dev_uninit(struct rte_eth_dev *dev)
        return 0;
 }
 
+static int eth_fm10k_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
+       struct rte_pci_device *pci_dev)
+{
+       return rte_eth_dev_pci_generic_probe(pci_dev,
+               sizeof(struct fm10k_adapter), eth_fm10k_dev_init);
+}
+
+static int eth_fm10k_pci_remove(struct rte_pci_device *pci_dev)
+{
+       return rte_eth_dev_pci_generic_remove(pci_dev, eth_fm10k_dev_uninit);
+}
+
 /*
  * The set of PCI devices this driver supports. This driver will enable both PF
  * and SRIOV-VF devices.
  */
 static const struct rte_pci_id pci_id_fm10k_map[] = {
-#define RTE_PCI_DEV_ID_DECL_FM10K(vend, dev) { RTE_PCI_DEVICE(vend, dev) },
-#define RTE_PCI_DEV_ID_DECL_FM10KVF(vend, dev) { RTE_PCI_DEVICE(vend, dev) },
-#include "rte_pci_dev_ids.h"
+       { RTE_PCI_DEVICE(FM10K_INTEL_VENDOR_ID, FM10K_DEV_ID_PF) },
+       { RTE_PCI_DEVICE(FM10K_INTEL_VENDOR_ID, FM10K_DEV_ID_SDI_FM10420_QDA2) },
+       { RTE_PCI_DEVICE(FM10K_INTEL_VENDOR_ID, FM10K_DEV_ID_VF) },
        { .vendor_id = 0, /* sentinel */ },
 };
 
-static struct eth_driver rte_pmd_fm10k = {
-       .pci_drv = {
-               .name = "rte_pmd_fm10k",
-               .id_table = pci_id_fm10k_map,
-               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_DETACHABLE,
-       },
-       .eth_dev_init = eth_fm10k_dev_init,
-       .eth_dev_uninit = eth_fm10k_dev_uninit,
-       .dev_private_size = sizeof(struct fm10k_adapter),
-};
-
-/*
- * Driver initialization routine.
- * Invoked once at EAL init time.
- * Register itself as the [Poll Mode] Driver of PCI FM10K devices.
- */
-static int
-rte_pmd_fm10k_init(__rte_unused const char *name,
-       __rte_unused const char *params)
-{
-       PMD_INIT_FUNC_TRACE();
-       rte_eth_driver_register(&rte_pmd_fm10k);
-       return 0;
-}
-
-static struct rte_driver rte_fm10k_driver = {
-       .type = PMD_PDEV,
-       .init = rte_pmd_fm10k_init,
+static struct rte_pci_driver rte_pmd_fm10k = {
+       .id_table = pci_id_fm10k_map,
+       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+                    RTE_PCI_DRV_IOVA_AS_VA,
+       .probe = eth_fm10k_pci_probe,
+       .remove = eth_fm10k_pci_remove,
 };
 
-PMD_REGISTER_DRIVER(rte_fm10k_driver);
+RTE_PMD_REGISTER_PCI(net_fm10k, rte_pmd_fm10k);
+RTE_PMD_REGISTER_PCI_TABLE(net_fm10k, pci_id_fm10k_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_fm10k, "* igb_uio | uio_pci_generic | vfio-pci");