net: add macro for MAC address print
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_ethdev.c
index f718b8b..2f40ae9 100644 (file)
@@ -26,8 +26,8 @@
 #include <rte_eal.h>
 #include <rte_alarm.h>
 #include <rte_ether.h>
-#include <rte_ethdev_driver.h>
-#include <rte_ethdev_pci.h>
+#include <ethdev_driver.h>
+#include <ethdev_pci.h>
 #include <rte_string_fns.h>
 #include <rte_malloc.h>
 #include <rte_dev.h>
 #include "vmxnet3_logs.h"
 #include "vmxnet3_ethdev.h"
 
-#define PROCESS_SYS_EVENTS 0
-
 #define        VMXNET3_TX_MAX_SEG      UINT8_MAX
 
 #define VMXNET3_TX_OFFLOAD_CAP         \
        (DEV_TX_OFFLOAD_VLAN_INSERT |   \
-        DEV_TX_OFFLOAD_IPV4_CKSUM |    \
         DEV_TX_OFFLOAD_TCP_CKSUM |     \
         DEV_TX_OFFLOAD_UDP_CKSUM |     \
         DEV_TX_OFFLOAD_TCP_TSO |       \
        (DEV_RX_OFFLOAD_VLAN_STRIP |    \
         DEV_RX_OFFLOAD_VLAN_FILTER |   \
         DEV_RX_OFFLOAD_SCATTER |       \
-        DEV_RX_OFFLOAD_IPV4_CKSUM |    \
         DEV_RX_OFFLOAD_UDP_CKSUM |     \
         DEV_RX_OFFLOAD_TCP_CKSUM |     \
         DEV_RX_OFFLOAD_TCP_LRO |       \
-        DEV_RX_OFFLOAD_JUMBO_FRAME)
+        DEV_RX_OFFLOAD_JUMBO_FRAME |   \
+        DEV_RX_OFFLOAD_RSS_HASH)
+
+int vmxnet3_segs_dynfield_offset = -1;
 
 static int eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev);
 static int eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev);
 static int vmxnet3_dev_configure(struct rte_eth_dev *dev);
 static int vmxnet3_dev_start(struct rte_eth_dev *dev);
-static void vmxnet3_dev_stop(struct rte_eth_dev *dev);
-static void vmxnet3_dev_close(struct rte_eth_dev *dev);
+static int vmxnet3_dev_stop(struct rte_eth_dev *dev);
+static int vmxnet3_dev_close(struct rte_eth_dev *dev);
+static int vmxnet3_dev_reset(struct rte_eth_dev *dev);
 static void vmxnet3_dev_set_rxmode(struct vmxnet3_hw *hw, uint32_t feature, int set);
-static void vmxnet3_dev_promiscuous_enable(struct rte_eth_dev *dev);
-static void vmxnet3_dev_promiscuous_disable(struct rte_eth_dev *dev);
-static void vmxnet3_dev_allmulticast_enable(struct rte_eth_dev *dev);
-static void vmxnet3_dev_allmulticast_disable(struct rte_eth_dev *dev);
+static int vmxnet3_dev_promiscuous_enable(struct rte_eth_dev *dev);
+static int vmxnet3_dev_promiscuous_disable(struct rte_eth_dev *dev);
+static int vmxnet3_dev_allmulticast_enable(struct rte_eth_dev *dev);
+static int vmxnet3_dev_allmulticast_disable(struct rte_eth_dev *dev);
 static int __vmxnet3_dev_link_update(struct rte_eth_dev *dev,
                                     int wait_to_complete);
 static int vmxnet3_dev_link_update(struct rte_eth_dev *dev,
@@ -78,25 +78,28 @@ static int vmxnet3_dev_link_update(struct rte_eth_dev *dev,
 static void vmxnet3_hw_stats_save(struct vmxnet3_hw *hw);
 static int vmxnet3_dev_stats_get(struct rte_eth_dev *dev,
                                  struct rte_eth_stats *stats);
-static void vmxnet3_dev_stats_reset(struct rte_eth_dev *dev);
+static int vmxnet3_dev_stats_reset(struct rte_eth_dev *dev);
 static int vmxnet3_dev_xstats_get_names(struct rte_eth_dev *dev,
                                        struct rte_eth_xstat_name *xstats,
                                        unsigned int n);
 static int vmxnet3_dev_xstats_get(struct rte_eth_dev *dev,
                                  struct rte_eth_xstat *xstats, unsigned int n);
-static void vmxnet3_dev_info_get(struct rte_eth_dev *dev,
-                                struct rte_eth_dev_info *dev_info);
+static int vmxnet3_dev_info_get(struct rte_eth_dev *dev,
+                               struct rte_eth_dev_info *dev_info);
 static const uint32_t *
 vmxnet3_dev_supported_ptypes_get(struct rte_eth_dev *dev);
+static int vmxnet3_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
 static int vmxnet3_dev_vlan_filter_set(struct rte_eth_dev *dev,
                                       uint16_t vid, int on);
 static int vmxnet3_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
 static int vmxnet3_mac_addr_set(struct rte_eth_dev *dev,
                                 struct rte_ether_addr *mac_addr);
+static void vmxnet3_process_events(struct rte_eth_dev *dev);
 static void vmxnet3_interrupt_handler(void *param);
-
-int vmxnet3_logtype_init;
-int vmxnet3_logtype_driver;
+static int vmxnet3_dev_rx_queue_intr_enable(struct rte_eth_dev *dev,
+                                               uint16_t queue_id);
+static int vmxnet3_dev_rx_queue_intr_disable(struct rte_eth_dev *dev,
+                                               uint16_t queue_id);
 
 /*
  * The set of PCI devices this driver supports
@@ -113,6 +116,7 @@ static const struct eth_dev_ops vmxnet3_eth_dev_ops = {
        .dev_start            = vmxnet3_dev_start,
        .dev_stop             = vmxnet3_dev_stop,
        .dev_close            = vmxnet3_dev_close,
+       .dev_reset            = vmxnet3_dev_reset,
        .promiscuous_enable   = vmxnet3_dev_promiscuous_enable,
        .promiscuous_disable  = vmxnet3_dev_promiscuous_disable,
        .allmulticast_enable  = vmxnet3_dev_allmulticast_enable,
@@ -125,12 +129,15 @@ static const struct eth_dev_ops vmxnet3_eth_dev_ops = {
        .mac_addr_set         = vmxnet3_mac_addr_set,
        .dev_infos_get        = vmxnet3_dev_info_get,
        .dev_supported_ptypes_get = vmxnet3_dev_supported_ptypes_get,
+       .mtu_set              = vmxnet3_dev_mtu_set,
        .vlan_filter_set      = vmxnet3_dev_vlan_filter_set,
        .vlan_offload_set     = vmxnet3_dev_vlan_offload_set,
        .rx_queue_setup       = vmxnet3_dev_rx_queue_setup,
        .rx_queue_release     = vmxnet3_dev_rx_queue_release,
        .tx_queue_setup       = vmxnet3_dev_tx_queue_setup,
        .tx_queue_release     = vmxnet3_dev_tx_queue_release,
+       .rx_queue_intr_enable = vmxnet3_dev_rx_queue_intr_enable,
+       .rx_queue_intr_disable = vmxnet3_dev_rx_queue_intr_disable,
 };
 
 struct vmxnet3_xstats_name_off {
@@ -181,30 +188,61 @@ gpa_zone_reserve(struct rte_eth_dev *dev, uint32_t size,
 }
 
 /*
- * This function is based on vmxnet3_disable_intr()
+ * Enable the given interrupt
  */
 static void
-vmxnet3_disable_intr(struct vmxnet3_hw *hw)
+vmxnet3_enable_intr(struct vmxnet3_hw *hw, unsigned int intr_idx)
 {
-       int i;
+       PMD_INIT_FUNC_TRACE();
+       VMXNET3_WRITE_BAR0_REG(hw, VMXNET3_REG_IMR + intr_idx * 8, 0);
+}
 
+/*
+ * Disable the given interrupt
+ */
+static void
+vmxnet3_disable_intr(struct vmxnet3_hw *hw, unsigned int intr_idx)
+{
        PMD_INIT_FUNC_TRACE();
+       VMXNET3_WRITE_BAR0_REG(hw, VMXNET3_REG_IMR + intr_idx * 8, 1);
+}
 
-       hw->shared->devRead.intrConf.intrCtrl |= VMXNET3_IC_DISABLE_ALL;
-       for (i = 0; i < hw->num_intrs; i++)
-               VMXNET3_WRITE_BAR0_REG(hw, VMXNET3_REG_IMR + i * 8, 1);
+/*
+ * Enable all intrs used by the device
+ */
+static void
+vmxnet3_enable_all_intrs(struct vmxnet3_hw *hw)
+{
+       Vmxnet3_DSDevRead *devRead = &hw->shared->devRead;
+
+       PMD_INIT_FUNC_TRACE();
+
+       devRead->intrConf.intrCtrl &= rte_cpu_to_le_32(~VMXNET3_IC_DISABLE_ALL);
+
+       if (hw->intr.lsc_only) {
+               vmxnet3_enable_intr(hw, devRead->intrConf.eventIntrIdx);
+       } else {
+               int i;
+
+               for (i = 0; i < hw->intr.num_intrs; i++)
+                       vmxnet3_enable_intr(hw, i);
+       }
 }
 
+/*
+ * Disable all intrs used by the device
+ */
 static void
-vmxnet3_enable_intr(struct vmxnet3_hw *hw)
+vmxnet3_disable_all_intrs(struct vmxnet3_hw *hw)
 {
        int i;
 
        PMD_INIT_FUNC_TRACE();
 
-       hw->shared->devRead.intrConf.intrCtrl &= ~VMXNET3_IC_DISABLE_ALL;
+       hw->shared->devRead.intrConf.intrCtrl |=
+               rte_cpu_to_le_32(VMXNET3_IC_DISABLE_ALL);
        for (i = 0; i < hw->num_intrs; i++)
-               VMXNET3_WRITE_BAR0_REG(hw, VMXNET3_REG_IMR + i * 8, 0);
+               vmxnet3_disable_intr(hw, i);
 }
 
 /*
@@ -235,6 +273,11 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev)
        struct vmxnet3_hw *hw = eth_dev->data->dev_private;
        uint32_t mac_hi, mac_lo, ver;
        struct rte_eth_link link;
+       static const struct rte_mbuf_dynfield vmxnet3_segs_dynfield_desc = {
+               .name = VMXNET3_SEGS_DYNFIELD_NAME,
+               .size = sizeof(vmxnet3_segs_dynfield_t),
+               .align = __alignof__(vmxnet3_segs_dynfield_t),
+       };
 
        PMD_INIT_FUNC_TRACE();
 
@@ -244,6 +287,14 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev)
        eth_dev->tx_pkt_prepare = vmxnet3_prep_pkts;
        pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 
+       /* extra mbuf field is required to guess MSS */
+       vmxnet3_segs_dynfield_offset =
+               rte_mbuf_dynfield_register(&vmxnet3_segs_dynfield_desc);
+       if (vmxnet3_segs_dynfield_offset < 0) {
+               PMD_INIT_LOG(ERR, "Cannot register mbuf field.");
+               return -rte_errno;
+       }
+
        /*
         * for secondary processes, we don't initialize any further as primary
         * has already done this work.
@@ -252,6 +303,7 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev)
                return 0;
 
        rte_eth_copy_pci_info(eth_dev, pci_dev);
+       eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
 
        /* Vendor and Device ID need to be set before init of shared code */
        hw->device_id = pci_dev->id.device_id;
@@ -288,7 +340,7 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev)
                return -EIO;
        }
 
-       PMD_INIT_LOG(DEBUG, "Using device version %d\n", hw->version);
+       PMD_INIT_LOG(INFO, "Using device v%d", hw->version);
 
        /* Check UPT version compatibility with driver. */
        ver = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_UVRS);
@@ -307,25 +359,22 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev)
        memcpy(hw->perm_addr + 4, &mac_hi, 2);
 
        /* Allocate memory for storing MAC addresses */
-       eth_dev->data->mac_addrs = rte_zmalloc("vmxnet3", ETHER_ADDR_LEN *
+       eth_dev->data->mac_addrs = rte_zmalloc("vmxnet3", RTE_ETHER_ADDR_LEN *
                                               VMXNET3_MAX_MAC_ADDRS, 0);
        if (eth_dev->data->mac_addrs == NULL) {
                PMD_INIT_LOG(ERR,
                             "Failed to allocate %d bytes needed to store MAC addresses",
-                            ETHER_ADDR_LEN * VMXNET3_MAX_MAC_ADDRS);
+                            RTE_ETHER_ADDR_LEN * VMXNET3_MAX_MAC_ADDRS);
                return -ENOMEM;
        }
        /* Copy the permanent MAC address */
        rte_ether_addr_copy((struct rte_ether_addr *)hw->perm_addr,
                        &eth_dev->data->mac_addrs[0]);
 
-       PMD_INIT_LOG(DEBUG, "MAC Address : %02x:%02x:%02x:%02x:%02x:%02x",
+       PMD_INIT_LOG(DEBUG, "MAC Address : " RTE_ETHER_ADDR_PRT_FMT,
                     hw->perm_addr[0], hw->perm_addr[1], hw->perm_addr[2],
                     hw->perm_addr[3], hw->perm_addr[4], hw->perm_addr[5]);
 
-       /* Flag to call rte_eth_dev_release_port() in rte_eth_dev_close(). */
-       eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
-
        /* Put device in Quiesce Mode */
        VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_QUIESCE_DEV);
 
@@ -373,11 +422,6 @@ eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev)
                return -EBUSY;
        }
 
-       eth_dev->dev_ops = NULL;
-       eth_dev->rx_pkt_burst = NULL;
-       eth_dev->tx_pkt_burst = NULL;
-       eth_dev->tx_pkt_prepare = NULL;
-
        return 0;
 }
 
@@ -400,6 +444,40 @@ static struct rte_pci_driver rte_vmxnet3_pmd = {
        .remove = eth_vmxnet3_pci_remove,
 };
 
+static void
+vmxnet3_alloc_intr_resources(struct rte_eth_dev *dev)
+{
+       struct vmxnet3_hw *hw = dev->data->dev_private;
+       uint32_t cfg;
+       int nvec = 1; /* for link event */
+
+       /* intr settings */
+       VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
+                              VMXNET3_CMD_GET_CONF_INTR);
+       cfg = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
+       hw->intr.type = cfg & 0x3;
+       hw->intr.mask_mode = (cfg >> 2) & 0x3;
+
+       if (hw->intr.type == VMXNET3_IT_AUTO)
+               hw->intr.type = VMXNET3_IT_MSIX;
+
+       if (hw->intr.type == VMXNET3_IT_MSIX) {
+               /* only support shared tx/rx intr */
+               if (hw->num_tx_queues != hw->num_rx_queues)
+                       goto msix_err;
+
+               nvec += hw->num_rx_queues;
+               hw->intr.num_intrs = nvec;
+               return;
+       }
+
+msix_err:
+       /* the tx/rx queue interrupt will be disabled */
+       hw->intr.num_intrs = 2;
+       hw->intr.lsc_only = TRUE;
+       PMD_INIT_LOG(INFO, "Enabled MSI-X with %d vectors", hw->intr.num_intrs);
+}
+
 static int
 vmxnet3_dev_configure(struct rte_eth_dev *dev)
 {
@@ -409,6 +487,9 @@ vmxnet3_dev_configure(struct rte_eth_dev *dev)
 
        PMD_INIT_FUNC_TRACE();
 
+       if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)
+               dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH;
+
        if (dev->data->nb_tx_queues > VMXNET3_MAX_TX_QUEUES ||
            dev->data->nb_rx_queues > VMXNET3_MAX_RX_QUEUES) {
                PMD_INIT_LOG(ERR, "ERROR: Number of queues not supported");
@@ -483,6 +564,8 @@ vmxnet3_dev_configure(struct rte_eth_dev *dev)
                hw->rss_confPA = mz->iova;
        }
 
+       vmxnet3_alloc_intr_resources(dev);
+
        return 0;
 }
 
@@ -492,7 +575,7 @@ vmxnet3_write_mac(struct vmxnet3_hw *hw, const uint8_t *addr)
        uint32_t val;
 
        PMD_INIT_LOG(DEBUG,
-                    "Writing MAC Address : %02x:%02x:%02x:%02x:%02x:%02x",
+                    "Writing MAC Address : " RTE_ETHER_ADDR_PRT_FMT,
                     addr[0], addr[1], addr[2],
                     addr[3], addr[4], addr[5]);
 
@@ -503,6 +586,85 @@ vmxnet3_write_mac(struct vmxnet3_hw *hw, const uint8_t *addr)
        VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_MACH, val);
 }
 
+/*
+ * Configure the hardware to generate MSI-X interrupts.
+ * If setting up MSIx fails, try setting up MSI (only 1 interrupt vector
+ * which will be disabled to allow lsc to work).
+ *
+ * Returns 0 on success and -1 otherwise.
+ */
+static int
+vmxnet3_configure_msix(struct rte_eth_dev *dev)
+{
+       struct vmxnet3_hw *hw = dev->data->dev_private;
+       struct rte_intr_handle *intr_handle = dev->intr_handle;
+       uint16_t intr_vector;
+       int i;
+
+       hw->intr.event_intr_idx = 0;
+
+       /* only vfio-pci driver can support interrupt mode. */
+       if (!rte_intr_cap_multiple(intr_handle) ||
+           dev->data->dev_conf.intr_conf.rxq == 0)
+               return -1;
+
+       intr_vector = dev->data->nb_rx_queues;
+       if (intr_vector > VMXNET3_MAX_RX_QUEUES) {
+               PMD_INIT_LOG(ERR, "At most %d intr queues supported",
+                            VMXNET3_MAX_RX_QUEUES);
+               return -ENOTSUP;
+       }
+
+       if (rte_intr_efd_enable(intr_handle, intr_vector)) {
+               PMD_INIT_LOG(ERR, "Failed to enable fastpath event fd");
+               return -1;
+       }
+
+       if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) {
+               intr_handle->intr_vec =
+                       rte_zmalloc("intr_vec",
+                                   dev->data->nb_rx_queues * sizeof(int), 0);
+               if (intr_handle->intr_vec == NULL) {
+                       PMD_INIT_LOG(ERR, "Failed to allocate %d Rx queues intr_vec",
+                                       dev->data->nb_rx_queues);
+                       rte_intr_efd_disable(intr_handle);
+                       return -ENOMEM;
+               }
+       }
+
+       if (!rte_intr_allow_others(intr_handle) &&
+           dev->data->dev_conf.intr_conf.lsc != 0) {
+               PMD_INIT_LOG(ERR, "not enough intr vector to support both Rx interrupt and LSC");
+               rte_free(intr_handle->intr_vec);
+               intr_handle->intr_vec = NULL;
+               rte_intr_efd_disable(intr_handle);
+               return -1;
+       }
+
+       /* if we cannot allocate one MSI-X vector per queue, don't enable
+        * interrupt mode.
+        */
+       if (hw->intr.num_intrs != (intr_handle->nb_efd + 1)) {
+               PMD_INIT_LOG(ERR, "Device configured with %d Rx intr vectors, expecting %d",
+                               hw->intr.num_intrs, intr_handle->nb_efd + 1);
+               rte_free(intr_handle->intr_vec);
+               intr_handle->intr_vec = NULL;
+               rte_intr_efd_disable(intr_handle);
+               return -1;
+       }
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++)
+               intr_handle->intr_vec[i] = i + 1;
+
+       for (i = 0; i < hw->intr.num_intrs; i++)
+               hw->intr.mod_levels[i] = UPT1_IML_ADAPTIVE;
+
+       PMD_INIT_LOG(INFO, "intr type %u, mode %u, %u vectors allocated",
+                   hw->intr.type, hw->intr.mask_mode, hw->intr.num_intrs);
+
+       return 0;
+}
+
 static int
 vmxnet3_dev_setup_memreg(struct rte_eth_dev *dev)
 {
@@ -594,6 +756,7 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev)
 {
        struct rte_eth_conf port_conf = dev->data->dev_conf;
        struct vmxnet3_hw *hw = dev->data->dev_private;
+       struct rte_intr_handle *intr_handle = dev->intr_handle;
        uint32_t mtu = dev->data->mtu;
        Vmxnet3_DriverShared *shared = hw->shared;
        Vmxnet3_DSDevRead *devRead = &shared->devRead;
@@ -619,15 +782,6 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev)
        devRead->misc.numTxQueues  = hw->num_tx_queues;
        devRead->misc.numRxQueues  = hw->num_rx_queues;
 
-       /*
-        * Set number of interrupts to 1
-        * PMD by default disables all the interrupts but this is MUST
-        * to activate device. It needs at least one interrupt for
-        * link events to handle
-        */
-       hw->num_intrs = devRead->intrConf.numIntrs = 1;
-       devRead->intrConf.intrCtrl |= VMXNET3_IC_DISABLE_ALL;
-
        for (i = 0; i < hw->num_tx_queues; i++) {
                Vmxnet3_TxQueueDesc *tqd = &hw->tqd_start[i];
                vmxnet3_tx_queue_t *txq  = dev->data->tx_queues[i];
@@ -644,9 +798,13 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev)
                tqd->conf.compRingSize = txq->comp_ring.size;
                tqd->conf.dataRingSize = txq->data_ring.size;
                tqd->conf.txDataRingDescSize = txq->txdata_desc_size;
-               tqd->conf.intrIdx      = txq->comp_ring.intr_idx;
-               tqd->status.stopped    = TRUE;
-               tqd->status.error      = 0;
+
+               if (hw->intr.lsc_only)
+                       tqd->conf.intrIdx = 1;
+               else
+                       tqd->conf.intrIdx = intr_handle->intr_vec[i];
+               tqd->status.stopped = TRUE;
+               tqd->status.error   = 0;
                memset(&tqd->stats, 0, sizeof(tqd->stats));
        }
 
@@ -663,16 +821,25 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev)
                rqd->conf.rxRingSize[0]   = rxq->cmd_ring[0].size;
                rqd->conf.rxRingSize[1]   = rxq->cmd_ring[1].size;
                rqd->conf.compRingSize    = rxq->comp_ring.size;
-               rqd->conf.intrIdx         = rxq->comp_ring.intr_idx;
-               if (VMXNET3_VERSION_GE_3(hw)) {
-                       rqd->conf.rxDataRingBasePA = rxq->data_ring.basePA;
-                       rqd->conf.rxDataRingDescSize = rxq->data_desc_size;
-               }
-               rqd->status.stopped       = TRUE;
-               rqd->status.error         = 0;
+
+               if (hw->intr.lsc_only)
+                       rqd->conf.intrIdx = 1;
+               else
+                       rqd->conf.intrIdx = intr_handle->intr_vec[i];
+               rqd->status.stopped = TRUE;
+               rqd->status.error   = 0;
                memset(&rqd->stats, 0, sizeof(rqd->stats));
        }
 
+       /* intr settings */
+       devRead->intrConf.autoMask = hw->intr.mask_mode == VMXNET3_IMM_AUTO;
+       devRead->intrConf.numIntrs = hw->intr.num_intrs;
+       for (i = 0; i < hw->intr.num_intrs; i++)
+               devRead->intrConf.modLevels[i] = hw->intr.mod_levels[i];
+
+       devRead->intrConf.eventIntrIdx = hw->intr.event_intr_idx;
+       devRead->intrConf.intrCtrl |= rte_cpu_to_le_32(VMXNET3_IC_DISABLE_ALL);
+
        /* RxMode set to 0 of VMXNET3_RXM_xxx */
        devRead->rxFilterConf.rxMode = 0;
 
@@ -722,24 +889,18 @@ vmxnet3_dev_start(struct rte_eth_dev *dev)
        /* Save stats before it is reset by CMD_ACTIVATE */
        vmxnet3_hw_stats_save(hw);
 
+       /* configure MSI-X */
+       ret = vmxnet3_configure_msix(dev);
+       if (ret < 0) {
+               /* revert to lsc only */
+               hw->intr.num_intrs = 2;
+               hw->intr.lsc_only = TRUE;
+       }
+
        ret = vmxnet3_setup_driver_shared(dev);
        if (ret != VMXNET3_SUCCESS)
                return ret;
 
-       /* check if lsc interrupt feature is enabled */
-       if (dev->data->dev_conf.intr_conf.lsc) {
-               struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
-
-               /* Setup interrupt callback  */
-               rte_intr_callback_register(&pci_dev->intr_handle,
-                                          vmxnet3_interrupt_handler, dev);
-
-               if (rte_intr_enable(&pci_dev->intr_handle) < 0) {
-                       PMD_INIT_LOG(ERR, "interrupt enable failed");
-                       return -EIO;
-               }
-       }
-
        /* Exchange shared data with device */
        VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_DSAL,
                               VMXNET3_GET_ADDR_LO(hw->sharedPA));
@@ -769,7 +930,8 @@ vmxnet3_dev_start(struct rte_eth_dev *dev)
                PMD_INIT_LOG(DEBUG, "Failed to setup memory region\n");
        }
 
-       if (VMXNET3_VERSION_GE_4(hw)) {
+       if (VMXNET3_VERSION_GE_4(hw) &&
+           dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_RSS) {
                /* Check for additional RSS  */
                ret = vmxnet3_v4_rss_configure(dev);
                if (ret != VMXNET3_SUCCESS) {
@@ -778,9 +940,6 @@ vmxnet3_dev_start(struct rte_eth_dev *dev)
                }
        }
 
-       /* Disable interrupts */
-       vmxnet3_disable_intr(hw);
-
        /*
         * Load RX queues with blank mbufs and update next2fill index for device
         * Update RxMode of the device
@@ -796,48 +955,76 @@ vmxnet3_dev_start(struct rte_eth_dev *dev)
        /* Setting proper Rx Mode and issue Rx Mode Update command */
        vmxnet3_dev_set_rxmode(hw, VMXNET3_RXM_UCAST | VMXNET3_RXM_BCAST, 1);
 
-       if (dev->data->dev_conf.intr_conf.lsc) {
-               vmxnet3_enable_intr(hw);
+       /* Setup interrupt callback  */
+       rte_intr_callback_register(dev->intr_handle,
+                                  vmxnet3_interrupt_handler, dev);
 
-               /*
-                * Update link state from device since this won't be
-                * done upon starting with lsc in use. This is done
-                * only after enabling interrupts to avoid any race
-                * where the link state could change without an
-                * interrupt being fired.
-                */
-               __vmxnet3_dev_link_update(dev, 0);
+       if (rte_intr_enable(dev->intr_handle) < 0) {
+               PMD_INIT_LOG(ERR, "interrupt enable failed");
+               return -EIO;
        }
 
+       /* enable all intrs */
+       vmxnet3_enable_all_intrs(hw);
+
+       vmxnet3_process_events(dev);
+
+       /*
+        * Update link state from device since this won't be
+        * done upon starting with lsc in use. This is done
+        * only after enabling interrupts to avoid any race
+        * where the link state could change without an
+        * interrupt being fired.
+        */
+       __vmxnet3_dev_link_update(dev, 0);
+
        return VMXNET3_SUCCESS;
 }
 
 /*
  * Stop device: disable rx and tx functions to allow for reconfiguring.
  */
-static void
+static int
 vmxnet3_dev_stop(struct rte_eth_dev *dev)
 {
        struct rte_eth_link link;
        struct vmxnet3_hw *hw = dev->data->dev_private;
+       struct rte_intr_handle *intr_handle = dev->intr_handle;
+       int ret;
 
        PMD_INIT_FUNC_TRACE();
 
        if (hw->adapter_stopped == 1) {
                PMD_INIT_LOG(DEBUG, "Device already stopped.");
-               return;
+               return 0;
        }
 
-       /* disable interrupts */
-       vmxnet3_disable_intr(hw);
+       do {
+               /* Unregister has lock to make sure there is no running cb.
+                * This has to happen first since vmxnet3_interrupt_handler
+                * reenables interrupts by calling vmxnet3_enable_intr
+                */
+               ret = rte_intr_callback_unregister(intr_handle,
+                                                  vmxnet3_interrupt_handler,
+                                                  (void *)-1);
+       } while (ret == -EAGAIN);
 
-       if (dev->data->dev_conf.intr_conf.lsc) {
-               struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
+       if (ret < 0)
+               PMD_DRV_LOG(ERR, "Error attempting to unregister intr cb: %d",
+                           ret);
 
-               rte_intr_disable(&pci_dev->intr_handle);
+       PMD_INIT_LOG(DEBUG, "Disabled %d intr callbacks", ret);
 
-               rte_intr_callback_unregister(&pci_dev->intr_handle,
-                                            vmxnet3_interrupt_handler, dev);
+       /* disable interrupts */
+       vmxnet3_disable_all_intrs(hw);
+
+       rte_intr_disable(intr_handle);
+
+       /* Clean datapath event and queue/vector mapping */
+       rte_intr_efd_disable(intr_handle);
+       if (intr_handle->intr_vec != NULL) {
+               rte_free(intr_handle->intr_vec);
+               intr_handle->intr_vec = NULL;
        }
 
        /* quiesce the device first */
@@ -859,6 +1046,9 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
        rte_eth_linkstatus_set(dev, &link);
 
        hw->adapter_stopped = 1;
+       dev->data->dev_started = 0;
+
+       return 0;
 }
 
 static void
@@ -886,13 +1076,30 @@ vmxnet3_free_queues(struct rte_eth_dev *dev)
 /*
  * Reset and stop device.
  */
-static void
+static int
 vmxnet3_dev_close(struct rte_eth_dev *dev)
 {
+       int ret;
        PMD_INIT_FUNC_TRACE();
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return 0;
 
-       vmxnet3_dev_stop(dev);
+       ret = vmxnet3_dev_stop(dev);
        vmxnet3_free_queues(dev);
+
+       return ret;
+}
+
+static int
+vmxnet3_dev_reset(struct rte_eth_dev *dev)
+{
+       int ret;
+
+       ret = eth_vmxnet3_dev_uninit(dev);
+       if (ret)
+               return ret;
+       ret = eth_vmxnet3_dev_init(dev);
+       return ret;
 }
 
 static void
@@ -1127,13 +1334,13 @@ vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
        return 0;
 }
 
-static void
+static int
 vmxnet3_dev_stats_reset(struct rte_eth_dev *dev)
 {
        unsigned int i;
        struct vmxnet3_hw *hw = dev->data->dev_private;
-       struct UPT1_TxStats txStats;
-       struct UPT1_RxStats rxStats;
+       struct UPT1_TxStats txStats = {0};
+       struct UPT1_RxStats rxStats = {0};
 
        VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_GET_STATS);
 
@@ -1149,10 +1356,12 @@ vmxnet3_dev_stats_reset(struct rte_eth_dev *dev)
                memcpy(&hw->snapshot_rx_stats[i], &rxStats,
                        sizeof(hw->snapshot_rx_stats[0]));
        }
+
+       return 0;
 }
 
-static void
-vmxnet3_dev_info_get(struct rte_eth_dev *dev __rte_unused,
+static int
+vmxnet3_dev_info_get(struct rte_eth_dev *dev,
                     struct rte_eth_dev_info *dev_info)
 {
        struct vmxnet3_hw *hw = dev->data->dev_private;
@@ -1161,6 +1370,8 @@ vmxnet3_dev_info_get(struct rte_eth_dev *dev __rte_unused,
        dev_info->max_tx_queues = VMXNET3_MAX_TX_QUEUES;
        dev_info->min_rx_bufsize = 1518 + RTE_PKTMBUF_HEADROOM;
        dev_info->max_rx_pktlen = 16384; /* includes CRC, cf MAXFRS register */
+       dev_info->min_mtu = VMXNET3_MIN_MTU;
+       dev_info->max_mtu = VMXNET3_MAX_MTU;
        dev_info->speed_capa = ETH_LINK_SPEED_10G;
        dev_info->max_mac_addrs = VMXNET3_MAX_MAC_ADDRS;
 
@@ -1188,6 +1399,8 @@ vmxnet3_dev_info_get(struct rte_eth_dev *dev __rte_unused,
        dev_info->rx_queue_offload_capa = 0;
        dev_info->tx_offload_capa = VMXNET3_TX_OFFLOAD_CAP;
        dev_info->tx_queue_offload_capa = 0;
+
+       return 0;
 }
 
 static const uint32_t *
@@ -1204,6 +1417,18 @@ vmxnet3_dev_supported_ptypes_get(struct rte_eth_dev *dev)
        return NULL;
 }
 
+static int
+vmxnet3_dev_mtu_set(struct rte_eth_dev *dev, __rte_unused uint16_t mtu)
+{
+       if (dev->data->dev_started) {
+               PMD_DRV_LOG(ERR, "Port %d must be stopped to configure MTU",
+                           dev->data->port_id);
+               return -EBUSY;
+       }
+
+       return 0;
+}
+
 static int
 vmxnet3_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
 {
@@ -1262,7 +1487,7 @@ vmxnet3_dev_set_rxmode(struct vmxnet3_hw *hw, uint32_t feature, int set)
 }
 
 /* Promiscuous supported only if Vmxnet3_DriverShared is initialized in adapter */
-static void
+static int
 vmxnet3_dev_promiscuous_enable(struct rte_eth_dev *dev)
 {
        struct vmxnet3_hw *hw = dev->data->dev_private;
@@ -1273,10 +1498,12 @@ vmxnet3_dev_promiscuous_enable(struct rte_eth_dev *dev)
 
        VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
                               VMXNET3_CMD_UPDATE_VLAN_FILTERS);
+
+       return 0;
 }
 
 /* Promiscuous supported only if Vmxnet3_DriverShared is initialized in adapter */
-static void
+static int
 vmxnet3_dev_promiscuous_disable(struct rte_eth_dev *dev)
 {
        struct vmxnet3_hw *hw = dev->data->dev_private;
@@ -1290,24 +1517,30 @@ vmxnet3_dev_promiscuous_disable(struct rte_eth_dev *dev)
        vmxnet3_dev_set_rxmode(hw, VMXNET3_RXM_PROMISC, 0);
        VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
                               VMXNET3_CMD_UPDATE_VLAN_FILTERS);
+
+       return 0;
 }
 
 /* Allmulticast supported only if Vmxnet3_DriverShared is initialized in adapter */
-static void
+static int
 vmxnet3_dev_allmulticast_enable(struct rte_eth_dev *dev)
 {
        struct vmxnet3_hw *hw = dev->data->dev_private;
 
        vmxnet3_dev_set_rxmode(hw, VMXNET3_RXM_ALL_MULTI, 1);
+
+       return 0;
 }
 
 /* Allmulticast supported only if Vmxnet3_DriverShared is initialized in adapter */
-static void
+static int
 vmxnet3_dev_allmulticast_disable(struct rte_eth_dev *dev)
 {
        struct vmxnet3_hw *hw = dev->data->dev_private;
 
        vmxnet3_dev_set_rxmode(hw, VMXNET3_RXM_ALL_MULTI, 0);
+
+       return 0;
 }
 
 /* Enable/disable filter on vlan */
@@ -1389,9 +1622,9 @@ vmxnet3_process_events(struct rte_eth_dev *dev)
        if (events & VMXNET3_ECR_LINK) {
                PMD_DRV_LOG(DEBUG, "Process events: VMXNET3_ECR_LINK event");
                if (vmxnet3_dev_link_update(dev, 0) == 0)
-                       _rte_eth_dev_callback_process(dev,
-                                                     RTE_ETH_EVENT_INTR_LSC,
-                                                     NULL);
+                       rte_eth_dev_callback_process(dev,
+                                                    RTE_ETH_EVENT_INTR_LSC,
+                                                    NULL);
        }
 
        /* Check if there is an error on xmit/recv queues */
@@ -1422,24 +1655,45 @@ static void
 vmxnet3_interrupt_handler(void *param)
 {
        struct rte_eth_dev *dev = param;
-       struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
+       struct vmxnet3_hw *hw = dev->data->dev_private;
+       Vmxnet3_DSDevRead *devRead = &hw->shared->devRead;
+       uint32_t events;
+
+       PMD_INIT_FUNC_TRACE();
+       vmxnet3_disable_intr(hw, devRead->intrConf.eventIntrIdx);
+
+       events = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_ECR);
+       if (events == 0)
+               goto done;
 
+       RTE_LOG(DEBUG, PMD, "Reading events: 0x%X", events);
        vmxnet3_process_events(dev);
+done:
+       vmxnet3_enable_intr(hw, devRead->intrConf.eventIntrIdx);
+}
 
-       if (rte_intr_enable(&pci_dev->intr_handle) < 0)
-               PMD_DRV_LOG(ERR, "interrupt enable failed");
+static int
+vmxnet3_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+       struct vmxnet3_hw *hw = dev->data->dev_private;
+
+       vmxnet3_enable_intr(hw, dev->intr_handle->intr_vec[queue_id]);
+
+       return 0;
+}
+
+static int
+vmxnet3_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+       struct vmxnet3_hw *hw = dev->data->dev_private;
+
+       vmxnet3_disable_intr(hw, dev->intr_handle->intr_vec[queue_id]);
+
+       return 0;
 }
 
 RTE_PMD_REGISTER_PCI(net_vmxnet3, rte_vmxnet3_pmd);
 RTE_PMD_REGISTER_PCI_TABLE(net_vmxnet3, pci_id_vmxnet3_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_vmxnet3, "* igb_uio | uio_pci_generic | vfio-pci");
-
-RTE_INIT(vmxnet3_init_log)
-{
-       vmxnet3_logtype_init = rte_log_register("pmd.net.vmxnet3.init");
-       if (vmxnet3_logtype_init >= 0)
-               rte_log_set_level(vmxnet3_logtype_init, RTE_LOG_NOTICE);
-       vmxnet3_logtype_driver = rte_log_register("pmd.net.vmxnet3.driver");
-       if (vmxnet3_logtype_driver >= 0)
-               rte_log_set_level(vmxnet3_logtype_driver, RTE_LOG_NOTICE);
-}
+RTE_LOG_REGISTER_SUFFIX(vmxnet3_logtype_init, init, NOTICE);
+RTE_LOG_REGISTER_SUFFIX(vmxnet3_logtype_driver, driver, NOTICE);