drivers: advertise kmod dependencies in pmdinfo
[dpdk.git] / drivers / net / e1000 / igb_ethdev.c
index 3ab082e..08f2a68 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  * Default values for port configuration
  */
 #define IGB_DEFAULT_RX_FREE_THRESH  32
-#define IGB_DEFAULT_RX_PTHRESH      8
+
+#define IGB_DEFAULT_RX_PTHRESH      ((hw->mac.type == e1000_i354) ? 12 : 8)
 #define IGB_DEFAULT_RX_HTHRESH      8
-#define IGB_DEFAULT_RX_WTHRESH      0
+#define IGB_DEFAULT_RX_WTHRESH      ((hw->mac.type == e1000_82576) ? 1 : 4)
 
-#define IGB_DEFAULT_TX_PTHRESH      32
-#define IGB_DEFAULT_TX_HTHRESH      0
-#define IGB_DEFAULT_TX_WTHRESH      0
+#define IGB_DEFAULT_TX_PTHRESH      ((hw->mac.type == e1000_i354) ? 20 : 8)
+#define IGB_DEFAULT_TX_HTHRESH      1
+#define IGB_DEFAULT_TX_WTHRESH      ((hw->mac.type == e1000_82576) ? 1 : 16)
 
 #define IGB_HKEY_MAX_INDEX 10
 
 #define IGB_8_BIT_MASK   UINT8_MAX
 
 /* Additional timesync values. */
-#define E1000_ETQF_FILTER_1588 3
-#define E1000_TIMINCA_INCVALUE 16000000
-#define E1000_TIMINCA_INIT     ((0x02 << E1000_TIMINCA_16NS_SHIFT) \
-                               | E1000_TIMINCA_INCVALUE)
+#define E1000_CYCLECOUNTER_MASK      0xffffffffffffffffULL
+#define E1000_ETQF_FILTER_1588       3
+#define IGB_82576_TSYNC_SHIFT        16
+#define E1000_INCPERIOD_82576        (1 << E1000_TIMINCA_16NS_SHIFT)
+#define E1000_INCVALUE_82576         (16 << IGB_82576_TSYNC_SHIFT)
 #define E1000_TSAUXC_DISABLE_SYSTIME 0x80000000
 
+#define E1000_VTIVAR_MISC                0x01740
+#define E1000_VTIVAR_MISC_MASK           0xFF
+#define E1000_VTIVAR_VALID               0x80
+#define E1000_VTIVAR_MISC_MAILBOX        0
+#define E1000_VTIVAR_MISC_INTR_MASK      0x3
+
+/* External VLAN Enable bit mask */
+#define E1000_CTRL_EXT_EXT_VLAN      (1 << 26)
+
+/* External VLAN Ether Type bit mask and shift */
+#define E1000_VET_VET_EXT            0xFFFF0000
+#define E1000_VET_VET_EXT_SHIFT      16
+
 static int  eth_igb_configure(struct rte_eth_dev *dev);
 static int  eth_igb_start(struct rte_eth_dev *dev);
 static void eth_igb_stop(struct rte_eth_dev *dev);
+static int  eth_igb_dev_set_link_up(struct rte_eth_dev *dev);
+static int  eth_igb_dev_set_link_down(struct rte_eth_dev *dev);
 static void eth_igb_close(struct rte_eth_dev *dev);
 static void eth_igb_promiscuous_enable(struct rte_eth_dev *dev);
 static void eth_igb_promiscuous_disable(struct rte_eth_dev *dev);
@@ -96,9 +113,16 @@ static int  eth_igb_link_update(struct rte_eth_dev *dev,
                                int wait_to_complete);
 static void eth_igb_stats_get(struct rte_eth_dev *dev,
                                struct rte_eth_stats *rte_stats);
+static int eth_igb_xstats_get(struct rte_eth_dev *dev,
+                             struct rte_eth_xstat *xstats, unsigned n);
+static int eth_igb_xstats_get_names(struct rte_eth_dev *dev,
+                                   struct rte_eth_xstat_name *xstats_names,
+                                   unsigned limit);
 static void eth_igb_stats_reset(struct rte_eth_dev *dev);
+static void eth_igb_xstats_reset(struct rte_eth_dev *dev);
 static void eth_igb_infos_get(struct rte_eth_dev *dev,
                              struct rte_eth_dev_info *dev_info);
+static const uint32_t *eth_igb_supported_ptypes_get(struct rte_eth_dev *dev);
 static void eth_igbvf_infos_get(struct rte_eth_dev *dev,
                                struct rte_eth_dev_info *dev_info);
 static int  eth_igb_flow_ctrl_get(struct rte_eth_dev *dev,
@@ -121,7 +145,9 @@ static int  eth_igb_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
 
 static int eth_igb_vlan_filter_set(struct rte_eth_dev *dev,
                uint16_t vlan_id, int on);
-static void eth_igb_vlan_tpid_set(struct rte_eth_dev *dev, uint16_t tpid_id);
+static int eth_igb_vlan_tpid_set(struct rte_eth_dev *dev,
+                                enum rte_vlan_type vlan_type,
+                                uint16_t tpid_id);
 static void eth_igb_vlan_offload_set(struct rte_eth_dev *dev, int mask);
 
 static void igb_vlan_hw_filter_enable(struct rte_eth_dev *dev);
@@ -148,8 +174,18 @@ static int igbvf_dev_configure(struct rte_eth_dev *dev);
 static int igbvf_dev_start(struct rte_eth_dev *dev);
 static void igbvf_dev_stop(struct rte_eth_dev *dev);
 static void igbvf_dev_close(struct rte_eth_dev *dev);
+static void igbvf_promiscuous_enable(struct rte_eth_dev *dev);
+static void igbvf_promiscuous_disable(struct rte_eth_dev *dev);
+static void igbvf_allmulticast_enable(struct rte_eth_dev *dev);
+static void igbvf_allmulticast_disable(struct rte_eth_dev *dev);
 static int eth_igbvf_link_update(struct e1000_hw *hw);
-static void eth_igbvf_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats);
+static void eth_igbvf_stats_get(struct rte_eth_dev *dev,
+                               struct rte_eth_stats *rte_stats);
+static int eth_igbvf_xstats_get(struct rte_eth_dev *dev,
+                               struct rte_eth_xstat *xstats, unsigned n);
+static int eth_igbvf_xstats_get_names(struct rte_eth_dev *dev,
+                                     struct rte_eth_xstat_name *xstats_names,
+                                     unsigned limit);
 static void eth_igbvf_stats_reset(struct rte_eth_dev *dev);
 static int igbvf_vlan_filter_set(struct rte_eth_dev *dev,
                uint16_t vlan_id, int on);
@@ -230,6 +266,11 @@ static int igb_timesync_read_rx_timestamp(struct rte_eth_dev *dev,
                                          uint32_t flags);
 static int igb_timesync_read_tx_timestamp(struct rte_eth_dev *dev,
                                          struct timespec *timestamp);
+static int igb_timesync_adjust_time(struct rte_eth_dev *dev, int64_t delta);
+static int igb_timesync_read_time(struct rte_eth_dev *dev,
+                                 struct timespec *timestamp);
+static int igb_timesync_write_time(struct rte_eth_dev *dev,
+                                  const struct timespec *timestamp);
 static int eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev,
                                        uint16_t queue_id);
 static int eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev,
@@ -239,6 +280,9 @@ static void eth_igb_assign_msix_vector(struct e1000_hw *hw, int8_t direction,
 static void eth_igb_write_ivar(struct e1000_hw *hw, uint8_t msix_vector,
                               uint8_t index, uint8_t offset);
 static void eth_igb_configure_msix_intr(struct rte_eth_dev *dev);
+static void eth_igbvf_interrupt_handler(struct rte_intr_handle *handle,
+                                       void *param);
+static void igbvf_mbx_process(struct rte_eth_dev *dev);
 
 /*
  * Define VF Stats MACRO for Non "cleared on read" register
@@ -262,28 +306,77 @@ static enum e1000_fc_mode igb_fc_setting = e1000_fc_full;
  * The set of PCI devices this driver supports
  */
 static const struct rte_pci_id pci_id_igb_map[] = {
-
-#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) {RTE_PCI_DEVICE(vend, dev)},
-#include "rte_pci_dev_ids.h"
-
-{0},
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD) },
+
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER) },
+
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER) },
+
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_DA4) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP) },
+       { .vendor_id = 0, /* sentinel */ },
 };
 
 /*
  * The set of PCI devices this driver supports (for 82576&I350 VF)
  */
 static const struct rte_pci_id pci_id_igbvf_map[] = {
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF_HV) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF_HV) },
+       { .vendor_id = 0, /* sentinel */ },
+};
 
-#define RTE_PCI_DEV_ID_DECL_IGBVF(vend, dev) {RTE_PCI_DEVICE(vend, dev)},
-#include "rte_pci_dev_ids.h"
+static const struct rte_eth_desc_lim rx_desc_lim = {
+       .nb_max = E1000_MAX_RING_DESC,
+       .nb_min = E1000_MIN_RING_DESC,
+       .nb_align = IGB_RXD_ALIGN,
+};
 
-{0},
+static const struct rte_eth_desc_lim tx_desc_lim = {
+       .nb_max = E1000_MAX_RING_DESC,
+       .nb_min = E1000_MIN_RING_DESC,
+       .nb_align = IGB_RXD_ALIGN,
 };
 
 static const struct eth_dev_ops eth_igb_ops = {
        .dev_configure        = eth_igb_configure,
        .dev_start            = eth_igb_start,
        .dev_stop             = eth_igb_stop,
+       .dev_set_link_up      = eth_igb_dev_set_link_up,
+       .dev_set_link_down    = eth_igb_dev_set_link_down,
        .dev_close            = eth_igb_close,
        .promiscuous_enable   = eth_igb_promiscuous_enable,
        .promiscuous_disable  = eth_igb_promiscuous_disable,
@@ -291,8 +384,12 @@ static const struct eth_dev_ops eth_igb_ops = {
        .allmulticast_disable = eth_igb_allmulticast_disable,
        .link_update          = eth_igb_link_update,
        .stats_get            = eth_igb_stats_get,
+       .xstats_get           = eth_igb_xstats_get,
+       .xstats_get_names     = eth_igb_xstats_get_names,
        .stats_reset          = eth_igb_stats_reset,
+       .xstats_reset         = eth_igb_xstats_reset,
        .dev_infos_get        = eth_igb_infos_get,
+       .dev_supported_ptypes_get = eth_igb_supported_ptypes_get,
        .mtu_set              = eth_igb_mtu_set,
        .vlan_filter_set      = eth_igb_vlan_filter_set,
        .vlan_tpid_set        = eth_igb_vlan_tpid_set,
@@ -318,15 +415,19 @@ static const struct eth_dev_ops eth_igb_ops = {
        .rss_hash_conf_get    = eth_igb_rss_hash_conf_get,
        .filter_ctrl          = eth_igb_filter_ctrl,
        .set_mc_addr_list     = eth_igb_set_mc_addr_list,
+       .rxq_info_get         = igb_rxq_info_get,
+       .txq_info_get         = igb_txq_info_get,
        .timesync_enable      = igb_timesync_enable,
        .timesync_disable     = igb_timesync_disable,
        .timesync_read_rx_timestamp = igb_timesync_read_rx_timestamp,
        .timesync_read_tx_timestamp = igb_timesync_read_tx_timestamp,
-       .get_reg_length       = eth_igb_get_reg_length,
        .get_reg              = eth_igb_get_regs,
        .get_eeprom_length    = eth_igb_get_eeprom_length,
        .get_eeprom           = eth_igb_get_eeprom,
        .set_eeprom           = eth_igb_set_eeprom,
+       .timesync_adjust_time = igb_timesync_adjust_time,
+       .timesync_read_time   = igb_timesync_read_time,
+       .timesync_write_time  = igb_timesync_write_time,
 };
 
 /*
@@ -338,21 +439,111 @@ static const struct eth_dev_ops igbvf_eth_dev_ops = {
        .dev_start            = igbvf_dev_start,
        .dev_stop             = igbvf_dev_stop,
        .dev_close            = igbvf_dev_close,
+       .promiscuous_enable   = igbvf_promiscuous_enable,
+       .promiscuous_disable  = igbvf_promiscuous_disable,
+       .allmulticast_enable  = igbvf_allmulticast_enable,
+       .allmulticast_disable = igbvf_allmulticast_disable,
        .link_update          = eth_igb_link_update,
        .stats_get            = eth_igbvf_stats_get,
+       .xstats_get           = eth_igbvf_xstats_get,
+       .xstats_get_names     = eth_igbvf_xstats_get_names,
        .stats_reset          = eth_igbvf_stats_reset,
+       .xstats_reset         = eth_igbvf_stats_reset,
        .vlan_filter_set      = igbvf_vlan_filter_set,
        .dev_infos_get        = eth_igbvf_infos_get,
+       .dev_supported_ptypes_get = eth_igb_supported_ptypes_get,
        .rx_queue_setup       = eth_igb_rx_queue_setup,
        .rx_queue_release     = eth_igb_rx_queue_release,
        .tx_queue_setup       = eth_igb_tx_queue_setup,
        .tx_queue_release     = eth_igb_tx_queue_release,
        .set_mc_addr_list     = eth_igb_set_mc_addr_list,
+       .rxq_info_get         = igb_rxq_info_get,
+       .txq_info_get         = igb_txq_info_get,
        .mac_addr_set         = igbvf_default_mac_addr_set,
-       .get_reg_length       = igbvf_get_reg_length,
        .get_reg              = igbvf_get_regs,
 };
 
+/* store statistics names and its offset in stats structure */
+struct rte_igb_xstats_name_off {
+       char name[RTE_ETH_XSTATS_NAME_SIZE];
+       unsigned offset;
+};
+
+static const struct rte_igb_xstats_name_off rte_igb_stats_strings[] = {
+       {"rx_crc_errors", offsetof(struct e1000_hw_stats, crcerrs)},
+       {"rx_align_errors", offsetof(struct e1000_hw_stats, algnerrc)},
+       {"rx_symbol_errors", offsetof(struct e1000_hw_stats, symerrs)},
+       {"rx_missed_packets", offsetof(struct e1000_hw_stats, mpc)},
+       {"tx_single_collision_packets", offsetof(struct e1000_hw_stats, scc)},
+       {"tx_multiple_collision_packets", offsetof(struct e1000_hw_stats, mcc)},
+       {"tx_excessive_collision_packets", offsetof(struct e1000_hw_stats,
+               ecol)},
+       {"tx_late_collisions", offsetof(struct e1000_hw_stats, latecol)},
+       {"tx_total_collisions", offsetof(struct e1000_hw_stats, colc)},
+       {"tx_deferred_packets", offsetof(struct e1000_hw_stats, dc)},
+       {"tx_no_carrier_sense_packets", offsetof(struct e1000_hw_stats, tncrs)},
+       {"rx_carrier_ext_errors", offsetof(struct e1000_hw_stats, cexterr)},
+       {"rx_length_errors", offsetof(struct e1000_hw_stats, rlec)},
+       {"rx_xon_packets", offsetof(struct e1000_hw_stats, xonrxc)},
+       {"tx_xon_packets", offsetof(struct e1000_hw_stats, xontxc)},
+       {"rx_xoff_packets", offsetof(struct e1000_hw_stats, xoffrxc)},
+       {"tx_xoff_packets", offsetof(struct e1000_hw_stats, xofftxc)},
+       {"rx_flow_control_unsupported_packets", offsetof(struct e1000_hw_stats,
+               fcruc)},
+       {"rx_size_64_packets", offsetof(struct e1000_hw_stats, prc64)},
+       {"rx_size_65_to_127_packets", offsetof(struct e1000_hw_stats, prc127)},
+       {"rx_size_128_to_255_packets", offsetof(struct e1000_hw_stats, prc255)},
+       {"rx_size_256_to_511_packets", offsetof(struct e1000_hw_stats, prc511)},
+       {"rx_size_512_to_1023_packets", offsetof(struct e1000_hw_stats,
+               prc1023)},
+       {"rx_size_1024_to_max_packets", offsetof(struct e1000_hw_stats,
+               prc1522)},
+       {"rx_broadcast_packets", offsetof(struct e1000_hw_stats, bprc)},
+       {"rx_multicast_packets", offsetof(struct e1000_hw_stats, mprc)},
+       {"rx_undersize_errors", offsetof(struct e1000_hw_stats, ruc)},
+       {"rx_fragment_errors", offsetof(struct e1000_hw_stats, rfc)},
+       {"rx_oversize_errors", offsetof(struct e1000_hw_stats, roc)},
+       {"rx_jabber_errors", offsetof(struct e1000_hw_stats, rjc)},
+       {"rx_management_packets", offsetof(struct e1000_hw_stats, mgprc)},
+       {"rx_management_dropped", offsetof(struct e1000_hw_stats, mgpdc)},
+       {"tx_management_packets", offsetof(struct e1000_hw_stats, mgptc)},
+       {"rx_total_packets", offsetof(struct e1000_hw_stats, tpr)},
+       {"tx_total_packets", offsetof(struct e1000_hw_stats, tpt)},
+       {"rx_total_bytes", offsetof(struct e1000_hw_stats, tor)},
+       {"tx_total_bytes", offsetof(struct e1000_hw_stats, tot)},
+       {"tx_size_64_packets", offsetof(struct e1000_hw_stats, ptc64)},
+       {"tx_size_65_to_127_packets", offsetof(struct e1000_hw_stats, ptc127)},
+       {"tx_size_128_to_255_packets", offsetof(struct e1000_hw_stats, ptc255)},
+       {"tx_size_256_to_511_packets", offsetof(struct e1000_hw_stats, ptc511)},
+       {"tx_size_512_to_1023_packets", offsetof(struct e1000_hw_stats,
+               ptc1023)},
+       {"tx_size_1023_to_max_packets", offsetof(struct e1000_hw_stats,
+               ptc1522)},
+       {"tx_multicast_packets", offsetof(struct e1000_hw_stats, mptc)},
+       {"tx_broadcast_packets", offsetof(struct e1000_hw_stats, bptc)},
+       {"tx_tso_packets", offsetof(struct e1000_hw_stats, tsctc)},
+       {"tx_tso_errors", offsetof(struct e1000_hw_stats, tsctfc)},
+       {"rx_sent_to_host_packets", offsetof(struct e1000_hw_stats, rpthc)},
+       {"tx_sent_by_host_packets", offsetof(struct e1000_hw_stats, hgptc)},
+       {"rx_code_violation_packets", offsetof(struct e1000_hw_stats, scvpc)},
+
+       {"interrupt_assert_count", offsetof(struct e1000_hw_stats, iac)},
+};
+
+#define IGB_NB_XSTATS (sizeof(rte_igb_stats_strings) / \
+               sizeof(rte_igb_stats_strings[0]))
+
+static const struct rte_igb_xstats_name_off rte_igbvf_stats_strings[] = {
+       {"rx_multicast_packets", offsetof(struct e1000_vf_stats, mprc)},
+       {"rx_good_loopback_packets", offsetof(struct e1000_vf_stats, gprlbc)},
+       {"tx_good_loopback_packets", offsetof(struct e1000_vf_stats, gptlbc)},
+       {"rx_good_loopback_bytes", offsetof(struct e1000_vf_stats, gorlbc)},
+       {"tx_good_loopback_bytes", offsetof(struct e1000_vf_stats, gotlbc)},
+};
+
+#define IGBVF_NB_XSTATS (sizeof(rte_igbvf_stats_strings) / \
+               sizeof(rte_igbvf_stats_strings[0]))
+
 /**
  * Atomically reads the link status information from global
  * structure rte_eth_dev.
@@ -424,6 +615,41 @@ igb_intr_disable(struct e1000_hw *hw)
        E1000_WRITE_FLUSH(hw);
 }
 
+static inline void
+igbvf_intr_enable(struct rte_eth_dev *dev)
+{
+       struct e1000_hw *hw =
+               E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       /* only for mailbox */
+       E1000_WRITE_REG(hw, E1000_EIAM, 1 << E1000_VTIVAR_MISC_MAILBOX);
+       E1000_WRITE_REG(hw, E1000_EIAC, 1 << E1000_VTIVAR_MISC_MAILBOX);
+       E1000_WRITE_REG(hw, E1000_EIMS, 1 << E1000_VTIVAR_MISC_MAILBOX);
+       E1000_WRITE_FLUSH(hw);
+}
+
+/* only for mailbox now. If RX/TX needed, should extend this function.  */
+static void
+igbvf_set_ivar_map(struct e1000_hw *hw, uint8_t msix_vector)
+{
+       uint32_t tmp = 0;
+
+       /* mailbox */
+       tmp |= (msix_vector & E1000_VTIVAR_MISC_INTR_MASK);
+       tmp |= E1000_VTIVAR_VALID;
+       E1000_WRITE_REG(hw, E1000_VTIVAR_MISC, tmp);
+}
+
+static void
+eth_igbvf_configure_msix_intr(struct rte_eth_dev *dev)
+{
+       struct e1000_hw *hw =
+               E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       /* Configure VF other cause ivar */
+       igbvf_set_ivar_map(hw, E1000_VTIVAR_MISC_MAILBOX);
+}
+
 static inline int32_t
 igb_pf_reset_hw(struct e1000_hw *hw)
 {
@@ -530,6 +756,7 @@ eth_igb_dev_init(struct rte_eth_dev *eth_dev)
        uint32_t ctrl_ext;
 
        pci_dev = eth_dev->pci_dev;
+
        eth_dev->dev_ops = &eth_igb_ops;
        eth_dev->rx_pkt_burst = &eth_igb_recv_pkts;
        eth_dev->tx_pkt_burst = &eth_igb_xmit_pkts;
@@ -543,6 +770,8 @@ eth_igb_dev_init(struct rte_eth_dev *eth_dev)
                return 0;
        }
 
+       rte_eth_copy_pci_info(eth_dev, pci_dev);
+
        hw->hw_addr= (void *)pci_dev->mem_resource[0].addr;
 
        igb_identify_hardware(eth_dev);
@@ -650,6 +879,13 @@ eth_igb_dev_init(struct rte_eth_dev *eth_dev)
                     eth_dev->data->port_id, pci_dev->id.vendor_id,
                     pci_dev->id.device_id);
 
+       rte_intr_callback_register(&pci_dev->intr_handle,
+                                  eth_igb_interrupt_handler,
+                                  (void *)eth_dev);
+
+       /* enable uio/vfio intr/eventfd mapping */
+       rte_intr_enable(&pci_dev->intr_handle);
+
        /* enable support intr */
        igb_intr_enable(eth_dev);
 
@@ -665,7 +901,7 @@ eth_igb_dev_init(struct rte_eth_dev *eth_dev)
 err_late:
        igb_hw_control_release(hw);
 
-       return (error);
+       return error;
 }
 
 static int
@@ -720,6 +956,7 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev)
        struct e1000_hw *hw =
                E1000_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
        int diag;
+       struct ether_addr *perm_addr = (struct ether_addr *)hw->mac.perm_addr;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -738,6 +975,8 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev)
 
        pci_dev = eth_dev->pci_dev;
 
+       rte_eth_copy_pci_info(eth_dev, pci_dev);
+
        hw->device_id = pci_dev->id.device_id;
        hw->vendor_id = pci_dev->id.vendor_id;
        hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
@@ -770,6 +1009,26 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev)
                return -ENOMEM;
        }
 
+       /* Generate a random MAC address, if none was assigned by PF. */
+       if (is_zero_ether_addr(perm_addr)) {
+               eth_random_addr(perm_addr->addr_bytes);
+               diag = e1000_rar_set(hw, perm_addr->addr_bytes, 0);
+               if (diag) {
+                       rte_free(eth_dev->data->mac_addrs);
+                       eth_dev->data->mac_addrs = NULL;
+                       return diag;
+               }
+               PMD_INIT_LOG(INFO, "\tVF MAC address not assigned by Host PF");
+               PMD_INIT_LOG(INFO, "\tAssign randomly generated MAC address "
+                            "%02x:%02x:%02x:%02x:%02x:%02x",
+                            perm_addr->addr_bytes[0],
+                            perm_addr->addr_bytes[1],
+                            perm_addr->addr_bytes[2],
+                            perm_addr->addr_bytes[3],
+                            perm_addr->addr_bytes[4],
+                            perm_addr->addr_bytes[5]);
+       }
+
        /* Copy the permanent MAC address */
        ether_addr_copy((struct ether_addr *) hw->mac.perm_addr,
                        &eth_dev->data->mac_addrs[0]);
@@ -779,6 +1038,10 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev)
                     eth_dev->data->port_id, pci_dev->id.vendor_id,
                     pci_dev->id.device_id, "igb_mac_82576_vf");
 
+       rte_intr_callback_register(&pci_dev->intr_handle,
+                                  eth_igbvf_interrupt_handler,
+                                  (void *)eth_dev);
+
        return 0;
 }
 
@@ -787,6 +1050,7 @@ eth_igbvf_dev_uninit(struct rte_eth_dev *eth_dev)
 {
        struct e1000_adapter *adapter =
                E1000_DEV_PRIVATE(eth_dev->data->dev_private);
+       struct rte_pci_device *pci_dev = eth_dev->pci_dev;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -803,15 +1067,22 @@ eth_igbvf_dev_uninit(struct rte_eth_dev *eth_dev)
        rte_free(eth_dev->data->mac_addrs);
        eth_dev->data->mac_addrs = NULL;
 
+       /* disable uio intr before callback unregister */
+       rte_intr_disable(&pci_dev->intr_handle);
+       rte_intr_callback_unregister(&pci_dev->intr_handle,
+                                    eth_igbvf_interrupt_handler,
+                                    (void *)eth_dev);
+
        return 0;
 }
 
 static struct eth_driver rte_igb_pmd = {
        .pci_drv = {
-               .name = "rte_igb_pmd",
                .id_table = pci_id_igb_map,
                .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
                        RTE_PCI_DRV_DETACHABLE,
+               .probe = rte_eth_dev_pci_probe,
+               .remove = rte_eth_dev_pci_remove,
        },
        .eth_dev_init = eth_igb_dev_init,
        .eth_dev_uninit = eth_igb_dev_uninit,
@@ -823,22 +1094,16 @@ static struct eth_driver rte_igb_pmd = {
  */
 static struct eth_driver rte_igbvf_pmd = {
        .pci_drv = {
-               .name = "rte_igbvf_pmd",
                .id_table = pci_id_igbvf_map,
                .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_DETACHABLE,
+               .probe = rte_eth_dev_pci_probe,
+               .remove = rte_eth_dev_pci_remove,
        },
        .eth_dev_init = eth_igbvf_dev_init,
        .eth_dev_uninit = eth_igbvf_dev_uninit,
        .dev_private_size = sizeof(struct e1000_adapter),
 };
 
-static int
-rte_igb_pmd_init(const char *name __rte_unused, const char *params __rte_unused)
-{
-       rte_eth_driver_register(&rte_igb_pmd);
-       return 0;
-}
-
 static void
 igb_vmdq_vlan_hw_filter_enable(struct rte_eth_dev *dev)
 {
@@ -850,18 +1115,76 @@ igb_vmdq_vlan_hw_filter_enable(struct rte_eth_dev *dev)
        E1000_WRITE_REG(hw, E1000_RCTL, rctl);
 }
 
-/*
- * VF Driver initialization routine.
- * Invoked one at EAL init time.
- * Register itself as the [Virtual Poll Mode] Driver of PCI IGB devices.
- */
 static int
-rte_igbvf_pmd_init(const char *name __rte_unused, const char *params __rte_unused)
+igb_check_mq_mode(struct rte_eth_dev *dev)
 {
-       PMD_INIT_FUNC_TRACE();
+       enum rte_eth_rx_mq_mode rx_mq_mode = dev->data->dev_conf.rxmode.mq_mode;
+       enum rte_eth_tx_mq_mode tx_mq_mode = dev->data->dev_conf.txmode.mq_mode;
+       uint16_t nb_rx_q = dev->data->nb_rx_queues;
+       uint16_t nb_tx_q = dev->data->nb_rx_queues;
+
+       if ((rx_mq_mode & ETH_MQ_RX_DCB_FLAG) ||
+           tx_mq_mode == ETH_MQ_TX_DCB ||
+           tx_mq_mode == ETH_MQ_TX_VMDQ_DCB) {
+               PMD_INIT_LOG(ERR, "DCB mode is not supported.");
+               return -EINVAL;
+       }
+       if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
+               /* Check multi-queue mode.
+                * To no break software we accept ETH_MQ_RX_NONE as this might
+                * be used to turn off VLAN filter.
+                */
 
-       rte_eth_driver_register(&rte_igbvf_pmd);
-       return (0);
+               if (rx_mq_mode == ETH_MQ_RX_NONE ||
+                   rx_mq_mode == ETH_MQ_RX_VMDQ_ONLY) {
+                       dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY;
+                       RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1;
+               } else {
+                       /* Only support one queue on VFs.
+                        * RSS together with SRIOV is not supported.
+                        */
+                       PMD_INIT_LOG(ERR, "SRIOV is active,"
+                                       " wrong mq_mode rx %d.",
+                                       rx_mq_mode);
+                       return -EINVAL;
+               }
+               /* TX mode is not used here, so mode might be ignored.*/
+               if (tx_mq_mode != ETH_MQ_TX_VMDQ_ONLY) {
+                       /* SRIOV only works in VMDq enable mode */
+                       PMD_INIT_LOG(WARNING, "SRIOV is active,"
+                                       " TX mode %d is not supported. "
+                                       " Driver will behave as %d mode.",
+                                       tx_mq_mode, ETH_MQ_TX_VMDQ_ONLY);
+               }
+
+               /* check valid queue number */
+               if ((nb_rx_q > 1) || (nb_tx_q > 1)) {
+                       PMD_INIT_LOG(ERR, "SRIOV is active,"
+                                       " only support one queue on VFs.");
+                       return -EINVAL;
+               }
+       } else {
+               /* To no break software that set invalid mode, only display
+                * warning if invalid mode is used.
+                */
+               if (rx_mq_mode != ETH_MQ_RX_NONE &&
+                   rx_mq_mode != ETH_MQ_RX_VMDQ_ONLY &&
+                   rx_mq_mode != ETH_MQ_RX_RSS) {
+                       /* RSS together with VMDq not supported*/
+                       PMD_INIT_LOG(ERR, "RX mode %d is not supported.",
+                                    rx_mq_mode);
+                       return -EINVAL;
+               }
+
+               if (tx_mq_mode != ETH_MQ_TX_NONE &&
+                   tx_mq_mode != ETH_MQ_TX_VMDQ_ONLY) {
+                       PMD_INIT_LOG(WARNING, "TX mode %d is not supported."
+                                       " Due to txmode is meaningless in this"
+                                       " driver, just ignore.",
+                                       tx_mq_mode);
+               }
+       }
+       return 0;
 }
 
 static int
@@ -869,12 +1192,22 @@ eth_igb_configure(struct rte_eth_dev *dev)
 {
        struct e1000_interrupt *intr =
                E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+       int ret;
 
        PMD_INIT_FUNC_TRACE();
+
+       /* multipe queue mode checking */
+       ret  = igb_check_mq_mode(dev);
+       if (ret != 0) {
+               PMD_DRV_LOG(ERR, "igb_check_mq_mode fails with %d.",
+                           ret);
+               return ret;
+       }
+
        intr->flags |= E1000_FLAG_NEED_LINK_UPDATE;
        PMD_INIT_FUNC_TRACE();
 
-       return (0);
+       return 0;
 }
 
 static int
@@ -888,11 +1221,17 @@ eth_igb_start(struct rte_eth_dev *dev)
        int ret, mask;
        uint32_t intr_vector = 0;
        uint32_t ctrl_ext;
+       uint32_t *speeds;
+       int num_speeds;
+       bool autoneg;
 
        PMD_INIT_FUNC_TRACE();
 
+       /* disable uio/vfio intr/eventfd mapping */
+       rte_intr_disable(intr_handle);
+
        /* Power up the phy. Needed to make the link go Up */
-       e1000_power_up_phy(hw);
+       eth_igb_dev_set_link_up(dev);
 
        /*
         * Packet Buffer Allocation (PBA)
@@ -912,7 +1251,7 @@ eth_igb_start(struct rte_eth_dev *dev)
        /* Initialize the hardware */
        if (igb_hardware_init(hw)) {
                PMD_INIT_LOG(ERR, "Unable to initialize the hardware");
-               return (-EIO);
+               return -EIO;
        }
        adapter->stopped = 0;
 
@@ -928,13 +1267,15 @@ eth_igb_start(struct rte_eth_dev *dev)
        igb_pf_host_configure(dev);
 
        /* check and configure queue intr-vector mapping */
-       if (dev->data->dev_conf.intr_conf.rxq != 0)
+       if ((rte_intr_cap_multiple(intr_handle) ||
+            !RTE_ETH_DEV_SRIOV(dev).active) &&
+           dev->data->dev_conf.intr_conf.rxq != 0) {
                intr_vector = dev->data->nb_rx_queues;
+               if (rte_intr_efd_enable(intr_handle, intr_vector))
+                       return -1;
+       }
 
-       if (rte_intr_efd_enable(intr_handle, intr_vector))
-               return -1;
-
-       if (rte_intr_dp_is_en(intr_handle)) {
+       if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) {
                intr_handle->intr_vec =
                        rte_zmalloc("intr_vec",
                                    dev->data->nb_rx_queues * sizeof(int), 0);
@@ -983,64 +1324,76 @@ eth_igb_start(struct rte_eth_dev *dev)
        }
 
        /* Setup link speed and duplex */
-       switch (dev->data->dev_conf.link_speed) {
-       case ETH_LINK_SPEED_AUTONEG:
-               if (dev->data->dev_conf.link_duplex == ETH_LINK_AUTONEG_DUPLEX)
-                       hw->phy.autoneg_advertised = E1000_ALL_SPEED_DUPLEX;
-               else if (dev->data->dev_conf.link_duplex == ETH_LINK_HALF_DUPLEX)
-                       hw->phy.autoneg_advertised = E1000_ALL_HALF_DUPLEX;
-               else if (dev->data->dev_conf.link_duplex == ETH_LINK_FULL_DUPLEX)
-                       hw->phy.autoneg_advertised = E1000_ALL_FULL_DUPLEX;
-               else
-                       goto error_invalid_config;
-               break;
-       case ETH_LINK_SPEED_10:
-               if (dev->data->dev_conf.link_duplex == ETH_LINK_AUTONEG_DUPLEX)
-                       hw->phy.autoneg_advertised = E1000_ALL_10_SPEED;
-               else if (dev->data->dev_conf.link_duplex == ETH_LINK_HALF_DUPLEX)
-                       hw->phy.autoneg_advertised = ADVERTISE_10_HALF;
-               else if (dev->data->dev_conf.link_duplex == ETH_LINK_FULL_DUPLEX)
-                       hw->phy.autoneg_advertised = ADVERTISE_10_FULL;
-               else
-                       goto error_invalid_config;
-               break;
-       case ETH_LINK_SPEED_100:
-               if (dev->data->dev_conf.link_duplex == ETH_LINK_AUTONEG_DUPLEX)
-                       hw->phy.autoneg_advertised = E1000_ALL_100_SPEED;
-               else if (dev->data->dev_conf.link_duplex == ETH_LINK_HALF_DUPLEX)
-                       hw->phy.autoneg_advertised = ADVERTISE_100_HALF;
-               else if (dev->data->dev_conf.link_duplex == ETH_LINK_FULL_DUPLEX)
-                       hw->phy.autoneg_advertised = ADVERTISE_100_FULL;
-               else
+       speeds = &dev->data->dev_conf.link_speeds;
+       if (*speeds == ETH_LINK_SPEED_AUTONEG) {
+               hw->phy.autoneg_advertised = E1000_ALL_SPEED_DUPLEX;
+               hw->mac.autoneg = 1;
+       } else {
+               num_speeds = 0;
+               autoneg = (*speeds & ETH_LINK_SPEED_FIXED) == 0;
+
+               /* Reset */
+               hw->phy.autoneg_advertised = 0;
+
+               if (*speeds & ~(ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
+                               ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M |
+                               ETH_LINK_SPEED_1G | ETH_LINK_SPEED_FIXED)) {
+                       num_speeds = -1;
                        goto error_invalid_config;
-               break;
-       case ETH_LINK_SPEED_1000:
-               if ((dev->data->dev_conf.link_duplex == ETH_LINK_AUTONEG_DUPLEX) ||
-                               (dev->data->dev_conf.link_duplex == ETH_LINK_FULL_DUPLEX))
-                       hw->phy.autoneg_advertised = ADVERTISE_1000_FULL;
-               else
+               }
+               if (*speeds & ETH_LINK_SPEED_10M_HD) {
+                       hw->phy.autoneg_advertised |= ADVERTISE_10_HALF;
+                       num_speeds++;
+               }
+               if (*speeds & ETH_LINK_SPEED_10M) {
+                       hw->phy.autoneg_advertised |= ADVERTISE_10_FULL;
+                       num_speeds++;
+               }
+               if (*speeds & ETH_LINK_SPEED_100M_HD) {
+                       hw->phy.autoneg_advertised |= ADVERTISE_100_HALF;
+                       num_speeds++;
+               }
+               if (*speeds & ETH_LINK_SPEED_100M) {
+                       hw->phy.autoneg_advertised |= ADVERTISE_100_FULL;
+                       num_speeds++;
+               }
+               if (*speeds & ETH_LINK_SPEED_1G) {
+                       hw->phy.autoneg_advertised |= ADVERTISE_1000_FULL;
+                       num_speeds++;
+               }
+               if (num_speeds == 0 || (!autoneg && (num_speeds > 1)))
                        goto error_invalid_config;
-               break;
-       case ETH_LINK_SPEED_10000:
-       default:
-               goto error_invalid_config;
+
+               /* Set/reset the mac.autoneg based on the link speed,
+                * fixed or not
+                */
+               if (!autoneg) {
+                       hw->mac.autoneg = 0;
+                       hw->mac.forced_speed_duplex =
+                                       hw->phy.autoneg_advertised;
+               } else {
+                       hw->mac.autoneg = 1;
+               }
        }
+
        e1000_setup_link(hw);
 
-       /* check if lsc interrupt feature is enabled */
-       if (dev->data->dev_conf.intr_conf.lsc != 0) {
-               if (rte_intr_allow_others(intr_handle)) {
-                       rte_intr_callback_register(intr_handle,
-                                                  eth_igb_interrupt_handler,
-                                                  (void *)dev);
+       if (rte_intr_allow_others(intr_handle)) {
+               /* check if lsc interrupt is enabled */
+               if (dev->data->dev_conf.intr_conf.lsc != 0)
                        eth_igb_lsc_interrupt_setup(dev);
-               } else
+       } else {
+               rte_intr_callback_unregister(intr_handle,
+                                            eth_igb_interrupt_handler,
+                                            (void *)dev);
+               if (dev->data->dev_conf.intr_conf.lsc != 0)
                        PMD_INIT_LOG(INFO, "lsc won't enable because of"
                                     " no intr multiplex\n");
        }
 
        /* check if rxq interrupt is enabled */
-       if (dev->data->dev_conf.intr_conf.rxq != 0)
+       if (dev->data->dev_conf.intr_conf.rxq != 0 &&
+           rte_intr_dp_is_en(intr_handle))
                eth_igb_rxq_interrupt_setup(dev);
 
        /* enable uio/vfio intr/eventfd mapping */
@@ -1051,14 +1404,13 @@ eth_igb_start(struct rte_eth_dev *dev)
 
        PMD_INIT_LOG(DEBUG, "<<");
 
-       return (0);
+       return 0;
 
 error_invalid_config:
-       PMD_INIT_LOG(ERR, "Invalid link_speed/link_duplex (%u/%u) for port %u",
-                    dev->data->dev_conf.link_speed,
-                    dev->data->dev_conf.link_duplex, dev->data->port_id);
+       PMD_INIT_LOG(ERR, "Invalid advertised speeds (%u) for port %u",
+                    dev->data->dev_conf.link_speeds, dev->data->port_id);
        igb_dev_clear_queues(dev);
-       return (-EINVAL);
+       return -EINVAL;
 }
 
 /*********************************************************************
@@ -1097,10 +1449,7 @@ eth_igb_stop(struct rte_eth_dev *dev)
        }
 
        /* Power down the phy. Needed to make the link go Down */
-       if (hw->phy.media_type == e1000_media_type_copper)
-               e1000_power_down_phy(hw);
-       else
-               e1000_shutdown_fiber_serdes_link(hw);
+       eth_igb_dev_set_link_down(dev);
 
        igb_dev_clear_queues(dev);
 
@@ -1133,6 +1482,12 @@ eth_igb_stop(struct rte_eth_dev *dev)
        }
        filter_info->twotuple_mask = 0;
 
+       if (!rte_intr_allow_others(intr_handle))
+               /* resume to the default handler */
+               rte_intr_callback_register(intr_handle,
+                                          eth_igb_interrupt_handler,
+                                          (void *)dev);
+
        /* Clean datapath event and queue/vec mapping */
        rte_intr_efd_disable(intr_handle);
        if (intr_handle->intr_vec != NULL) {
@@ -1141,6 +1496,32 @@ eth_igb_stop(struct rte_eth_dev *dev)
        }
 }
 
+static int
+eth_igb_dev_set_link_up(struct rte_eth_dev *dev)
+{
+       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       if (hw->phy.media_type == e1000_media_type_copper)
+               e1000_power_up_phy(hw);
+       else
+               e1000_power_up_fiber_serdes_link(hw);
+
+       return 0;
+}
+
+static int
+eth_igb_dev_set_link_down(struct rte_eth_dev *dev)
+{
+       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       if (hw->phy.media_type == e1000_media_type_copper)
+               e1000_power_down_phy(hw);
+       else
+               e1000_shutdown_fiber_serdes_link(hw);
+
+       return 0;
+}
+
 static void
 eth_igb_close(struct rte_eth_dev *dev)
 {
@@ -1245,24 +1626,28 @@ igb_hardware_init(struct e1000_hw *hw)
 
        diag = e1000_init_hw(hw);
        if (diag < 0)
-               return (diag);
+               return diag;
 
        E1000_WRITE_REG(hw, E1000_VET, ETHER_TYPE_VLAN << 16 | ETHER_TYPE_VLAN);
        e1000_get_phy_info(hw);
        e1000_check_for_link(hw);
 
-       return (0);
+       return 0;
 }
 
 /* This function is based on igb_update_stats_counters() in igb/if_igb.c */
 static void
-eth_igb_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
+igb_read_stats_registers(struct e1000_hw *hw, struct e1000_hw_stats *stats)
 {
-       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct e1000_hw_stats *stats =
-                       E1000_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
        int pause_frames;
 
+       uint64_t old_gprc  = stats->gprc;
+       uint64_t old_gptc  = stats->gptc;
+       uint64_t old_tpr   = stats->tpr;
+       uint64_t old_tpt   = stats->tpt;
+       uint64_t old_rpthc = stats->rpthc;
+       uint64_t old_hgptc = stats->hgptc;
+
        if(hw->phy.media_type == e1000_media_type_copper ||
            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
                stats->symerrs +=
@@ -1304,10 +1689,13 @@ eth_igb_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
        /* For the 64-bit byte counters the low dword must be read first. */
        /* Both registers clear on the read of the high dword */
 
+       /* Workaround CRC bytes included in size, take away 4 bytes/packet */
        stats->gorc += E1000_READ_REG(hw, E1000_GORCL);
        stats->gorc += ((uint64_t)E1000_READ_REG(hw, E1000_GORCH) << 32);
+       stats->gorc -= (stats->gprc - old_gprc) * ETHER_CRC_LEN;
        stats->gotc += E1000_READ_REG(hw, E1000_GOTCL);
        stats->gotc += ((uint64_t)E1000_READ_REG(hw, E1000_GOTCH) << 32);
+       stats->gotc -= (stats->gptc - old_gptc) * ETHER_CRC_LEN;
 
        stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
        stats->ruc += E1000_READ_REG(hw, E1000_RUC);
@@ -1315,13 +1703,16 @@ eth_igb_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
        stats->roc += E1000_READ_REG(hw, E1000_ROC);
        stats->rjc += E1000_READ_REG(hw, E1000_RJC);
 
+       stats->tpr += E1000_READ_REG(hw, E1000_TPR);
+       stats->tpt += E1000_READ_REG(hw, E1000_TPT);
+
        stats->tor += E1000_READ_REG(hw, E1000_TORL);
        stats->tor += ((uint64_t)E1000_READ_REG(hw, E1000_TORH) << 32);
+       stats->tor -= (stats->tpr - old_tpr) * ETHER_CRC_LEN;
        stats->tot += E1000_READ_REG(hw, E1000_TOTL);
        stats->tot += ((uint64_t)E1000_READ_REG(hw, E1000_TOTH) << 32);
+       stats->tot -= (stats->tpt - old_tpt) * ETHER_CRC_LEN;
 
-       stats->tpr += E1000_READ_REG(hw, E1000_TPR);
-       stats->tpt += E1000_READ_REG(hw, E1000_TPT);
        stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
        stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
        stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
@@ -1354,8 +1745,10 @@ eth_igb_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
        stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
        stats->hgorc += E1000_READ_REG(hw, E1000_HGORCL);
        stats->hgorc += ((uint64_t)E1000_READ_REG(hw, E1000_HGORCH) << 32);
+       stats->hgorc -= (stats->rpthc - old_rpthc) * ETHER_CRC_LEN;
        stats->hgotc += E1000_READ_REG(hw, E1000_HGOTCL);
        stats->hgotc += ((uint64_t)E1000_READ_REG(hw, E1000_HGOTCH) << 32);
+       stats->hgotc -= (stats->hgptc - old_hgptc) * ETHER_CRC_LEN;
        stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
        stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
        stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
@@ -1366,28 +1759,29 @@ eth_igb_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
        stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
        stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
        stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
+}
+
+static void
+eth_igb_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
+{
+       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct e1000_hw_stats *stats =
+                       E1000_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
+
+       igb_read_stats_registers(hw, stats);
 
        if (rte_stats == NULL)
                return;
 
        /* Rx Errors */
-       rte_stats->ibadcrc = stats->crcerrs;
-       rte_stats->ibadlen = stats->rlec + stats->ruc + stats->roc;
        rte_stats->imissed = stats->mpc;
-       rte_stats->ierrors = rte_stats->ibadcrc +
-                            rte_stats->ibadlen +
-                            rte_stats->imissed +
+       rte_stats->ierrors = stats->crcerrs +
+                            stats->rlec + stats->ruc + stats->roc +
                             stats->rxerrc + stats->algnerrc + stats->cexterr;
 
        /* Tx Errors */
        rte_stats->oerrors = stats->ecol + stats->latecol;
 
-       /* XON/XOFF pause frames */
-       rte_stats->tx_pause_xon  = stats->xontxc;
-       rte_stats->rx_pause_xon  = stats->xonrxc;
-       rte_stats->tx_pause_xoff = stats->xofftxc;
-       rte_stats->rx_pause_xoff = stats->xoffrxc;
-
        rte_stats->ipackets = stats->gprc;
        rte_stats->opackets = stats->gptc;
        rte_stats->ibytes   = stats->gorc;
@@ -1408,12 +1802,70 @@ eth_igb_stats_reset(struct rte_eth_dev *dev)
 }
 
 static void
-eth_igbvf_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
+eth_igb_xstats_reset(struct rte_eth_dev *dev)
+{
+       struct e1000_hw_stats *stats =
+                       E1000_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
+
+       /* HW registers are cleared on read */
+       eth_igb_xstats_get(dev, NULL, IGB_NB_XSTATS);
+
+       /* Reset software totals */
+       memset(stats, 0, sizeof(*stats));
+}
+
+static int eth_igb_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
+       struct rte_eth_xstat_name *xstats_names,
+       __rte_unused unsigned limit)
+{
+       unsigned i;
+
+       if (xstats_names == NULL)
+               return IGB_NB_XSTATS;
+
+       /* Note: limit checked in rte_eth_xstats_names() */
+
+       for (i = 0; i < IGB_NB_XSTATS; i++) {
+               snprintf(xstats_names[i].name, sizeof(xstats_names[i].name),
+                        "%s", rte_igb_stats_strings[i].name);
+       }
+
+       return IGB_NB_XSTATS;
+}
+
+static int
+eth_igb_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
+                  unsigned n)
 {
        struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct e1000_vf_stats *hw_stats = (struct e1000_vf_stats*)
-                         E1000_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
+       struct e1000_hw_stats *hw_stats =
+                       E1000_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
+       unsigned i;
+
+       if (n < IGB_NB_XSTATS)
+               return IGB_NB_XSTATS;
+
+       igb_read_stats_registers(hw, hw_stats);
+
+       /* If this is a reset xstats is NULL, and we have cleared the
+        * registers by reading them.
+        */
+       if (!xstats)
+               return 0;
+
+       /* Extended stats */
+       for (i = 0; i < IGB_NB_XSTATS; i++) {
+               xstats[i].id = i;
+               xstats[i].value = *(uint64_t *)(((char *)hw_stats) +
+                       rte_igb_stats_strings[i].offset);
+       }
+
+       return IGB_NB_XSTATS;
+}
 
+static void
+igbvf_read_stats_registers(struct e1000_hw *hw, struct e1000_vf_stats *hw_stats)
+{
        /* Good Rx packets, include VF loopback */
        UPDATE_VF_STAT(E1000_VFGPRC,
            hw_stats->last_gprc, hw_stats->gprc);
@@ -1449,6 +1901,57 @@ eth_igbvf_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
        /* Good Tx loopback octets */
        UPDATE_VF_STAT(E1000_VFGOTLBC,
            hw_stats->last_gotlbc, hw_stats->gotlbc);
+}
+
+static int eth_igbvf_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
+                                    struct rte_eth_xstat_name *xstats_names,
+                                    __rte_unused unsigned limit)
+{
+       unsigned i;
+
+       if (xstats_names != NULL)
+               for (i = 0; i < IGBVF_NB_XSTATS; i++) {
+                       snprintf(xstats_names[i].name,
+                               sizeof(xstats_names[i].name), "%s",
+                               rte_igbvf_stats_strings[i].name);
+               }
+       return IGBVF_NB_XSTATS;
+}
+
+static int
+eth_igbvf_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
+                    unsigned n)
+{
+       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct e1000_vf_stats *hw_stats = (struct e1000_vf_stats *)
+                       E1000_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
+       unsigned i;
+
+       if (n < IGBVF_NB_XSTATS)
+               return IGBVF_NB_XSTATS;
+
+       igbvf_read_stats_registers(hw, hw_stats);
+
+       if (!xstats)
+               return 0;
+
+       for (i = 0; i < IGBVF_NB_XSTATS; i++) {
+               xstats[i].id = i;
+               xstats[i].value = *(uint64_t *)(((char *)hw_stats) +
+                       rte_igbvf_stats_strings[i].offset);
+       }
+
+       return IGBVF_NB_XSTATS;
+}
+
+static void
+eth_igbvf_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
+{
+       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct e1000_vf_stats *hw_stats = (struct e1000_vf_stats *)
+                         E1000_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
+
+       igbvf_read_stats_registers(hw, hw_stats);
 
        if (rte_stats == NULL)
                return;
@@ -1457,12 +1960,6 @@ eth_igbvf_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
        rte_stats->ibytes = hw_stats->gorc;
        rte_stats->opackets = hw_stats->gptc;
        rte_stats->obytes = hw_stats->gotc;
-       rte_stats->imcasts = hw_stats->mprc;
-       rte_stats->ilbpackets = hw_stats->gprlbc;
-       rte_stats->ilbbytes = hw_stats->gorlbc;
-       rte_stats->olbpackets = hw_stats->gptlbc;
-       rte_stats->olbbytes = hw_stats->gotlbc;
-
 }
 
 static void
@@ -1477,7 +1974,6 @@ eth_igbvf_stats_reset(struct rte_eth_dev *dev)
        /* reset HW current stats*/
        memset(&hw_stats->gprc, 0, sizeof(*hw_stats) -
               offsetof(struct e1000_vf_stats, gprc));
-
 }
 
 static void
@@ -1572,6 +2068,40 @@ eth_igb_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                },
                .txq_flags = 0,
        };
+
+       dev_info->rx_desc_lim = rx_desc_lim;
+       dev_info->tx_desc_lim = tx_desc_lim;
+
+       dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
+                       ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M |
+                       ETH_LINK_SPEED_1G;
+}
+
+static const uint32_t *
+eth_igb_supported_ptypes_get(struct rte_eth_dev *dev)
+{
+       static const uint32_t ptypes[] = {
+               /* refers to igb_rxd_pkt_info_to_pkt_type() */
+               RTE_PTYPE_L2_ETHER,
+               RTE_PTYPE_L3_IPV4,
+               RTE_PTYPE_L3_IPV4_EXT,
+               RTE_PTYPE_L3_IPV6,
+               RTE_PTYPE_L3_IPV6_EXT,
+               RTE_PTYPE_L4_TCP,
+               RTE_PTYPE_L4_UDP,
+               RTE_PTYPE_L4_SCTP,
+               RTE_PTYPE_TUNNEL_IP,
+               RTE_PTYPE_INNER_L3_IPV6,
+               RTE_PTYPE_INNER_L3_IPV6_EXT,
+               RTE_PTYPE_INNER_L4_TCP,
+               RTE_PTYPE_INNER_L4_UDP,
+               RTE_PTYPE_UNKNOWN
+       };
+
+       if (dev->rx_pkt_burst == eth_igb_recv_pkts ||
+           dev->rx_pkt_burst == eth_igb_recv_scattered_pkts)
+               return ptypes;
+       return NULL;
 }
 
 static void
@@ -1624,6 +2154,9 @@ eth_igbvf_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                },
                .txq_flags = 0,
        };
+
+       dev_info->rx_desc_lim = rx_desc_lim;
+       dev_info->tx_desc_lim = tx_desc_lim;
 }
 
 /* return 0 means link status changed, -1 means not changed */
@@ -1678,13 +2211,20 @@ eth_igb_link_update(struct rte_eth_dev *dev, int wait_to_complete)
 
        /* Now we check if a transition has happened */
        if (link_check) {
-               hw->mac.ops.get_link_up_info(hw, &link.link_speed,
-                                         &link.link_duplex);
-               link.link_status = 1;
+               uint16_t duplex, speed;
+               hw->mac.ops.get_link_up_info(hw, &speed, &duplex);
+               link.link_duplex = (duplex == FULL_DUPLEX) ?
+                               ETH_LINK_FULL_DUPLEX :
+                               ETH_LINK_HALF_DUPLEX;
+               link.link_speed = speed;
+               link.link_status = ETH_LINK_UP;
+               link.link_autoneg = !(dev->data->dev_conf.link_speeds &
+                               ETH_LINK_SPEED_FIXED);
        } else if (!link_check) {
                link.link_speed = 0;
-               link.link_duplex = 0;
-               link.link_status = 0;
+               link.link_duplex = ETH_LINK_HALF_DUPLEX;
+               link.link_status = ETH_LINK_DOWN;
+               link.link_autoneg = ETH_LINK_SPEED_FIXED;
        }
        rte_igb_dev_atomic_write_link_status(dev, &link);
 
@@ -1845,15 +2385,32 @@ eth_igb_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
        return 0;
 }
 
-static void
-eth_igb_vlan_tpid_set(struct rte_eth_dev *dev, uint16_t tpid)
+static int
+eth_igb_vlan_tpid_set(struct rte_eth_dev *dev,
+                     enum rte_vlan_type vlan_type,
+                     uint16_t tpid)
 {
        struct e1000_hw *hw =
                E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       uint32_t reg = ETHER_TYPE_VLAN ;
+       uint32_t reg, qinq;
+
+       qinq = E1000_READ_REG(hw, E1000_CTRL_EXT);
+       qinq &= E1000_CTRL_EXT_EXT_VLAN;
+
+       /* only outer TPID of double VLAN can be configured*/
+       if (qinq && vlan_type == ETH_VLAN_TYPE_OUTER) {
+               reg = E1000_READ_REG(hw, E1000_VET);
+               reg = (reg & (~E1000_VET_VET_EXT)) |
+                       ((uint32_t)tpid << E1000_VET_VET_EXT_SHIFT);
+               E1000_WRITE_REG(hw, E1000_VET, reg);
 
-       reg |= (tpid << 16);
-       E1000_WRITE_REG(hw, E1000_VET, reg);
+               return 0;
+       }
+
+       /* all other TPID values are read-only*/
+       PMD_DRV_LOG(ERR, "Not supported");
+
+       return -ENOTSUP;
 }
 
 static void
@@ -2138,7 +2695,7 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev)
                E1000_WRITE_REG(hw, E1000_TCTL, tctl);
                E1000_WRITE_REG(hw, E1000_RCTL, rctl);
                E1000_WRITE_FLUSH(hw);
-               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC);
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
        }
 
        return 0;
@@ -2165,13 +2722,76 @@ eth_igb_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
        eth_igb_interrupt_action(dev);
 }
 
+static int
+eth_igbvf_interrupt_get_status(struct rte_eth_dev *dev)
+{
+       uint32_t eicr;
+       struct e1000_hw *hw =
+               E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct e1000_interrupt *intr =
+               E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+
+       igbvf_intr_disable(hw);
+
+       /* read-on-clear nic registers here */
+       eicr = E1000_READ_REG(hw, E1000_EICR);
+       intr->flags = 0;
+
+       if (eicr == E1000_VTIVAR_MISC_MAILBOX)
+               intr->flags |= E1000_FLAG_MAILBOX;
+
+       return 0;
+}
+
+void igbvf_mbx_process(struct rte_eth_dev *dev)
+{
+       struct e1000_hw *hw =
+               E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct e1000_mbx_info *mbx = &hw->mbx;
+       u32 in_msg = 0;
+
+       if (mbx->ops.read(hw, &in_msg, 1, 0))
+               return;
+
+       /* PF reset VF event */
+       if (in_msg == E1000_PF_CONTROL_MSG)
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, NULL);
+}
+
+static int
+eth_igbvf_interrupt_action(struct rte_eth_dev *dev)
+{
+       struct e1000_interrupt *intr =
+               E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+
+       if (intr->flags & E1000_FLAG_MAILBOX) {
+               igbvf_mbx_process(dev);
+               intr->flags &= ~E1000_FLAG_MAILBOX;
+       }
+
+       igbvf_intr_enable(dev);
+       rte_intr_enable(&dev->pci_dev->intr_handle);
+
+       return 0;
+}
+
+static void
+eth_igbvf_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
+                           void *param)
+{
+       struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
+
+       eth_igbvf_interrupt_get_status(dev);
+       eth_igbvf_interrupt_action(dev);
+}
+
 static int
 eth_igb_led_on(struct rte_eth_dev *dev)
 {
        struct e1000_hw *hw;
 
        hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       return (e1000_led_on(hw) == E1000_SUCCESS ? 0 : -ENOTSUP);
+       return e1000_led_on(hw) == E1000_SUCCESS ? 0 : -ENOTSUP;
 }
 
 static int
@@ -2180,7 +2800,7 @@ eth_igb_led_off(struct rte_eth_dev *dev)
        struct e1000_hw *hw;
 
        hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       return (e1000_led_off(hw) == E1000_SUCCESS ? 0 : -ENOTSUP);
+       return e1000_led_off(hw) == E1000_SUCCESS ? 0 : -ENOTSUP;
 }
 
 static int
@@ -2252,7 +2872,7 @@ eth_igb_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
            (fc_conf->high_water < fc_conf->low_water)) {
                PMD_INIT_LOG(ERR, "e1000 incorrect high/low water value");
                PMD_INIT_LOG(ERR, "high water must <=  0x%x", max_high_water);
-               return (-EINVAL);
+               return -EINVAL;
        }
 
        hw->fc.requested_mode = rte_fcmode_2_e1000_fcmode[fc_conf->mode];
@@ -2282,7 +2902,7 @@ eth_igb_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
        }
 
        PMD_INIT_LOG(ERR, "e1000_setup_link_generic = 0x%x", err);
-       return (-EIO);
+       return -EIO;
 }
 
 #define E1000_RAH_POOLSEL_SHIFT      (18)
@@ -2436,6 +3056,8 @@ igbvf_dev_start(struct rte_eth_dev *dev)
        struct e1000_adapter *adapter =
                E1000_DEV_PRIVATE(dev->data->dev_private);
        int ret;
+       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       uint32_t intr_vector = 0;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -2455,12 +3077,41 @@ igbvf_dev_start(struct rte_eth_dev *dev)
                return ret;
        }
 
+       /* check and configure queue intr-vector mapping */
+       if (dev->data->dev_conf.intr_conf.rxq != 0) {
+               intr_vector = dev->data->nb_rx_queues;
+               ret = rte_intr_efd_enable(intr_handle, intr_vector);
+               if (ret)
+                       return ret;
+       }
+
+       if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) {
+               intr_handle->intr_vec =
+                       rte_zmalloc("intr_vec",
+                                   dev->data->nb_rx_queues * sizeof(int), 0);
+               if (!intr_handle->intr_vec) {
+                       PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues"
+                                    " intr_vec\n", dev->data->nb_rx_queues);
+                       return -ENOMEM;
+               }
+       }
+
+       eth_igbvf_configure_msix_intr(dev);
+
+       /* enable uio/vfio intr/eventfd mapping */
+       rte_intr_enable(intr_handle);
+
+       /* resume enabled intr since hw reset */
+       igbvf_intr_enable(dev);
+
        return 0;
 }
 
 static void
 igbvf_dev_stop(struct rte_eth_dev *dev)
 {
+       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+
        PMD_INIT_FUNC_TRACE();
 
        igbvf_stop_adapter(dev);
@@ -2472,6 +3123,16 @@ igbvf_dev_stop(struct rte_eth_dev *dev)
        igbvf_set_vfta_all(dev,0);
 
        igb_dev_clear_queues(dev);
+
+       /* disable intr eventfd mapping */
+       rte_intr_disable(intr_handle);
+
+       /* Clean datapath event and queue/vec mapping */
+       rte_intr_efd_disable(intr_handle);
+       if (intr_handle->intr_vec) {
+               rte_free(intr_handle->intr_vec);
+               intr_handle->intr_vec = NULL;
+       }
 }
 
 static void
@@ -2480,6 +3141,7 @@ igbvf_dev_close(struct rte_eth_dev *dev)
        struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct e1000_adapter *adapter =
                E1000_DEV_PRIVATE(dev->data->dev_private);
+       struct ether_addr addr;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -2488,12 +3150,63 @@ igbvf_dev_close(struct rte_eth_dev *dev)
        igbvf_dev_stop(dev);
        adapter->stopped = 1;
        igb_dev_free_queues(dev);
+
+       /**
+        * reprogram the RAR with a zero mac address,
+        * to ensure that the VF traffic goes to the PF
+        * after stop, close and detach of the VF.
+        **/
+
+       memset(&addr, 0, sizeof(addr));
+       igbvf_default_mac_addr_set(dev, &addr);
+}
+
+static void
+igbvf_promiscuous_enable(struct rte_eth_dev *dev)
+{
+       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       /* Set both unicast and multicast promisc */
+       e1000_promisc_set_vf(hw, e1000_promisc_enabled);
+}
+
+static void
+igbvf_promiscuous_disable(struct rte_eth_dev *dev)
+{
+       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       /* If in allmulticast mode leave multicast promisc */
+       if (dev->data->all_multicast == 1)
+               e1000_promisc_set_vf(hw, e1000_promisc_multicast);
+       else
+               e1000_promisc_set_vf(hw, e1000_promisc_disabled);
+}
+
+static void
+igbvf_allmulticast_enable(struct rte_eth_dev *dev)
+{
+       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       /* In promiscuous mode multicast promisc already set */
+       if (dev->data->promiscuous == 0)
+               e1000_promisc_set_vf(hw, e1000_promisc_multicast);
+}
+
+static void
+igbvf_allmulticast_disable(struct rte_eth_dev *dev)
+{
+       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       /* In promiscuous mode leave multicast promisc enabled */
+       if (dev->data->promiscuous == 0)
+               e1000_promisc_set_vf(hw, e1000_promisc_disabled);
 }
 
 static int igbvf_set_vfta(struct e1000_hw *hw, uint16_t vid, bool on)
 {
        struct e1000_mbx_info *mbx = &hw->mbx;
        uint32_t msgbuf[2];
+       s32 err;
 
        /* After set vlan, vlan strip will also be enabled in igb driver*/
        msgbuf[0] = E1000_VF_SET_VLAN;
@@ -2502,7 +3215,20 @@ static int igbvf_set_vfta(struct e1000_hw *hw, uint16_t vid, bool on)
        if (on)
                msgbuf[0] |= E1000_VF_SET_VLAN_ADD;
 
-       return (mbx->ops.write_posted(hw, msgbuf, 2, 0));
+       err = mbx->ops.write_posted(hw, msgbuf, 2, 0);
+       if (err)
+               goto mbx_err;
+
+       err = mbx->ops.read_posted(hw, msgbuf, 2, 0);
+       if (err)
+               goto mbx_err;
+
+       msgbuf[0] &= ~E1000_VT_MSGTYPE_CTS;
+       if (msgbuf[0] == (E1000_VF_SET_VLAN | E1000_VT_MSGTYPE_NACK))
+               err = -EINVAL;
+
+mbx_err:
+       return err;
 }
 
 static void igbvf_set_vfta_all(struct rte_eth_dev *dev, bool on)
@@ -3883,6 +4609,209 @@ eth_igb_set_mc_addr_list(struct rte_eth_dev *dev,
        return 0;
 }
 
+static uint64_t
+igb_read_systime_cyclecounter(struct rte_eth_dev *dev)
+{
+       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint64_t systime_cycles;
+
+       switch (hw->mac.type) {
+       case e1000_i210:
+       case e1000_i211:
+               /*
+                * Need to read System Time Residue Register to be able
+                * to read the other two registers.
+                */
+               E1000_READ_REG(hw, E1000_SYSTIMR);
+               /* SYSTIMEL stores ns and SYSTIMEH stores seconds. */
+               systime_cycles = (uint64_t)E1000_READ_REG(hw, E1000_SYSTIML);
+               systime_cycles += (uint64_t)E1000_READ_REG(hw, E1000_SYSTIMH)
+                               * NSEC_PER_SEC;
+               break;
+       case e1000_82580:
+       case e1000_i350:
+       case e1000_i354:
+               /*
+                * Need to read System Time Residue Register to be able
+                * to read the other two registers.
+                */
+               E1000_READ_REG(hw, E1000_SYSTIMR);
+               systime_cycles = (uint64_t)E1000_READ_REG(hw, E1000_SYSTIML);
+               /* Only the 8 LSB are valid. */
+               systime_cycles |= (uint64_t)(E1000_READ_REG(hw, E1000_SYSTIMH)
+                               & 0xff) << 32;
+               break;
+       default:
+               systime_cycles = (uint64_t)E1000_READ_REG(hw, E1000_SYSTIML);
+               systime_cycles |= (uint64_t)E1000_READ_REG(hw, E1000_SYSTIMH)
+                               << 32;
+               break;
+       }
+
+       return systime_cycles;
+}
+
+static uint64_t
+igb_read_rx_tstamp_cyclecounter(struct rte_eth_dev *dev)
+{
+       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint64_t rx_tstamp_cycles;
+
+       switch (hw->mac.type) {
+       case e1000_i210:
+       case e1000_i211:
+               /* RXSTMPL stores ns and RXSTMPH stores seconds. */
+               rx_tstamp_cycles = (uint64_t)E1000_READ_REG(hw, E1000_RXSTMPL);
+               rx_tstamp_cycles += (uint64_t)E1000_READ_REG(hw, E1000_RXSTMPH)
+                               * NSEC_PER_SEC;
+               break;
+       case e1000_82580:
+       case e1000_i350:
+       case e1000_i354:
+               rx_tstamp_cycles = (uint64_t)E1000_READ_REG(hw, E1000_RXSTMPL);
+               /* Only the 8 LSB are valid. */
+               rx_tstamp_cycles |= (uint64_t)(E1000_READ_REG(hw, E1000_RXSTMPH)
+                               & 0xff) << 32;
+               break;
+       default:
+               rx_tstamp_cycles = (uint64_t)E1000_READ_REG(hw, E1000_RXSTMPL);
+               rx_tstamp_cycles |= (uint64_t)E1000_READ_REG(hw, E1000_RXSTMPH)
+                               << 32;
+               break;
+       }
+
+       return rx_tstamp_cycles;
+}
+
+static uint64_t
+igb_read_tx_tstamp_cyclecounter(struct rte_eth_dev *dev)
+{
+       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint64_t tx_tstamp_cycles;
+
+       switch (hw->mac.type) {
+       case e1000_i210:
+       case e1000_i211:
+               /* RXSTMPL stores ns and RXSTMPH stores seconds. */
+               tx_tstamp_cycles = (uint64_t)E1000_READ_REG(hw, E1000_TXSTMPL);
+               tx_tstamp_cycles += (uint64_t)E1000_READ_REG(hw, E1000_TXSTMPH)
+                               * NSEC_PER_SEC;
+               break;
+       case e1000_82580:
+       case e1000_i350:
+       case e1000_i354:
+               tx_tstamp_cycles = (uint64_t)E1000_READ_REG(hw, E1000_TXSTMPL);
+               /* Only the 8 LSB are valid. */
+               tx_tstamp_cycles |= (uint64_t)(E1000_READ_REG(hw, E1000_TXSTMPH)
+                               & 0xff) << 32;
+               break;
+       default:
+               tx_tstamp_cycles = (uint64_t)E1000_READ_REG(hw, E1000_TXSTMPL);
+               tx_tstamp_cycles |= (uint64_t)E1000_READ_REG(hw, E1000_TXSTMPH)
+                               << 32;
+               break;
+       }
+
+       return tx_tstamp_cycles;
+}
+
+static void
+igb_start_timecounters(struct rte_eth_dev *dev)
+{
+       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct e1000_adapter *adapter =
+               (struct e1000_adapter *)dev->data->dev_private;
+       uint32_t incval = 1;
+       uint32_t shift = 0;
+       uint64_t mask = E1000_CYCLECOUNTER_MASK;
+
+       switch (hw->mac.type) {
+       case e1000_82580:
+       case e1000_i350:
+       case e1000_i354:
+               /* 32 LSB bits + 8 MSB bits = 40 bits */
+               mask = (1ULL << 40) - 1;
+               /* fall-through */
+       case e1000_i210:
+       case e1000_i211:
+               /*
+                * Start incrementing the register
+                * used to timestamp PTP packets.
+                */
+               E1000_WRITE_REG(hw, E1000_TIMINCA, incval);
+               break;
+       case e1000_82576:
+               incval = E1000_INCVALUE_82576;
+               shift = IGB_82576_TSYNC_SHIFT;
+               E1000_WRITE_REG(hw, E1000_TIMINCA,
+                               E1000_INCPERIOD_82576 | incval);
+               break;
+       default:
+               /* Not supported */
+               return;
+       }
+
+       memset(&adapter->systime_tc, 0, sizeof(struct rte_timecounter));
+       memset(&adapter->rx_tstamp_tc, 0, sizeof(struct rte_timecounter));
+       memset(&adapter->tx_tstamp_tc, 0, sizeof(struct rte_timecounter));
+
+       adapter->systime_tc.cc_mask = mask;
+       adapter->systime_tc.cc_shift = shift;
+       adapter->systime_tc.nsec_mask = (1ULL << shift) - 1;
+
+       adapter->rx_tstamp_tc.cc_mask = mask;
+       adapter->rx_tstamp_tc.cc_shift = shift;
+       adapter->rx_tstamp_tc.nsec_mask = (1ULL << shift) - 1;
+
+       adapter->tx_tstamp_tc.cc_mask = mask;
+       adapter->tx_tstamp_tc.cc_shift = shift;
+       adapter->tx_tstamp_tc.nsec_mask = (1ULL << shift) - 1;
+}
+
+static int
+igb_timesync_adjust_time(struct rte_eth_dev *dev, int64_t delta)
+{
+       struct e1000_adapter *adapter =
+                       (struct e1000_adapter *)dev->data->dev_private;
+
+       adapter->systime_tc.nsec += delta;
+       adapter->rx_tstamp_tc.nsec += delta;
+       adapter->tx_tstamp_tc.nsec += delta;
+
+       return 0;
+}
+
+static int
+igb_timesync_write_time(struct rte_eth_dev *dev, const struct timespec *ts)
+{
+       uint64_t ns;
+       struct e1000_adapter *adapter =
+                       (struct e1000_adapter *)dev->data->dev_private;
+
+       ns = rte_timespec_to_ns(ts);
+
+       /* Set the timecounters to a new value. */
+       adapter->systime_tc.nsec = ns;
+       adapter->rx_tstamp_tc.nsec = ns;
+       adapter->tx_tstamp_tc.nsec = ns;
+
+       return 0;
+}
+
+static int
+igb_timesync_read_time(struct rte_eth_dev *dev, struct timespec *ts)
+{
+       uint64_t ns, systime_cycles;
+       struct e1000_adapter *adapter =
+                       (struct e1000_adapter *)dev->data->dev_private;
+
+       systime_cycles = igb_read_systime_cyclecounter(dev);
+       ns = rte_timecounter_update(&adapter->systime_tc, systime_cycles);
+       *ts = rte_ns_to_timespec(ns);
+
+       return 0;
+}
+
 static int
 igb_timesync_enable(struct rte_eth_dev *dev)
 {
@@ -3890,13 +4819,32 @@ igb_timesync_enable(struct rte_eth_dev *dev)
        uint32_t tsync_ctl;
        uint32_t tsauxc;
 
+       /* Stop the timesync system time. */
+       E1000_WRITE_REG(hw, E1000_TIMINCA, 0x0);
+       /* Reset the timesync system time value. */
+       switch (hw->mac.type) {
+       case e1000_82580:
+       case e1000_i350:
+       case e1000_i354:
+       case e1000_i210:
+       case e1000_i211:
+               E1000_WRITE_REG(hw, E1000_SYSTIMR, 0x0);
+               /* fall-through */
+       case e1000_82576:
+               E1000_WRITE_REG(hw, E1000_SYSTIML, 0x0);
+               E1000_WRITE_REG(hw, E1000_SYSTIMH, 0x0);
+               break;
+       default:
+               /* Not supported. */
+               return -ENOTSUP;
+       }
+
        /* Enable system time for it isn't on by default. */
        tsauxc = E1000_READ_REG(hw, E1000_TSAUXC);
        tsauxc &= ~E1000_TSAUXC_DISABLE_SYSTIME;
        E1000_WRITE_REG(hw, E1000_TSAUXC, tsauxc);
 
-       /* Start incrementing the register used to timestamp PTP packets. */
-       E1000_WRITE_REG(hw, E1000_TIMINCA, E1000_TIMINCA_INIT);
+       igb_start_timecounters(dev);
 
        /* Enable L2 filtering of IEEE1588/802.1AS Ethernet frame types. */
        E1000_WRITE_REG(hw, E1000_ETQF(E1000_ETQF_FILTER_1588),
@@ -3948,19 +4896,19 @@ igb_timesync_read_rx_timestamp(struct rte_eth_dev *dev,
                               uint32_t flags __rte_unused)
 {
        struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct e1000_adapter *adapter =
+                       (struct e1000_adapter *)dev->data->dev_private;
        uint32_t tsync_rxctl;
-       uint32_t rx_stmpl;
-       uint32_t rx_stmph;
+       uint64_t rx_tstamp_cycles;
+       uint64_t ns;
 
        tsync_rxctl = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
        if ((tsync_rxctl & E1000_TSYNCRXCTL_VALID) == 0)
                return -EINVAL;
 
-       rx_stmpl = E1000_READ_REG(hw, E1000_RXSTMPL);
-       rx_stmph = E1000_READ_REG(hw, E1000_RXSTMPH);
-
-       timestamp->tv_sec = (uint64_t)(((uint64_t)rx_stmph << 32) | rx_stmpl);
-       timestamp->tv_nsec = 0;
+       rx_tstamp_cycles = igb_read_rx_tstamp_cyclecounter(dev);
+       ns = rte_timecounter_update(&adapter->rx_tstamp_tc, rx_tstamp_cycles);
+       *timestamp = rte_ns_to_timespec(ns);
 
        return  0;
 }
@@ -3970,19 +4918,19 @@ igb_timesync_read_tx_timestamp(struct rte_eth_dev *dev,
                               struct timespec *timestamp)
 {
        struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct e1000_adapter *adapter =
+                       (struct e1000_adapter *)dev->data->dev_private;
        uint32_t tsync_txctl;
-       uint32_t tx_stmpl;
-       uint32_t tx_stmph;
+       uint64_t tx_tstamp_cycles;
+       uint64_t ns;
 
        tsync_txctl = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
        if ((tsync_txctl & E1000_TSYNCTXCTL_VALID) == 0)
                return -EINVAL;
 
-       tx_stmpl = E1000_READ_REG(hw, E1000_TXSTMPL);
-       tx_stmph = E1000_READ_REG(hw, E1000_TXSTMPH);
-
-       timestamp->tv_sec = (uint64_t)(((uint64_t)tx_stmph << 32) | tx_stmpl);
-       timestamp->tv_nsec = 0;
+       tx_tstamp_cycles = igb_read_tx_tstamp_cyclecounter(dev);
+       ns = rte_timecounter_update(&adapter->tx_tstamp_tc, tx_tstamp_cycles);
+       *timestamp = rte_ns_to_timespec(ns);
 
        return  0;
 }
@@ -4023,6 +4971,12 @@ eth_igb_get_regs(struct rte_eth_dev *dev,
        int count = 0;
        const struct reg_info *reg_group;
 
+       if (data == NULL) {
+               regs->length = eth_igb_get_reg_length(dev);
+               regs->width = sizeof(uint32_t);
+               return 0;
+       }
+
        /* Support only full register dump */
        if ((regs->length == 0) ||
            (regs->length == (uint32_t)eth_igb_get_reg_length(dev))) {
@@ -4047,6 +5001,12 @@ igbvf_get_regs(struct rte_eth_dev *dev,
        int count = 0;
        const struct reg_info *reg_group;
 
+       if (data == NULL) {
+               regs->length = igbvf_get_reg_length(dev);
+               regs->width = sizeof(uint32_t);
+               return 0;
+       }
+
        /* Support only full register dump */
        if ((regs->length == 0) ||
            (regs->length == (uint32_t)igbvf_get_reg_length(dev))) {
@@ -4117,16 +5077,6 @@ eth_igb_set_eeprom(struct rte_eth_dev *dev,
        return nvm->ops.write(hw,  first, length, data);
 }
 
-static struct rte_driver pmd_igb_drv = {
-       .type = PMD_PDEV,
-       .init = rte_igb_pmd_init,
-};
-
-static struct rte_driver pmd_igbvf_drv = {
-       .type = PMD_PDEV,
-       .init = rte_igbvf_pmd_init,
-};
-
 static int
 eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
 {
@@ -4213,7 +5163,10 @@ eth_igb_configure_msix_intr(struct rte_eth_dev *dev)
        uint32_t tmpval, regval, intr_mask;
        struct e1000_hw *hw =
                E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       uint32_t vec = 0;
+       uint32_t vec = E1000_MISC_VEC_ID;
+       uint32_t base = E1000_MISC_VEC_ID;
+       uint32_t misc_shift = 0;
+
        struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
 
        /* won't configure msix register if no mapping is done
@@ -4222,6 +5175,11 @@ eth_igb_configure_msix_intr(struct rte_eth_dev *dev)
        if (!rte_intr_dp_is_en(intr_handle))
                return;
 
+       if (rte_intr_allow_others(intr_handle)) {
+               vec = base = E1000_RX_VEC_START;
+               misc_shift = 1;
+       }
+
        /* set interrupt vector for other causes */
        if (hw->mac.type == e1000_82575) {
                tmpval = E1000_READ_REG(hw, E1000_CTRL_EXT);
@@ -4250,8 +5208,8 @@ eth_igb_configure_msix_intr(struct rte_eth_dev *dev)
                E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE |
                                        E1000_GPIE_PBA | E1000_GPIE_EIAME |
                                        E1000_GPIE_NSICR);
-
-               intr_mask = (1 << intr_handle->max_intr) - 1;
+               intr_mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) <<
+                       misc_shift;
                regval = E1000_READ_REG(hw, E1000_EIAC);
                E1000_WRITE_REG(hw, E1000_EIAC, regval | intr_mask);
 
@@ -4265,19 +5223,24 @@ eth_igb_configure_msix_intr(struct rte_eth_dev *dev)
        /* use EIAM to auto-mask when MSI-X interrupt
         * is asserted, this saves a register write for every interrupt
         */
-       intr_mask = (1 << intr_handle->nb_efd) - 1;
+       intr_mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) <<
+               misc_shift;
        regval = E1000_READ_REG(hw, E1000_EIAM);
        E1000_WRITE_REG(hw, E1000_EIAM, regval | intr_mask);
 
        for (queue_id = 0; queue_id < dev->data->nb_rx_queues; queue_id++) {
                eth_igb_assign_msix_vector(hw, 0, queue_id, vec);
                intr_handle->intr_vec[queue_id] = vec;
-               if (vec < intr_handle->nb_efd - 1)
+               if (vec < base + intr_handle->nb_efd - 1)
                        vec++;
        }
 
        E1000_WRITE_FLUSH(hw);
 }
 
-PMD_REGISTER_DRIVER(pmd_igb_drv);
-PMD_REGISTER_DRIVER(pmd_igbvf_drv);
+RTE_PMD_REGISTER_PCI(net_e1000_igb, rte_igb_pmd.pci_drv);
+RTE_PMD_REGISTER_PCI_TABLE(net_e1000_igb, pci_id_igb_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_e1000_igb, "* igb_uio | uio_pci_generic | vfio");
+RTE_PMD_REGISTER_PCI(net_e1000_igb_vf, rte_igbvf_pmd.pci_drv);
+RTE_PMD_REGISTER_PCI_TABLE(net_e1000_igb_vf, pci_id_igbvf_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_e1000_igb_vf, "* igb_uio | vfio");