update Intel copyright years to 2014
[dpdk.git] / lib / librte_pmd_e1000 / igb_ethdev.c
index f0071bf..77244e6 100644 (file)
@@ -1,35 +1,34 @@
 /*-
  *   BSD LICENSE
  * 
- *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
  *   All rights reserved.
  * 
- *   Redistribution and use in source and binary forms, with or without 
- *   modification, are permitted provided that the following conditions 
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
  *   are met:
  * 
- *     * Redistributions of source code must retain the above copyright 
+ *     * Redistributions of source code must retain the above copyright
  *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright 
- *       notice, this list of conditions and the following disclaimer in 
- *       the documentation and/or other materials provided with the 
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
  *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its 
- *       contributors may be used to endorse or promote products derived 
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
  *       from this software without specific prior written permission.
  * 
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * 
  */
 
 #include <sys/queue.h>
@@ -74,7 +73,7 @@ static void eth_igb_infos_get(struct rte_eth_dev *dev,
                                struct rte_eth_dev_info *dev_info);
 static int  eth_igb_flow_ctrl_set(struct rte_eth_dev *dev,
                                struct rte_eth_fc_conf *fc_conf);
-static int eth_igb_interrupt_setup(struct rte_eth_dev *dev);
+static int eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev);
 static int eth_igb_interrupt_get_status(struct rte_eth_dev *dev);
 static int eth_igb_interrupt_action(struct rte_eth_dev *dev);
 static void eth_igb_interrupt_handler(struct rte_intr_handle *handle,
@@ -119,6 +118,10 @@ static int igbvf_vlan_filter_set(struct rte_eth_dev *dev,
                uint16_t vlan_id, int on);
 static int igbvf_set_vfta(struct e1000_hw *hw, uint16_t vid, bool on);
 static void igbvf_set_vfta_all(struct rte_eth_dev *dev, bool on);
+static int eth_igb_rss_reta_update(struct rte_eth_dev *dev,
+                struct rte_eth_rss_reta *reta_conf);
+static int eth_igb_rss_reta_query(struct rte_eth_dev *dev,
+               struct rte_eth_rss_reta *reta_conf);
 
 /*
  * Define VF Stats MACRO for Non "cleared on read" register
@@ -135,6 +138,8 @@ static void igbvf_set_vfta_all(struct rte_eth_dev *dev, bool on);
 #define IGB_LINK_UPDATE_CHECK_TIMEOUT  90  /* 9s */
 #define IGB_LINK_UPDATE_CHECK_INTERVAL 100 /* ms */
 
+#define IGBVF_PMD_NAME "rte_igbvf_pmd"     /* PMD name */
+
 static enum e1000_fc_mode igb_fc_setting = e1000_fc_full;
 
 /*
@@ -177,6 +182,8 @@ static struct eth_dev_ops eth_igb_ops = {
        .vlan_offload_set     = eth_igb_vlan_offload_set,
        .rx_queue_setup       = eth_igb_rx_queue_setup,
        .rx_queue_release     = eth_igb_rx_queue_release,
+       .rx_queue_count       = eth_igb_rx_queue_count,
+       .rx_descriptor_done   = eth_igb_rx_descriptor_done,
        .tx_queue_setup       = eth_igb_tx_queue_setup,
        .tx_queue_release     = eth_igb_tx_queue_release,
        .dev_led_on           = eth_igb_led_on,
@@ -184,6 +191,8 @@ static struct eth_dev_ops eth_igb_ops = {
        .flow_ctrl_set        = eth_igb_flow_ctrl_set,
        .mac_addr_add         = eth_igb_rar_set,
        .mac_addr_remove      = eth_igb_rar_clear,
+       .reta_update          = eth_igb_rss_reta_update,
+       .reta_query           = eth_igb_rss_reta_query,
 };
 
 /*
@@ -258,6 +267,42 @@ rte_igb_dev_atomic_write_link_status(struct rte_eth_dev *dev,
        return 0;
 }
 
+static inline void
+igb_intr_enable(struct rte_eth_dev *dev)
+{
+       struct e1000_interrupt *intr =
+               E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+       struct e1000_hw *hw =
+               E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       E1000_WRITE_REG(hw, E1000_IMS, intr->mask);
+       E1000_WRITE_FLUSH(hw);
+}
+
+static void
+igb_intr_disable(struct e1000_hw *hw)
+{
+       E1000_WRITE_REG(hw, E1000_IMC, ~0);
+       E1000_WRITE_FLUSH(hw);
+}
+
+static inline int32_t
+igb_pf_reset_hw(struct e1000_hw *hw)
+{
+       uint32_t ctrl_ext;
+       int32_t status;
+       status = e1000_reset_hw(hw);
+       ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+       /* Set PF Reset Done bit so PF/VF Mail Ops can work */
+       ctrl_ext |= E1000_CTRL_EXT_PFRSTD;
+       E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+       E1000_WRITE_FLUSH(hw);
+       return status;
+}
 static void
 igb_identify_hardware(struct rte_eth_dev *dev)
 {
@@ -284,6 +329,7 @@ eth_igb_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
                E1000_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
        struct e1000_vfta * shadow_vfta =
                        E1000_DEV_PRIVATE_TO_VFTA(eth_dev->data->dev_private);
+       uint32_t ctrl_ext;
 
        pci_dev = eth_dev->pci_dev;
        eth_dev->dev_ops = &eth_igb_ops;
@@ -299,10 +345,9 @@ eth_igb_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
                return 0;
        }
 
-       hw->hw_addr= (void *)pci_dev->mem_resource.addr;
+       hw->hw_addr= (void *)pci_dev->mem_resource[0].addr;
 
        igb_identify_hardware(eth_dev);
-
        if (e1000_setup_init_funcs(hw, TRUE) != E1000_SUCCESS) {
                error = -EIO;
                goto err_late;
@@ -325,7 +370,7 @@ eth_igb_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
         * Start from a known state, this is important in reading the nvm
         * and mac from that.
         */
-       e1000_reset_hw(hw);
+       igb_pf_reset_hw(hw);
 
        /* Make sure we have a good EEPROM before we read from it */
        if (e1000_validate_nvm_checksum(hw) < 0) {
@@ -381,6 +426,15 @@ eth_igb_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
                                        "SOL/IDER session");
        }
 
+       /* initialize PF if max_vfs not zero */
+       igb_pf_host_init(eth_dev);
+       ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+       /* Set PF Reset Done bit so PF/VF Mail Ops can work */
+       ctrl_ext |= E1000_CTRL_EXT_PFRSTD;
+       E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+       E1000_WRITE_FLUSH(hw);
+
        PMD_INIT_LOG(INFO, "port_id %d vendorID=0x%x deviceID=0x%x\n",
                     eth_dev->data->port_id, pci_dev->id.vendor_id,
                     pci_dev->id.device_id);
@@ -388,6 +442,12 @@ eth_igb_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
        rte_intr_callback_register(&(pci_dev->intr_handle),
                eth_igb_interrupt_handler, (void *)eth_dev);
 
+       /* enable uio intr after callback register */
+       rte_intr_enable(&(pci_dev->intr_handle));
+        
+       /* enable support intr */
+       igb_intr_enable(eth_dev);
+       
        return 0;
 
 err_late:
@@ -411,11 +471,23 @@ eth_igbvf_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
        PMD_INIT_LOG(DEBUG, "eth_igbvf_dev_init");
 
        eth_dev->dev_ops = &igbvf_eth_dev_ops;
+       eth_dev->rx_pkt_burst = &eth_igb_recv_pkts;
+       eth_dev->tx_pkt_burst = &eth_igb_xmit_pkts;
+
+       /* for secondary processes, we don't initialise any further as primary
+        * has already done this work. Only check we don't need a different
+        * RX function */
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY){
+               if (eth_dev->data->scattered_rx)
+                       eth_dev->rx_pkt_burst = &eth_igb_recv_scattered_pkts;
+               return 0;
+       }
+
        pci_dev = eth_dev->pci_dev;
 
        hw->device_id = pci_dev->id.device_id;
        hw->vendor_id = pci_dev->id.vendor_id;
-       hw->hw_addr = (void *)pci_dev->mem_resource.addr;
+       hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
 
        /* Initialize the shared code */
        diag = e1000_setup_init_funcs(hw, TRUE);
@@ -430,7 +502,7 @@ eth_igbvf_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
 
        /* Disable the interrupts for VF */
        igbvf_intr_disable(hw);
-
+       
        diag = hw->mac.ops.reset_hw(hw);
 
        /* Allocate memory for storing MAC addresses */
@@ -443,6 +515,7 @@ eth_igbvf_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
                        ETHER_ADDR_LEN * hw->mac.rar_entry_count);
                return -ENOMEM;
        }
+       
        /* Copy the permanent MAC address */
        ether_addr_copy((struct ether_addr *) hw->mac.perm_addr,
                        &eth_dev->data->mac_addrs[0]);
@@ -460,7 +533,9 @@ static struct eth_driver rte_igb_pmd = {
        {
                .name = "rte_igb_pmd",
                .id_table = pci_id_igb_map,
+#ifdef RTE_EAL_UNBIND_PORTS
                .drv_flags = RTE_PCI_DRV_NEED_IGB_UIO,
+#endif
        },
        .eth_dev_init = eth_igb_dev_init,
        .dev_private_size = sizeof(struct e1000_adapter),
@@ -473,7 +548,9 @@ static struct eth_driver rte_igbvf_pmd = {
        {
                .name = "rte_igbvf_pmd",
                .id_table = pci_id_igbvf_map,
+#ifdef RTE_EAL_UNBIND_PORTS
                .drv_flags = RTE_PCI_DRV_NEED_IGB_UIO,
+#endif
        },
        .eth_dev_init = eth_igbvf_dev_init,
        .dev_private_size = sizeof(struct e1000_adapter),
@@ -486,6 +563,17 @@ rte_igb_pmd_init(void)
        return 0;
 }
 
+static void
+igb_vmdq_vlan_hw_filter_enable(struct rte_eth_dev *dev)
+{
+       struct e1000_hw *hw =
+               E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       /* RCTL: enable VLAN filter since VMDq always use VLAN filter */
+       uint32_t rctl = E1000_READ_REG(hw, E1000_RCTL);
+       rctl |= E1000_RCTL_VFE;
+       E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+}
+
 /*
  * VF Driver initialization routine.
  * Invoked one at EAL init time.
@@ -521,11 +609,10 @@ eth_igb_start(struct rte_eth_dev *dev)
        struct e1000_hw *hw =
                E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        int ret, i, mask;
+       uint32_t ctrl_ext;
 
        PMD_INIT_LOG(DEBUG, ">>");
 
-       igb_intr_disable(hw);
-
        /* Power up the phy. Needed to make the link go Up */
        e1000_power_up_phy(hw);
 
@@ -552,6 +639,15 @@ eth_igb_start(struct rte_eth_dev *dev)
 
        E1000_WRITE_REG(hw, E1000_VET, ETHER_TYPE_VLAN);
 
+       ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+       /* Set PF Reset Done bit so PF/VF Mail Ops can work */
+       ctrl_ext |= E1000_CTRL_EXT_PFRSTD;
+       E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+       E1000_WRITE_FLUSH(hw);
+
+       /* configure PF module if SRIOV enabled */
+       igb_pf_host_configure(dev);
+
        /* Configure for OS presence */
        igb_init_manageability(hw);
 
@@ -574,6 +670,11 @@ eth_igb_start(struct rte_eth_dev *dev)
                        ETH_VLAN_EXTEND_MASK;
        eth_igb_vlan_offload_set(dev, mask);
 
+       if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_ONLY) {
+               /* Enable VLAN filter since VMDq always use VLAN filter */
+               igb_vmdq_vlan_hw_filter_enable(dev);
+       }
+               
        /*
         * Configure the Interrupt Moderation register (EITR) with the maximum
         * possible value (0xFFFF) to minimize "System Partial Write" issued by
@@ -650,14 +751,11 @@ eth_igb_start(struct rte_eth_dev *dev)
        e1000_setup_link(hw);
 
        /* check if lsc interrupt feature is enabled */
-       if (dev->data->dev_conf.intr_conf.lsc != 0) {
-               ret = eth_igb_interrupt_setup(dev);
-               if (ret) {
-                       PMD_INIT_LOG(ERR, "Unable to setup interrupts");
-                       igb_dev_clear_queues(dev);
-                       return ret;
-               }
-       }
+       if (dev->data->dev_conf.intr_conf.lsc != 0)
+               ret = eth_igb_lsc_interrupt_setup(dev);
+
+       /* resume enabled intr since hw reset */
+       igb_intr_enable(dev);
 
        PMD_INIT_LOG(DEBUG, "<<");
 
@@ -684,9 +782,18 @@ eth_igb_stop(struct rte_eth_dev *dev)
        struct rte_eth_link link;
 
        igb_intr_disable(hw);
-       e1000_reset_hw(hw);
+       igb_pf_reset_hw(hw);
        E1000_WRITE_REG(hw, E1000_WUC, 0);
 
+       /* Set bit for Go Link disconnect */
+       if (hw->mac.type >= e1000_82580) {
+               uint32_t phpm_reg;
+
+               phpm_reg = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT);
+               phpm_reg |= E1000_82580_PM_GO_LINKD;
+               E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, phpm_reg);
+       }
+
        /* Power down the phy. Needed to make the link go Down */
        e1000_power_down_phy(hw);
 
@@ -708,6 +815,15 @@ eth_igb_close(struct rte_eth_dev *dev)
        igb_release_manageability(hw);
        igb_hw_control_release(hw);
 
+       /* Clear bit for Go Link disconnect */
+       if (hw->mac.type >= e1000_82580) {
+               uint32_t phpm_reg;
+
+               phpm_reg = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT);
+               phpm_reg &= ~E1000_82580_PM_GO_LINKD;
+               E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, phpm_reg);
+       }
+
        igb_dev_clear_queues(dev);
 
        memset(&link, 0, sizeof(link));
@@ -776,7 +892,7 @@ igb_hardware_init(struct e1000_hw *hw)
                hw->fc.requested_mode = e1000_fc_none;
 
        /* Issue a global reset */
-       e1000_reset_hw(hw);
+       igb_pf_reset_hw(hw);
        E1000_WRITE_REG(hw, E1000_WUC, 0);
 
        diag = e1000_init_hw(hw);
@@ -1018,19 +1134,28 @@ eth_igb_infos_get(struct rte_eth_dev *dev,
        case e1000_82575:
                dev_info->max_rx_queues = 4;
                dev_info->max_tx_queues = 4;
+               dev_info->max_vmdq_pools = 0;
                break;
 
        case e1000_82576:
                dev_info->max_rx_queues = 16;
                dev_info->max_tx_queues = 16;
+               dev_info->max_vmdq_pools = ETH_8_POOLS;
                break;
 
        case e1000_82580:
                dev_info->max_rx_queues = 8;
                dev_info->max_tx_queues = 8;
+               dev_info->max_vmdq_pools = ETH_8_POOLS;
                break;
 
        case e1000_i350:
+               dev_info->max_rx_queues = 8;
+               dev_info->max_tx_queues = 8;
+               dev_info->max_vmdq_pools = ETH_8_POOLS;
+               break;
+
+       case e1000_i354:
                dev_info->max_rx_queues = 8;
                dev_info->max_tx_queues = 8;
                break;
@@ -1038,22 +1163,26 @@ eth_igb_infos_get(struct rte_eth_dev *dev,
        case e1000_i210:
                dev_info->max_rx_queues = 4;
                dev_info->max_tx_queues = 4;
+               dev_info->max_vmdq_pools = 0;
                break;
 
        case e1000_vfadapt:
                dev_info->max_rx_queues = 2;
                dev_info->max_tx_queues = 2;
+               dev_info->max_vmdq_pools = 0;
                break;
 
        case e1000_vfadapt_i350:
                dev_info->max_rx_queues = 1;
                dev_info->max_tx_queues = 1;
+               dev_info->max_vmdq_pools = 0;
                break;
 
        default:
                /* Should not happen */
                dev_info->max_rx_queues = 0;
                dev_info->max_tx_queues = 0;
+               dev_info->max_vmdq_pools = 0;
        }
 }
 
@@ -1333,10 +1462,6 @@ igb_vlan_hw_strip_disable(struct rte_eth_dev *dev)
        reg = E1000_READ_REG(hw, E1000_CTRL);
        reg &= ~E1000_CTRL_VME;
        E1000_WRITE_REG(hw, E1000_CTRL, reg);
-
-       /* Update maximum frame size */
-       E1000_WRITE_REG(hw, E1000_RLPML,
-               dev->data->dev_conf.rxmode.max_rx_pkt_len + VLAN_TAG_SIZE);
 }
 
 static void
@@ -1350,11 +1475,6 @@ igb_vlan_hw_strip_enable(struct rte_eth_dev *dev)
        reg = E1000_READ_REG(hw, E1000_CTRL);
        reg |= E1000_CTRL_VME;
        E1000_WRITE_REG(hw, E1000_CTRL, reg);
-
-       /* Update maximum frame size */
-       E1000_WRITE_REG(hw, E1000_RLPML,
-               dev->data->dev_conf.rxmode.max_rx_pkt_len);
-
 }
 
 static void
@@ -1369,6 +1489,11 @@ igb_vlan_hw_extend_disable(struct rte_eth_dev *dev)
        reg &= ~E1000_CTRL_EXT_EXTEND_VLAN;
        E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg);
 
+       /* Update maximum packet length */
+       if (dev->data->dev_conf.rxmode.jumbo_frame == 1)
+               E1000_WRITE_REG(hw, E1000_RLPML,
+                       dev->data->dev_conf.rxmode.max_rx_pkt_len +
+                                               VLAN_TAG_SIZE);
 }
 
 static void
@@ -1382,6 +1507,12 @@ igb_vlan_hw_extend_enable(struct rte_eth_dev *dev)
        reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
        reg |= E1000_CTRL_EXT_EXTEND_VLAN;
        E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg);
+
+       /* Update maximum packet length */
+       if (dev->data->dev_conf.rxmode.jumbo_frame == 1)
+               E1000_WRITE_REG(hw, E1000_RLPML,
+                       dev->data->dev_conf.rxmode.max_rx_pkt_len +
+                                               2 * VLAN_TAG_SIZE);
 }
 
 static void
@@ -1409,12 +1540,6 @@ eth_igb_vlan_offload_set(struct rte_eth_dev *dev, int mask)
        }
 }
 
-static void
-igb_intr_disable(struct e1000_hw *hw)
-{
-       E1000_WRITE_REG(hw, E1000_IMC, ~0);
-       E1000_WRITE_FLUSH(hw);
-}
 
 /**
  * It enables the interrupt mask and then enable the interrupt.
@@ -1427,14 +1552,12 @@ igb_intr_disable(struct e1000_hw *hw)
  *  - On failure, a negative value.
  */
 static int
-eth_igb_interrupt_setup(struct rte_eth_dev *dev)
+eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev)
 {
-       struct e1000_hw *hw =
-               E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct e1000_interrupt *intr =
+               E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
 
-       E1000_WRITE_REG(hw, E1000_IMS, E1000_ICR_LSC);
-       E1000_WRITE_FLUSH(hw);
-       rte_intr_enable(&(dev->pci_dev->intr_handle));
+       intr->mask |= E1000_ICR_LSC;
 
        return 0;
 }
@@ -1459,12 +1582,19 @@ eth_igb_interrupt_get_status(struct rte_eth_dev *dev)
        struct e1000_interrupt *intr =
                E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
 
+       igb_intr_disable(hw);
+
        /* read-on-clear nic registers here */
        icr = E1000_READ_REG(hw, E1000_ICR);
+
+       intr->flags = 0;
        if (icr & E1000_ICR_LSC) {
                intr->flags |= E1000_FLAG_NEED_LINK_UPDATE;
        }
 
+       if (icr & E1000_ICR_VMMB) 
+               intr->flags |= E1000_FLAG_MAILBOX;
+
        return 0;
 }
 
@@ -1489,51 +1619,58 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev)
        struct rte_eth_link link;
        int ret;
 
-       if (!(intr->flags & E1000_FLAG_NEED_LINK_UPDATE))
-               return -1;
+       if (intr->flags & E1000_FLAG_MAILBOX) {
+               igb_pf_mbx_process(dev);
+               intr->flags &= ~E1000_FLAG_MAILBOX;
+       }
 
-       intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE;
+       igb_intr_enable(dev);
        rte_intr_enable(&(dev->pci_dev->intr_handle));
 
-       /* set get_link_status to check register later */
-       hw->mac.get_link_status = 1;
-       ret = eth_igb_link_update(dev, 0);
-
-       /* check if link has changed */
-       if (ret < 0)
-               return 0;
-
-       memset(&link, 0, sizeof(link));
-       rte_igb_dev_atomic_read_link_status(dev, &link);
-       if (link.link_status) {
-               PMD_INIT_LOG(INFO,
-                       " Port %d: Link Up - speed %u Mbps - %s\n",
-                       dev->data->port_id, (unsigned)link.link_speed,
-                       link.link_duplex == ETH_LINK_FULL_DUPLEX ?
-                               "full-duplex" : "half-duplex");
-       } else {
-               PMD_INIT_LOG(INFO, " Port %d: Link Down\n",
-                                       dev->data->port_id);
-       }
-       PMD_INIT_LOG(INFO, "PCI Address: %04d:%02d:%02d:%d",
-                               dev->pci_dev->addr.domain,
-                               dev->pci_dev->addr.bus,
-                               dev->pci_dev->addr.devid,
-                               dev->pci_dev->addr.function);
-       tctl = E1000_READ_REG(hw, E1000_TCTL);
-       rctl = E1000_READ_REG(hw, E1000_RCTL);
-       if (link.link_status) {
-               /* enable Tx/Rx */
-               tctl |= E1000_TCTL_EN;
-               rctl |= E1000_RCTL_EN;
-       } else {
-               /* disable Tx/Rx */
-               tctl &= ~E1000_TCTL_EN;
-               rctl &= ~E1000_RCTL_EN;
+       if (intr->flags & E1000_FLAG_NEED_LINK_UPDATE) {
+               intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE;
+
+               /* set get_link_status to check register later */
+               hw->mac.get_link_status = 1;
+               ret = eth_igb_link_update(dev, 0);
+
+               /* check if link has changed */
+               if (ret < 0)
+                       return 0;
+
+               memset(&link, 0, sizeof(link));
+               rte_igb_dev_atomic_read_link_status(dev, &link);
+               if (link.link_status) {
+                       PMD_INIT_LOG(INFO,
+                               " Port %d: Link Up - speed %u Mbps - %s\n",
+                               dev->data->port_id, (unsigned)link.link_speed,
+                               link.link_duplex == ETH_LINK_FULL_DUPLEX ?
+                                       "full-duplex" : "half-duplex");
+               } else {
+                       PMD_INIT_LOG(INFO, " Port %d: Link Down\n",
+                                               dev->data->port_id);
+               }
+               PMD_INIT_LOG(INFO, "PCI Address: %04d:%02d:%02d:%d",
+                                       dev->pci_dev->addr.domain,
+                                       dev->pci_dev->addr.bus,
+                                       dev->pci_dev->addr.devid,
+                                       dev->pci_dev->addr.function);
+               tctl = E1000_READ_REG(hw, E1000_TCTL);
+               rctl = E1000_READ_REG(hw, E1000_RCTL);
+               if (link.link_status) {
+                       /* enable Tx/Rx */
+                       tctl |= E1000_TCTL_EN;
+                       rctl |= E1000_RCTL_EN;
+               } else {
+                       /* disable Tx/Rx */
+                       tctl &= ~E1000_TCTL_EN;
+                       rctl &= ~E1000_RCTL_EN;
+               }
+               E1000_WRITE_REG(hw, E1000_TCTL, tctl);
+               E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+               E1000_WRITE_FLUSH(hw);
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC);
        }
-       E1000_WRITE_REG(hw, E1000_TCTL, tctl);
-       E1000_WRITE_REG(hw, E1000_RCTL, rctl);
-       E1000_WRITE_FLUSH(hw);
 
        return 0;
 }
@@ -1557,7 +1694,6 @@ eth_igb_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
 
        eth_igb_interrupt_get_status(dev);
        eth_igb_interrupt_action(dev);
-       _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC);
 }
 
 static int
@@ -1591,6 +1727,7 @@ eth_igb_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
        };
        uint32_t rx_buf_size;
        uint32_t max_high_water;
+       uint32_t rctl;
 
        hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        rx_buf_size = igb_get_rx_buffer_size(hw);
@@ -1613,6 +1750,21 @@ eth_igb_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
 
        err = e1000_setup_link_generic(hw);
        if (err == E1000_SUCCESS) {
+
+               /* check if we want to forward MAC frames - driver doesn't have native
+                * capability to do that, so we'll write the registers ourselves */
+
+               rctl = E1000_READ_REG(hw, E1000_RCTL);
+
+               /* set or clear MFLCN.PMCF bit depending on configuration */
+               if (fc_conf->mac_ctrl_frame_fwd != 0)
+                       rctl |= E1000_RCTL_PMCF;
+               else
+                       rctl &= ~E1000_RCTL_PMCF;
+
+               E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+               E1000_WRITE_FLUSH(hw);
+
                return 0;
        }
 
@@ -1620,13 +1772,18 @@ eth_igb_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
        return (-EIO);
 }
 
+#define E1000_RAH_POOLSEL_SHIFT      (18)
 static void
 eth_igb_rar_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
                uint32_t index, __rte_unused uint32_t pool)
 {
        struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint32_t rah;
 
        e1000_rar_set(hw, mac_addr->addr_bytes, index);
+       rah = E1000_READ_REG(hw, E1000_RAH(index));
+       rah |= (0x1 << (E1000_RAH_POOLSEL_SHIFT + pool));
+       E1000_WRITE_REG(hw, E1000_RAH(index), rah);
 }
 
 static void
@@ -1649,7 +1806,7 @@ igbvf_intr_disable(struct e1000_hw *hw)
        PMD_INIT_LOG(DEBUG, "igbvf_intr_disable");
 
        /* Clear interrupt mask to stop from interrupts being generated */
-       E1000_WRITE_REG(hw, E1000_EIMC, ~0);
+       E1000_WRITE_REG(hw, E1000_EIMC, 0xFFFF);
 
        E1000_WRITE_FLUSH(hw);
 }
@@ -1666,7 +1823,7 @@ igbvf_stop_adapter(struct rte_eth_dev *dev)
        eth_igb_infos_get(dev, &dev_info);
 
        /* Clear interrupt mask to stop from interrupts being generated */
-       E1000_WRITE_REG(hw, E1000_EIMC, ~0);
+       igbvf_intr_disable(hw);
 
        /* Clear any pending interrupts, flush previous writes */
        E1000_READ_REG(hw, E1000_EICR);
@@ -1753,10 +1910,14 @@ igbvf_dev_configure(struct rte_eth_dev *dev)
 static int
 igbvf_dev_start(struct rte_eth_dev *dev)
 {
+       struct e1000_hw *hw = 
+               E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        int ret;
 
        PMD_INIT_LOG(DEBUG, "igbvf_dev_start");
 
+       hw->mac.ops.reset_hw(hw);
+
        /* Set all vfta */
        igbvf_set_vfta_all(dev,1);
        
@@ -1870,3 +2031,74 @@ igbvf_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
        return 0;
 }
 
+static int
+eth_igb_rss_reta_update(struct rte_eth_dev *dev,
+                                struct rte_eth_rss_reta *reta_conf)
+{
+       uint8_t i,j,mask;
+       uint32_t reta;  
+       struct e1000_hw *hw =
+                       E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); 
+       
+       /*    
+        * Update Redirection Table RETA[n],n=0...31,The redirection table has 
+        * 128-entries in 32 registers 
+        */ 
+       for(i = 0; i < ETH_RSS_RETA_NUM_ENTRIES; i += 4) {  
+               if (i < ETH_RSS_RETA_NUM_ENTRIES/2) 
+                       mask = (uint8_t)((reta_conf->mask_lo >> i) & 0xF);
+               else
+                       mask = (uint8_t)((reta_conf->mask_hi >>
+                               (i - ETH_RSS_RETA_NUM_ENTRIES/2)) & 0xF);
+               if (mask != 0) {
+                       reta = 0;
+                       /* If all 4 entries were set,don't need read RETA register */
+                       if (mask != 0xF)  
+                               reta = E1000_READ_REG(hw,E1000_RETA(i >> 2));
+
+                       for (j = 0; j < 4; j++) {
+                               if (mask & (0x1 << j)) {
+                                       if (mask != 0xF)
+                                               reta &= ~(0xFF << 8 * j);
+                                       reta |= reta_conf->reta[i + j] << 8 * j;
+                               }
+                       }
+                       E1000_WRITE_REG(hw, E1000_RETA(i >> 2),reta);
+               }
+       }
+
+       return 0;
+}
+
+static int
+eth_igb_rss_reta_query(struct rte_eth_dev *dev,
+                                struct rte_eth_rss_reta *reta_conf)
+{
+       uint8_t i,j,mask;
+       uint32_t reta;
+       struct e1000_hw *hw = 
+                       E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       /* 
+        * Read Redirection Table RETA[n],n=0...31,The redirection table has 
+        * 128-entries in 32 registers
+        */
+       for(i = 0; i < ETH_RSS_RETA_NUM_ENTRIES; i += 4) {
+               if (i < ETH_RSS_RETA_NUM_ENTRIES/2)
+                       mask = (uint8_t)((reta_conf->mask_lo >> i) & 0xF);
+               else
+                       mask = (uint8_t)((reta_conf->mask_hi >>
+                               (i - ETH_RSS_RETA_NUM_ENTRIES/2)) & 0xF);
+
+               if (mask != 0) {
+                       reta = E1000_READ_REG(hw,E1000_RETA(i >> 2));
+                       for (j = 0; j < 4; j++) {
+                               if (mask & (0x1 << j))
+                                       reta_conf->reta[i + j] =
+                                               (uint8_t)((reta >> 8 * j) & 0xFF);
+                       }
+               }
+       }
+       return 0;
+}