i40e: fix build with icc
[dpdk.git] / lib / librte_pmd_i40e / i40e_ethdev.c
index c788fd8..5074262 100644 (file)
@@ -47,6 +47,7 @@
 #include <rte_memzone.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
+#include <rte_alarm.h>
 #include <rte_dev.h>
 #include <rte_eth_ctrl.h>
 
@@ -161,7 +162,8 @@ static int i40e_dev_rss_reta_query(struct rte_eth_dev *dev,
 static int i40e_get_cap(struct i40e_hw *hw);
 static int i40e_pf_parameter_init(struct rte_eth_dev *dev);
 static int i40e_pf_setup(struct i40e_pf *pf);
-static int i40e_vsi_init(struct i40e_vsi *vsi);
+static int i40e_dev_rxtx_init(struct i40e_pf *pf);
+static int i40e_vmdq_setup(struct rte_eth_dev *dev);
 static void i40e_stat_update_32(struct i40e_hw *hw, uint32_t reg,
                bool offset_loaded, uint64_t *offset, uint64_t *stat);
 static void i40e_stat_update_48(struct i40e_hw *hw,
@@ -267,28 +269,18 @@ static struct eth_driver rte_i40e_pmd = {
        {
                .name = "rte_i40e_pmd",
                .id_table = pci_id_i40e_map,
-               .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
        },
        .eth_dev_init = eth_i40e_dev_init,
        .dev_private_size = sizeof(struct i40e_adapter),
 };
 
 static inline int
-i40e_prev_power_of_2(int n)
+i40e_align_floor(int n)
 {
-       int p = n;
-
-       --p;
-       p |= p >> 1;
-       p |= p >> 2;
-       p |= p >> 4;
-       p |= p >> 8;
-       p |= p >> 16;
-       if (p == (n - 1))
-               return n;
-       p >>= 1;
-
-       return ++p;
+       if (n == 0)
+               return 0;
+       return (1 << (sizeof(n) * CHAR_BIT - 1 - __builtin_clz(n)));
 }
 
 static inline int
@@ -505,7 +497,7 @@ eth_i40e_dev_init(__rte_unused struct eth_driver *eth_drv,
        if (!dev->data->mac_addrs) {
                PMD_INIT_LOG(ERR, "Failed to allocated memory "
                                        "for storing mac address");
-               goto err_get_mac_addr;
+               goto err_mac_alloc;
        }
        ether_addr_copy((struct ether_addr *)hw->mac.perm_addr,
                                        &dev->data->mac_addrs[0]);
@@ -526,8 +518,9 @@ eth_i40e_dev_init(__rte_unused struct eth_driver *eth_drv,
 
        return 0;
 
+err_mac_alloc:
+       i40e_vsi_release(pf->main_vsi);
 err_setup_pf_switch:
-       rte_free(pf->main_vsi);
 err_get_mac_addr:
 err_configure_lan_hmc:
        (void)i40e_shutdown_lan_hmc(hw);
@@ -546,6 +539,27 @@ err_get_capabilities:
 static int
 i40e_dev_configure(struct rte_eth_dev *dev)
 {
+       int ret;
+       enum rte_eth_rx_mq_mode mq_mode = dev->data->dev_conf.rxmode.mq_mode;
+
+       /* VMDQ setup.
+        *  Needs to move VMDQ setting out of i40e_pf_config_mq_rx() as VMDQ and
+        *  RSS setting have different requirements.
+        *  General PMD driver call sequence are NIC init, configure,
+        *  rx/tx_queue_setup and dev_start. In rx/tx_queue_setup() function, it
+        *  will try to lookup the VSI that specific queue belongs to if VMDQ
+        *  applicable. So, VMDQ setting has to be done before
+        *  rx/tx_queue_setup(). This function is good  to place vmdq_setup.
+        *  For RSS setting, it will try to calculate actual configured RX queue
+        *  number, which will be available after rx_queue_setup(). dev_start()
+        *  function is good to place RSS setup.
+        */
+       if (mq_mode & ETH_MQ_RX_VMDQ_FLAG) {
+               ret = i40e_vmdq_setup(dev);
+               if (ret)
+                       return ret;
+       }
+
        return i40e_dev_init_vlan(dev);
 }
 
@@ -769,8 +783,8 @@ i40e_dev_start(struct rte_eth_dev *dev)
 {
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct i40e_vsi *vsi = pf->main_vsi;
-       int ret;
+       struct i40e_vsi *main_vsi = pf->main_vsi;
+       int ret, i;
 
        if ((dev->data->dev_conf.link_duplex != ETH_LINK_AUTONEG_DUPLEX) &&
                (dev->data->dev_conf.link_duplex != ETH_LINK_FULL_DUPLEX)) {
@@ -781,26 +795,37 @@ i40e_dev_start(struct rte_eth_dev *dev)
        }
 
        /* Initialize VSI */
-       ret = i40e_vsi_init(vsi);
+       ret = i40e_dev_rxtx_init(pf);
        if (ret != I40E_SUCCESS) {
-               PMD_DRV_LOG(ERR, "Failed to init VSI");
+               PMD_DRV_LOG(ERR, "Failed to init rx/tx queues");
                goto err_up;
        }
 
        /* Map queues with MSIX interrupt */
-       i40e_vsi_queues_bind_intr(vsi);
-       i40e_vsi_enable_queues_intr(vsi);
+       i40e_vsi_queues_bind_intr(main_vsi);
+       i40e_vsi_enable_queues_intr(main_vsi);
+
+       /* Map VMDQ VSI queues with MSIX interrupt */
+       for (i = 0; i < pf->nb_cfg_vmdq_vsi; i++) {
+               i40e_vsi_queues_bind_intr(pf->vmdq[i].vsi);
+               i40e_vsi_enable_queues_intr(pf->vmdq[i].vsi);
+       }
 
        /* Enable all queues which have been configured */
-       ret = i40e_vsi_switch_queues(vsi, TRUE);
+       ret = i40e_dev_switch_queues(pf, TRUE);
        if (ret != I40E_SUCCESS) {
                PMD_DRV_LOG(ERR, "Failed to enable VSI");
                goto err_up;
        }
 
        /* Enable receiving broadcast packets */
-       if ((vsi->type == I40E_VSI_MAIN) || (vsi->type == I40E_VSI_VMDQ2)) {
-               ret = i40e_aq_set_vsi_broadcast(hw, vsi->seid, true, NULL);
+       ret = i40e_aq_set_vsi_broadcast(hw, main_vsi->seid, true, NULL);
+       if (ret != I40E_SUCCESS)
+               PMD_DRV_LOG(INFO, "fail to set vsi broadcast");
+
+       for (i = 0; i < pf->nb_cfg_vmdq_vsi; i++) {
+               ret = i40e_aq_set_vsi_broadcast(hw, pf->vmdq[i].vsi->seid,
+                                               true, NULL);
                if (ret != I40E_SUCCESS)
                        PMD_DRV_LOG(INFO, "fail to set vsi broadcast");
        }
@@ -815,7 +840,8 @@ i40e_dev_start(struct rte_eth_dev *dev)
        return I40E_SUCCESS;
 
 err_up:
-       i40e_vsi_switch_queues(vsi, FALSE);
+       i40e_dev_switch_queues(pf, FALSE);
+       i40e_dev_clear_queues(dev);
 
        return ret;
 }
@@ -824,17 +850,26 @@ static void
 i40e_dev_stop(struct rte_eth_dev *dev)
 {
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
-       struct i40e_vsi *vsi = pf->main_vsi;
+       struct i40e_vsi *main_vsi = pf->main_vsi;
+       int i;
 
        /* Disable all queues */
-       i40e_vsi_switch_queues(vsi, FALSE);
+       i40e_dev_switch_queues(pf, FALSE);
+
+       /* un-map queues with interrupt registers */
+       i40e_vsi_disable_queues_intr(main_vsi);
+       i40e_vsi_queues_unbind_intr(main_vsi);
+
+       for (i = 0; i < pf->nb_cfg_vmdq_vsi; i++) {
+               i40e_vsi_disable_queues_intr(pf->vmdq[i].vsi);
+               i40e_vsi_queues_unbind_intr(pf->vmdq[i].vsi);
+       }
+
+       /* Clear all queues and release memory */
+       i40e_dev_clear_queues(dev);
 
        /* Set link down */
        i40e_dev_set_link_down(dev);
-
-       /* un-map queues with interrupt registers */
-       i40e_vsi_disable_queues_intr(vsi);
-       i40e_vsi_queues_unbind_intr(vsi);
 }
 
 static void
@@ -1430,6 +1465,15 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                .txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS | ETH_TXQ_FLAGS_NOOFFLOADS,
        };
 
+       if (pf->flags | I40E_FLAG_VMDQ) {
+               dev_info->max_vmdq_pools = pf->max_nb_vmdq_vsi;
+               dev_info->vmdq_queue_base = dev_info->max_rx_queues;
+               dev_info->vmdq_queue_num = pf->vmdq_nb_qps *
+                                               pf->max_nb_vmdq_vsi;
+               dev_info->vmdq_pool_base = I40E_VMDQ_POOL_BASE;
+               dev_info->max_rx_queues += dev_info->vmdq_queue_num;
+               dev_info->max_tx_queues += dev_info->vmdq_queue_num;
+       }
 }
 
 static int
@@ -1550,48 +1594,41 @@ i40e_priority_flow_ctrl_set(__rte_unused struct rte_eth_dev *dev,
 static void
 i40e_macaddr_add(struct rte_eth_dev *dev,
                 struct ether_addr *mac_addr,
-                __attribute__((unused)) uint32_t index,
-                __attribute__((unused)) uint32_t pool)
+                __rte_unused uint32_t index,
+                uint32_t pool)
 {
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
-       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct i40e_mac_filter_info mac_filter;
-       struct i40e_vsi *vsi = pf->main_vsi;
-       struct ether_addr old_mac;
+       struct i40e_vsi *vsi;
        int ret;
 
-       if (!is_valid_assigned_ether_addr(mac_addr)) {
-               PMD_DRV_LOG(ERR, "Invalid ethernet address");
+       /* If VMDQ not enabled or configured, return */
+       if (pool != 0 && (!(pf->flags | I40E_FLAG_VMDQ) || !pf->nb_cfg_vmdq_vsi)) {
+               PMD_DRV_LOG(ERR, "VMDQ not %s, can't set mac to pool %u",
+                       pf->flags | I40E_FLAG_VMDQ ? "configured" : "enabled",
+                       pool);
                return;
        }
 
-       if (is_same_ether_addr(mac_addr, &(pf->dev_addr))) {
-               PMD_DRV_LOG(INFO, "Ignore adding permanent mac address");
+       if (pool > pf->nb_cfg_vmdq_vsi) {
+               PMD_DRV_LOG(ERR, "Pool number %u invalid. Max pool is %u",
+                               pool, pf->nb_cfg_vmdq_vsi);
                return;
        }
 
-       /* Write mac address */
-       ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_ONLY,
-                                       mac_addr->addr_bytes, NULL);
-       if (ret != I40E_SUCCESS) {
-               PMD_DRV_LOG(ERR, "Failed to write mac address");
-               return;
-       }
-
-       (void)rte_memcpy(&old_mac, hw->mac.addr, ETHER_ADDR_LEN);
-       (void)rte_memcpy(hw->mac.addr, mac_addr->addr_bytes,
-                       ETHER_ADDR_LEN);
        (void)rte_memcpy(&mac_filter.mac_addr, mac_addr, ETHER_ADDR_LEN);
        mac_filter.filter_type = RTE_MACVLAN_PERFECT_MATCH;
 
+       if (pool == 0)
+               vsi = pf->main_vsi;
+       else
+               vsi = pf->vmdq[pool - 1].vsi;
+
        ret = i40e_vsi_add_mac(vsi, &mac_filter);
        if (ret != I40E_SUCCESS) {
                PMD_DRV_LOG(ERR, "Failed to add MACVLAN filter");
                return;
        }
-
-       ether_addr_copy(mac_addr, &pf->dev_addr);
-       i40e_vsi_delete_mac(vsi, &old_mac);
 }
 
 /* Remove a MAC address, and update filters */
@@ -1599,36 +1636,39 @@ static void
 i40e_macaddr_remove(struct rte_eth_dev *dev, uint32_t index)
 {
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
-       struct i40e_vsi *vsi = pf->main_vsi;
-       struct rte_eth_dev_data *data = I40E_VSI_TO_DEV_DATA(vsi);
+       struct i40e_vsi *vsi;
+       struct rte_eth_dev_data *data = dev->data;
        struct ether_addr *macaddr;
        int ret;
-       struct i40e_hw *hw =
-               I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-
-       if (index >= vsi->max_macaddrs)
-               return;
+       uint32_t i;
+       uint64_t pool_sel;
 
        macaddr = &(data->mac_addrs[index]);
-       if (!is_valid_assigned_ether_addr(macaddr))
-               return;
-
-       ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_ONLY,
-                                       hw->mac.perm_addr, NULL);
-       if (ret != I40E_SUCCESS) {
-               PMD_DRV_LOG(ERR, "Failed to write mac address");
-               return;
-       }
 
-       (void)rte_memcpy(hw->mac.addr, hw->mac.perm_addr, ETHER_ADDR_LEN);
-
-       ret = i40e_vsi_delete_mac(vsi, macaddr);
-       if (ret != I40E_SUCCESS)
-               return;
+       pool_sel = dev->data->mac_pool_sel[index];
+
+       for (i = 0; i < sizeof(pool_sel) * CHAR_BIT; i++) {
+               if (pool_sel & (1ULL << i)) {
+                       if (i == 0)
+                               vsi = pf->main_vsi;
+                       else {
+                               /* No VMDQ pool enabled or configured */
+                               if (!(pf->flags | I40E_FLAG_VMDQ) ||
+                                       (i > pf->nb_cfg_vmdq_vsi)) {
+                                       PMD_DRV_LOG(ERR, "No VMDQ pool enabled"
+                                                       "/configured");
+                                       return;
+                               }
+                               vsi = pf->vmdq[i - 1].vsi;
+                       }
+                       ret = i40e_vsi_delete_mac(vsi, macaddr);
 
-       /* Clear device address as it has been removed */
-       if (is_same_ether_addr(&(pf->dev_addr), macaddr))
-               memset(&pf->dev_addr, 0, sizeof(struct ether_addr));
+                       if (ret) {
+                               PMD_DRV_LOG(ERR, "Failed to remove MACVLAN filter");
+                               return;
+                       }
+               }
+       }
 }
 
 /* Set perfect match or hash match of MAC and VLAN for a VF */
@@ -1720,7 +1760,7 @@ i40e_mac_filter_handle(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
        filter = (struct rte_eth_mac_filter *)(arg);
 
        switch (filter_op) {
-       case RTE_ETH_FILTER_NONE:
+       case RTE_ETH_FILTER_NOP:
                ret = I40E_SUCCESS;
                break;
        case RTE_ETH_FILTER_ADD:
@@ -1971,7 +2011,7 @@ i40e_pf_parameter_init(struct rte_eth_dev *dev)
 {
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        struct i40e_hw *hw = I40E_PF_TO_HW(pf);
-       uint16_t sum_queues = 0, sum_vsis;
+       uint16_t sum_queues = 0, sum_vsis, left_queues;
 
        /* First check if FW support SRIOV */
        if (dev->pci_dev->max_vfs && !hw->func_caps.sr_iov_1_1) {
@@ -1987,7 +2027,7 @@ i40e_pf_parameter_init(struct rte_eth_dev *dev)
                pf->flags |= I40E_FLAG_RSS;
                pf->lan_nb_qps = RTE_MIN(hw->func_caps.num_tx_qp,
                        (uint32_t)(1 << hw->func_caps.rss_table_entry_width));
-               pf->lan_nb_qps = i40e_prev_power_of_2(pf->lan_nb_qps);
+               pf->lan_nb_qps = i40e_align_floor(pf->lan_nb_qps);
        } else
                pf->lan_nb_qps = 1;
        sum_queues = pf->lan_nb_qps;
@@ -2021,11 +2061,19 @@ i40e_pf_parameter_init(struct rte_eth_dev *dev)
 
        if (hw->func_caps.vmdq) {
                pf->flags |= I40E_FLAG_VMDQ;
-               pf->vmdq_nb_qps = I40E_DEFAULT_QP_NUM_VMDQ;
-               sum_queues += pf->vmdq_nb_qps;
-               sum_vsis += 1;
-               PMD_INIT_LOG(INFO, "VMDQ queue pairs:%u", pf->vmdq_nb_qps);
+               pf->vmdq_nb_qps = RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM;
+               pf->max_nb_vmdq_vsi = 1;
+               /*
+                * If VMDQ available, assume a single VSI can be created.  Will adjust
+                * later.
+                */
+               sum_queues += pf->vmdq_nb_qps * pf->max_nb_vmdq_vsi;
+               sum_vsis += pf->max_nb_vmdq_vsi;
+       } else {
+               pf->vmdq_nb_qps = 0;
+               pf->max_nb_vmdq_vsi = 0;
        }
+       pf->nb_cfg_vmdq_vsi = 0;
 
        if (hw->func_caps.fd) {
                pf->flags |= I40E_FLAG_FDIR;
@@ -2046,6 +2094,22 @@ i40e_pf_parameter_init(struct rte_eth_dev *dev)
                return -EINVAL;
        }
 
+       /* Adjust VMDQ setting to support as many VMs as possible */
+       if (pf->flags & I40E_FLAG_VMDQ) {
+               left_queues = hw->func_caps.num_rx_qp - sum_queues;
+
+               pf->max_nb_vmdq_vsi += RTE_MIN(left_queues / pf->vmdq_nb_qps,
+                                       pf->max_num_vsi - sum_vsis);
+
+               /* Limit the max VMDQ number that rte_ether that can support  */
+               pf->max_nb_vmdq_vsi = RTE_MIN(pf->max_nb_vmdq_vsi,
+                                       ETH_64_POOLS - 1);
+
+               PMD_INIT_LOG(INFO, "Max VMDQ VSI num:%u",
+                               pf->max_nb_vmdq_vsi);
+               PMD_INIT_LOG(INFO, "VMDQ queue pairs:%u", pf->vmdq_nb_qps);
+       }
+
        /* Each VSI occupy 1 MSIX interrupt at least, plus IRQ0 for misc intr
         * cause */
        if (sum_vsis > hw->func_caps.num_msix_vectors - 1) {
@@ -2438,7 +2502,7 @@ i40e_vsi_config_tc_queue_mapping(struct i40e_vsi *vsi,
        vsi->enabled_tc = enabled_tcmap;
 
        /* Number of queues per enabled TC */
-       qpnum_per_tc = i40e_prev_power_of_2(vsi->nb_qps / total_tc);
+       qpnum_per_tc = i40e_align_floor(vsi->nb_qps / total_tc);
        qpnum_per_tc = RTE_MIN(qpnum_per_tc, I40E_MAX_Q_PER_TC);
        bsf = rte_bsf32(qpnum_per_tc);
 
@@ -2751,6 +2815,9 @@ i40e_vsi_setup(struct i40e_pf *pf,
        case I40E_VSI_SRIOV :
                vsi->nb_qps = pf->vf_nb_qps;
                break;
+       case I40E_VSI_VMDQ2:
+               vsi->nb_qps = pf->vmdq_nb_qps;
+               break;
        default:
                goto fail_mem;
        }
@@ -2892,8 +2959,44 @@ i40e_vsi_setup(struct i40e_pf *pf,
                 * Since VSI is not created yet, only configure parameter,
                 * will add vsi below.
                 */
-       }
-       else {
+       } else if (type == I40E_VSI_VMDQ2) {
+               memset(&ctxt, 0, sizeof(ctxt));
+               /*
+                * For other VSI, the uplink_seid equals to uplink VSI's
+                * uplink_seid since they share same VEB
+                */
+               vsi->uplink_seid = uplink_vsi->uplink_seid;
+               ctxt.pf_num = hw->pf_id;
+               ctxt.vf_num = 0;
+               ctxt.uplink_seid = vsi->uplink_seid;
+               ctxt.connection_type = 0x1;
+               ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2;
+
+               ctxt.info.valid_sections |=
+                               rte_cpu_to_le_16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+               /* user_param carries flag to enable loop back */
+               if (user_param) {
+                       ctxt.info.switch_id =
+                       rte_cpu_to_le_16(I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB);
+                       ctxt.info.switch_id |=
+                       rte_cpu_to_le_16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
+               }
+
+               /* Configure port/vlan */
+               ctxt.info.valid_sections |=
+                       rte_cpu_to_le_16(I40E_AQ_VSI_PROP_VLAN_VALID);
+               ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_ALL;
+               ret = i40e_vsi_config_tc_queue_mapping(vsi, &ctxt.info,
+                                               I40E_DEFAULT_TCMAP);
+               if (ret != I40E_SUCCESS) {
+                       PMD_DRV_LOG(ERR, "Failed to configure "
+                                       "TC queue mapping");
+                       goto fail_msix_alloc;
+               }
+               ctxt.info.up_enable_bits = I40E_DEFAULT_TCMAP;
+               ctxt.info.valid_sections |=
+                       rte_cpu_to_le_16(I40E_AQ_VSI_PROP_SCHED_VALID);
+       } else {
                PMD_DRV_LOG(ERR, "VSI: Not support other type VSI yet");
                goto fail_msix_alloc;
        }
@@ -3068,7 +3171,6 @@ i40e_pf_setup(struct i40e_pf *pf)
 {
        struct i40e_hw *hw = I40E_PF_TO_HW(pf);
        struct i40e_filter_control_settings settings;
-       struct rte_eth_dev_data *dev_data = pf->dev_data;
        struct i40e_vsi *vsi;
        int ret;
 
@@ -3090,8 +3192,6 @@ i40e_pf_setup(struct i40e_pf *pf)
                return I40E_ERR_NOT_READY;
        }
        pf->main_vsi = vsi;
-       dev_data->nb_rx_queues = vsi->nb_qps;
-       dev_data->nb_tx_queues = vsi->nb_qps;
 
        /* Configure filter control */
        memset(&settings, 0, sizeof(settings));
@@ -3172,11 +3272,11 @@ i40e_switch_tx_queue(struct i40e_hw *hw, uint16_t q_idx, bool on)
 
 /* Swith on or off the tx queues */
 static int
-i40e_vsi_switch_tx_queues(struct i40e_vsi *vsi, bool on)
+i40e_dev_switch_tx_queues(struct i40e_pf *pf, bool on)
 {
-       struct rte_eth_dev_data *dev_data = I40E_VSI_TO_DEV_DATA(vsi);
+       struct rte_eth_dev_data *dev_data = pf->dev_data;
        struct i40e_tx_queue *txq;
-       struct rte_eth_dev *dev = I40E_VSI_TO_ETH_DEV(vsi);
+       struct rte_eth_dev *dev = pf->adapter->eth_dev;
        uint16_t i;
        int ret;
 
@@ -3184,7 +3284,7 @@ i40e_vsi_switch_tx_queues(struct i40e_vsi *vsi, bool on)
                txq = dev_data->tx_queues[i];
                /* Don't operate the queue if not configured or
                 * if starting only per queue */
-               if (!txq->q_set || (on && txq->tx_deferred_start))
+               if (!txq || !txq->q_set || (on && txq->tx_deferred_start))
                        continue;
                if (on)
                        ret = i40e_dev_tx_queue_start(dev, i);
@@ -3250,11 +3350,11 @@ i40e_switch_rx_queue(struct i40e_hw *hw, uint16_t q_idx, bool on)
 }
 /* Switch on or off the rx queues */
 static int
-i40e_vsi_switch_rx_queues(struct i40e_vsi *vsi, bool on)
+i40e_dev_switch_rx_queues(struct i40e_pf *pf, bool on)
 {
-       struct rte_eth_dev_data *dev_data = I40E_VSI_TO_DEV_DATA(vsi);
+       struct rte_eth_dev_data *dev_data = pf->dev_data;
        struct i40e_rx_queue *rxq;
-       struct rte_eth_dev *dev = I40E_VSI_TO_ETH_DEV(vsi);
+       struct rte_eth_dev *dev = pf->adapter->eth_dev;
        uint16_t i;
        int ret;
 
@@ -3262,7 +3362,7 @@ i40e_vsi_switch_rx_queues(struct i40e_vsi *vsi, bool on)
                rxq = dev_data->rx_queues[i];
                /* Don't operate the queue if not configured or
                 * if starting only per queue */
-               if (!rxq->q_set || (on && rxq->rx_deferred_start))
+               if (!rxq || !rxq->q_set || (on && rxq->rx_deferred_start))
                        continue;
                if (on)
                        ret = i40e_dev_rx_queue_start(dev, i);
@@ -3277,26 +3377,26 @@ i40e_vsi_switch_rx_queues(struct i40e_vsi *vsi, bool on)
 
 /* Switch on or off all the rx/tx queues */
 int
-i40e_vsi_switch_queues(struct i40e_vsi *vsi, bool on)
+i40e_dev_switch_queues(struct i40e_pf *pf, bool on)
 {
        int ret;
 
        if (on) {
                /* enable rx queues before enabling tx queues */
-               ret = i40e_vsi_switch_rx_queues(vsi, on);
+               ret = i40e_dev_switch_rx_queues(pf, on);
                if (ret) {
                        PMD_DRV_LOG(ERR, "Failed to switch rx queues");
                        return ret;
                }
-               ret = i40e_vsi_switch_tx_queues(vsi, on);
+               ret = i40e_dev_switch_tx_queues(pf, on);
        } else {
                /* Stop tx queues before stopping rx queues */
-               ret = i40e_vsi_switch_tx_queues(vsi, on);
+               ret = i40e_dev_switch_tx_queues(pf, on);
                if (ret) {
                        PMD_DRV_LOG(ERR, "Failed to switch tx queues");
                        return ret;
                }
-               ret = i40e_vsi_switch_rx_queues(vsi, on);
+               ret = i40e_dev_switch_rx_queues(pf, on);
        }
 
        return ret;
@@ -3304,15 +3404,18 @@ i40e_vsi_switch_queues(struct i40e_vsi *vsi, bool on)
 
 /* Initialize VSI for TX */
 static int
-i40e_vsi_tx_init(struct i40e_vsi *vsi)
+i40e_dev_tx_init(struct i40e_pf *pf)
 {
-       struct i40e_pf *pf = I40E_VSI_TO_PF(vsi);
        struct rte_eth_dev_data *data = pf->dev_data;
        uint16_t i;
        uint32_t ret = I40E_SUCCESS;
+       struct i40e_tx_queue *txq;
 
        for (i = 0; i < data->nb_tx_queues; i++) {
-               ret = i40e_tx_queue_init(data->tx_queues[i]);
+               txq = data->tx_queues[i];
+               if (!txq || !txq->q_set)
+                       continue;
+               ret = i40e_tx_queue_init(txq);
                if (ret != I40E_SUCCESS)
                        break;
        }
@@ -3322,16 +3425,20 @@ i40e_vsi_tx_init(struct i40e_vsi *vsi)
 
 /* Initialize VSI for RX */
 static int
-i40e_vsi_rx_init(struct i40e_vsi *vsi)
+i40e_dev_rx_init(struct i40e_pf *pf)
 {
-       struct i40e_pf *pf = I40E_VSI_TO_PF(vsi);
        struct rte_eth_dev_data *data = pf->dev_data;
        int ret = I40E_SUCCESS;
        uint16_t i;
+       struct i40e_rx_queue *rxq;
 
        i40e_pf_config_mq_rx(pf);
        for (i = 0; i < data->nb_rx_queues; i++) {
-               ret = i40e_rx_queue_init(data->rx_queues[i]);
+               rxq = data->rx_queues[i];
+               if (!rxq || !rxq->q_set)
+                       continue;
+
+               ret = i40e_rx_queue_init(rxq);
                if (ret != I40E_SUCCESS) {
                        PMD_DRV_LOG(ERR, "Failed to do RX queue "
                                    "initialization");
@@ -3342,26 +3449,121 @@ i40e_vsi_rx_init(struct i40e_vsi *vsi)
        return ret;
 }
 
-/* Initialize VSI */
 static int
-i40e_vsi_init(struct i40e_vsi *vsi)
+i40e_dev_rxtx_init(struct i40e_pf *pf)
 {
        int err;
 
-       err = i40e_vsi_tx_init(vsi);
+       err = i40e_dev_tx_init(pf);
        if (err) {
-               PMD_DRV_LOG(ERR, "Failed to do vsi TX initialization");
+               PMD_DRV_LOG(ERR, "Failed to do TX initialization");
                return err;
        }
-       err = i40e_vsi_rx_init(vsi);
+       err = i40e_dev_rx_init(pf);
        if (err) {
-               PMD_DRV_LOG(ERR, "Failed to do vsi RX initialization");
+               PMD_DRV_LOG(ERR, "Failed to do RX initialization");
                return err;
        }
 
        return err;
 }
 
+static int
+i40e_vmdq_setup(struct rte_eth_dev *dev)
+{
+       struct rte_eth_conf *conf = &dev->data->dev_conf;
+       struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       int i, err, conf_vsis, j, loop;
+       struct i40e_vsi *vsi;
+       struct i40e_vmdq_info *vmdq_info;
+       struct rte_eth_vmdq_rx_conf *vmdq_conf;
+       struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+
+       /*
+        * Disable interrupt to avoid message from VF. Furthermore, it will
+        * avoid race condition in VSI creation/destroy.
+        */
+       i40e_pf_disable_irq0(hw);
+
+       if ((pf->flags & I40E_FLAG_VMDQ) == 0) {
+               PMD_INIT_LOG(ERR, "FW doesn't support VMDQ");
+               return -ENOTSUP;
+       }
+
+       conf_vsis = conf->rx_adv_conf.vmdq_rx_conf.nb_queue_pools;
+       if (conf_vsis > pf->max_nb_vmdq_vsi) {
+               PMD_INIT_LOG(ERR, "VMDQ config: %u, max support:%u",
+                       conf->rx_adv_conf.vmdq_rx_conf.nb_queue_pools,
+                       pf->max_nb_vmdq_vsi);
+               return -ENOTSUP;
+       }
+
+       if (pf->vmdq != NULL) {
+               PMD_INIT_LOG(INFO, "VMDQ already configured");
+               return 0;
+       }
+
+       pf->vmdq = rte_zmalloc("vmdq_info_struct",
+                               sizeof(*vmdq_info) * conf_vsis, 0);
+
+       if (pf->vmdq == NULL) {
+               PMD_INIT_LOG(ERR, "Failed to allocate memory");
+               return -ENOMEM;
+       }
+
+       vmdq_conf = &conf->rx_adv_conf.vmdq_rx_conf;
+
+       /* Create VMDQ VSI */
+       for (i = 0; i < conf_vsis; i++) {
+               vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, pf->main_vsi,
+                               vmdq_conf->enable_loop_back);
+               if (vsi == NULL) {
+                       PMD_INIT_LOG(ERR, "Failed to create VMDQ VSI");
+                       err = -1;
+                       goto err_vsi_setup;
+               }
+               vmdq_info = &pf->vmdq[i];
+               vmdq_info->pf = pf;
+               vmdq_info->vsi = vsi;
+       }
+       pf->nb_cfg_vmdq_vsi = conf_vsis;
+
+       /* Configure Vlan */
+       loop = sizeof(vmdq_conf->pool_map[0].pools) * CHAR_BIT;
+       for (i = 0; i < vmdq_conf->nb_pool_maps; i++) {
+               for (j = 0; j < loop && j < pf->nb_cfg_vmdq_vsi; j++) {
+                       if (vmdq_conf->pool_map[i].pools & (1UL << j)) {
+                               PMD_INIT_LOG(INFO, "Add vlan %u to vmdq pool %u",
+                                       vmdq_conf->pool_map[i].vlan_id, j);
+
+                               err = i40e_vsi_add_vlan(pf->vmdq[j].vsi,
+                                               vmdq_conf->pool_map[i].vlan_id);
+                               if (err) {
+                                       PMD_INIT_LOG(ERR, "Failed to add vlan");
+                                       err = -1;
+                                       goto err_vsi_setup;
+                               }
+                       }
+               }
+       }
+
+       i40e_pf_enable_irq0(hw);
+
+       return 0;
+
+err_vsi_setup:
+       for (i = 0; i < conf_vsis; i++)
+               if (pf->vmdq[i].vsi == NULL)
+                       break;
+               else
+                       i40e_vsi_release(pf->vmdq[i].vsi);
+
+       rte_free(pf->vmdq);
+       pf->vmdq = NULL;
+       i40e_pf_enable_irq0(hw);
+       return err;
+}
+
 static void
 i40e_stat_update_32(struct i40e_hw *hw,
                   uint32_t reg,
@@ -3526,6 +3728,57 @@ i40e_dev_handle_aq_msg(struct rte_eth_dev *dev)
        rte_free(info.msg_buf);
 }
 
+/*
+ * Interrupt handler is registered as the alarm callback for handling LSC
+ * interrupt in a definite of time, in order to wait the NIC into a stable
+ * state. Currently it waits 1 sec in i40e for the link up interrupt, and
+ * no need for link down interrupt.
+ */
+static void
+i40e_dev_interrupt_delayed_handler(void *param)
+{
+       struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
+       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint32_t icr0;
+
+       /* read interrupt causes again */
+       icr0 = I40E_READ_REG(hw, I40E_PFINT_ICR0);
+
+#ifdef RTE_LIBRTE_I40E_DEBUG_DRIVER
+       if (icr0 & I40E_PFINT_ICR0_ECC_ERR_MASK)
+               PMD_DRV_LOG(ERR, "ICR0: unrecoverable ECC error\n");
+       if (icr0 & I40E_PFINT_ICR0_MAL_DETECT_MASK)
+               PMD_DRV_LOG(ERR, "ICR0: malicious programming detected\n");
+       if (icr0 & I40E_PFINT_ICR0_GRST_MASK)
+               PMD_DRV_LOG(INFO, "ICR0: global reset requested\n");
+       if (icr0 & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK)
+               PMD_DRV_LOG(INFO, "ICR0: PCI exception\n activated\n");
+       if (icr0 & I40E_PFINT_ICR0_STORM_DETECT_MASK)
+               PMD_DRV_LOG(INFO, "ICR0: a change in the storm control "
+                                                               "state\n");
+       if (icr0 & I40E_PFINT_ICR0_HMC_ERR_MASK)
+               PMD_DRV_LOG(ERR, "ICR0: HMC error\n");
+       if (icr0 & I40E_PFINT_ICR0_PE_CRITERR_MASK)
+               PMD_DRV_LOG(ERR, "ICR0: protocol engine critical error\n");
+#endif /* RTE_LIBRTE_I40E_DEBUG_DRIVER */
+
+       if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) {
+               PMD_DRV_LOG(INFO, "INT:VF reset detected\n");
+               i40e_dev_handle_vfr_event(dev);
+       }
+       if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) {
+               PMD_DRV_LOG(INFO, "INT:ADMINQ event\n");
+               i40e_dev_handle_aq_msg(dev);
+       }
+
+       /* handle the link up interrupt in an alarm callback */
+       i40e_dev_link_update(dev, 0);
+       _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC);
+
+       i40e_pf_enable_irq0(hw);
+       rte_intr_enable(&(dev->pci_dev->intr_handle));
+}
+
 /**
  * Interrupt handler triggered by NIC  for handling
  * specific interrupt.
@@ -3544,20 +3797,20 @@ i40e_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
 {
        struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       uint32_t icr0, icr0_ena;
+       uint32_t icr0;
 
+       /* Disable interrupt */
        i40e_pf_disable_irq0(hw);
 
+       /* read out interrupt causes */
        icr0 = I40E_READ_REG(hw, I40E_PFINT_ICR0);
-       icr0_ena = I40E_READ_REG(hw, I40E_PFINT_ICR0_ENA);
 
        /* No interrupt event indicated */
        if (!(icr0 & I40E_PFINT_ICR0_INTEVENT_MASK)) {
                PMD_DRV_LOG(INFO, "No interrupt event");
                goto done;
        }
-
-+#ifdef RTE_LIBRTE_I40E_DEBUG_DRIVER
+#ifdef RTE_LIBRTE_I40E_DEBUG_DRIVER
        if (icr0 & I40E_PFINT_ICR0_ECC_ERR_MASK)
                PMD_DRV_LOG(ERR, "ICR0: unrecoverable ECC error");
        if (icr0 & I40E_PFINT_ICR0_MAL_DETECT_MASK)
@@ -3583,16 +3836,33 @@ i40e_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
                i40e_dev_handle_aq_msg(dev);
        }
 
+       /* Link Status Change interrupt */
        if (icr0 & I40E_PFINT_ICR0_LINK_STAT_CHANGE_MASK) {
-               PMD_DRV_LOG(INFO, "INT:Link status changed");
+#define I40E_US_PER_SECOND 1000000
+               struct rte_eth_link link;
+
+               PMD_DRV_LOG(INFO, "ICR0: link status changed\n");
+               memset(&link, 0, sizeof(link));
+               rte_i40e_dev_atomic_read_link_status(dev, &link);
                i40e_dev_link_update(dev, 0);
+
+               /*
+                * For link up interrupt, it needs to wait 1 second to let the
+                * hardware be a stable state. Otherwise several consecutive
+                * interrupts can be observed.
+                * For link down interrupt, no need to wait.
+                */
+               if (!link.link_status && rte_eal_alarm_set(I40E_US_PER_SECOND,
+                       i40e_dev_interrupt_delayed_handler, (void *)dev) >= 0)
+                       return;
+               else
+                       _rte_eth_dev_callback_process(dev,
+                               RTE_ETH_EVENT_INTR_LSC);
        }
 
 done:
-       I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, icr0_ena);
-       /* Re-enable interrupt from device side */
+       /* Enable interrupt */
        i40e_pf_enable_irq0(hw);
-       /* Re-enable interrupt from host side */
        rte_intr_enable(&(dev->pci_dev->intr_handle));
 }
 
@@ -4452,8 +4722,8 @@ i40e_add_vxlan_port(struct i40e_pf *pf, uint16_t port)
                return -1;
        }
 
-       PMD_DRV_LOG(INFO, "Added %s port %d with AQ command with index %d",
-                        port,  filter_index);
+       PMD_DRV_LOG(INFO, "Added port %d with AQ command with index %d",
+                        port,  filter_idx);
 
        /* New port: add it and mark its index in the bitmap */
        pf->vxlan_ports[idx] = port;
@@ -4560,6 +4830,26 @@ i40e_dev_udp_tunnel_del(struct rte_eth_dev *dev,
        return ret;
 }
 
+/* Calculate the maximum number of contiguous PF queues that are configured */
+static int
+i40e_pf_calc_configured_queues_num(struct i40e_pf *pf)
+{
+       struct rte_eth_dev_data *data = pf->dev_data;
+       int i, num;
+       struct i40e_rx_queue *rxq;
+
+       num = 0;
+       for (i = 0; i < pf->lan_nb_qps; i++) {
+               rxq = data->rx_queues[i];
+               if (rxq && rxq->q_set)
+                       num++;
+               else
+                       break;
+       }
+
+       return num;
+}
+
 /* Configure RSS */
 static int
 i40e_pf_config_rss(struct i40e_pf *pf)
@@ -4567,7 +4857,25 @@ i40e_pf_config_rss(struct i40e_pf *pf)
        struct i40e_hw *hw = I40E_PF_TO_HW(pf);
        struct rte_eth_rss_conf rss_conf;
        uint32_t i, lut = 0;
-       uint16_t j, num = i40e_prev_power_of_2(pf->dev_data->nb_rx_queues);
+       uint16_t j, num;
+
+       /*
+        * If both VMDQ and RSS enabled, not all of PF queues are configured.
+        * It's necessary to calulate the actual PF queues that are configured.
+        */
+       if (pf->dev_data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_VMDQ_FLAG) {
+               num = i40e_pf_calc_configured_queues_num(pf);
+               num = i40e_align_floor(num);
+       } else
+               num = i40e_align_floor(pf->dev_data->nb_rx_queues);
+
+       PMD_INIT_LOG(INFO, "Max of contiguous %u PF queues are configured",
+                       num);
+
+       if (num == 0) {
+               PMD_INIT_LOG(ERR, "No PF queues are configured to enable RSS");
+               return -ENOTSUP;
+       }
 
        for (i = 0, j = 0; i < hw->func_caps.rss_table_size; i++, j++) {
                if (j == num)
@@ -4665,18 +4973,21 @@ i40e_tunnel_filter_handle(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
 static int
 i40e_pf_config_mq_rx(struct i40e_pf *pf)
 {
-       if (!pf->dev_data->sriov.active) {
-               switch (pf->dev_data->dev_conf.rxmode.mq_mode) {
-               case ETH_MQ_RX_RSS:
-                       i40e_pf_config_rss(pf);
-                       break;
-               default:
-                       i40e_pf_disable_rss(pf);
-                       break;
-               }
+       int ret = 0;
+       enum rte_eth_rx_mq_mode mq_mode = pf->dev_data->dev_conf.rxmode.mq_mode;
+
+       if (mq_mode & ETH_MQ_RX_DCB_FLAG) {
+               PMD_INIT_LOG(ERR, "i40e doesn't support DCB yet");
+               return -ENOTSUP;
        }
 
-       return 0;
+       /* RSS setup */
+       if (mq_mode & ETH_MQ_RX_RSS_FLAG)
+               ret = i40e_pf_config_rss(pf);
+       else
+               i40e_pf_disable_rss(pf);
+
+       return ret;
 }
 
 static int