fm10k: get descriptor limits
[dpdk.git] / drivers / net / fm10k / fm10k_ethdev.c
index 1da4ab7..caa1272 100644 (file)
 #include "fm10k.h"
 #include "base/fm10k_api.h"
 
-#define FM10K_RX_BUFF_ALIGN 512
 /* Default delay to acquire mailbox lock */
 #define FM10K_MBXLOCK_DELAY_US 20
 #define UINT64_LOWER_32BITS_MASK 0x00000000ffffffffULL
 
+#define MAIN_VSI_POOL_NUMBER 0
+
+/* Max try times to acquire switch status */
+#define MAX_QUERY_SWITCH_STATE_TIMES 10
+/* Wait interval to get switch status */
+#define WAIT_SWITCH_MSG_US    100000
 /* Number of chars per uint32 type */
 #define CHARS_PER_UINT32 (sizeof(uint32_t))
 #define BIT_MASK_PER_UINT32 ((1 << CHARS_PER_UINT32) - 1)
@@ -56,6 +61,12 @@ static void fm10k_dev_promiscuous_disable(struct rte_eth_dev *dev);
 static void fm10k_dev_allmulticast_enable(struct rte_eth_dev *dev);
 static void fm10k_dev_allmulticast_disable(struct rte_eth_dev *dev);
 static inline int fm10k_glort_valid(struct fm10k_hw *hw);
+static int
+fm10k_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on);
+static void fm10k_MAC_filter_set(struct rte_eth_dev *dev,
+       const u8 *mac, bool add, uint32_t pool);
+static void fm10k_tx_queue_release(void *queue);
+static void fm10k_rx_queue_release(void *queue);
 
 static void
 fm10k_mbx_initlock(struct fm10k_hw *hw)
@@ -171,7 +182,7 @@ rx_queue_disable(struct fm10k_hw *hw, uint16_t qnum)
        /* Wait 100us at most */
        for (i = 0; i < FM10K_QUEUE_DISABLE_TIMEOUT; i++) {
                rte_delay_us(1);
-               reg = FM10K_READ_REG(hw, FM10K_RXQCTL(i));
+               reg = FM10K_READ_REG(hw, FM10K_RXQCTL(qnum));
                if (!(reg & FM10K_RXQCTL_ENABLE))
                        break;
        }
@@ -194,7 +205,6 @@ tx_queue_reset(struct fm10k_tx_queue *q)
        q->next_free = 0;
        q->nb_used = 0;
        q->nb_free = q->nb_desc - 1;
-       q->free_trigger = q->nb_free - q->free_thresh;
        fifo_reset(&q->rs_tracker, (q->nb_desc + 1) / q->rs_thresh);
        FM10K_PCI_REG_WRITE(q->tail_ptr, 0);
 }
@@ -261,7 +271,7 @@ tx_queue_disable(struct fm10k_hw *hw, uint16_t qnum)
        /* Wait 100us at most */
        for (i = 0; i < FM10K_QUEUE_DISABLE_TIMEOUT; i++) {
                rte_delay_us(1);
-               reg = FM10K_READ_REG(hw, FM10K_TXDCTL(i));
+               reg = FM10K_READ_REG(hw, FM10K_TXDCTL(qnum));
                if (!(reg & FM10K_TXDCTL_ENABLE))
                        break;
        }
@@ -272,19 +282,96 @@ tx_queue_disable(struct fm10k_hw *hw, uint16_t qnum)
        return 0;
 }
 
+static int
+fm10k_check_mq_mode(struct rte_eth_dev *dev)
+{
+       enum rte_eth_rx_mq_mode rx_mq_mode = dev->data->dev_conf.rxmode.mq_mode;
+       struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_eth_vmdq_rx_conf *vmdq_conf;
+       uint16_t nb_rx_q = dev->data->nb_rx_queues;
+
+       vmdq_conf = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
+
+       if (rx_mq_mode & ETH_MQ_RX_DCB_FLAG) {
+               PMD_INIT_LOG(ERR, "DCB mode is not supported.");
+               return -EINVAL;
+       }
+
+       if (!(rx_mq_mode & ETH_MQ_RX_VMDQ_FLAG))
+               return 0;
+
+       if (hw->mac.type == fm10k_mac_vf) {
+               PMD_INIT_LOG(ERR, "VMDQ mode is not supported in VF.");
+               return -EINVAL;
+       }
+
+       /* Check VMDQ queue pool number */
+       if (vmdq_conf->nb_queue_pools >
+                       sizeof(vmdq_conf->pool_map[0].pools) * CHAR_BIT ||
+                       vmdq_conf->nb_queue_pools > nb_rx_q) {
+               PMD_INIT_LOG(ERR, "Too many of queue pools: %d",
+                       vmdq_conf->nb_queue_pools);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int
 fm10k_dev_configure(struct rte_eth_dev *dev)
 {
+       int ret;
+
        PMD_INIT_FUNC_TRACE();
 
        if (dev->data->dev_conf.rxmode.hw_strip_crc == 0)
                PMD_INIT_LOG(WARNING, "fm10k always strip CRC");
+       /* multipe queue mode checking */
+       ret  = fm10k_check_mq_mode(dev);
+       if (ret != 0) {
+               PMD_DRV_LOG(ERR, "fm10k_check_mq_mode fails with %d.",
+                           ret);
+               return ret;
+       }
 
        return 0;
 }
 
+/* fls = find last set bit = 32 minus the number of leading zeros */
+#ifndef fls
+#define fls(x) (((x) == 0) ? 0 : (32 - __builtin_clz((x))))
+#endif
+
 static void
-fm10k_dev_mq_rx_configure(struct rte_eth_dev *dev)
+fm10k_dev_vmdq_rx_configure(struct rte_eth_dev *dev)
+{
+       struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_eth_vmdq_rx_conf *vmdq_conf;
+       uint32_t i;
+
+       vmdq_conf = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
+
+       for (i = 0; i < vmdq_conf->nb_pool_maps; i++) {
+               if (!vmdq_conf->pool_map[i].pools)
+                       continue;
+               fm10k_mbx_lock(hw);
+               fm10k_update_vlan(hw, vmdq_conf->pool_map[i].vlan_id, 0, true);
+               fm10k_mbx_unlock(hw);
+       }
+}
+
+static void
+fm10k_dev_pf_main_vsi_reset(struct rte_eth_dev *dev)
+{
+       struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       /* Add default mac address */
+       fm10k_MAC_filter_set(dev, hw->mac.addr, true,
+               MAIN_VSI_POOL_NUMBER);
+}
+
+static void
+fm10k_dev_rss_configure(struct rte_eth_dev *dev)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
@@ -321,7 +408,7 @@ fm10k_dev_mq_rx_configure(struct rte_eth_dev *dev)
         * little-endian order.
         */
        reta = 0;
-       for (i = 0, j = 0; i < FM10K_RETA_SIZE; i++, j++) {
+       for (i = 0, j = 0; i < FM10K_MAX_RSS_INDICES; i++, j++) {
                if (j == dev->data->nb_rx_queues)
                        j = 0;
                reta = (reta << CHAR_BIT) | j;
@@ -355,6 +442,78 @@ fm10k_dev_mq_rx_configure(struct rte_eth_dev *dev)
        FM10K_WRITE_REG(hw, FM10K_MRQC(0), mrqc);
 }
 
+static void
+fm10k_dev_logic_port_update(struct rte_eth_dev *dev,
+       uint16_t nb_lport_old, uint16_t nb_lport_new)
+{
+       struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint32_t i;
+
+       fm10k_mbx_lock(hw);
+       /* Disable previous logic ports */
+       if (nb_lport_old)
+               hw->mac.ops.update_lport_state(hw, hw->mac.dglort_map,
+                       nb_lport_old, false);
+       /* Enable new logic ports */
+       hw->mac.ops.update_lport_state(hw, hw->mac.dglort_map,
+               nb_lport_new, true);
+       fm10k_mbx_unlock(hw);
+
+       for (i = 0; i < nb_lport_new; i++) {
+               /* Set unicast mode by default. App can change
+                * to other mode in other API func.
+                */
+               fm10k_mbx_lock(hw);
+               hw->mac.ops.update_xcast_mode(hw, hw->mac.dglort_map + i,
+                       FM10K_XCAST_MODE_NONE);
+               fm10k_mbx_unlock(hw);
+       }
+}
+
+static void
+fm10k_dev_mq_rx_configure(struct rte_eth_dev *dev)
+{
+       struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_eth_vmdq_rx_conf *vmdq_conf;
+       struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
+       struct fm10k_macvlan_filter_info *macvlan;
+       uint16_t nb_queue_pools = 0; /* pool number in configuration */
+       uint16_t nb_lport_new, nb_lport_old;
+
+       macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
+       vmdq_conf = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
+
+       fm10k_dev_rss_configure(dev);
+
+       /* only PF supports VMDQ */
+       if (hw->mac.type != fm10k_mac_pf)
+               return;
+
+       if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_VMDQ_FLAG)
+               nb_queue_pools = vmdq_conf->nb_queue_pools;
+
+       /* no pool number change, no need to update logic port and VLAN/MAC */
+       if (macvlan->nb_queue_pools == nb_queue_pools)
+               return;
+
+       nb_lport_old = macvlan->nb_queue_pools ? macvlan->nb_queue_pools : 1;
+       nb_lport_new = nb_queue_pools ? nb_queue_pools : 1;
+       fm10k_dev_logic_port_update(dev, nb_lport_old, nb_lport_new);
+
+       /* reset MAC/VLAN as it's based on VMDQ or PF main VSI */
+       memset(dev->data->mac_addrs, 0,
+               ETHER_ADDR_LEN * FM10K_MAX_MACADDR_NUM);
+       ether_addr_copy((const struct ether_addr *)hw->mac.addr,
+               &dev->data->mac_addrs[0]);
+       memset(macvlan, 0, sizeof(*macvlan));
+       macvlan->nb_queue_pools = nb_queue_pools;
+
+       if (nb_queue_pools)
+               fm10k_dev_vmdq_rx_configure(dev);
+       else
+               fm10k_dev_pf_main_vsi_reset(dev);
+}
+
 static int
 fm10k_dev_tx_init(struct rte_eth_dev *dev)
 {
@@ -431,14 +590,28 @@ fm10k_dev_rx_init(struct rte_eth_dev *dev)
                /* Configure the Rx buffer size for one buff without split */
                buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mp) -
                        RTE_PKTMBUF_HEADROOM);
+               /* As RX buffer is aligned to 512B within mbuf, some bytes are
+                * reserved for this purpose, and the worst case could be 511B.
+                * But SRR reg assumes all buffers have the same size. In order
+                * to fill the gap, we'll have to consider the worst case and
+                * assume 512B is reserved. If we don't do so, it's possible
+                * for HW to overwrite data to next mbuf.
+                */
+               buf_size -= FM10K_RX_DATABUF_ALIGN;
+
                FM10K_WRITE_REG(hw, FM10K_SRRCTL(i),
                                buf_size >> FM10K_SRRCTL_BSIZEPKT_SHIFT);
 
                /* It adds dual VLAN length for supporting dual VLAN */
                if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
-                               2 * FM10K_VLAN_TAG_SIZE) > buf_size){
+                               2 * FM10K_VLAN_TAG_SIZE) > buf_size ||
+                       dev->data->dev_conf.rxmode.enable_scatter) {
+                       uint32_t reg;
                        dev->data->scattered_rx = 1;
                        dev->rx_pkt_burst = fm10k_recv_scattered_pkts;
+                       reg = FM10K_READ_REG(hw, FM10K_SRRCTL(i));
+                       reg |= FM10K_SRRCTL_BUFFER_CHAINING_EN;
+                       FM10K_WRITE_REG(hw, FM10K_SRRCTL(i), reg);
                }
 
                /* Enable drop on empty, it's RO for VF */
@@ -449,12 +622,7 @@ fm10k_dev_rx_init(struct rte_eth_dev *dev)
                FM10K_WRITE_FLUSH(hw);
        }
 
-       if (dev->data->dev_conf.rxmode.enable_scatter) {
-               dev->rx_pkt_burst = fm10k_recv_scattered_pkts;
-               dev->data->scattered_rx = 1;
-       }
-
-       /* Configure RSS if applicable */
+       /* Configure VMDQ/RSS if applicable */
        fm10k_dev_mq_rx_configure(dev);
        return 0;
 }
@@ -684,10 +852,32 @@ fm10k_dev_allmulticast_disable(struct rte_eth_dev *dev)
                PMD_INIT_LOG(ERR, "Failed to disable allmulticast mode");
 }
 
-/* fls = find last set bit = 32 minus the number of leading zeros */
-#ifndef fls
-#define fls(x) (((x) == 0) ? 0 : (32 - __builtin_clz((x))))
-#endif
+static void
+fm10k_dev_dglort_map_configure(struct rte_eth_dev *dev)
+{
+       struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint32_t dglortdec, pool_len, rss_len, i;
+       uint16_t nb_queue_pools;
+       struct fm10k_macvlan_filter_info *macvlan;
+
+       macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
+       nb_queue_pools = macvlan->nb_queue_pools;
+       pool_len = nb_queue_pools ? fls(nb_queue_pools - 1) : 0;
+       rss_len = fls(dev->data->nb_rx_queues - 1) - pool_len;
+       dglortdec = (rss_len << FM10K_DGLORTDEC_RSSLENGTH_SHIFT) | pool_len;
+
+       /* Establish only MAP 0 as valid */
+       FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(0), FM10K_DGLORTMAP_ANY);
+
+       /* Configure VMDQ/RSS DGlort Decoder */
+       FM10K_WRITE_REG(hw, FM10K_DGLORTDEC(0), dglortdec);
+
+       /* Invalidate all other GLORT entries */
+       for (i = 1; i < FM10K_DGLORT_COUNT; i++)
+               FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(i),
+                               FM10K_DGLORTMAP_NONE);
+}
+
 #define BSIZEPKT_ROUNDUP ((1 << FM10K_SRRCTL_BSIZEPKT_SHIFT) - 1)
 static int
 fm10k_dev_start(struct rte_eth_dev *dev)
@@ -728,20 +918,8 @@ fm10k_dev_start(struct rte_eth_dev *dev)
                return diag;
        }
 
-       if (hw->mac.type == fm10k_mac_pf) {
-               /* Establish only VSI 0 as valid */
-               FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(0), FM10K_DGLORTMAP_ANY);
-
-               /* Configure RSS bits used in RETA table */
-               FM10K_WRITE_REG(hw, FM10K_DGLORTDEC(0),
-                               fls(dev->data->nb_rx_queues - 1) <<
-                               FM10K_DGLORTDEC_RSSLENGTH_SHIFT);
-
-               /* Invalidate all other GLORT entries */
-               for (i = 1; i < FM10K_DGLORT_COUNT; i++)
-                       FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(i),
-                                       FM10K_DGLORTMAP_NONE);
-       }
+       if (hw->mac.type == fm10k_mac_pf)
+               fm10k_dev_dglort_map_configure(dev);
 
        for (i = 0; i < dev->data->nb_rx_queues; i++) {
                struct fm10k_rx_queue *rxq;
@@ -767,12 +945,18 @@ fm10k_dev_start(struct rte_eth_dev *dev)
                diag = fm10k_dev_tx_queue_start(dev, i);
                if (diag != 0) {
                        int j;
+                       for (j = 0; j < i; ++j)
+                               tx_queue_clean(dev->data->tx_queues[j]);
                        for (j = 0; j < dev->data->nb_rx_queues; ++j)
                                rx_queue_clean(dev->data->rx_queues[j]);
                        return diag;
                }
        }
 
+       /* Update default vlan when not in VMDQ mode */
+       if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_VMDQ_FLAG))
+               fm10k_vlan_filter_set(dev, hw->mac.default_vid, true);
+
        return 0;
 }
 
@@ -783,23 +967,53 @@ fm10k_dev_stop(struct rte_eth_dev *dev)
 
        PMD_INIT_FUNC_TRACE();
 
-       for (i = 0; i < dev->data->nb_tx_queues; i++)
-               fm10k_dev_tx_queue_stop(dev, i);
+       if (dev->data->tx_queues)
+               for (i = 0; i < dev->data->nb_tx_queues; i++)
+                       fm10k_dev_tx_queue_stop(dev, i);
 
-       for (i = 0; i < dev->data->nb_rx_queues; i++)
-               fm10k_dev_rx_queue_stop(dev, i);
+       if (dev->data->rx_queues)
+               for (i = 0; i < dev->data->nb_rx_queues; i++)
+                       fm10k_dev_rx_queue_stop(dev, i);
+}
+
+static void
+fm10k_dev_queue_release(struct rte_eth_dev *dev)
+{
+       int i;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (dev->data->tx_queues) {
+               for (i = 0; i < dev->data->nb_tx_queues; i++)
+                       fm10k_tx_queue_release(dev->data->tx_queues[i]);
+       }
+
+       if (dev->data->rx_queues) {
+               for (i = 0; i < dev->data->nb_rx_queues; i++)
+                       fm10k_rx_queue_release(dev->data->rx_queues[i]);
+       }
 }
 
 static void
 fm10k_dev_close(struct rte_eth_dev *dev)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint16_t nb_lport;
+       struct fm10k_macvlan_filter_info *macvlan;
 
        PMD_INIT_FUNC_TRACE();
 
+       macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
+       nb_lport = macvlan->nb_queue_pools ? macvlan->nb_queue_pools : 1;
+       fm10k_mbx_lock(hw);
+       hw->mac.ops.update_lport_state(hw, hw->mac.dglort_map,
+               nb_lport, false);
+       fm10k_mbx_unlock(hw);
+
        /* Stop mailbox service first */
        fm10k_close_mbx_service(hw);
        fm10k_dev_stop(dev);
+       fm10k_dev_queue_release(dev);
        fm10k_stop_hw(hw);
 }
 
@@ -876,15 +1090,26 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
        dev_info->max_rx_pktlen      = FM10K_MAX_PKT_SIZE;
        dev_info->max_rx_queues      = hw->mac.max_queues;
        dev_info->max_tx_queues      = hw->mac.max_queues;
-       dev_info->max_mac_addrs      = 1;
+       dev_info->max_mac_addrs      = FM10K_MAX_MACADDR_NUM;
        dev_info->max_hash_mac_addrs = 0;
        dev_info->max_vfs            = dev->pci_dev->max_vfs;
-       dev_info->max_vmdq_pools     = ETH_64_POOLS;
+       dev_info->vmdq_pool_base     = 0;
+       dev_info->vmdq_queue_base    = 0;
+       dev_info->max_vmdq_pools     = ETH_32_POOLS;
+       dev_info->vmdq_queue_num     = FM10K_MAX_QUEUES_PF;
        dev_info->rx_offload_capa =
+               DEV_RX_OFFLOAD_VLAN_STRIP |
                DEV_RX_OFFLOAD_IPV4_CKSUM |
                DEV_RX_OFFLOAD_UDP_CKSUM  |
                DEV_RX_OFFLOAD_TCP_CKSUM;
-       dev_info->tx_offload_capa    = 0;
+       dev_info->tx_offload_capa =
+               DEV_TX_OFFLOAD_VLAN_INSERT |
+               DEV_TX_OFFLOAD_IPV4_CKSUM  |
+               DEV_TX_OFFLOAD_UDP_CKSUM   |
+               DEV_TX_OFFLOAD_TCP_CKSUM   |
+               DEV_TX_OFFLOAD_TCP_TSO;
+
+       dev_info->hash_key_size = FM10K_RSSRK_SIZE * sizeof(uint32_t);
        dev_info->reta_size = FM10K_MAX_RSS_INDICES;
 
        dev_info->default_rxconf = (struct rte_eth_rxconf) {
@@ -909,20 +1134,220 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
                                ETH_TXQ_FLAGS_NOOFFLOADS,
        };
 
+       dev_info->rx_desc_lim = (struct rte_eth_desc_lim) {
+               .nb_max = FM10K_MAX_RX_DESC,
+               .nb_min = FM10K_MIN_RX_DESC,
+               .nb_align = FM10K_MULT_RX_DESC,
+       };
+
+       dev_info->tx_desc_lim = (struct rte_eth_desc_lim) {
+               .nb_max = FM10K_MAX_TX_DESC,
+               .nb_min = FM10K_MIN_TX_DESC,
+               .nb_align = FM10K_MULT_TX_DESC,
+       };
 }
 
 static int
 fm10k_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+{
+       s32 result;
+       uint16_t mac_num = 0;
+       uint32_t vid_idx, vid_bit, mac_index;
+       struct fm10k_hw *hw;
+       struct fm10k_macvlan_filter_info *macvlan;
+       struct rte_eth_dev_data *data = dev->data;
+
+       hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
+
+       if (macvlan->nb_queue_pools > 0) { /* VMDQ mode */
+               PMD_INIT_LOG(ERR, "Cannot change VLAN filter in VMDQ mode");
+               return (-EINVAL);
+       }
+
+       if (vlan_id > ETH_VLAN_ID_MAX) {
+               PMD_INIT_LOG(ERR, "Invalid vlan_id: must be < 4096");
+               return (-EINVAL);
+       }
+
+       vid_idx = FM10K_VFTA_IDX(vlan_id);
+       vid_bit = FM10K_VFTA_BIT(vlan_id);
+       /* this VLAN ID is already in the VLAN filter table, return SUCCESS */
+       if (on && (macvlan->vfta[vid_idx] & vid_bit))
+               return 0;
+       /* this VLAN ID is NOT in the VLAN filter table, cannot remove */
+       if (!on && !(macvlan->vfta[vid_idx] & vid_bit)) {
+               PMD_INIT_LOG(ERR, "Invalid vlan_id: not existing "
+                       "in the VLAN filter table");
+               return (-EINVAL);
+       }
+
+       fm10k_mbx_lock(hw);
+       result = fm10k_update_vlan(hw, vlan_id, 0, on);
+       fm10k_mbx_unlock(hw);
+       if (result != FM10K_SUCCESS) {
+               PMD_INIT_LOG(ERR, "VLAN update failed: %d", result);
+               return (-EIO);
+       }
+
+       for (mac_index = 0; (mac_index < FM10K_MAX_MACADDR_NUM) &&
+                       (result == FM10K_SUCCESS); mac_index++) {
+               if (is_zero_ether_addr(&data->mac_addrs[mac_index]))
+                       continue;
+               if (mac_num > macvlan->mac_num - 1) {
+                       PMD_INIT_LOG(ERR, "MAC address number "
+                                       "not match");
+                       break;
+               }
+               fm10k_mbx_lock(hw);
+               result = fm10k_update_uc_addr(hw, hw->mac.dglort_map,
+                       data->mac_addrs[mac_index].addr_bytes,
+                       vlan_id, on, 0);
+               fm10k_mbx_unlock(hw);
+               mac_num++;
+       }
+       if (result != FM10K_SUCCESS) {
+               PMD_INIT_LOG(ERR, "MAC address update failed: %d", result);
+               return (-EIO);
+       }
+
+       if (on) {
+               macvlan->vlan_num++;
+               macvlan->vfta[vid_idx] |= vid_bit;
+       } else {
+               macvlan->vlan_num--;
+               macvlan->vfta[vid_idx] &= ~vid_bit;
+       }
+       return 0;
+}
+
+static void
+fm10k_vlan_offload_set(__rte_unused struct rte_eth_dev *dev, int mask)
+{
+       if (mask & ETH_VLAN_STRIP_MASK) {
+               if (!dev->data->dev_conf.rxmode.hw_vlan_strip)
+                       PMD_INIT_LOG(ERR, "VLAN stripping is "
+                                       "always on in fm10k");
+       }
+
+       if (mask & ETH_VLAN_EXTEND_MASK) {
+               if (dev->data->dev_conf.rxmode.hw_vlan_extend)
+                       PMD_INIT_LOG(ERR, "VLAN QinQ is not "
+                                       "supported in fm10k");
+       }
+
+       if (mask & ETH_VLAN_FILTER_MASK) {
+               if (!dev->data->dev_conf.rxmode.hw_vlan_filter)
+                       PMD_INIT_LOG(ERR, "VLAN filter is always on in fm10k");
+       }
+}
+
+/* Add/Remove a MAC address, and update filters to main VSI */
+static void fm10k_MAC_filter_set_main_vsi(struct rte_eth_dev *dev,
+               const u8 *mac, bool add, uint32_t pool)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct fm10k_macvlan_filter_info *macvlan;
+       uint32_t i, j, k;
 
-       PMD_INIT_FUNC_TRACE();
+       macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
 
-       /* @todo - add support for the VF */
-       if (hw->mac.type != fm10k_mac_pf)
-               return -ENOTSUP;
+       if (pool != MAIN_VSI_POOL_NUMBER) {
+               PMD_DRV_LOG(ERR, "VMDQ not enabled, can't set "
+                       "mac to pool %u", pool);
+               return;
+       }
+       for (i = 0, j = 0; j < FM10K_VFTA_SIZE; j++) {
+               if (!macvlan->vfta[j])
+                       continue;
+               for (k = 0; k < FM10K_UINT32_BIT_SIZE; k++) {
+                       if (!(macvlan->vfta[j] & (1 << k)))
+                               continue;
+                       if (i + 1 > macvlan->vlan_num) {
+                               PMD_INIT_LOG(ERR, "vlan number not match");
+                               return;
+                       }
+                       fm10k_mbx_lock(hw);
+                       fm10k_update_uc_addr(hw, hw->mac.dglort_map, mac,
+                               j * FM10K_UINT32_BIT_SIZE + k, add, 0);
+                       fm10k_mbx_unlock(hw);
+                       i++;
+               }
+       }
+}
 
-       return fm10k_update_vlan(hw, vlan_id, 0, on);
+/* Add/Remove a MAC address, and update filters to VMDQ */
+static void fm10k_MAC_filter_set_vmdq(struct rte_eth_dev *dev,
+               const u8 *mac, bool add, uint32_t pool)
+{
+       struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct fm10k_macvlan_filter_info *macvlan;
+       struct rte_eth_vmdq_rx_conf *vmdq_conf;
+       uint32_t i;
+
+       macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
+       vmdq_conf = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
+
+       if (pool > macvlan->nb_queue_pools) {
+               PMD_DRV_LOG(ERR, "Pool number %u invalid."
+                       " Max pool is %u",
+                       pool, macvlan->nb_queue_pools);
+               return;
+       }
+       for (i = 0; i < vmdq_conf->nb_pool_maps; i++) {
+               if (!(vmdq_conf->pool_map[i].pools & (1UL << pool)))
+                       continue;
+               fm10k_mbx_lock(hw);
+               fm10k_update_uc_addr(hw, hw->mac.dglort_map + pool, mac,
+                       vmdq_conf->pool_map[i].vlan_id, add, 0);
+               fm10k_mbx_unlock(hw);
+       }
+}
+
+/* Add/Remove a MAC address, and update filters */
+static void fm10k_MAC_filter_set(struct rte_eth_dev *dev,
+               const u8 *mac, bool add, uint32_t pool)
+{
+       struct fm10k_macvlan_filter_info *macvlan;
+
+       macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
+
+       if (macvlan->nb_queue_pools > 0) /* VMDQ mode */
+               fm10k_MAC_filter_set_vmdq(dev, mac, add, pool);
+       else
+               fm10k_MAC_filter_set_main_vsi(dev, mac, add, pool);
+
+       if (add)
+               macvlan->mac_num++;
+       else
+               macvlan->mac_num--;
+}
+
+/* Add a MAC address, and update filters */
+static void
+fm10k_macaddr_add(struct rte_eth_dev *dev,
+               struct ether_addr *mac_addr,
+               uint32_t index,
+               uint32_t pool)
+{
+       struct fm10k_macvlan_filter_info *macvlan;
+
+       macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
+       fm10k_MAC_filter_set(dev, mac_addr->addr_bytes, TRUE, pool);
+       macvlan->mac_vmdq_id[index] = pool;
+}
+
+/* Remove a MAC address, and update filters */
+static void
+fm10k_macaddr_remove(struct rte_eth_dev *dev, uint32_t index)
+{
+       struct rte_eth_dev_data *data = dev->data;
+       struct fm10k_macvlan_filter_info *macvlan;
+
+       macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
+       fm10k_MAC_filter_set(dev, data->mac_addrs[index].addr_bytes,
+                       FALSE, macvlan->mac_vmdq_id[index]);
+       macvlan->mac_vmdq_id[index] = 0;
 }
 
 static inline int
@@ -1025,7 +1450,7 @@ mempool_element_size_valid(struct rte_mempool *mp)
                        RTE_PKTMBUF_HEADROOM;
 
        /* account for up to 512B of alignment */
-       min_size -= FM10K_RX_BUFF_ALIGN;
+       min_size -= FM10K_RX_DATABUF_ALIGN;
 
        /* sanity check for overflow */
        if (min_size > mp->elt_size)
@@ -1117,7 +1542,11 @@ fm10k_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_id,
                return (-ENOMEM);
        }
        q->hw_ring = mz->addr;
+#ifdef RTE_LIBRTE_XEN_DOM0
+       q->hw_ring_phys_addr = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr);
+#else
        q->hw_ring_phys_addr = mz->phys_addr;
+#endif
 
        dev->data->rx_queues[queue_id] = q;
        return 0;
@@ -1263,7 +1692,11 @@ fm10k_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_id,
                return (-ENOMEM);
        }
        q->hw_ring = mz->addr;
+#ifdef RTE_LIBRTE_XEN_DOM0
+       q->hw_ring_phys_addr = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr);
+#else
        q->hw_ring_phys_addr = mz->phys_addr;
+#endif
 
        /*
         * allocate memory for the RS bit tracker. Enough slots to hold the
@@ -1495,6 +1928,36 @@ fm10k_dev_enable_intr_pf(struct rte_eth_dev *dev)
        FM10K_WRITE_FLUSH(hw);
 }
 
+static void
+fm10k_dev_disable_intr_pf(struct rte_eth_dev *dev)
+{
+       struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint32_t int_map = FM10K_INT_MAP_DISABLE;
+
+       int_map |= 0;
+
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_Mailbox), int_map);
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_PCIeFault), int_map);
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_SwitchUpDown), int_map);
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_SwitchEvent), int_map);
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_SRAM), int_map);
+       FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_VFLR), int_map);
+
+       /* Disable misc causes */
+       FM10K_WRITE_REG(hw, FM10K_EIMR, FM10K_EIMR_DISABLE(PCA_FAULT) |
+                               FM10K_EIMR_DISABLE(THI_FAULT) |
+                               FM10K_EIMR_DISABLE(FUM_FAULT) |
+                               FM10K_EIMR_DISABLE(MAILBOX) |
+                               FM10K_EIMR_DISABLE(SWITCHREADY) |
+                               FM10K_EIMR_DISABLE(SWITCHNOTREADY) |
+                               FM10K_EIMR_DISABLE(SRAMERROR) |
+                               FM10K_EIMR_DISABLE(VFLR));
+
+       /* Disable ITR 0 */
+       FM10K_WRITE_REG(hw, FM10K_ITR(0), FM10K_ITR_MASK_SET);
+       FM10K_WRITE_FLUSH(hw);
+}
+
 static void
 fm10k_dev_enable_intr_vf(struct rte_eth_dev *dev)
 {
@@ -1513,6 +1976,22 @@ fm10k_dev_enable_intr_vf(struct rte_eth_dev *dev)
        FM10K_WRITE_FLUSH(hw);
 }
 
+static void
+fm10k_dev_disable_intr_vf(struct rte_eth_dev *dev)
+{
+       struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint32_t int_map = FM10K_INT_MAP_DISABLE;
+
+       int_map |= 0;
+
+       /* Only INT 0 available, other 15 are reserved. */
+       FM10K_WRITE_REG(hw, FM10K_VFINT_MAP, int_map);
+
+       /* Disable ITR 0 */
+       FM10K_WRITE_REG(hw, FM10K_VFITR(0), FM10K_ITR_MASK_SET);
+       FM10K_WRITE_FLUSH(hw);
+}
+
 static int
 fm10k_dev_handle_fault(struct fm10k_hw *hw, uint32_t eicr)
 {
@@ -1521,7 +2000,7 @@ fm10k_dev_handle_fault(struct fm10k_hw *hw, uint32_t eicr)
        const char *estr = "Unknown error";
 
        /* Process PCA fault */
-       if (eicr & FM10K_EIMR_PCA_FAULT) {
+       if (eicr & FM10K_EICR_PCA_FAULT) {
                err = fm10k_get_fault(hw, FM10K_PCA_FAULT, &fault);
                if (err)
                        goto error;
@@ -1549,7 +2028,7 @@ fm10k_dev_handle_fault(struct fm10k_hw *hw, uint32_t eicr)
        }
 
        /* Process THI fault */
-       if (eicr & FM10K_EIMR_THI_FAULT) {
+       if (eicr & FM10K_EICR_THI_FAULT) {
                err = fm10k_get_fault(hw, FM10K_THI_FAULT, &fault);
                if (err)
                        goto error;
@@ -1567,7 +2046,7 @@ fm10k_dev_handle_fault(struct fm10k_hw *hw, uint32_t eicr)
        }
 
        /* Process FUM fault */
-       if (eicr & FM10K_EIMR_FUM_FAULT) {
+       if (eicr & FM10K_EICR_FUM_FAULT) {
                err = fm10k_get_fault(hw, FM10K_FUM_FAULT, &fault);
                if (err)
                        goto error;
@@ -1604,8 +2083,6 @@ fm10k_dev_handle_fault(struct fm10k_hw *hw, uint32_t eicr)
                        fault.address, fault.specinfo);
        }
 
-       if (estr)
-               return 0;
        return 0;
 error:
        PMD_INIT_LOG(ERR, "Failed to handle fault event.");
@@ -1776,6 +2253,9 @@ static const struct eth_dev_ops fm10k_eth_dev_ops = {
        .link_update            = fm10k_link_update,
        .dev_infos_get          = fm10k_dev_infos_get,
        .vlan_filter_set        = fm10k_vlan_filter_set,
+       .vlan_offload_set       = fm10k_vlan_offload_set,
+       .mac_addr_add           = fm10k_macaddr_add,
+       .mac_addr_remove        = fm10k_macaddr_remove,
        .rx_queue_start         = fm10k_dev_rx_queue_start,
        .rx_queue_stop          = fm10k_dev_rx_queue_stop,
        .tx_queue_start         = fm10k_dev_tx_queue_start,
@@ -1795,6 +2275,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        int diag;
+       struct fm10k_macvlan_filter_info *macvlan;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -1809,6 +2290,8 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
                return 0;
 
+       macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
+       memset(macvlan, 0, sizeof(*macvlan));
        /* Vendor and Device ID need to be set before init of shared code */
        memset(hw, 0, sizeof(*hw));
        hw->device_id = dev->pci_dev->id.device_id;
@@ -1854,31 +2337,28 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
        }
 
        /* Initialize MAC address(es) */
-       dev->data->mac_addrs = rte_zmalloc("fm10k", ETHER_ADDR_LEN, 0);
+       dev->data->mac_addrs = rte_zmalloc("fm10k",
+                       ETHER_ADDR_LEN * FM10K_MAX_MACADDR_NUM, 0);
        if (dev->data->mac_addrs == NULL) {
                PMD_INIT_LOG(ERR, "Cannot allocate memory for MAC addresses");
                return -ENOMEM;
        }
 
        diag = fm10k_read_mac_addr(hw);
-       if (diag != FM10K_SUCCESS) {
-               /*
-                * TODO: remove special handling on VF. Need shared code to
-                * fix first.
-                */
-               if (hw->mac.type == fm10k_mac_pf) {
-                       PMD_INIT_LOG(ERR, "Read MAC addr failed: %d", diag);
-                       return -EIO;
-               } else {
-                       /* Generate a random addr */
-                       eth_random_addr(hw->mac.addr);
-                       memcpy(hw->mac.perm_addr, hw->mac.addr, ETH_ALEN);
-               }
-       }
 
        ether_addr_copy((const struct ether_addr *)hw->mac.addr,
                        &dev->data->mac_addrs[0]);
 
+       if (diag != FM10K_SUCCESS ||
+               !is_valid_assigned_ether_addr(dev->data->mac_addrs)) {
+
+               /* Generate a random addr */
+               eth_random_addr(hw->mac.addr);
+               memcpy(hw->mac.perm_addr, hw->mac.addr, ETH_ALEN);
+               ether_addr_copy((const struct ether_addr *)hw->mac.addr,
+               &dev->data->mac_addrs[0]);
+       }
+
        /* Reset the hw statistics */
        fm10k_stats_reset(dev);
 
@@ -1911,6 +2391,32 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
                fm10k_dev_enable_intr_vf(dev);
        }
 
+       /* Enable uio intr after callback registered */
+       rte_intr_enable(&(dev->pci_dev->intr_handle));
+
+       hw->mac.ops.update_int_moderator(hw);
+
+       /* Make sure Switch Manager is ready before going forward. */
+       if (hw->mac.type == fm10k_mac_pf) {
+               int switch_ready = 0;
+               int i;
+
+               for (i = 0; i < MAX_QUERY_SWITCH_STATE_TIMES; i++) {
+                       fm10k_mbx_lock(hw);
+                       hw->mac.ops.get_host_state(hw, &switch_ready);
+                       fm10k_mbx_unlock(hw);
+                       if (switch_ready)
+                               break;
+                       /* Delay some time to acquire async LPORT_MAP info. */
+                       rte_delay_us(WAIT_SWITCH_MSG_US);
+               }
+
+               if (switch_ready == 0) {
+                       PMD_INIT_LOG(ERR, "switch is not ready");
+                       return -1;
+               }
+       }
+
        /*
         * Below function will trigger operations on mailbox, acquire lock to
         * avoid race condition from interrupt handler. Operations on mailbox
@@ -1920,17 +2426,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
         */
        fm10k_mbx_lock(hw);
        /* Enable port first */
-       hw->mac.ops.update_lport_state(hw, 0, 0, 1);
-
-       /* Update default vlan */
-       hw->mac.ops.update_vlan(hw, hw->mac.default_vid, 0, true);
-
-       /*
-        * Add default mac/vlan filter. glort is assigned by SM for PF, while is
-        * unused for VF. PF will assign correct glort for VF.
-        */
-       hw->mac.ops.update_uc_addr(hw, hw->mac.dglort_map, hw->mac.addr,
-                             hw->mac.default_vid, 1, 0);
+       hw->mac.ops.update_lport_state(hw, hw->mac.dglort_map, 1, 1);
 
        /* Set unicast mode by default. App can change to other mode in other
         * API func.
@@ -1940,8 +2436,57 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
 
        fm10k_mbx_unlock(hw);
 
-       /* enable uio intr after callback registered */
-       rte_intr_enable(&(dev->pci_dev->intr_handle));
+       /* Add default mac address */
+       fm10k_MAC_filter_set(dev, hw->mac.addr, true,
+               MAIN_VSI_POOL_NUMBER);
+
+       return 0;
+}
+
+static int
+eth_fm10k_dev_uninit(struct rte_eth_dev *dev)
+{
+       struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       PMD_INIT_FUNC_TRACE();
+
+       /* only uninitialize in the primary process */
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return 0;
+
+       /* safe to close dev here */
+       fm10k_dev_close(dev);
+
+       dev->dev_ops = NULL;
+       dev->rx_pkt_burst = NULL;
+       dev->tx_pkt_burst = NULL;
+
+       /* disable uio/vfio intr */
+       rte_intr_disable(&(dev->pci_dev->intr_handle));
+
+       /*PF/VF has different interrupt handling mechanism */
+       if (hw->mac.type == fm10k_mac_pf) {
+               /* disable interrupt */
+               fm10k_dev_disable_intr_pf(dev);
+
+               /* unregister callback func to eal lib */
+               rte_intr_callback_unregister(&(dev->pci_dev->intr_handle),
+                       fm10k_dev_interrupt_handler_pf, (void *)dev);
+       } else {
+               /* disable interrupt */
+               fm10k_dev_disable_intr_vf(dev);
+
+               rte_intr_callback_unregister(&(dev->pci_dev->intr_handle),
+                       fm10k_dev_interrupt_handler_vf, (void *)dev);
+       }
+
+       /* free mac memory */
+       if (dev->data->mac_addrs) {
+               rte_free(dev->data->mac_addrs);
+               dev->data->mac_addrs = NULL;
+       }
+
+       memset(hw, 0, sizeof(*hw));
 
        return 0;
 }
@@ -1961,9 +2506,10 @@ static struct eth_driver rte_pmd_fm10k = {
        .pci_drv = {
                .name = "rte_pmd_fm10k",
                .id_table = pci_id_fm10k_map,
-               .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_DETACHABLE,
        },
        .eth_dev_init = eth_fm10k_dev_init,
+       .eth_dev_uninit = eth_fm10k_dev_uninit,
        .dev_private_size = sizeof(struct fm10k_adapter),
 };