net/iavf: fix overflow in maximum packet length config
[dpdk.git] / drivers / net / iavf / iavf_ethdev.c
index eebc49a..dc5cbc2 100644 (file)
 #include <rte_atomic.h>
 #include <rte_eal.h>
 #include <rte_ether.h>
-#include <rte_ethdev_driver.h>
-#include <rte_ethdev_pci.h>
+#include <ethdev_driver.h>
+#include <ethdev_pci.h>
 #include <rte_malloc.h>
 #include <rte_memzone.h>
 #include <rte_dev.h>
 
-#include "iavf_log.h"
-#include "base/iavf_prototype.h"
-#include "base/iavf_adminq_cmd.h"
-#include "base/iavf_type.h"
-
 #include "iavf.h"
 #include "iavf_rxtx.h"
+#include "iavf_generic_flow.h"
+#include "rte_pmd_iavf.h"
+
+/* devargs */
+#define IAVF_PROTO_XTR_ARG         "proto_xtr"
+
+static const char * const iavf_valid_args[] = {
+       IAVF_PROTO_XTR_ARG,
+       NULL
+};
+
+static const struct rte_mbuf_dynfield iavf_proto_xtr_metadata_param = {
+       .name = "intel_pmd_dynfield_proto_xtr_metadata",
+       .size = sizeof(uint32_t),
+       .align = __alignof__(uint32_t),
+       .flags = 0,
+};
+
+struct iavf_proto_xtr_ol {
+       const struct rte_mbuf_dynflag param;
+       uint64_t *ol_flag;
+       bool required;
+};
+
+static struct iavf_proto_xtr_ol iavf_proto_xtr_params[] = {
+       [IAVF_PROTO_XTR_VLAN] = {
+               .param = { .name = "intel_pmd_dynflag_proto_xtr_vlan" },
+               .ol_flag = &rte_pmd_ifd_dynflag_proto_xtr_vlan_mask },
+       [IAVF_PROTO_XTR_IPV4] = {
+               .param = { .name = "intel_pmd_dynflag_proto_xtr_ipv4" },
+               .ol_flag = &rte_pmd_ifd_dynflag_proto_xtr_ipv4_mask },
+       [IAVF_PROTO_XTR_IPV6] = {
+               .param = { .name = "intel_pmd_dynflag_proto_xtr_ipv6" },
+               .ol_flag = &rte_pmd_ifd_dynflag_proto_xtr_ipv6_mask },
+       [IAVF_PROTO_XTR_IPV6_FLOW] = {
+               .param = { .name = "intel_pmd_dynflag_proto_xtr_ipv6_flow" },
+               .ol_flag = &rte_pmd_ifd_dynflag_proto_xtr_ipv6_flow_mask },
+       [IAVF_PROTO_XTR_TCP] = {
+               .param = { .name = "intel_pmd_dynflag_proto_xtr_tcp" },
+               .ol_flag = &rte_pmd_ifd_dynflag_proto_xtr_tcp_mask },
+       [IAVF_PROTO_XTR_IP_OFFSET] = {
+               .param = { .name = "intel_pmd_dynflag_proto_xtr_ip_offset" },
+               .ol_flag = &rte_pmd_ifd_dynflag_proto_xtr_ip_offset_mask },
+};
 
 static int iavf_dev_configure(struct rte_eth_dev *dev);
 static int iavf_dev_start(struct rte_eth_dev *dev);
-static void iavf_dev_stop(struct rte_eth_dev *dev);
-static void iavf_dev_close(struct rte_eth_dev *dev);
+static int iavf_dev_stop(struct rte_eth_dev *dev);
+static int iavf_dev_close(struct rte_eth_dev *dev);
+static int iavf_dev_reset(struct rte_eth_dev *dev);
 static int iavf_dev_info_get(struct rte_eth_dev *dev,
                             struct rte_eth_dev_info *dev_info);
 static const uint32_t *iavf_dev_supported_ptypes_get(struct rte_eth_dev *dev);
 static int iavf_dev_stats_get(struct rte_eth_dev *dev,
                             struct rte_eth_stats *stats);
 static int iavf_dev_stats_reset(struct rte_eth_dev *dev);
+static int iavf_dev_xstats_get(struct rte_eth_dev *dev,
+                                struct rte_eth_xstat *xstats, unsigned int n);
+static int iavf_dev_xstats_get_names(struct rte_eth_dev *dev,
+                                      struct rte_eth_xstat_name *xstats_names,
+                                      unsigned int limit);
 static int iavf_dev_promiscuous_enable(struct rte_eth_dev *dev);
 static int iavf_dev_promiscuous_disable(struct rte_eth_dev *dev);
 static int iavf_dev_allmulticast_enable(struct rte_eth_dev *dev);
@@ -72,41 +117,67 @@ static int iavf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev,
                                        uint16_t queue_id);
 static int iavf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev,
                                         uint16_t queue_id);
-
-int iavf_logtype_init;
-int iavf_logtype_driver;
-
-#ifdef RTE_LIBRTE_IAVF_DEBUG_RX
-int iavf_logtype_rx;
-#endif
-#ifdef RTE_LIBRTE_IAVF_DEBUG_TX
-int iavf_logtype_tx;
-#endif
-#ifdef RTE_LIBRTE_IAVF_DEBUG_TX_FREE
-int iavf_logtype_tx_free;
-#endif
+static int iavf_dev_flow_ops_get(struct rte_eth_dev *dev,
+                                const struct rte_flow_ops **ops);
+static int iavf_set_mc_addr_list(struct rte_eth_dev *dev,
+                       struct rte_ether_addr *mc_addrs,
+                       uint32_t mc_addrs_num);
+static int iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused, void *arg);
 
 static const struct rte_pci_id pci_id_iavf_map[] = {
        { RTE_PCI_DEVICE(IAVF_INTEL_VENDOR_ID, IAVF_DEV_ID_ADAPTIVE_VF) },
+       { RTE_PCI_DEVICE(IAVF_INTEL_VENDOR_ID, IAVF_DEV_ID_VF) },
+       { RTE_PCI_DEVICE(IAVF_INTEL_VENDOR_ID, IAVF_DEV_ID_VF_HV) },
+       { RTE_PCI_DEVICE(IAVF_INTEL_VENDOR_ID, IAVF_DEV_ID_X722_VF) },
+       { RTE_PCI_DEVICE(IAVF_INTEL_VENDOR_ID, IAVF_DEV_ID_X722_A0_VF) },
        { .vendor_id = 0, /* sentinel */ },
 };
 
+struct rte_iavf_xstats_name_off {
+       char name[RTE_ETH_XSTATS_NAME_SIZE];
+       unsigned int offset;
+};
+
+static const struct rte_iavf_xstats_name_off rte_iavf_stats_strings[] = {
+       {"rx_bytes", offsetof(struct iavf_eth_stats, rx_bytes)},
+       {"rx_unicast_packets", offsetof(struct iavf_eth_stats, rx_unicast)},
+       {"rx_multicast_packets", offsetof(struct iavf_eth_stats, rx_multicast)},
+       {"rx_broadcast_packets", offsetof(struct iavf_eth_stats, rx_broadcast)},
+       {"rx_dropped_packets", offsetof(struct iavf_eth_stats, rx_discards)},
+       {"rx_unknown_protocol_packets", offsetof(struct iavf_eth_stats,
+               rx_unknown_protocol)},
+       {"tx_bytes", offsetof(struct iavf_eth_stats, tx_bytes)},
+       {"tx_unicast_packets", offsetof(struct iavf_eth_stats, tx_unicast)},
+       {"tx_multicast_packets", offsetof(struct iavf_eth_stats, tx_multicast)},
+       {"tx_broadcast_packets", offsetof(struct iavf_eth_stats, tx_broadcast)},
+       {"tx_dropped_packets", offsetof(struct iavf_eth_stats, tx_discards)},
+       {"tx_error_packets", offsetof(struct iavf_eth_stats, tx_errors)},
+};
+
+#define IAVF_NB_XSTATS (sizeof(rte_iavf_stats_strings) / \
+               sizeof(rte_iavf_stats_strings[0]))
+
 static const struct eth_dev_ops iavf_eth_dev_ops = {
        .dev_configure              = iavf_dev_configure,
        .dev_start                  = iavf_dev_start,
        .dev_stop                   = iavf_dev_stop,
        .dev_close                  = iavf_dev_close,
+       .dev_reset                  = iavf_dev_reset,
        .dev_infos_get              = iavf_dev_info_get,
        .dev_supported_ptypes_get   = iavf_dev_supported_ptypes_get,
        .link_update                = iavf_dev_link_update,
        .stats_get                  = iavf_dev_stats_get,
        .stats_reset                = iavf_dev_stats_reset,
+       .xstats_get                 = iavf_dev_xstats_get,
+       .xstats_get_names           = iavf_dev_xstats_get_names,
+       .xstats_reset               = iavf_dev_stats_reset,
        .promiscuous_enable         = iavf_dev_promiscuous_enable,
        .promiscuous_disable        = iavf_dev_promiscuous_disable,
        .allmulticast_enable        = iavf_dev_allmulticast_enable,
        .allmulticast_disable       = iavf_dev_allmulticast_disable,
        .mac_addr_add               = iavf_dev_add_mac_addr,
        .mac_addr_remove            = iavf_dev_del_mac_addr,
+       .set_mc_addr_list                       = iavf_set_mc_addr_list,
        .vlan_filter_set            = iavf_dev_vlan_filter_set,
        .vlan_offload_set           = iavf_dev_vlan_offload_set,
        .rx_queue_start             = iavf_dev_rx_queue_start,
@@ -124,37 +195,183 @@ static const struct eth_dev_ops iavf_eth_dev_ops = {
        .rss_hash_conf_get          = iavf_dev_rss_hash_conf_get,
        .rxq_info_get               = iavf_dev_rxq_info_get,
        .txq_info_get               = iavf_dev_txq_info_get,
-       .rx_queue_count             = iavf_dev_rxq_count,
-       .rx_descriptor_status       = iavf_dev_rx_desc_status,
-       .tx_descriptor_status       = iavf_dev_tx_desc_status,
        .mtu_set                    = iavf_dev_mtu_set,
        .rx_queue_intr_enable       = iavf_dev_rx_queue_intr_enable,
        .rx_queue_intr_disable      = iavf_dev_rx_queue_intr_disable,
+       .flow_ops_get               = iavf_dev_flow_ops_get,
+       .tx_done_cleanup            = iavf_dev_tx_done_cleanup,
+       .get_monitor_addr           = iavf_get_monitor_addr,
+       .tm_ops_get                 = iavf_tm_ops_get,
 };
 
 static int
-iavf_dev_configure(struct rte_eth_dev *dev)
+iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused,
+                       void *arg)
 {
-       struct iavf_adapter *ad =
+       if (!arg)
+               return -EINVAL;
+
+       *(const void **)arg = &iavf_tm_ops;
+
+       return 0;
+}
+
+static int
+iavf_set_mc_addr_list(struct rte_eth_dev *dev,
+                       struct rte_ether_addr *mc_addrs,
+                       uint32_t mc_addrs_num)
+{
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+       struct iavf_adapter *adapter =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
-       struct iavf_info *vf =  IAVF_DEV_PRIVATE_TO_VF(ad);
-       struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
+       int err, ret;
 
-       ad->rx_bulk_alloc_allowed = true;
-       /* Initialize to TRUE. If any of Rx queues doesn't meet the
-        * vector Rx/Tx preconditions, it will be reset.
+       if (mc_addrs_num > IAVF_NUM_MACADDR_MAX) {
+               PMD_DRV_LOG(ERR,
+                           "can't add more than a limited number (%u) of addresses.",
+                           (uint32_t)IAVF_NUM_MACADDR_MAX);
+               return -EINVAL;
+       }
+
+       /* flush previous addresses */
+       err = iavf_add_del_mc_addr_list(adapter, vf->mc_addrs, vf->mc_addrs_num,
+                                       false);
+       if (err)
+               return err;
+
+       /* add new ones */
+       err = iavf_add_del_mc_addr_list(adapter, mc_addrs, mc_addrs_num, true);
+
+       if (err) {
+               /* if adding mac address list fails, should add the previous
+                * addresses back.
+                */
+               ret = iavf_add_del_mc_addr_list(adapter, vf->mc_addrs,
+                                               vf->mc_addrs_num, true);
+               if (ret)
+                       return ret;
+       } else {
+               vf->mc_addrs_num = mc_addrs_num;
+               memcpy(vf->mc_addrs,
+                      mc_addrs, mc_addrs_num * sizeof(*mc_addrs));
+       }
+
+       return err;
+}
+
+static void
+iavf_config_rss_hf(struct iavf_adapter *adapter, uint64_t rss_hf)
+{
+       static const uint64_t map_hena_rss[] = {
+               /* IPv4 */
+               [IAVF_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP] =
+                               ETH_RSS_NONFRAG_IPV4_UDP,
+               [IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP] =
+                               ETH_RSS_NONFRAG_IPV4_UDP,
+               [IAVF_FILTER_PCTYPE_NONF_IPV4_UDP] =
+                               ETH_RSS_NONFRAG_IPV4_UDP,
+               [IAVF_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK] =
+                               ETH_RSS_NONFRAG_IPV4_TCP,
+               [IAVF_FILTER_PCTYPE_NONF_IPV4_TCP] =
+                               ETH_RSS_NONFRAG_IPV4_TCP,
+               [IAVF_FILTER_PCTYPE_NONF_IPV4_SCTP] =
+                               ETH_RSS_NONFRAG_IPV4_SCTP,
+               [IAVF_FILTER_PCTYPE_NONF_IPV4_OTHER] =
+                               ETH_RSS_NONFRAG_IPV4_OTHER,
+               [IAVF_FILTER_PCTYPE_FRAG_IPV4] = ETH_RSS_FRAG_IPV4,
+
+               /* IPv6 */
+               [IAVF_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP] =
+                               ETH_RSS_NONFRAG_IPV6_UDP,
+               [IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP] =
+                               ETH_RSS_NONFRAG_IPV6_UDP,
+               [IAVF_FILTER_PCTYPE_NONF_IPV6_UDP] =
+                               ETH_RSS_NONFRAG_IPV6_UDP,
+               [IAVF_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK] =
+                               ETH_RSS_NONFRAG_IPV6_TCP,
+               [IAVF_FILTER_PCTYPE_NONF_IPV6_TCP] =
+                               ETH_RSS_NONFRAG_IPV6_TCP,
+               [IAVF_FILTER_PCTYPE_NONF_IPV6_SCTP] =
+                               ETH_RSS_NONFRAG_IPV6_SCTP,
+               [IAVF_FILTER_PCTYPE_NONF_IPV6_OTHER] =
+                               ETH_RSS_NONFRAG_IPV6_OTHER,
+               [IAVF_FILTER_PCTYPE_FRAG_IPV6] = ETH_RSS_FRAG_IPV6,
+
+               /* L2 Payload */
+               [IAVF_FILTER_PCTYPE_L2_PAYLOAD] = ETH_RSS_L2_PAYLOAD
+       };
+
+       const uint64_t ipv4_rss = ETH_RSS_NONFRAG_IPV4_UDP |
+                                 ETH_RSS_NONFRAG_IPV4_TCP |
+                                 ETH_RSS_NONFRAG_IPV4_SCTP |
+                                 ETH_RSS_NONFRAG_IPV4_OTHER |
+                                 ETH_RSS_FRAG_IPV4;
+
+       const uint64_t ipv6_rss = ETH_RSS_NONFRAG_IPV6_UDP |
+                                 ETH_RSS_NONFRAG_IPV6_TCP |
+                                 ETH_RSS_NONFRAG_IPV6_SCTP |
+                                 ETH_RSS_NONFRAG_IPV6_OTHER |
+                                 ETH_RSS_FRAG_IPV6;
+
+       struct iavf_info *vf =  IAVF_DEV_PRIVATE_TO_VF(adapter);
+       uint64_t caps = 0, hena = 0, valid_rss_hf = 0;
+       uint32_t i;
+       int ret;
+
+       ret = iavf_get_hena_caps(adapter, &caps);
+       if (ret) {
+               /**
+                * RSS offload type configuration is not a necessary feature
+                * for VF, so here just print a warning and return.
+                */
+               PMD_DRV_LOG(WARNING,
+                           "fail to get RSS offload type caps, ret: %d", ret);
+               return;
+       }
+
+       /**
+        * ETH_RSS_IPV4 and ETH_RSS_IPV6 can be considered as 2
+        * generalizations of all other IPv4 and IPv6 RSS types.
         */
-       ad->rx_vec_allowed = true;
-       ad->tx_vec_allowed = true;
+       if (rss_hf & ETH_RSS_IPV4)
+               rss_hf |= ipv4_rss;
 
-       /* Vlan stripping setting */
-       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) {
-               if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
-                       iavf_enable_vlan_strip(ad);
-               else
-                       iavf_disable_vlan_strip(ad);
+       if (rss_hf & ETH_RSS_IPV6)
+               rss_hf |= ipv6_rss;
+
+       RTE_BUILD_BUG_ON(RTE_DIM(map_hena_rss) > sizeof(uint64_t) * CHAR_BIT);
+
+       for (i = 0; i < RTE_DIM(map_hena_rss); i++) {
+               uint64_t bit = BIT_ULL(i);
+
+               if ((caps & bit) && (map_hena_rss[i] & rss_hf)) {
+                       valid_rss_hf |= map_hena_rss[i];
+                       hena |= bit;
+               }
        }
-       return 0;
+
+       ret = iavf_set_hena(adapter, hena);
+       if (ret) {
+               /**
+                * RSS offload type configuration is not a necessary feature
+                * for VF, so here just print a warning and return.
+                */
+               PMD_DRV_LOG(WARNING,
+                           "fail to set RSS offload types, ret: %d", ret);
+               return;
+       }
+
+       if (valid_rss_hf & ipv4_rss)
+               valid_rss_hf |= rss_hf & ETH_RSS_IPV4;
+
+       if (valid_rss_hf & ipv6_rss)
+               valid_rss_hf |= rss_hf & ETH_RSS_IPV6;
+
+       if (rss_hf & ~valid_rss_hf)
+               PMD_DRV_LOG(WARNING, "Unsupported rss_hf 0x%" PRIx64,
+                           rss_hf & ~valid_rss_hf);
+
+       vf->rss_hf = valid_rss_hf;
 }
 
 static int
@@ -162,34 +379,22 @@ iavf_init_rss(struct iavf_adapter *adapter)
 {
        struct iavf_info *vf =  IAVF_DEV_PRIVATE_TO_VF(adapter);
        struct rte_eth_rss_conf *rss_conf;
-       uint8_t i, j, nb_q;
+       uint16_t i, j, nb_q;
        int ret;
 
        rss_conf = &adapter->eth_dev->data->dev_conf.rx_adv_conf.rss_conf;
        nb_q = RTE_MIN(adapter->eth_dev->data->nb_rx_queues,
-                      IAVF_MAX_NUM_QUEUES);
+                      vf->max_rss_qregion);
 
        if (!(vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF)) {
                PMD_DRV_LOG(DEBUG, "RSS is not supported");
                return -ENOTSUP;
        }
-       if (adapter->eth_dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_RSS) {
-               PMD_DRV_LOG(WARNING, "RSS is enabled by PF by default");
-               /* set all lut items to default queue */
-               for (i = 0; i < vf->vf_res->rss_lut_size; i++)
-                       vf->rss_lut[i] = 0;
-               ret = iavf_configure_rss_lut(adapter);
-               return ret;
-       }
-
-       /* In IAVF, RSS enablement is set by PF driver. It is not supported
-        * to set based on rss_conf->rss_hf.
-        */
 
        /* configure RSS key */
        if (!rss_conf->rss_key) {
                /* Calculate the default hash key */
-               for (i = 0; i <= vf->vf_res->rss_key_size; i++)
+               for (i = 0; i < vf->vf_res->rss_key_size; i++)
                        vf->rss_key[i] = (uint8_t)rte_rand();
        } else
                rte_memcpy(vf->rss_key, rss_conf->rss_key,
@@ -210,6 +415,157 @@ iavf_init_rss(struct iavf_adapter *adapter)
        if (ret)
                return ret;
 
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF) {
+               /* Set RSS hash configuration based on rss_conf->rss_hf. */
+               ret = iavf_rss_hash_set(adapter, rss_conf->rss_hf, true);
+               if (ret) {
+                       PMD_DRV_LOG(ERR, "fail to set default RSS");
+                       return ret;
+               }
+       } else {
+               iavf_config_rss_hf(adapter, rss_conf->rss_hf);
+       }
+
+       return 0;
+}
+
+static int
+iavf_queues_req_reset(struct rte_eth_dev *dev, uint16_t num)
+{
+       struct iavf_adapter *ad =
+               IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct iavf_info *vf =  IAVF_DEV_PRIVATE_TO_VF(ad);
+       int ret;
+
+       ret = iavf_request_queues(ad, num);
+       if (ret) {
+               PMD_DRV_LOG(ERR, "request queues from PF failed");
+               return ret;
+       }
+       PMD_DRV_LOG(INFO, "change queue pairs from %u to %u",
+                       vf->vsi_res->num_queue_pairs, num);
+
+       ret = iavf_dev_reset(dev);
+       if (ret) {
+               PMD_DRV_LOG(ERR, "vf reset failed");
+               return ret;
+       }
+
+       return 0;
+}
+
+static int
+iavf_dev_vlan_insert_set(struct rte_eth_dev *dev)
+{
+       struct iavf_adapter *adapter =
+               IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+       bool enable;
+
+       if (!(vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2))
+               return 0;
+
+       enable = !!(dev->data->dev_conf.txmode.offloads &
+                   DEV_TX_OFFLOAD_VLAN_INSERT);
+       iavf_config_vlan_insert_v2(adapter, enable);
+
+       return 0;
+}
+
+static int
+iavf_dev_init_vlan(struct rte_eth_dev *dev)
+{
+       int err;
+
+       err = iavf_dev_vlan_offload_set(dev,
+                                       ETH_VLAN_STRIP_MASK |
+                                       ETH_QINQ_STRIP_MASK |
+                                       ETH_VLAN_FILTER_MASK |
+                                       ETH_VLAN_EXTEND_MASK);
+       if (err) {
+               PMD_DRV_LOG(ERR, "Failed to update vlan offload");
+               return err;
+       }
+
+       err = iavf_dev_vlan_insert_set(dev);
+       if (err)
+               PMD_DRV_LOG(ERR, "Failed to update vlan insertion");
+
+       return err;
+}
+
+static int
+iavf_dev_configure(struct rte_eth_dev *dev)
+{
+       struct iavf_adapter *ad =
+               IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct iavf_info *vf =  IAVF_DEV_PRIVATE_TO_VF(ad);
+       uint16_t num_queue_pairs = RTE_MAX(dev->data->nb_rx_queues,
+               dev->data->nb_tx_queues);
+       int ret;
+
+       ad->rx_bulk_alloc_allowed = true;
+       /* Initialize to TRUE. If any of Rx queues doesn't meet the
+        * vector Rx/Tx preconditions, it will be reset.
+        */
+       ad->rx_vec_allowed = true;
+       ad->tx_vec_allowed = true;
+
+       if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)
+               dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH;
+
+       /* Large VF setting */
+       if (num_queue_pairs > IAVF_MAX_NUM_QUEUES_DFLT) {
+               if (!(vf->vf_res->vf_cap_flags &
+                               VIRTCHNL_VF_LARGE_NUM_QPAIRS)) {
+                       PMD_DRV_LOG(ERR, "large VF is not supported");
+                       return -1;
+               }
+
+               if (num_queue_pairs > IAVF_MAX_NUM_QUEUES_LV) {
+                       PMD_DRV_LOG(ERR, "queue pairs number cannot be larger than %u",
+                               IAVF_MAX_NUM_QUEUES_LV);
+                       return -1;
+               }
+
+               ret = iavf_queues_req_reset(dev, num_queue_pairs);
+               if (ret)
+                       return ret;
+
+               ret = iavf_get_max_rss_queue_region(ad);
+               if (ret) {
+                       PMD_INIT_LOG(ERR, "get max rss queue region failed");
+                       return ret;
+               }
+
+               vf->lv_enabled = true;
+       } else {
+               /* Check if large VF is already enabled. If so, disable and
+                * release redundant queue resource.
+                * Or check if enough queue pairs. If not, request them from PF.
+                */
+               if (vf->lv_enabled ||
+                   num_queue_pairs > vf->vsi_res->num_queue_pairs) {
+                       ret = iavf_queues_req_reset(dev, num_queue_pairs);
+                       if (ret)
+                               return ret;
+
+                       vf->lv_enabled = false;
+               }
+               /* if large VF is not required, use default rss queue region */
+               vf->max_rss_qregion = IAVF_MAX_NUM_QUEUES_DFLT;
+       }
+
+       ret = iavf_dev_init_vlan(dev);
+       if (ret)
+               PMD_DRV_LOG(ERR, "configure VLAN failed: %d", ret);
+
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
+               if (iavf_init_rss(ad) != 0) {
+                       PMD_DRV_LOG(ERR, "configure rss failed");
+                       return -1;
+               }
+       }
        return 0;
 }
 
@@ -218,42 +574,43 @@ iavf_init_rxq(struct rte_eth_dev *dev, struct iavf_rx_queue *rxq)
 {
        struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct rte_eth_dev_data *dev_data = dev->data;
-       uint16_t buf_size, max_pkt_len, len;
+       uint16_t buf_size, max_pkt_len;
 
        buf_size = rte_pktmbuf_data_room_size(rxq->mp) - RTE_PKTMBUF_HEADROOM;
 
        /* Calculate the maximum packet length allowed */
-       len = rxq->rx_buf_len * IAVF_MAX_CHAINED_RX_BUFFERS;
-       max_pkt_len = RTE_MIN(len, dev->data->dev_conf.rxmode.max_rx_pkt_len);
+       max_pkt_len = RTE_MIN((uint32_t)
+                       rxq->rx_buf_len * IAVF_MAX_CHAINED_RX_BUFFERS,
+                       dev->data->dev_conf.rxmode.max_rx_pkt_len);
 
        /* Check if the jumbo frame and maximum packet length are set
         * correctly.
         */
        if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
-               if (max_pkt_len <= RTE_ETHER_MAX_LEN ||
+               if (max_pkt_len <= IAVF_ETH_MAX_LEN ||
                    max_pkt_len > IAVF_FRAME_SIZE_MAX) {
                        PMD_DRV_LOG(ERR, "maximum packet length must be "
                                    "larger than %u and smaller than %u, "
                                    "as jumbo frame is enabled",
-                                   (uint32_t)RTE_ETHER_MAX_LEN,
+                                   (uint32_t)IAVF_ETH_MAX_LEN,
                                    (uint32_t)IAVF_FRAME_SIZE_MAX);
                        return -EINVAL;
                }
        } else {
                if (max_pkt_len < RTE_ETHER_MIN_LEN ||
-                   max_pkt_len > RTE_ETHER_MAX_LEN) {
+                   max_pkt_len > IAVF_ETH_MAX_LEN) {
                        PMD_DRV_LOG(ERR, "maximum packet length must be "
                                    "larger than %u and smaller than %u, "
                                    "as jumbo frame is disabled",
                                    (uint32_t)RTE_ETHER_MIN_LEN,
-                                   (uint32_t)RTE_ETHER_MAX_LEN);
+                                   (uint32_t)IAVF_ETH_MAX_LEN);
                        return -EINVAL;
                }
        }
 
        rxq->max_pkt_len = max_pkt_len;
        if ((dev_data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_SCATTER) ||
-           (rxq->max_pkt_len + 2 * IAVF_VLAN_TAG_SIZE) > buf_size) {
+           rxq->max_pkt_len > buf_size) {
                dev_data->scattered_rx = 1;
        }
        IAVF_PCI_REG_WRITE(rxq->qrx_tail, rxq->nb_rx_desc - 1);
@@ -292,6 +649,7 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev,
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
        struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(adapter);
+       struct iavf_qv_map *qv_map;
        uint16_t interval, i;
        int vec;
 
@@ -312,6 +670,14 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev,
                }
        }
 
+       qv_map = rte_zmalloc("qv_map",
+               dev->data->nb_rx_queues * sizeof(struct iavf_qv_map), 0);
+       if (!qv_map) {
+               PMD_DRV_LOG(ERR, "Failed to allocate %d queue-vector map",
+                               dev->data->nb_rx_queues);
+               return -1;
+       }
+
        if (!dev->data->dev_conf.intr_conf.rxq ||
            !rte_intr_dp_is_en(intr_handle)) {
                /* Rx interrupt disabled, Map interrupt only for writeback */
@@ -320,9 +686,19 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev,
                    VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) {
                        /* If WB_ON_ITR supports, enable it */
                        vf->msix_base = IAVF_RX_VEC_START;
-                       IAVF_WRITE_REG(hw, IAVFINT_DYN_CTLN1(vf->msix_base - 1),
-                                     IAVFINT_DYN_CTLN1_ITR_INDX_MASK |
-                                     IAVFINT_DYN_CTLN1_WB_ON_ITR_MASK);
+                       /* Set the ITR for index zero, to 2us to make sure that
+                        * we leave time for aggregation to occur, but don't
+                        * increase latency dramatically.
+                        */
+                       IAVF_WRITE_REG(hw,
+                                      IAVF_VFINT_DYN_CTLN1(vf->msix_base - 1),
+                                      (0 << IAVF_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) |
+                                      IAVF_VFINT_DYN_CTLN1_WB_ON_ITR_MASK |
+                                      (2UL << IAVF_VFINT_DYN_CTLN1_INTERVAL_SHIFT));
+                       /* debug - check for success! the return value
+                        * should be 2, offset is 0x2800
+                        */
+                       /* IAVF_READ_REG(hw, IAVF_VFINT_ITRN1(0, 0)); */
                } else {
                        /* If no WB_ON_ITR offload flags, need to set
                         * interrupt for descriptor write back.
@@ -332,25 +708,30 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev,
                        /* set ITR to max */
                        interval = iavf_calc_itr_interval(
                                        IAVF_QUEUE_ITR_INTERVAL_MAX);
-                       IAVF_WRITE_REG(hw, IAVFINT_DYN_CTL01,
-                                     IAVFINT_DYN_CTL01_INTENA_MASK |
-                                     (IAVF_ITR_INDEX_DEFAULT <<
-                                      IAVFINT_DYN_CTL01_ITR_INDX_SHIFT) |
-                                     (interval <<
-                                      IAVFINT_DYN_CTL01_INTERVAL_SHIFT));
+                       IAVF_WRITE_REG(hw, IAVF_VFINT_DYN_CTL01,
+                                      IAVF_VFINT_DYN_CTL01_INTENA_MASK |
+                                      (IAVF_ITR_INDEX_DEFAULT <<
+                                       IAVF_VFINT_DYN_CTL01_ITR_INDX_SHIFT) |
+                                      (interval <<
+                                       IAVF_VFINT_DYN_CTL01_INTERVAL_SHIFT));
                }
                IAVF_WRITE_FLUSH(hw);
                /* map all queues to the same interrupt */
-               for (i = 0; i < dev->data->nb_rx_queues; i++)
-                       vf->rxq_map[vf->msix_base] |= 1 << i;
+               for (i = 0; i < dev->data->nb_rx_queues; i++) {
+                       qv_map[i].queue_id = i;
+                       qv_map[i].vector_id = vf->msix_base;
+               }
+               vf->qv_map = qv_map;
        } else {
                if (!rte_intr_allow_others(intr_handle)) {
                        vf->nb_msix = 1;
                        vf->msix_base = IAVF_MISC_VEC_ID;
                        for (i = 0; i < dev->data->nb_rx_queues; i++) {
-                               vf->rxq_map[vf->msix_base] |= 1 << i;
+                               qv_map[i].queue_id = i;
+                               qv_map[i].vector_id = vf->msix_base;
                                intr_handle->intr_vec[i] = IAVF_MISC_VEC_ID;
                        }
+                       vf->qv_map = qv_map;
                        PMD_DRV_LOG(DEBUG,
                                    "vector %u are mapping to all Rx queues",
                                    vf->msix_base);
@@ -358,25 +739,47 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev,
                        /* If Rx interrupt is reuquired, and we can use
                         * multi interrupts, then the vec is from 1
                         */
-                       vf->nb_msix = RTE_MIN(vf->vf_res->max_vectors,
-                                             intr_handle->nb_efd);
+                       vf->nb_msix = RTE_MIN(intr_handle->nb_efd,
+                                (uint16_t)(vf->vf_res->max_vectors - 1));
                        vf->msix_base = IAVF_RX_VEC_START;
                        vec = IAVF_RX_VEC_START;
                        for (i = 0; i < dev->data->nb_rx_queues; i++) {
-                               vf->rxq_map[vec] |= 1 << i;
+                               qv_map[i].queue_id = i;
+                               qv_map[i].vector_id = vec;
                                intr_handle->intr_vec[i] = vec++;
-                               if (vec >= vf->nb_msix)
+                               if (vec >= vf->nb_msix + IAVF_RX_VEC_START)
                                        vec = IAVF_RX_VEC_START;
                        }
+                       vf->qv_map = qv_map;
                        PMD_DRV_LOG(DEBUG,
                                    "%u vectors are mapping to %u Rx queues",
                                    vf->nb_msix, dev->data->nb_rx_queues);
                }
        }
 
-       if (iavf_config_irq_map(adapter)) {
-               PMD_DRV_LOG(ERR, "config interrupt mapping failed");
-               return -1;
+       if (!vf->lv_enabled) {
+               if (iavf_config_irq_map(adapter)) {
+                       PMD_DRV_LOG(ERR, "config interrupt mapping failed");
+                       return -1;
+               }
+       } else {
+               uint16_t num_qv_maps = dev->data->nb_rx_queues;
+               uint16_t index = 0;
+
+               while (num_qv_maps > IAVF_IRQ_MAP_NUM_PER_BUF) {
+                       if (iavf_config_irq_map_lv(adapter,
+                                       IAVF_IRQ_MAP_NUM_PER_BUF, index)) {
+                               PMD_DRV_LOG(ERR, "config interrupt mapping for large VF failed");
+                               return -1;
+                       }
+                       num_qv_maps -= IAVF_IRQ_MAP_NUM_PER_BUF;
+                       index += IAVF_IRQ_MAP_NUM_PER_BUF;
+               }
+
+               if (iavf_config_irq_map_lv(adapter, num_qv_maps, index)) {
+                       PMD_DRV_LOG(ERR, "config interrupt mapping for large VF failed");
+                       return -1;
+               }
        }
        return 0;
 }
@@ -417,30 +820,44 @@ iavf_dev_start(struct rte_eth_dev *dev)
        struct iavf_adapter *adapter =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
-       struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct rte_intr_handle *intr_handle = dev->intr_handle;
+       uint16_t num_queue_pairs;
+       uint16_t index = 0;
 
        PMD_INIT_FUNC_TRACE();
 
-       hw->adapter_stopped = 0;
+       adapter->stopped = 0;
 
        vf->max_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len;
        vf->num_queue_pairs = RTE_MAX(dev->data->nb_rx_queues,
                                      dev->data->nb_tx_queues);
+       num_queue_pairs = vf->num_queue_pairs;
+
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS)
+               if (iavf_get_qos_cap(adapter)) {
+                       PMD_INIT_LOG(ERR, "Failed to get qos capability");
+                       return -1;
+               }
 
        if (iavf_init_queues(dev) != 0) {
                PMD_DRV_LOG(ERR, "failed to do Queue init");
                return -1;
        }
 
-       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
-               if (iavf_init_rss(adapter) != 0) {
-                       PMD_DRV_LOG(ERR, "configure rss failed");
-                       goto err_rss;
+       /* If needed, send configure queues msg multiple times to make the
+        * adminq buffer length smaller than the 4K limitation.
+        */
+       while (num_queue_pairs > IAVF_CFG_Q_NUM_PER_BUF) {
+               if (iavf_configure_queues(adapter,
+                               IAVF_CFG_Q_NUM_PER_BUF, index) != 0) {
+                       PMD_DRV_LOG(ERR, "configure queues failed");
+                       goto err_queue;
                }
+               num_queue_pairs -= IAVF_CFG_Q_NUM_PER_BUF;
+               index += IAVF_CFG_Q_NUM_PER_BUF;
        }
 
-       if (iavf_configure_queues(adapter) != 0) {
+       if (iavf_configure_queues(adapter, num_queue_pairs, index) != 0) {
                PMD_DRV_LOG(ERR, "configure queues failed");
                goto err_queue;
        }
@@ -456,7 +873,11 @@ iavf_dev_start(struct rte_eth_dev *dev)
        }
 
        /* Set all mac addrs */
-       iavf_add_del_all_mac_addr(adapter, TRUE);
+       iavf_add_del_all_mac_addr(adapter, true);
+
+       /* Set all multicast addresses */
+       iavf_add_del_mc_addr_list(adapter, vf->mc_addrs, vf->mc_addrs_num,
+                                 true);
 
        if (iavf_start_queues(dev) != 0) {
                PMD_DRV_LOG(ERR, "enable queues failed");
@@ -466,24 +887,23 @@ iavf_dev_start(struct rte_eth_dev *dev)
        return 0;
 
 err_mac:
-       iavf_add_del_all_mac_addr(adapter, FALSE);
+       iavf_add_del_all_mac_addr(adapter, false);
 err_queue:
-err_rss:
        return -1;
 }
 
-static void
+static int
 iavf_dev_stop(struct rte_eth_dev *dev)
 {
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
        struct iavf_adapter *adapter =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
-       struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct rte_intr_handle *intr_handle = dev->intr_handle;
 
        PMD_INIT_FUNC_TRACE();
 
-       if (hw->adapter_stopped == 1)
-               return;
+       if (adapter->stopped == 1)
+               return 0;
 
        iavf_stop_queues(dev);
 
@@ -496,8 +916,16 @@ iavf_dev_stop(struct rte_eth_dev *dev)
        }
 
        /* remove all mac addrs */
-       iavf_add_del_all_mac_addr(adapter, FALSE);
-       hw->adapter_stopped = 1;
+       iavf_add_del_all_mac_addr(adapter, false);
+
+       /* remove all multicast addresses */
+       iavf_add_del_mc_addr_list(adapter, vf->mc_addrs, vf->mc_addrs_num,
+                                 false);
+
+       adapter->stopped = 1;
+       dev->data->dev_started = 0;
+
+       return 0;
 }
 
 static int
@@ -505,10 +933,12 @@ iavf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
        struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 
-       dev_info->max_rx_queues = vf->vsi_res->num_queue_pairs;
-       dev_info->max_tx_queues = vf->vsi_res->num_queue_pairs;
+       dev_info->max_rx_queues = IAVF_MAX_NUM_QUEUES_LV;
+       dev_info->max_tx_queues = IAVF_MAX_NUM_QUEUES_LV;
        dev_info->min_rx_bufsize = IAVF_BUF_SIZE_MIN;
        dev_info->max_rx_pktlen = IAVF_FRAME_SIZE_MAX;
+       dev_info->max_mtu = dev_info->max_rx_pktlen - IAVF_ETH_OVERHEAD;
+       dev_info->min_mtu = RTE_ETHER_MIN_MTU;
        dev_info->hash_key_size = vf->vf_res->rss_key_size;
        dev_info->reta_size = vf->vf_res->rss_lut_size;
        dev_info->flow_type_rss_offloads = IAVF_RSS_OFFLOAD_ALL;
@@ -522,7 +952,9 @@ iavf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
                DEV_RX_OFFLOAD_SCATTER |
                DEV_RX_OFFLOAD_JUMBO_FRAME |
-               DEV_RX_OFFLOAD_VLAN_FILTER;
+               DEV_RX_OFFLOAD_VLAN_FILTER |
+               DEV_RX_OFFLOAD_RSS_HASH;
+
        dev_info->tx_offload_capa =
                DEV_TX_OFFLOAD_VLAN_INSERT |
                DEV_TX_OFFLOAD_QINQ_INSERT |
@@ -536,7 +968,11 @@ iavf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                DEV_TX_OFFLOAD_GRE_TNL_TSO |
                DEV_TX_OFFLOAD_IPIP_TNL_TSO |
                DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
-               DEV_TX_OFFLOAD_MULTI_SEGS;
+               DEV_TX_OFFLOAD_MULTI_SEGS |
+               DEV_TX_OFFLOAD_MBUF_FAST_FREE;
+
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_CRC)
+               dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_KEEP_CRC;
 
        dev_info->default_rxconf = (struct rte_eth_rxconf) {
                .rx_free_thresh = IAVF_DEFAULT_RX_FREE_THRESH,
@@ -589,6 +1025,8 @@ iavf_dev_link_update(struct rte_eth_dev *dev,
        struct rte_eth_link new_link;
        struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 
+       memset(&new_link, 0, sizeof(new_link));
+
        /* Only read status info stored in VF, and the info is updated
         *  when receive LINK_CHANGE evnet from PF by Virtchnnl.
         */
@@ -631,12 +1069,7 @@ iavf_dev_link_update(struct rte_eth_dev *dev,
        new_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
                                ETH_LINK_SPEED_FIXED);
 
-       if (rte_atomic64_cmpset((uint64_t *)&dev->data->dev_link,
-                               *(uint64_t *)&dev->data->dev_link,
-                               *(uint64_t *)&new_link) == 0)
-               return -1;
-
-       return 0;
+       return rte_eth_linkstatus_set(dev, &new_link);
 }
 
 static int
@@ -645,18 +1078,9 @@ iavf_dev_promiscuous_enable(struct rte_eth_dev *dev)
        struct iavf_adapter *adapter =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
-       int ret;
-
-       if (vf->promisc_unicast_enabled)
-               return 0;
 
-       ret = iavf_config_promisc(adapter, TRUE, vf->promisc_multicast_enabled);
-       if (!ret)
-               vf->promisc_unicast_enabled = TRUE;
-       else
-               ret = -EAGAIN;
-
-       return ret;
+       return iavf_config_promisc(adapter,
+                                 true, vf->promisc_multicast_enabled);
 }
 
 static int
@@ -665,18 +1089,9 @@ iavf_dev_promiscuous_disable(struct rte_eth_dev *dev)
        struct iavf_adapter *adapter =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
-       int ret;
-
-       if (!vf->promisc_unicast_enabled)
-               return 0;
-
-       ret = iavf_config_promisc(adapter, FALSE, vf->promisc_multicast_enabled);
-       if (!ret)
-               vf->promisc_unicast_enabled = FALSE;
-       else
-               ret = -EAGAIN;
 
-       return ret;
+       return iavf_config_promisc(adapter,
+                                 false, vf->promisc_multicast_enabled);
 }
 
 static int
@@ -685,18 +1100,9 @@ iavf_dev_allmulticast_enable(struct rte_eth_dev *dev)
        struct iavf_adapter *adapter =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
-       int ret;
-
-       if (vf->promisc_multicast_enabled)
-               return 0;
 
-       ret = iavf_config_promisc(adapter, vf->promisc_unicast_enabled, TRUE);
-       if (!ret)
-               vf->promisc_multicast_enabled = TRUE;
-       else
-               ret = -EAGAIN;
-
-       return ret;
+       return iavf_config_promisc(adapter,
+                                 vf->promisc_unicast_enabled, true);
 }
 
 static int
@@ -705,18 +1111,9 @@ iavf_dev_allmulticast_disable(struct rte_eth_dev *dev)
        struct iavf_adapter *adapter =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
-       int ret;
 
-       if (!vf->promisc_multicast_enabled)
-               return 0;
-
-       ret = iavf_config_promisc(adapter, vf->promisc_unicast_enabled, FALSE);
-       if (!ret)
-               vf->promisc_multicast_enabled = FALSE;
-       else
-               ret = -EAGAIN;
-
-       return ret;
+       return iavf_config_promisc(adapter,
+                                 vf->promisc_unicast_enabled, false);
 }
 
 static int
@@ -734,7 +1131,7 @@ iavf_dev_add_mac_addr(struct rte_eth_dev *dev, struct rte_ether_addr *addr,
                return -EINVAL;
        }
 
-       err = iavf_add_del_eth_addr(adapter, addr, TRUE);
+       err = iavf_add_del_eth_addr(adapter, addr, true, VIRTCHNL_ETHER_ADDR_EXTRA);
        if (err) {
                PMD_DRV_LOG(ERR, "fail to add MAC address");
                return -EIO;
@@ -756,7 +1153,7 @@ iavf_dev_del_mac_addr(struct rte_eth_dev *dev, uint32_t index)
 
        addr = &dev->data->mac_addrs[index];
 
-       err = iavf_add_del_eth_addr(adapter, addr, FALSE);
+       err = iavf_add_del_eth_addr(adapter, addr, false, VIRTCHNL_ETHER_ADDR_EXTRA);
        if (err)
                PMD_DRV_LOG(ERR, "fail to delete MAC address");
 
@@ -771,6 +1168,13 @@ iavf_dev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
        struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
        int err;
 
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) {
+               err = iavf_add_del_vlan_v2(adapter, vlan_id, on);
+               if (err)
+                       return -EIO;
+               return 0;
+       }
+
        if (!(vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN))
                return -ENOTSUP;
 
@@ -780,34 +1184,88 @@ iavf_dev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
        return 0;
 }
 
-static int
-iavf_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
+static void
+iavf_iterate_vlan_filters_v2(struct rte_eth_dev *dev, bool enable)
 {
+       struct rte_vlan_filter_conf *vfc = &dev->data->vlan_filter_conf;
        struct iavf_adapter *adapter =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
-       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
-       struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
-       int err;
+       uint32_t i, j;
+       uint64_t ids;
 
-       if (!(vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN))
-               return -ENOTSUP;
-
-       /* Vlan stripping setting */
-       if (mask & ETH_VLAN_STRIP_MASK) {
-               /* Enable or disable VLAN stripping */
-               if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
-                       err = iavf_enable_vlan_strip(adapter);
-               else
-                       err = iavf_disable_vlan_strip(adapter);
+       for (i = 0; i < RTE_DIM(vfc->ids); i++) {
+               if (vfc->ids[i] == 0)
+                       continue;
 
-               if (err)
-                       return -EIO;
+               ids = vfc->ids[i];
+               for (j = 0; ids != 0 && j < 64; j++, ids >>= 1) {
+                       if (ids & 1)
+                               iavf_add_del_vlan_v2(adapter,
+                                                    64 * i + j, enable);
+               }
        }
-       return 0;
 }
 
 static int
-iavf_dev_rss_reta_update(struct rte_eth_dev *dev,
+iavf_dev_vlan_offload_set_v2(struct rte_eth_dev *dev, int mask)
+{
+       struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
+       struct iavf_adapter *adapter =
+               IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       bool enable;
+       int err;
+
+       if (mask & ETH_VLAN_FILTER_MASK) {
+               enable = !!(rxmode->offloads & DEV_RX_OFFLOAD_VLAN_FILTER);
+
+               iavf_iterate_vlan_filters_v2(dev, enable);
+       }
+
+       if (mask & ETH_VLAN_STRIP_MASK) {
+               enable = !!(rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
+
+               err = iavf_config_vlan_strip_v2(adapter, enable);
+               /* If not support, the stripping is already disabled by PF */
+               if (err == -ENOTSUP && !enable)
+                       err = 0;
+               if (err)
+                       return -EIO;
+       }
+
+       return 0;
+}
+
+static int
+iavf_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
+{
+       struct iavf_adapter *adapter =
+               IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+       struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
+       int err;
+
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2)
+               return iavf_dev_vlan_offload_set_v2(dev, mask);
+
+       if (!(vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN))
+               return -ENOTSUP;
+
+       /* Vlan stripping setting */
+       if (mask & ETH_VLAN_STRIP_MASK) {
+               /* Enable or disable VLAN stripping */
+               if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
+                       err = iavf_enable_vlan_strip(adapter);
+               else
+                       err = iavf_disable_vlan_strip(adapter);
+
+               if (err)
+                       return -EIO;
+       }
+       return 0;
+}
+
+static int
+iavf_dev_rss_reta_update(struct rte_eth_dev *dev,
                        struct rte_eth_rss_reta_entry64 *reta_conf,
                        uint16_t reta_size)
 {
@@ -884,33 +1342,83 @@ iavf_dev_rss_reta_query(struct rte_eth_dev *dev,
 }
 
 static int
-iavf_dev_rss_hash_update(struct rte_eth_dev *dev,
-                       struct rte_eth_rss_conf *rss_conf)
+iavf_set_rss_key(struct iavf_adapter *adapter, uint8_t *key, uint8_t key_len)
 {
-       struct iavf_adapter *adapter =
-               IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
 
-       if (!(vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF))
-               return -ENOTSUP;
-
        /* HENA setting, it is enabled by default, no change */
-       if (!rss_conf->rss_key || rss_conf->rss_key_len == 0) {
+       if (!key || key_len == 0) {
                PMD_DRV_LOG(DEBUG, "No key to be configured");
                return 0;
-       } else if (rss_conf->rss_key_len != vf->vf_res->rss_key_size) {
+       } else if (key_len != vf->vf_res->rss_key_size) {
                PMD_DRV_LOG(ERR, "The size of hash key configured "
                        "(%d) doesn't match the size of hardware can "
-                       "support (%d)", rss_conf->rss_key_len,
+                       "support (%d)", key_len,
                        vf->vf_res->rss_key_size);
                return -EINVAL;
        }
 
-       rte_memcpy(vf->rss_key, rss_conf->rss_key, rss_conf->rss_key_len);
+       rte_memcpy(vf->rss_key, key, key_len);
 
        return iavf_configure_rss_key(adapter);
 }
 
+static int
+iavf_dev_rss_hash_update(struct rte_eth_dev *dev,
+                       struct rte_eth_rss_conf *rss_conf)
+{
+       struct iavf_adapter *adapter =
+               IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+       int ret;
+
+       adapter->eth_dev->data->dev_conf.rx_adv_conf.rss_conf = *rss_conf;
+
+       if (!(vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF))
+               return -ENOTSUP;
+
+       /* Set hash key. */
+       ret = iavf_set_rss_key(adapter, rss_conf->rss_key,
+                              rss_conf->rss_key_len);
+       if (ret)
+               return ret;
+
+       if (rss_conf->rss_hf == 0) {
+               vf->rss_hf = 0;
+               ret = iavf_set_hena(adapter, 0);
+
+               /* It is a workaround, temporarily allow error to be returned
+                * due to possible lack of PF handling for hena = 0.
+                */
+               if (ret)
+                       PMD_DRV_LOG(WARNING, "fail to clean existing RSS, lack PF support");
+               return 0;
+       }
+
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF) {
+               /* Clear existing RSS. */
+               ret = iavf_set_hena(adapter, 0);
+
+               /* It is a workaround, temporarily allow error to be returned
+                * due to possible lack of PF handling for hena = 0.
+                */
+               if (ret)
+                       PMD_DRV_LOG(WARNING, "fail to clean existing RSS,"
+                                   "lack PF support");
+
+               /* Set new RSS configuration. */
+               ret = iavf_rss_hash_set(adapter, rss_conf->rss_hf, true);
+               if (ret) {
+                       PMD_DRV_LOG(ERR, "fail to set new RSS");
+                       return ret;
+               }
+       } else {
+               iavf_config_rss_hf(adapter, rss_conf->rss_hf);
+       }
+
+       return 0;
+}
+
 static int
 iavf_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
                          struct rte_eth_rss_conf *rss_conf)
@@ -922,8 +1430,7 @@ iavf_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
        if (!(vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF))
                return -ENOTSUP;
 
-        /* Just set it to default value now. */
-       rss_conf->rss_hf = IAVF_RSS_OFFLOAD_ALL;
+       rss_conf->rss_hf = vf->rss_hf;
 
        if (!rss_conf->rss_key)
                return 0;
@@ -949,7 +1456,7 @@ iavf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
                return -EBUSY;
        }
 
-       if (frame_size > RTE_ETHER_MAX_LEN)
+       if (frame_size > IAVF_ETH_MAX_LEN)
                dev->data->dev_conf.rxmode.offloads |=
                                DEV_RX_OFFLOAD_JUMBO_FRAME;
        else
@@ -968,20 +1475,15 @@ iavf_dev_set_default_mac_addr(struct rte_eth_dev *dev,
        struct iavf_adapter *adapter =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(adapter);
-       struct rte_ether_addr *perm_addr, *old_addr;
+       struct rte_ether_addr *old_addr;
        int ret;
 
        old_addr = (struct rte_ether_addr *)hw->mac.addr;
-       perm_addr = (struct rte_ether_addr *)hw->mac.perm_addr;
 
-       if (rte_is_same_ether_addr(mac_addr, old_addr))
+       if (rte_is_same_ether_addr(old_addr, mac_addr))
                return 0;
 
-       /* If the MAC address is configured by host, skip the setting */
-       if (rte_is_valid_assigned_ether_addr(perm_addr))
-               return -EPERM;
-
-       ret = iavf_add_del_eth_addr(adapter, old_addr, FALSE);
+       ret = iavf_add_del_eth_addr(adapter, old_addr, false, VIRTCHNL_ETHER_ADDR_PRIMARY);
        if (ret)
                PMD_DRV_LOG(ERR, "Fail to delete old MAC:"
                            " %02X:%02X:%02X:%02X:%02X:%02X",
@@ -992,7 +1494,7 @@ iavf_dev_set_default_mac_addr(struct rte_eth_dev *dev,
                            old_addr->addr_bytes[4],
                            old_addr->addr_bytes[5]);
 
-       ret = iavf_add_del_eth_addr(adapter, mac_addr, TRUE);
+       ret = iavf_add_del_eth_addr(adapter, mac_addr, true, VIRTCHNL_ETHER_ADDR_PRIMARY);
        if (ret)
                PMD_DRV_LOG(ERR, "Fail to add new MAC:"
                            " %02X:%02X:%02X:%02X:%02X:%02X",
@@ -1062,20 +1564,23 @@ iavf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
        ret = iavf_query_stats(adapter, &pstats);
        if (ret == 0) {
+               uint8_t crc_stats_len = (dev->data->dev_conf.rxmode.offloads &
+                                        DEV_RX_OFFLOAD_KEEP_CRC) ? 0 :
+                                        RTE_ETHER_CRC_LEN;
                iavf_update_stats(vsi, pstats);
                stats->ipackets = pstats->rx_unicast + pstats->rx_multicast +
-                                               pstats->rx_broadcast;
+                               pstats->rx_broadcast - pstats->rx_discards;
                stats->opackets = pstats->tx_broadcast + pstats->tx_multicast +
                                                pstats->tx_unicast;
                stats->imissed = pstats->rx_discards;
                stats->oerrors = pstats->tx_errors + pstats->tx_discards;
                stats->ibytes = pstats->rx_bytes;
-               stats->ibytes -= stats->ipackets * RTE_ETHER_CRC_LEN;
+               stats->ibytes -= stats->ipackets * crc_stats_len;
                stats->obytes = pstats->tx_bytes;
        } else {
                PMD_DRV_LOG(ERR, "Get statistics failed");
        }
-       return -EIO;
+       return ret;
 }
 
 static int
@@ -1099,6 +1604,55 @@ iavf_dev_stats_reset(struct rte_eth_dev *dev)
        return 0;
 }
 
+static int iavf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
+                                     struct rte_eth_xstat_name *xstats_names,
+                                     __rte_unused unsigned int limit)
+{
+       unsigned int i;
+
+       if (xstats_names != NULL)
+               for (i = 0; i < IAVF_NB_XSTATS; i++) {
+                       snprintf(xstats_names[i].name,
+                               sizeof(xstats_names[i].name),
+                               "%s", rte_iavf_stats_strings[i].name);
+               }
+       return IAVF_NB_XSTATS;
+}
+
+static int iavf_dev_xstats_get(struct rte_eth_dev *dev,
+                                struct rte_eth_xstat *xstats, unsigned int n)
+{
+       int ret;
+       unsigned int i;
+       struct iavf_adapter *adapter =
+               IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+       struct iavf_vsi *vsi = &vf->vsi;
+       struct virtchnl_eth_stats *pstats = NULL;
+
+       if (n < IAVF_NB_XSTATS)
+               return IAVF_NB_XSTATS;
+
+       ret = iavf_query_stats(adapter, &pstats);
+       if (ret != 0)
+               return 0;
+
+       if (!xstats)
+               return 0;
+
+       iavf_update_stats(vsi, pstats);
+
+       /* loop over xstats array and values from pstats */
+       for (i = 0; i < IAVF_NB_XSTATS; i++) {
+               xstats[i].id = i;
+               xstats[i].value = *(uint64_t *)(((char *)pstats) +
+                       rte_iavf_stats_strings[i].offset);
+       }
+
+       return IAVF_NB_XSTATS;
+}
+
+
 static int
 iavf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 {
@@ -1111,16 +1665,17 @@ iavf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
        msix_intr = pci_dev->intr_handle.intr_vec[queue_id];
        if (msix_intr == IAVF_MISC_VEC_ID) {
                PMD_DRV_LOG(INFO, "MISC is also enabled for control");
-               IAVF_WRITE_REG(hw, IAVFINT_DYN_CTL01,
-                             IAVFINT_DYN_CTL01_INTENA_MASK |
-                             IAVFINT_DYN_CTL01_CLEARPBA_MASK |
-                             IAVFINT_DYN_CTL01_ITR_INDX_MASK);
+               IAVF_WRITE_REG(hw, IAVF_VFINT_DYN_CTL01,
+                              IAVF_VFINT_DYN_CTL01_INTENA_MASK |
+                              IAVF_VFINT_DYN_CTL01_CLEARPBA_MASK |
+                              IAVF_VFINT_DYN_CTL01_ITR_INDX_MASK);
        } else {
                IAVF_WRITE_REG(hw,
-                             IAVFINT_DYN_CTLN1(msix_intr - IAVF_RX_VEC_START),
-                             IAVFINT_DYN_CTLN1_INTENA_MASK |
-                             IAVFINT_DYN_CTL01_CLEARPBA_MASK |
-                             IAVFINT_DYN_CTLN1_ITR_INDX_MASK);
+                              IAVF_VFINT_DYN_CTLN1
+                               (msix_intr - IAVF_RX_VEC_START),
+                              IAVF_VFINT_DYN_CTLN1_INTENA_MASK |
+                              IAVF_VFINT_DYN_CTL01_CLEARPBA_MASK |
+                              IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK);
        }
 
        IAVF_WRITE_FLUSH(hw);
@@ -1144,7 +1699,7 @@ iavf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
        }
 
        IAVF_WRITE_REG(hw,
-                     IAVFINT_DYN_CTLN1(msix_intr - IAVF_RX_VEC_START),
+                     IAVF_VFINT_DYN_CTLN1(msix_intr - IAVF_RX_VEC_START),
                      0);
 
        IAVF_WRITE_FLUSH(hw);
@@ -1157,9 +1712,9 @@ iavf_check_vf_reset_done(struct iavf_hw *hw)
        int i, reset;
 
        for (i = 0; i < IAVF_RESET_WAIT_CNT; i++) {
-               reset = IAVF_READ_REG(hw, IAVFGEN_RSTAT) &
-                       IAVFGEN_RSTAT_VFR_STATE_MASK;
-               reset = reset >> IAVFGEN_RSTAT_VFR_STATE_SHIFT;
+               reset = IAVF_READ_REG(hw, IAVF_VFGEN_RSTAT) &
+                       IAVF_VFGEN_RSTAT_VFR_STATE_MASK;
+               reset = reset >> IAVF_VFGEN_RSTAT_VFR_STATE_SHIFT;
                if (reset == VIRTCHNL_VFR_VFACTIVE ||
                    reset == VIRTCHNL_VFR_COMPLETED)
                        break;
@@ -1172,6 +1727,349 @@ iavf_check_vf_reset_done(struct iavf_hw *hw)
        return 0;
 }
 
+static int
+iavf_lookup_proto_xtr_type(const char *flex_name)
+{
+       static struct {
+               const char *name;
+               enum iavf_proto_xtr_type type;
+       } xtr_type_map[] = {
+               { "vlan",      IAVF_PROTO_XTR_VLAN      },
+               { "ipv4",      IAVF_PROTO_XTR_IPV4      },
+               { "ipv6",      IAVF_PROTO_XTR_IPV6      },
+               { "ipv6_flow", IAVF_PROTO_XTR_IPV6_FLOW },
+               { "tcp",       IAVF_PROTO_XTR_TCP       },
+               { "ip_offset", IAVF_PROTO_XTR_IP_OFFSET },
+       };
+       uint32_t i;
+
+       for (i = 0; i < RTE_DIM(xtr_type_map); i++) {
+               if (strcmp(flex_name, xtr_type_map[i].name) == 0)
+                       return xtr_type_map[i].type;
+       }
+
+       PMD_DRV_LOG(ERR, "wrong proto_xtr type, "
+                   "it should be: vlan|ipv4|ipv6|ipv6_flow|tcp|ip_offset");
+
+       return -1;
+}
+
+/**
+ * Parse elem, the elem could be single number/range or '(' ')' group
+ * 1) A single number elem, it's just a simple digit. e.g. 9
+ * 2) A single range elem, two digits with a '-' between. e.g. 2-6
+ * 3) A group elem, combines multiple 1) or 2) with '( )'. e.g (0,2-4,6)
+ *    Within group elem, '-' used for a range separator;
+ *                       ',' used for a single number.
+ */
+static int
+iavf_parse_queue_set(const char *input, int xtr_type,
+                    struct iavf_devargs *devargs)
+{
+       const char *str = input;
+       char *end = NULL;
+       uint32_t min, max;
+       uint32_t idx;
+
+       while (isblank(*str))
+               str++;
+
+       if (!isdigit(*str) && *str != '(')
+               return -1;
+
+       /* process single number or single range of number */
+       if (*str != '(') {
+               errno = 0;
+               idx = strtoul(str, &end, 10);
+               if (errno || !end || idx >= IAVF_MAX_QUEUE_NUM)
+                       return -1;
+
+               while (isblank(*end))
+                       end++;
+
+               min = idx;
+               max = idx;
+
+               /* process single <number>-<number> */
+               if (*end == '-') {
+                       end++;
+                       while (isblank(*end))
+                               end++;
+                       if (!isdigit(*end))
+                               return -1;
+
+                       errno = 0;
+                       idx = strtoul(end, &end, 10);
+                       if (errno || !end || idx >= IAVF_MAX_QUEUE_NUM)
+                               return -1;
+
+                       max = idx;
+                       while (isblank(*end))
+                               end++;
+               }
+
+               if (*end != ':')
+                       return -1;
+
+               for (idx = RTE_MIN(min, max);
+                    idx <= RTE_MAX(min, max); idx++)
+                       devargs->proto_xtr[idx] = xtr_type;
+
+               return 0;
+       }
+
+       /* process set within bracket */
+       str++;
+       while (isblank(*str))
+               str++;
+       if (*str == '\0')
+               return -1;
+
+       min = IAVF_MAX_QUEUE_NUM;
+       do {
+               /* go ahead to the first digit */
+               while (isblank(*str))
+                       str++;
+               if (!isdigit(*str))
+                       return -1;
+
+               /* get the digit value */
+               errno = 0;
+               idx = strtoul(str, &end, 10);
+               if (errno || !end || idx >= IAVF_MAX_QUEUE_NUM)
+                       return -1;
+
+               /* go ahead to separator '-',',' and ')' */
+               while (isblank(*end))
+                       end++;
+               if (*end == '-') {
+                       if (min == IAVF_MAX_QUEUE_NUM)
+                               min = idx;
+                       else /* avoid continuous '-' */
+                               return -1;
+               } else if (*end == ',' || *end == ')') {
+                       max = idx;
+                       if (min == IAVF_MAX_QUEUE_NUM)
+                               min = idx;
+
+                       for (idx = RTE_MIN(min, max);
+                            idx <= RTE_MAX(min, max); idx++)
+                               devargs->proto_xtr[idx] = xtr_type;
+
+                       min = IAVF_MAX_QUEUE_NUM;
+               } else {
+                       return -1;
+               }
+
+               str = end + 1;
+       } while (*end != ')' && *end != '\0');
+
+       return 0;
+}
+
+static int
+iavf_parse_queue_proto_xtr(const char *queues, struct iavf_devargs *devargs)
+{
+       const char *queue_start;
+       uint32_t idx;
+       int xtr_type;
+       char flex_name[32];
+
+       while (isblank(*queues))
+               queues++;
+
+       if (*queues != '[') {
+               xtr_type = iavf_lookup_proto_xtr_type(queues);
+               if (xtr_type < 0)
+                       return -1;
+
+               devargs->proto_xtr_dflt = xtr_type;
+
+               return 0;
+       }
+
+       queues++;
+       do {
+               while (isblank(*queues))
+                       queues++;
+               if (*queues == '\0')
+                       return -1;
+
+               queue_start = queues;
+
+               /* go across a complete bracket */
+               if (*queue_start == '(') {
+                       queues += strcspn(queues, ")");
+                       if (*queues != ')')
+                               return -1;
+               }
+
+               /* scan the separator ':' */
+               queues += strcspn(queues, ":");
+               if (*queues++ != ':')
+                       return -1;
+               while (isblank(*queues))
+                       queues++;
+
+               for (idx = 0; ; idx++) {
+                       if (isblank(queues[idx]) ||
+                           queues[idx] == ',' ||
+                           queues[idx] == ']' ||
+                           queues[idx] == '\0')
+                               break;
+
+                       if (idx > sizeof(flex_name) - 2)
+                               return -1;
+
+                       flex_name[idx] = queues[idx];
+               }
+               flex_name[idx] = '\0';
+               xtr_type = iavf_lookup_proto_xtr_type(flex_name);
+               if (xtr_type < 0)
+                       return -1;
+
+               queues += idx;
+
+               while (isblank(*queues) || *queues == ',' || *queues == ']')
+                       queues++;
+
+               if (iavf_parse_queue_set(queue_start, xtr_type, devargs) < 0)
+                       return -1;
+       } while (*queues != '\0');
+
+       return 0;
+}
+
+static int
+iavf_handle_proto_xtr_arg(__rte_unused const char *key, const char *value,
+                         void *extra_args)
+{
+       struct iavf_devargs *devargs = extra_args;
+
+       if (!value || !extra_args)
+               return -EINVAL;
+
+       if (iavf_parse_queue_proto_xtr(value, devargs) < 0) {
+               PMD_DRV_LOG(ERR, "the proto_xtr's parameter is wrong : '%s'",
+                           value);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int iavf_parse_devargs(struct rte_eth_dev *dev)
+{
+       struct iavf_adapter *ad =
+               IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct rte_devargs *devargs = dev->device->devargs;
+       struct rte_kvargs *kvlist;
+       int ret;
+
+       if (!devargs)
+               return 0;
+
+       kvlist = rte_kvargs_parse(devargs->args, iavf_valid_args);
+       if (!kvlist) {
+               PMD_INIT_LOG(ERR, "invalid kvargs key\n");
+               return -EINVAL;
+       }
+
+       ad->devargs.proto_xtr_dflt = IAVF_PROTO_XTR_NONE;
+       memset(ad->devargs.proto_xtr, IAVF_PROTO_XTR_NONE,
+              sizeof(ad->devargs.proto_xtr));
+
+       ret = rte_kvargs_process(kvlist, IAVF_PROTO_XTR_ARG,
+                                &iavf_handle_proto_xtr_arg, &ad->devargs);
+       if (ret)
+               goto bail;
+
+bail:
+       rte_kvargs_free(kvlist);
+       return ret;
+}
+
+static void
+iavf_init_proto_xtr(struct rte_eth_dev *dev)
+{
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+       struct iavf_adapter *ad =
+                       IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       const struct iavf_proto_xtr_ol *xtr_ol;
+       bool proto_xtr_enable = false;
+       int offset;
+       uint16_t i;
+
+       vf->proto_xtr = rte_zmalloc("vf proto xtr",
+                                   vf->vsi_res->num_queue_pairs, 0);
+       if (unlikely(!(vf->proto_xtr))) {
+               PMD_DRV_LOG(ERR, "no memory for setting up proto_xtr's table");
+               return;
+       }
+
+       for (i = 0; i < vf->vsi_res->num_queue_pairs; i++) {
+               vf->proto_xtr[i] = ad->devargs.proto_xtr[i] !=
+                                       IAVF_PROTO_XTR_NONE ?
+                                       ad->devargs.proto_xtr[i] :
+                                       ad->devargs.proto_xtr_dflt;
+
+               if (vf->proto_xtr[i] != IAVF_PROTO_XTR_NONE) {
+                       uint8_t type = vf->proto_xtr[i];
+
+                       iavf_proto_xtr_params[type].required = true;
+                       proto_xtr_enable = true;
+               }
+       }
+
+       if (likely(!proto_xtr_enable))
+               return;
+
+       offset = rte_mbuf_dynfield_register(&iavf_proto_xtr_metadata_param);
+       if (unlikely(offset == -1)) {
+               PMD_DRV_LOG(ERR,
+                           "failed to extract protocol metadata, error %d",
+                           -rte_errno);
+               return;
+       }
+
+       PMD_DRV_LOG(DEBUG,
+                   "proto_xtr metadata offset in mbuf is : %d",
+                   offset);
+       rte_pmd_ifd_dynfield_proto_xtr_metadata_offs = offset;
+
+       for (i = 0; i < RTE_DIM(iavf_proto_xtr_params); i++) {
+               xtr_ol = &iavf_proto_xtr_params[i];
+
+               uint8_t rxdid = iavf_proto_xtr_type_to_rxdid((uint8_t)i);
+
+               if (!xtr_ol->required)
+                       continue;
+
+               if (!(vf->supported_rxdid & BIT(rxdid))) {
+                       PMD_DRV_LOG(ERR,
+                                   "rxdid[%u] is not supported in hardware",
+                                   rxdid);
+                       rte_pmd_ifd_dynfield_proto_xtr_metadata_offs = -1;
+                       break;
+               }
+
+               offset = rte_mbuf_dynflag_register(&xtr_ol->param);
+               if (unlikely(offset == -1)) {
+                       PMD_DRV_LOG(ERR,
+                                   "failed to register proto_xtr offload '%s', error %d",
+                                   xtr_ol->param.name, -rte_errno);
+
+                       rte_pmd_ifd_dynfield_proto_xtr_metadata_offs = -1;
+                       break;
+               }
+
+               PMD_DRV_LOG(DEBUG,
+                           "proto_xtr offload '%s' offset in mbuf is : %d",
+                           xtr_ol->param.name, offset);
+               *xtr_ol->ol_flag = 1ULL << offset;
+       }
+}
+
 static int
 iavf_init_vf(struct rte_eth_dev *dev)
 {
@@ -1181,6 +2079,12 @@ iavf_init_vf(struct rte_eth_dev *dev)
        struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 
+       err = iavf_parse_devargs(dev);
+       if (err) {
+               PMD_INIT_LOG(ERR, "Failed to parse devargs");
+               goto err;
+       }
+
        err = iavf_set_mac_type(hw);
        if (err) {
                PMD_INIT_LOG(ERR, "set_mac_type failed: %d", err);
@@ -1217,6 +2121,7 @@ iavf_init_vf(struct rte_eth_dev *dev)
                PMD_INIT_LOG(ERR, "unable to allocate vf_res memory");
                goto err_api;
        }
+
        if (iavf_get_vf_resource(adapter) != 0) {
                PMD_INIT_LOG(ERR, "iavf_get_vf_config failed");
                goto err_alloc;
@@ -1236,11 +2141,41 @@ iavf_init_vf(struct rte_eth_dev *dev)
                        goto err_rss;
                }
        }
+
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
+               if (iavf_get_supported_rxdid(adapter) != 0) {
+                       PMD_INIT_LOG(ERR, "failed to do get supported rxdid");
+                       goto err_rss;
+               }
+       }
+
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) {
+               if (iavf_get_vlan_offload_caps_v2(adapter) != 0) {
+                       PMD_INIT_LOG(ERR, "failed to do get VLAN offload v2 capabilities");
+                       goto err_rss;
+               }
+       }
+
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS) {
+               bufsz = sizeof(struct virtchnl_qos_cap_list) +
+                       IAVF_MAX_TRAFFIC_CLASS *
+                       sizeof(struct virtchnl_qos_cap_elem);
+               vf->qos_cap = rte_zmalloc("qos_cap", bufsz, 0);
+               if (!vf->qos_cap) {
+                       PMD_INIT_LOG(ERR, "unable to allocate qos_cap memory");
+                       goto err_rss;
+               }
+               iavf_tm_conf_init(dev);
+       }
+
+       iavf_init_proto_xtr(dev);
+
        return 0;
 err_rss:
        rte_free(vf->rss_key);
        rte_free(vf->rss_lut);
 err_alloc:
+       rte_free(vf->qos_cap);
        rte_free(vf->vf_res);
        vf->vsi_res = NULL;
 err_api:
@@ -1256,10 +2191,13 @@ static inline void
 iavf_enable_irq0(struct iavf_hw *hw)
 {
        /* Enable admin queue interrupt trigger */
-       IAVF_WRITE_REG(hw, IAVFINT_ICR0_ENA1, IAVFINT_ICR0_ENA1_ADMINQ_MASK);
+       IAVF_WRITE_REG(hw, IAVF_VFINT_ICR0_ENA1,
+                      IAVF_VFINT_ICR0_ENA1_ADMINQ_MASK);
 
-       IAVF_WRITE_REG(hw, IAVFINT_DYN_CTL01, IAVFINT_DYN_CTL01_INTENA_MASK |
-               IAVFINT_DYN_CTL01_CLEARPBA_MASK | IAVFINT_DYN_CTL01_ITR_INDX_MASK);
+       IAVF_WRITE_REG(hw, IAVF_VFINT_DYN_CTL01,
+                      IAVF_VFINT_DYN_CTL01_INTENA_MASK |
+                      IAVF_VFINT_DYN_CTL01_CLEARPBA_MASK |
+                      IAVF_VFINT_DYN_CTL01_ITR_INDX_MASK);
 
        IAVF_WRITE_FLUSH(hw);
 }
@@ -1268,9 +2206,9 @@ static inline void
 iavf_disable_irq0(struct iavf_hw *hw)
 {
        /* Disable all interrupt types */
-       IAVF_WRITE_REG(hw, IAVFINT_ICR0_ENA1, 0);
-       IAVF_WRITE_REG(hw, IAVFINT_DYN_CTL01,
-                     IAVFINT_DYN_CTL01_ITR_INDX_MASK);
+       IAVF_WRITE_REG(hw, IAVF_VFINT_ICR0_ENA1, 0);
+       IAVF_WRITE_REG(hw, IAVF_VFINT_DYN_CTL01,
+                      IAVF_VFINT_DYN_CTL01_ITR_INDX_MASK);
        IAVF_WRITE_FLUSH(hw);
 }
 
@@ -1287,6 +2225,36 @@ iavf_dev_interrupt_handler(void *param)
        iavf_enable_irq0(hw);
 }
 
+static int
+iavf_dev_flow_ops_get(struct rte_eth_dev *dev,
+                     const struct rte_flow_ops **ops)
+{
+       if (!dev)
+               return -EINVAL;
+
+       *ops = &iavf_flow_ops;
+       return 0;
+}
+
+static void
+iavf_default_rss_disable(struct iavf_adapter *adapter)
+{
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+       int ret = 0;
+
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
+               /* Set hena = 0 to ask PF to cleanup all existing RSS. */
+               ret = iavf_set_hena(adapter, 0);
+               if (ret)
+                       /* It is a workaround, temporarily allow error to be
+                        * returned due to possible lack of PF handling for
+                        * hena = 0.
+                        */
+                       PMD_INIT_LOG(WARNING, "fail to disable default RSS,"
+                                   "lack PF support");
+       }
+}
+
 static int
 iavf_dev_init(struct rte_eth_dev *eth_dev)
 {
@@ -1294,11 +2262,15 @@ iavf_dev_init(struct rte_eth_dev *eth_dev)
                IAVF_DEV_PRIVATE_TO_ADAPTER(eth_dev->data->dev_private);
        struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(adapter);
        struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
+       int ret = 0;
 
        PMD_INIT_FUNC_TRACE();
 
        /* assign ops func pointer */
        eth_dev->dev_ops = &iavf_eth_dev_ops;
+       eth_dev->rx_queue_count = iavf_dev_rxq_count;
+       eth_dev->rx_descriptor_status = iavf_dev_rx_desc_status;
+       eth_dev->tx_descriptor_status = iavf_dev_tx_desc_status;
        eth_dev->rx_pkt_burst = &iavf_recv_pkts;
        eth_dev->tx_pkt_burst = &iavf_xmit_pkts;
        eth_dev->tx_pkt_prepare = &iavf_prep_pkts;
@@ -1313,6 +2285,7 @@ iavf_dev_init(struct rte_eth_dev *eth_dev)
                return 0;
        }
        rte_eth_copy_pci_info(eth_dev, pci_dev);
+       eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
 
        hw->vendor_id = pci_dev->id.vendor_id;
        hw->device_id = pci_dev->id.device_id;
@@ -1324,12 +2297,16 @@ iavf_dev_init(struct rte_eth_dev *eth_dev)
        hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
        hw->back = IAVF_DEV_PRIVATE_TO_ADAPTER(eth_dev->data->dev_private);
        adapter->eth_dev = eth_dev;
+       adapter->stopped = 1;
 
        if (iavf_init_vf(eth_dev) != 0) {
                PMD_INIT_LOG(ERR, "Init vf failed");
                return -1;
        }
 
+       /* set default ptype table */
+       adapter->ptype_tbl = iavf_get_default_ptype_table();
+
        /* copy mac addr */
        eth_dev->data->mac_addrs = rte_zmalloc(
                "iavf_mac", RTE_ETHER_ADDR_LEN * IAVF_NUM_MACADDR_MAX, 0);
@@ -1359,17 +2336,44 @@ iavf_dev_init(struct rte_eth_dev *eth_dev)
        /* configure and enable device interrupt */
        iavf_enable_irq0(hw);
 
+       ret = iavf_flow_init(adapter);
+       if (ret) {
+               PMD_INIT_LOG(ERR, "Failed to initialize flow");
+               return ret;
+       }
+
+       iavf_default_rss_disable(adapter);
+
        return 0;
 }
 
-static void
+static int
 iavf_dev_close(struct rte_eth_dev *dev)
 {
        struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
        struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
+       struct iavf_adapter *adapter =
+               IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+       int ret;
+
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return 0;
+
+       ret = iavf_dev_stop(dev);
+
+       iavf_flow_flush(dev, NULL);
+       iavf_flow_uninit(adapter);
+
+       /*
+        * disable promiscuous mode before reset vf
+        * it is a workaround solution when work with kernel driver
+        * and it is not the normal way
+        */
+       if (vf->promisc_unicast_enabled || vf->promisc_multicast_enabled)
+               iavf_config_promisc(adapter, false, false);
 
-       iavf_dev_stop(dev);
        iavf_shutdown_adminq(hw);
        /* disable uio intr before callback unregister */
        rte_intr_disable(intr_handle);
@@ -1378,22 +2382,20 @@ iavf_dev_close(struct rte_eth_dev *dev)
        rte_intr_callback_unregister(intr_handle,
                                     iavf_dev_interrupt_handler, dev);
        iavf_disable_irq0(hw);
-}
-
-static int
-iavf_dev_uninit(struct rte_eth_dev *dev)
-{
-       struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
-       struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
-       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
-               return -EPERM;
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS)
+               iavf_tm_conf_uninit(dev);
 
-       dev->dev_ops = NULL;
-       dev->rx_pkt_burst = NULL;
-       dev->tx_pkt_burst = NULL;
-       if (hw->adapter_stopped == 0)
-               iavf_dev_close(dev);
+       if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
+               if (vf->rss_lut) {
+                       rte_free(vf->rss_lut);
+                       vf->rss_lut = NULL;
+               }
+               if (vf->rss_key) {
+                       rte_free(vf->rss_key);
+                       vf->rss_key = NULL;
+               }
+       }
 
        rte_free(vf->vf_res);
        vf->vsi_res = NULL;
@@ -1402,143 +2404,164 @@ iavf_dev_uninit(struct rte_eth_dev *dev)
        rte_free(vf->aq_resp);
        vf->aq_resp = NULL;
 
-       if (vf->rss_lut) {
-               rte_free(vf->rss_lut);
-               vf->rss_lut = NULL;
-       }
-       if (vf->rss_key) {
-               rte_free(vf->rss_key);
-               vf->rss_key = NULL;
-       }
+       /*
+        * If the VF is reset via VFLR, the device will be knocked out of bus
+        * master mode, and the driver will fail to recover from the reset. Fix
+        * this by enabling bus mastering after every reset. In a non-VFLR case,
+        * the bus master bit will not be disabled, and this call will have no
+        * effect.
+        */
+       if (vf->vf_reset && !rte_pci_set_bus_master(pci_dev, true))
+               vf->vf_reset = false;
 
-       return 0;
+       return ret;
 }
 
-static int eth_iavf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
-                            struct rte_pci_device *pci_dev)
+static int
+iavf_dev_uninit(struct rte_eth_dev *dev)
 {
-       return rte_eth_dev_pci_generic_probe(pci_dev,
-               sizeof(struct iavf_adapter), iavf_dev_init);
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return -EPERM;
+
+       iavf_dev_close(dev);
+
+       return 0;
 }
 
-static int eth_iavf_pci_remove(struct rte_pci_device *pci_dev)
+/*
+ * Reset VF device only to re-initialize resources in PMD layer
+ */
+static int
+iavf_dev_reset(struct rte_eth_dev *dev)
 {
-       return rte_eth_dev_pci_generic_remove(pci_dev, iavf_dev_uninit);
-}
+       int ret;
 
-/* Adaptive virtual function driver struct */
-static struct rte_pci_driver rte_iavf_pmd = {
-       .id_table = pci_id_iavf_map,
-       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
-       .probe = eth_iavf_pci_probe,
-       .remove = eth_iavf_pci_remove,
-};
+       ret = iavf_dev_uninit(dev);
+       if (ret)
+               return ret;
 
-RTE_PMD_REGISTER_PCI(net_iavf, rte_iavf_pmd);
-RTE_PMD_REGISTER_PCI_TABLE(net_iavf, pci_id_iavf_map);
-RTE_PMD_REGISTER_KMOD_DEP(net_iavf, "* igb_uio | vfio-pci");
-RTE_INIT(iavf_init_log)
-{
-       iavf_logtype_init = rte_log_register("pmd.net.iavf.init");
-       if (iavf_logtype_init >= 0)
-               rte_log_set_level(iavf_logtype_init, RTE_LOG_NOTICE);
-       iavf_logtype_driver = rte_log_register("pmd.net.iavf.driver");
-       if (iavf_logtype_driver >= 0)
-               rte_log_set_level(iavf_logtype_driver, RTE_LOG_NOTICE);
-
-#ifdef RTE_LIBRTE_IAVF_DEBUG_RX
-       iavf_logtype_rx = rte_log_register("pmd.net.iavf.rx");
-       if (iavf_logtype_rx >= 0)
-               rte_log_set_level(iavf_logtype_rx, RTE_LOG_DEBUG);
-#endif
+       return iavf_dev_init(dev);
+}
 
-#ifdef RTE_LIBRTE_IAVF_DEBUG_TX
-       iavf_logtype_tx = rte_log_register("pmd.net.iavf.tx");
-       if (iavf_logtype_tx >= 0)
-               rte_log_set_level(iavf_logtype_tx, RTE_LOG_DEBUG);
-#endif
+static int
+iavf_dcf_cap_check_handler(__rte_unused const char *key,
+                          const char *value, __rte_unused void *opaque)
+{
+       if (strcmp(value, "dcf"))
+               return -1;
 
-#ifdef RTE_LIBRTE_IAVF_DEBUG_TX_FREE
-       iavf_logtype_tx_free = rte_log_register("pmd.net.iavf.tx_free");
-       if (iavf_logtype_tx_free >= 0)
-               rte_log_set_level(iavf_logtype_tx_free, RTE_LOG_DEBUG);
-#endif
+       return 0;
 }
 
-/* memory func for base code */
-enum iavf_status_code
-iavf_allocate_dma_mem_d(__rte_unused struct iavf_hw *hw,
-                      struct iavf_dma_mem *mem,
-                      u64 size,
-                      u32 alignment)
+static int
+iavf_dcf_cap_selected(struct rte_devargs *devargs)
 {
-       const struct rte_memzone *mz = NULL;
-       char z_name[RTE_MEMZONE_NAMESIZE];
+       struct rte_kvargs *kvlist;
+       const char *key = "cap";
+       int ret = 0;
 
-       if (!mem)
-               return IAVF_ERR_PARAM;
+       if (devargs == NULL)
+               return 0;
 
-       snprintf(z_name, sizeof(z_name), "iavf_dma_%"PRIu64, rte_rand());
-       mz = rte_memzone_reserve_bounded(z_name, size, SOCKET_ID_ANY,
-                       RTE_MEMZONE_IOVA_CONTIG, alignment, RTE_PGSIZE_2M);
-       if (!mz)
-               return IAVF_ERR_NO_MEMORY;
+       kvlist = rte_kvargs_parse(devargs->args, NULL);
+       if (kvlist == NULL)
+               return 0;
 
-       mem->size = size;
-       mem->va = mz->addr;
-       mem->pa = mz->phys_addr;
-       mem->zone = (const void *)mz;
-       PMD_DRV_LOG(DEBUG,
-                   "memzone %s allocated with physical address: %"PRIu64,
-                   mz->name, mem->pa);
+       if (!rte_kvargs_count(kvlist, key))
+               goto exit;
 
-       return IAVF_SUCCESS;
+       /* dcf capability selected when there's a key-value pair: cap=dcf */
+       if (rte_kvargs_process(kvlist, key,
+                              iavf_dcf_cap_check_handler, NULL) < 0)
+               goto exit;
+
+       ret = 1;
+
+exit:
+       rte_kvargs_free(kvlist);
+       return ret;
 }
 
-enum iavf_status_code
-iavf_free_dma_mem_d(__rte_unused struct iavf_hw *hw,
-                  struct iavf_dma_mem *mem)
+static int
+iavf_drv_i40evf_check_handler(__rte_unused const char *key,
+                             const char *value, __rte_unused void *opaque)
 {
-       if (!mem)
-               return IAVF_ERR_PARAM;
-
-       PMD_DRV_LOG(DEBUG,
-                   "memzone %s to be freed with physical address: %"PRIu64,
-                   ((const struct rte_memzone *)mem->zone)->name, mem->pa);
-       rte_memzone_free((const struct rte_memzone *)mem->zone);
-       mem->zone = NULL;
-       mem->va = NULL;
-       mem->pa = (u64)0;
+       if (strcmp(value, "i40evf"))
+               return -1;
 
-       return IAVF_SUCCESS;
+       return 0;
 }
 
-enum iavf_status_code
-iavf_allocate_virt_mem_d(__rte_unused struct iavf_hw *hw,
-                       struct iavf_virt_mem *mem,
-                       u32 size)
+static int
+iavf_drv_i40evf_selected(struct rte_devargs *devargs, uint16_t device_id)
 {
-       if (!mem)
-               return IAVF_ERR_PARAM;
+       struct rte_kvargs *kvlist;
+       int ret = 0;
 
-       mem->size = size;
-       mem->va = rte_zmalloc("iavf", size, 0);
+       if (device_id != IAVF_DEV_ID_VF &&
+           device_id != IAVF_DEV_ID_VF_HV &&
+           device_id != IAVF_DEV_ID_X722_VF &&
+           device_id != IAVF_DEV_ID_X722_A0_VF)
+               return 0;
 
-       if (mem->va)
-               return IAVF_SUCCESS;
-       else
-               return IAVF_ERR_NO_MEMORY;
+       if (devargs == NULL)
+               return 0;
+
+       kvlist = rte_kvargs_parse(devargs->args, NULL);
+       if (kvlist == NULL)
+               return 0;
+
+       if (!rte_kvargs_count(kvlist, RTE_DEVARGS_KEY_DRIVER))
+               goto exit;
+
+       /* i40evf driver selected when there's a key-value pair:
+        * driver=i40evf
+        */
+       if (rte_kvargs_process(kvlist, RTE_DEVARGS_KEY_DRIVER,
+                              iavf_drv_i40evf_check_handler, NULL) < 0)
+               goto exit;
+
+       ret = 1;
+
+exit:
+       rte_kvargs_free(kvlist);
+       return ret;
 }
 
-enum iavf_status_code
-iavf_free_virt_mem_d(__rte_unused struct iavf_hw *hw,
-                   struct iavf_virt_mem *mem)
+static int eth_iavf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
+                            struct rte_pci_device *pci_dev)
 {
-       if (!mem)
-               return IAVF_ERR_PARAM;
+       if (iavf_dcf_cap_selected(pci_dev->device.devargs) ||
+           iavf_drv_i40evf_selected(pci_dev->device.devargs,
+                                    pci_dev->id.device_id))
+               return 1;
 
-       rte_free(mem->va);
-       mem->va = NULL;
+       return rte_eth_dev_pci_generic_probe(pci_dev,
+               sizeof(struct iavf_adapter), iavf_dev_init);
+}
 
-       return IAVF_SUCCESS;
+static int eth_iavf_pci_remove(struct rte_pci_device *pci_dev)
+{
+       return rte_eth_dev_pci_generic_remove(pci_dev, iavf_dev_uninit);
 }
+
+/* Adaptive virtual function driver struct */
+static struct rte_pci_driver rte_iavf_pmd = {
+       .id_table = pci_id_iavf_map,
+       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+       .probe = eth_iavf_pci_probe,
+       .remove = eth_iavf_pci_remove,
+};
+
+RTE_PMD_REGISTER_PCI(net_iavf, rte_iavf_pmd);
+RTE_PMD_REGISTER_PCI_TABLE(net_iavf, pci_id_iavf_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_iavf, "* igb_uio | vfio-pci");
+RTE_PMD_REGISTER_PARAM_STRING(net_iavf, "cap=dcf driver=i40evf");
+RTE_LOG_REGISTER_SUFFIX(iavf_logtype_init, init, NOTICE);
+RTE_LOG_REGISTER_SUFFIX(iavf_logtype_driver, driver, NOTICE);
+#ifdef RTE_ETHDEV_DEBUG_RX
+RTE_LOG_REGISTER_SUFFIX(iavf_logtype_rx, rx, DEBUG);
+#endif
+#ifdef RTE_ETHDEV_DEBUG_TX
+RTE_LOG_REGISTER_SUFFIX(iavf_logtype_tx, tx, DEBUG);
+#endif