net/bnxt: support for QinQ insertion and stripping
[dpdk.git] / drivers / net / bnxt / bnxt_ethdev.c
index 41771d8..b3a37e1 100644 (file)
@@ -11,6 +11,7 @@
 #include <rte_ethdev_pci.h>
 #include <rte_malloc.h>
 #include <rte_cycles.h>
+#include <rte_alarm.h>
 
 #include "bnxt.h"
 #include "bnxt_cpr.h"
@@ -71,6 +72,11 @@ int bnxt_logtype_driver;
 #define BROADCOM_DEV_ID_57407_MF 0x16ea
 #define BROADCOM_DEV_ID_57414_MF 0x16ec
 #define BROADCOM_DEV_ID_57416_MF 0x16ee
+#define BROADCOM_DEV_ID_57508 0x1750
+#define BROADCOM_DEV_ID_57504 0x1751
+#define BROADCOM_DEV_ID_57502 0x1752
+#define BROADCOM_DEV_ID_57500_VF1 0x1806
+#define BROADCOM_DEV_ID_57500_VF2 0x1807
 #define BROADCOM_DEV_ID_58802 0xd802
 #define BROADCOM_DEV_ID_58804 0xd804
 #define BROADCOM_DEV_ID_58808 0x16f0
@@ -119,6 +125,11 @@ static const struct rte_pci_id bnxt_pci_id_map[] = {
        { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_58804) },
        { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_58808) },
        { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_58802_VF) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57508) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57504) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57502) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57500_VF1) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57500_VF2) },
        { .vendor_id = 0, /* sentinel */ },
 };
 
@@ -140,6 +151,7 @@ static const struct rte_pci_id bnxt_pci_id_map[] = {
                                     DEV_TX_OFFLOAD_GRE_TNL_TSO | \
                                     DEV_TX_OFFLOAD_IPIP_TNL_TSO | \
                                     DEV_TX_OFFLOAD_GENEVE_TNL_TSO | \
+                                    DEV_TX_OFFLOAD_QINQ_INSERT | \
                                     DEV_TX_OFFLOAD_MULTI_SEGS)
 
 #define BNXT_DEV_RX_OFFLOAD_SUPPORT (DEV_RX_OFFLOAD_VLAN_FILTER | \
@@ -150,12 +162,26 @@ static const struct rte_pci_id bnxt_pci_id_map[] = {
                                     DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM | \
                                     DEV_RX_OFFLOAD_JUMBO_FRAME | \
                                     DEV_RX_OFFLOAD_KEEP_CRC | \
+                                    DEV_RX_OFFLOAD_VLAN_EXTEND | \
                                     DEV_RX_OFFLOAD_TCP_LRO)
 
 static int bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask);
 static void bnxt_print_link_info(struct rte_eth_dev *eth_dev);
 static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu);
 static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
+static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev);
+static int bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev);
+static void bnxt_cancel_fw_health_check(struct bnxt *bp);
+
+int is_bnxt_in_error(struct bnxt *bp)
+{
+       if (bp->flags & BNXT_FLAG_FATAL_ERROR)
+               return -EIO;
+       if (bp->flags & BNXT_FLAG_FW_RESET)
+               return -EBUSY;
+
+       return 0;
+}
 
 /***********************/
 
@@ -163,21 +189,54 @@ static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
  * High level utility functions
  */
 
-static void bnxt_free_mem(struct bnxt *bp)
+static uint16_t bnxt_rss_ctxts(const struct bnxt *bp)
+{
+       if (!BNXT_CHIP_THOR(bp))
+               return 1;
+
+       return RTE_ALIGN_MUL_CEIL(bp->rx_nr_rings,
+                                 BNXT_RSS_ENTRIES_PER_CTX_THOR) /
+                                   BNXT_RSS_ENTRIES_PER_CTX_THOR;
+}
+
+static uint16_t  bnxt_rss_hash_tbl_size(const struct bnxt *bp)
+{
+       if (!BNXT_CHIP_THOR(bp))
+               return HW_HASH_INDEX_SIZE;
+
+       return bnxt_rss_ctxts(bp) * BNXT_RSS_ENTRIES_PER_CTX_THOR;
+}
+
+static void bnxt_free_mem(struct bnxt *bp, bool reconfig)
 {
        bnxt_free_filter_mem(bp);
        bnxt_free_vnic_attributes(bp);
        bnxt_free_vnic_mem(bp);
 
-       bnxt_free_stats(bp);
-       bnxt_free_tx_rings(bp);
-       bnxt_free_rx_rings(bp);
+       /* tx/rx rings are configured as part of *_queue_setup callbacks.
+        * If the number of rings change across fw update,
+        * we don't have much choice except to warn the user.
+        */
+       if (!reconfig) {
+               bnxt_free_stats(bp);
+               bnxt_free_tx_rings(bp);
+               bnxt_free_rx_rings(bp);
+       }
+       bnxt_free_async_cp_ring(bp);
 }
 
-static int bnxt_alloc_mem(struct bnxt *bp)
+static int bnxt_alloc_mem(struct bnxt *bp, bool reconfig)
 {
        int rc;
 
+       rc = bnxt_alloc_ring_grps(bp);
+       if (rc)
+               goto alloc_mem_err;
+
+       rc = bnxt_alloc_async_ring_struct(bp);
+       if (rc)
+               goto alloc_mem_err;
+
        rc = bnxt_alloc_vnic_mem(bp);
        if (rc)
                goto alloc_mem_err;
@@ -190,10 +249,14 @@ static int bnxt_alloc_mem(struct bnxt *bp)
        if (rc)
                goto alloc_mem_err;
 
+       rc = bnxt_alloc_async_cp_ring(bp);
+       if (rc)
+               goto alloc_mem_err;
+
        return 0;
 
 alloc_mem_err:
-       bnxt_free_mem(bp);
+       bnxt_free_mem(bp, reconfig);
        return rc;
 }
 
@@ -211,9 +274,6 @@ static int bnxt_init_chip(struct bnxt *bp)
        unsigned int i, j;
        int rc;
 
-       /* disable uio/vfio intr/eventfd mapping */
-       rte_intr_disable(intr_handle);
-
        if (bp->eth_dev->data->mtu > RTE_ETHER_MTU) {
                bp->eth_dev->data->dev_conf.rxmode.offloads |=
                        DEV_RX_OFFLOAD_JUMBO_FRAME;
@@ -224,6 +284,12 @@ static int bnxt_init_chip(struct bnxt *bp)
                bp->flags &= ~BNXT_FLAG_JUMBO;
        }
 
+       /* THOR does not support ring groups.
+        * But we will use the array to save RSS context IDs.
+        */
+       if (BNXT_CHIP_THOR(bp))
+               bp->max_ring_grps = BNXT_MAX_RSS_CTXTS_THOR;
+
        rc = bnxt_alloc_all_hwrm_stat_ctxs(bp);
        if (rc) {
                PMD_DRV_LOG(ERR, "HWRM stat ctx alloc failure rc: %x\n", rc);
@@ -276,13 +342,21 @@ static int bnxt_init_chip(struct bnxt *bp)
 
                /* Alloc RSS context only if RSS mode is enabled */
                if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_RSS) {
-                       rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic);
+                       int j, nr_ctxs = bnxt_rss_ctxts(bp);
+
+                       rc = 0;
+                       for (j = 0; j < nr_ctxs; j++) {
+                               rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic, j);
+                               if (rc)
+                                       break;
+                       }
                        if (rc) {
                                PMD_DRV_LOG(ERR,
-                                       "HWRM vnic %d ctx alloc failure rc: %x\n",
-                                       i, rc);
+                                 "HWRM vnic %d ctx %d alloc failure rc: %x\n",
+                                 i, j, rc);
                                goto err_out;
                        }
+                       vnic->num_lb_ctxts = nr_ctxs;
                }
 
                /*
@@ -317,7 +391,7 @@ static int bnxt_init_chip(struct bnxt *bp)
                                    "rxq[%d]->vnic=%p vnic->fw_grp_ids=%p\n",
                                    j, rxq->vnic, rxq->vnic->fw_grp_ids);
 
-                       if (rxq->rx_deferred_start)
+                       if (BNXT_HAS_RING_GRPS(bp) && rxq->rx_deferred_start)
                                rxq->vnic->fw_grp_ids[j] = INVALID_HW_RING_ID;
                }
 
@@ -354,8 +428,9 @@ static int bnxt_init_chip(struct bnxt *bp)
                                        bp->rx_cp_nr_rings);
                        return -ENOTSUP;
                }
-               if (rte_intr_efd_enable(intr_handle, intr_vector))
-                       return -1;
+               rc = rte_intr_efd_enable(intr_handle, intr_vector);
+               if (rc)
+                       return rc;
        }
 
        if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) {
@@ -366,28 +441,31 @@ static int bnxt_init_chip(struct bnxt *bp)
                if (intr_handle->intr_vec == NULL) {
                        PMD_DRV_LOG(ERR, "Failed to allocate %d rx_queues"
                                " intr_vec", bp->eth_dev->data->nb_rx_queues);
-                       return -ENOMEM;
+                       rc = -ENOMEM;
+                       goto err_disable;
                }
                PMD_DRV_LOG(DEBUG, "intr_handle->intr_vec = %p "
                        "intr_handle->nb_efd = %d intr_handle->max_intr = %d\n",
                         intr_handle->intr_vec, intr_handle->nb_efd,
                        intr_handle->max_intr);
-       }
-
-       for (queue_id = 0; queue_id < bp->eth_dev->data->nb_rx_queues;
-            queue_id++) {
-               intr_handle->intr_vec[queue_id] = vec;
-               if (vec < base + intr_handle->nb_efd - 1)
-                       vec++;
+               for (queue_id = 0; queue_id < bp->eth_dev->data->nb_rx_queues;
+                    queue_id++) {
+                       intr_handle->intr_vec[queue_id] =
+                                                       vec + BNXT_RX_VEC_START;
+                       if (vec < base + intr_handle->nb_efd - 1)
+                               vec++;
+               }
        }
 
        /* enable uio/vfio intr/eventfd mapping */
-       rte_intr_enable(intr_handle);
+       rc = rte_intr_enable(intr_handle);
+       if (rc)
+               goto err_free;
 
        rc = bnxt_get_hwrm_link_config(bp, &new);
        if (rc) {
                PMD_DRV_LOG(ERR, "HWRM Get link config failure rc: %x\n", rc);
-               goto err_out;
+               goto err_free;
        }
 
        if (!bp->link_info.link_up) {
@@ -395,16 +473,18 @@ static int bnxt_init_chip(struct bnxt *bp)
                if (rc) {
                        PMD_DRV_LOG(ERR,
                                "HWRM link config failure rc: %x\n", rc);
-                       goto err_out;
+                       goto err_free;
                }
        }
        bnxt_print_link_info(bp->eth_dev);
 
        return 0;
 
+err_free:
+       rte_free(intr_handle->intr_vec);
+err_disable:
+       rte_intr_efd_disable(intr_handle);
 err_out:
-       bnxt_free_all_hwrm_resources(bp);
-
        /* Some of the error status returned by FW may not be from errno.h */
        if (rc > 0)
                rc = -EIO;
@@ -424,9 +504,11 @@ static int bnxt_init_nic(struct bnxt *bp)
 {
        int rc;
 
-       rc = bnxt_init_ring_grps(bp);
-       if (rc)
-               return rc;
+       if (BNXT_HAS_RING_GRPS(bp)) {
+               rc = bnxt_init_ring_grps(bp);
+               if (rc)
+                       return rc;
+       }
 
        bnxt_init_vnics(bp);
        bnxt_init_filters(bp);
@@ -438,12 +520,18 @@ static int bnxt_init_nic(struct bnxt *bp)
  * Device configuration and status function
  */
 
-static void bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev,
-                                 struct rte_eth_dev_info *dev_info)
+static int bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev,
+                               struct rte_eth_dev_info *dev_info)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct rte_pci_device *pdev = RTE_DEV_TO_PCI(eth_dev->device);
+       struct bnxt *bp = eth_dev->data->dev_private;
        uint16_t max_vnics, i, j, vpool, vrxq;
        unsigned int max_rx_rings;
+       int rc;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
 
        /* MAC Specifics */
        dev_info->max_mac_addrs = bp->max_l2_ctx;
@@ -451,19 +539,23 @@ static void bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev,
 
        /* PF/VF specifics */
        if (BNXT_PF(bp))
-               dev_info->max_vfs = bp->pdev->max_vfs;
-       max_rx_rings = RTE_MIN(bp->max_vnics, bp->max_stat_ctx);
+               dev_info->max_vfs = pdev->max_vfs;
+
+       max_rx_rings = RTE_MIN(bp->max_rx_rings, bp->max_stat_ctx);
        /* For the sake of symmetry, max_rx_queues = max_tx_queues */
        dev_info->max_rx_queues = max_rx_rings;
        dev_info->max_tx_queues = max_rx_rings;
-       dev_info->reta_size = HW_HASH_INDEX_SIZE;
+       dev_info->reta_size = bnxt_rss_hash_tbl_size(bp);
        dev_info->hash_key_size = 40;
        max_vnics = bp->max_vnics;
 
+       /* MTU specifics */
+       dev_info->min_mtu = RTE_ETHER_MIN_MTU;
+       dev_info->max_mtu = BNXT_MAX_MTU;
+
        /* Fast path specifics */
        dev_info->min_rx_bufsize = 1;
-       dev_info->max_rx_pktlen = BNXT_MAX_MTU + RTE_ETHER_HDR_LEN +
-               RTE_ETHER_CRC_LEN + VLAN_TAG_SIZE * 2;
+       dev_info->max_rx_pktlen = BNXT_MAX_PKT_LEN;
 
        dev_info->rx_offload_capa = BNXT_DEV_RX_OFFLOAD_SUPPORT;
        if (bp->flags & BNXT_FLAG_PTP_SUPPORTED)
@@ -532,12 +624,14 @@ found:
 
        dev_info->vmdq_pool_base = 0;
        dev_info->vmdq_queue_base = 0;
+
+       return 0;
 }
 
 /* Configure the device based on the configuration provided */
 static int bnxt_dev_configure_op(struct rte_eth_dev *eth_dev)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
        uint64_t rx_offloads = eth_dev->data->dev_conf.rxmode.offloads;
        int rc;
 
@@ -546,6 +640,10 @@ static int bnxt_dev_configure_op(struct rte_eth_dev *eth_dev)
        bp->tx_nr_rings = eth_dev->data->nb_tx_queues;
        bp->rx_nr_rings = eth_dev->data->nb_rx_queues;
 
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
+
        if (BNXT_VF(bp) && (bp->flags & BNXT_FLAG_NEW_RM)) {
                rc = bnxt_hwrm_check_vf_rings(bp);
                if (rc) {
@@ -570,25 +668,19 @@ static int bnxt_dev_configure_op(struct rte_eth_dev *eth_dev)
        /* Inherit new configurations */
        if (eth_dev->data->nb_rx_queues > bp->max_rx_rings ||
            eth_dev->data->nb_tx_queues > bp->max_tx_rings ||
+           eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues
+               + BNXT_NUM_ASYNC_CPR(bp) > bp->max_cp_rings ||
            eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues >
-           bp->max_cp_rings ||
-           eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues >
-           bp->max_stat_ctx ||
-           (uint32_t)(eth_dev->data->nb_rx_queues) > bp->max_ring_grps ||
-           (!(eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) &&
-            bp->max_vnics < eth_dev->data->nb_rx_queues)) {
-               PMD_DRV_LOG(ERR,
-                       "Insufficient resources to support requested config\n");
-               PMD_DRV_LOG(ERR,
-                       "Num Queues Requested: Tx %d, Rx %d\n",
-                       eth_dev->data->nb_tx_queues,
-                       eth_dev->data->nb_rx_queues);
-               PMD_DRV_LOG(ERR,
-                       "MAX: TxQ %d, RxQ %d, CQ %d Stat %d, Grp %d, Vnic %d\n",
-                       bp->max_tx_rings, bp->max_rx_rings, bp->max_cp_rings,
-                       bp->max_stat_ctx, bp->max_ring_grps, bp->max_vnics);
-               return -ENOSPC;
-       }
+           bp->max_stat_ctx)
+               goto resource_error;
+
+       if (BNXT_HAS_RING_GRPS(bp) &&
+           (uint32_t)(eth_dev->data->nb_rx_queues) > bp->max_ring_grps)
+               goto resource_error;
+
+       if (!(eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) &&
+           bp->max_vnics < eth_dev->data->nb_rx_queues)
+               goto resource_error;
 
        bp->rx_cp_nr_rings = bp->rx_nr_rings;
        bp->tx_cp_nr_rings = bp->tx_nr_rings;
@@ -601,6 +693,19 @@ static int bnxt_dev_configure_op(struct rte_eth_dev *eth_dev)
                bnxt_mtu_set_op(eth_dev, eth_dev->data->mtu);
        }
        return 0;
+
+resource_error:
+       PMD_DRV_LOG(ERR,
+                   "Insufficient resources to support requested config\n");
+       PMD_DRV_LOG(ERR,
+                   "Num Queues Requested: Tx %d, Rx %d\n",
+                   eth_dev->data->nb_tx_queues,
+                   eth_dev->data->nb_rx_queues);
+       PMD_DRV_LOG(ERR,
+                   "MAX: TxQ %d, RxQ %d, CQ %d Stat %d, Grp %d, Vnic %d\n",
+                   bp->max_tx_rings, bp->max_rx_rings, bp->max_cp_rings,
+                   bp->max_stat_ctx, bp->max_ring_grps, bp->max_vnics);
+       return -ENOSPC;
 }
 
 static void bnxt_print_link_info(struct rte_eth_dev *eth_dev)
@@ -618,12 +723,6 @@ static void bnxt_print_link_info(struct rte_eth_dev *eth_dev)
                        eth_dev->data->port_id);
 }
 
-static int bnxt_dev_lsc_intr_setup(struct rte_eth_dev *eth_dev)
-{
-       bnxt_print_link_info(eth_dev);
-       return 0;
-}
-
 /*
  * Determine whether the current configuration requires support for scattered
  * receive; return 1 if scattered receive is required and 0 if not.
@@ -648,6 +747,7 @@ static eth_rx_burst_t
 bnxt_receive_function(__rte_unused struct rte_eth_dev *eth_dev)
 {
 #ifdef RTE_ARCH_X86
+#ifndef RTE_LIBRTE_IEEE1588
        /*
         * Vector mode receive can be enabled only if scatter rx is not
         * in use and rx offloads are limited to VLAN stripping and
@@ -674,6 +774,7 @@ bnxt_receive_function(__rte_unused struct rte_eth_dev *eth_dev)
                    eth_dev->data->port_id,
                    eth_dev->data->scattered_rx,
                    eth_dev->data->dev_conf.rxmode.offloads);
+#endif
 #endif
        return bnxt_recv_pkts;
 }
@@ -682,14 +783,13 @@ static eth_tx_burst_t
 bnxt_transmit_function(__rte_unused struct rte_eth_dev *eth_dev)
 {
 #ifdef RTE_ARCH_X86
+#ifndef RTE_LIBRTE_IEEE1588
        /*
-        * Vector mode receive can be enabled only if scatter tx is not
-        * in use and tx offloads other than VLAN insertion are not
-        * in use.
+        * Vector mode transmit can be enabled only if not using scatter rx
+        * or tx offloads.
         */
        if (!eth_dev->data->scattered_rx &&
-           !(eth_dev->data->dev_conf.txmode.offloads &
-             ~DEV_TX_OFFLOAD_VLAN_INSERT)) {
+           !eth_dev->data->dev_conf.txmode.offloads) {
                PMD_DRV_LOG(INFO, "Using vector mode transmit for port %d\n",
                            eth_dev->data->port_id);
                return bnxt_xmit_pkts_vec;
@@ -701,13 +801,33 @@ bnxt_transmit_function(__rte_unused struct rte_eth_dev *eth_dev)
                    eth_dev->data->port_id,
                    eth_dev->data->scattered_rx,
                    eth_dev->data->dev_conf.txmode.offloads);
+#endif
 #endif
        return bnxt_xmit_pkts;
 }
 
+static int bnxt_handle_if_change_status(struct bnxt *bp)
+{
+       int rc;
+
+       /* Since fw has undergone a reset and lost all contexts,
+        * set fatal flag to not issue hwrm during cleanup
+        */
+       bp->flags |= BNXT_FLAG_FATAL_ERROR;
+       bnxt_uninit_resources(bp, true);
+
+       /* clear fatal flag so that re-init happens */
+       bp->flags &= ~BNXT_FLAG_FATAL_ERROR;
+       rc = bnxt_init_resources(bp, true);
+
+       bp->flags &= ~BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE;
+
+       return rc;
+}
+
 static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
        uint64_t rx_offloads = eth_dev->data->dev_conf.rxmode.offloads;
        int vlan_mask = 0;
        int rc;
@@ -717,7 +837,16 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
                        "RxQ cnt %d > CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS %d\n",
                        bp->rx_cp_nr_rings, RTE_ETHDEV_QUEUE_STAT_CNTRS);
        }
-       bp->dev_stopped = 0;
+
+       bnxt_enable_int(bp);
+       rc = bnxt_hwrm_if_change(bp, 1);
+       if (!rc) {
+               if (bp->flags & BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE) {
+                       rc = bnxt_handle_if_change_status(bp);
+                       if (rc)
+                               return rc;
+               }
+       }
 
        rc = bnxt_init_chip(bp);
        if (rc)
@@ -737,10 +866,15 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 
        eth_dev->rx_pkt_burst = bnxt_receive_function(eth_dev);
        eth_dev->tx_pkt_burst = bnxt_transmit_function(eth_dev);
+
        bp->flags |= BNXT_FLAG_INIT_DONE;
+       eth_dev->data->dev_started = 1;
+       bp->dev_stopped = 0;
+       bnxt_schedule_fw_health_check(bp);
        return 0;
 
 error:
+       bnxt_hwrm_if_change(bp, 0);
        bnxt_shutdown_nic(bp);
        bnxt_free_tx_mbufs(bp);
        bnxt_free_rx_mbufs(bp);
@@ -749,7 +883,7 @@ error:
 
 static int bnxt_dev_set_link_up_op(struct rte_eth_dev *eth_dev)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
        int rc = 0;
 
        if (!bp->link_info.link_up)
@@ -763,7 +897,7 @@ static int bnxt_dev_set_link_up_op(struct rte_eth_dev *eth_dev)
 
 static int bnxt_dev_set_link_down_op(struct rte_eth_dev *eth_dev)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
 
        eth_dev->data->dev_link.link_status = 0;
        bnxt_set_hwrm_link_config(bp, false);
@@ -775,24 +909,51 @@ static int bnxt_dev_set_link_down_op(struct rte_eth_dev *eth_dev)
 /* Unload the driver, release resources */
 static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
+
+       eth_dev->data->dev_started = 0;
+       /* Prevent crashes when queues are still in use */
+       eth_dev->rx_pkt_burst = &bnxt_dummy_recv_pkts;
+       eth_dev->tx_pkt_burst = &bnxt_dummy_xmit_pkts;
+
+       bnxt_disable_int(bp);
+
+       /* disable uio/vfio intr/eventfd mapping */
+       rte_intr_disable(intr_handle);
+
+       bnxt_cancel_fw_health_check(bp);
 
        bp->flags &= ~BNXT_FLAG_INIT_DONE;
        if (bp->eth_dev->data->dev_started) {
                /* TBD: STOP HW queues DMA */
                eth_dev->data->dev_link.link_status = 0;
        }
-       bnxt_set_hwrm_link_config(bp, false);
+       bnxt_dev_set_link_down_op(eth_dev);
+       /* Wait for link to be reset and the async notification to process. */
+       rte_delay_ms(BNXT_LINK_WAIT_INTERVAL * 2);
+
+       /* Clean queue intr-vector mapping */
+       rte_intr_efd_disable(intr_handle);
+       if (intr_handle->intr_vec != NULL) {
+               rte_free(intr_handle->intr_vec);
+               intr_handle->intr_vec = NULL;
+       }
+
        bnxt_hwrm_port_clr_stats(bp);
        bnxt_free_tx_mbufs(bp);
        bnxt_free_rx_mbufs(bp);
+       /* Process any remaining notifications in default completion queue */
+       bnxt_int_handler(eth_dev);
        bnxt_shutdown_nic(bp);
+       bnxt_hwrm_if_change(bp, 0);
        bp->dev_stopped = 1;
 }
 
 static void bnxt_dev_close_op(struct rte_eth_dev *eth_dev)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
 
        if (bp->dev_stopped == 0)
                bnxt_dev_stop_op(eth_dev);
@@ -812,12 +973,15 @@ static void bnxt_dev_close_op(struct rte_eth_dev *eth_dev)
 static void bnxt_mac_addr_remove_op(struct rte_eth_dev *eth_dev,
                                    uint32_t index)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
        uint64_t pool_mask = eth_dev->data->mac_pool_sel[index];
        struct bnxt_vnic_info *vnic;
        struct bnxt_filter_info *filter, *temp_filter;
        uint32_t i;
 
+       if (is_bnxt_in_error(bp))
+               return;
+
        /*
         * Loop through all VNICs from the specified filter flow pools to
         * remove the corresponding MAC addr filter
@@ -848,9 +1012,14 @@ static int bnxt_mac_addr_add_op(struct rte_eth_dev *eth_dev,
                                struct rte_ether_addr *mac_addr,
                                uint32_t index, uint32_t pool)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
        struct bnxt_vnic_info *vnic = &bp->vnic_info[pool];
        struct bnxt_filter_info *filter;
+       int rc = 0;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
 
        if (BNXT_VF(bp) & !BNXT_VF_IS_TRUSTED(bp)) {
                PMD_DRV_LOG(ERR, "Cannot add MAC address to a VF interface\n");
@@ -874,19 +1043,33 @@ static int bnxt_mac_addr_add_op(struct rte_eth_dev *eth_dev,
                PMD_DRV_LOG(ERR, "L2 filter alloc failed\n");
                return -ENODEV;
        }
-       STAILQ_INSERT_TAIL(&vnic->filter, filter, next);
+
        filter->mac_index = index;
        memcpy(filter->l2_addr, mac_addr, RTE_ETHER_ADDR_LEN);
-       return bnxt_hwrm_set_l2_filter(bp, vnic->fw_vnic_id, filter);
+
+       rc = bnxt_hwrm_set_l2_filter(bp, vnic->fw_vnic_id, filter);
+       if (!rc) {
+               STAILQ_INSERT_TAIL(&vnic->filter, filter, next);
+       } else {
+               filter->mac_index = INVALID_MAC_INDEX;
+               memset(&filter->l2_addr, 0, RTE_ETHER_ADDR_LEN);
+               bnxt_free_filter(bp, filter);
+       }
+
+       return rc;
 }
 
 int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete)
 {
        int rc = 0;
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
        struct rte_eth_link new;
        unsigned int cnt = BNXT_LINK_WAIT_CNT;
 
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
+
        memset(&new, 0, sizeof(new));
        do {
                /* Retrieve link info from hardware */
@@ -898,18 +1081,18 @@ int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete)
                                "Failed to retrieve link rc = 0x%x!\n", rc);
                        goto out;
                }
-               rte_delay_ms(BNXT_LINK_WAIT_INTERVAL);
 
-               if (!wait_to_complete)
+               if (!wait_to_complete || new.link_status)
                        break;
-       } while (!new.link_status && cnt--);
+
+               rte_delay_ms(BNXT_LINK_WAIT_INTERVAL);
+       } while (cnt--);
 
 out:
        /* Timed out or success */
        if (new.link_status != eth_dev->data->dev_link.link_status ||
        new.link_speed != eth_dev->data->dev_link.link_speed) {
-               memcpy(&eth_dev->data->dev_link, &new,
-                       sizeof(struct rte_eth_link));
+               rte_eth_linkstatus_set(eth_dev, &new);
 
                _rte_eth_dev_callback_process(eth_dev,
                                              RTE_ETH_EVENT_INTR_LSC,
@@ -921,86 +1104,195 @@ out:
        return rc;
 }
 
-static void bnxt_promiscuous_enable_op(struct rte_eth_dev *eth_dev)
+static int bnxt_promiscuous_enable_op(struct rte_eth_dev *eth_dev)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
        struct bnxt_vnic_info *vnic;
+       uint32_t old_flags;
+       int rc;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
 
        if (bp->vnic_info == NULL)
-               return;
+               return 0;
 
        vnic = &bp->vnic_info[0];
 
+       old_flags = vnic->flags;
        vnic->flags |= BNXT_VNIC_INFO_PROMISC;
-       bnxt_hwrm_cfa_l2_set_rx_mask(bp, vnic, 0, NULL);
+       rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, vnic, 0, NULL);
+       if (rc != 0)
+               vnic->flags = old_flags;
+
+       return rc;
 }
 
-static void bnxt_promiscuous_disable_op(struct rte_eth_dev *eth_dev)
+static int bnxt_promiscuous_disable_op(struct rte_eth_dev *eth_dev)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
        struct bnxt_vnic_info *vnic;
+       uint32_t old_flags;
+       int rc;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
 
        if (bp->vnic_info == NULL)
-               return;
+               return 0;
 
        vnic = &bp->vnic_info[0];
 
+       old_flags = vnic->flags;
        vnic->flags &= ~BNXT_VNIC_INFO_PROMISC;
-       bnxt_hwrm_cfa_l2_set_rx_mask(bp, vnic, 0, NULL);
+       rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, vnic, 0, NULL);
+       if (rc != 0)
+               vnic->flags = old_flags;
+
+       return rc;
 }
 
-static void bnxt_allmulticast_enable_op(struct rte_eth_dev *eth_dev)
+static int bnxt_allmulticast_enable_op(struct rte_eth_dev *eth_dev)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
        struct bnxt_vnic_info *vnic;
+       uint32_t old_flags;
+       int rc;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
 
        if (bp->vnic_info == NULL)
-               return;
+               return 0;
 
        vnic = &bp->vnic_info[0];
 
+       old_flags = vnic->flags;
        vnic->flags |= BNXT_VNIC_INFO_ALLMULTI;
-       bnxt_hwrm_cfa_l2_set_rx_mask(bp, vnic, 0, NULL);
+       rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, vnic, 0, NULL);
+       if (rc != 0)
+               vnic->flags = old_flags;
+
+       return rc;
 }
 
-static void bnxt_allmulticast_disable_op(struct rte_eth_dev *eth_dev)
+static int bnxt_allmulticast_disable_op(struct rte_eth_dev *eth_dev)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
        struct bnxt_vnic_info *vnic;
+       uint32_t old_flags;
+       int rc;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
 
        if (bp->vnic_info == NULL)
-               return;
+               return 0;
 
        vnic = &bp->vnic_info[0];
 
+       old_flags = vnic->flags;
        vnic->flags &= ~BNXT_VNIC_INFO_ALLMULTI;
-       bnxt_hwrm_cfa_l2_set_rx_mask(bp, vnic, 0, NULL);
+       rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, vnic, 0, NULL);
+       if (rc != 0)
+               vnic->flags = old_flags;
+
+       return rc;
+}
+
+/* Return bnxt_rx_queue pointer corresponding to a given rxq. */
+static struct bnxt_rx_queue *bnxt_qid_to_rxq(struct bnxt *bp, uint16_t qid)
+{
+       if (qid >= bp->rx_nr_rings)
+               return NULL;
+
+       return bp->eth_dev->data->rx_queues[qid];
+}
+
+/* Return rxq corresponding to a given rss table ring/group ID. */
+static uint16_t bnxt_rss_to_qid(struct bnxt *bp, uint16_t fwr)
+{
+       struct bnxt_rx_queue *rxq;
+       unsigned int i;
+
+       if (!BNXT_HAS_RING_GRPS(bp)) {
+               for (i = 0; i < bp->rx_nr_rings; i++) {
+                       rxq = bp->eth_dev->data->rx_queues[i];
+                       if (rxq->rx_ring->rx_ring_struct->fw_ring_id == fwr)
+                               return rxq->index;
+               }
+       } else {
+               for (i = 0; i < bp->rx_nr_rings; i++) {
+                       if (bp->grp_info[i].fw_grp_id == fwr)
+                               return i;
+               }
+       }
+
+       return INVALID_HW_RING_ID;
 }
 
 static int bnxt_reta_update_op(struct rte_eth_dev *eth_dev,
                            struct rte_eth_rss_reta_entry64 *reta_conf,
                            uint16_t reta_size)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
        struct rte_eth_conf *dev_conf = &bp->eth_dev->data->dev_conf;
-       struct bnxt_vnic_info *vnic;
-       int i;
+       struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
+       uint16_t tbl_size = bnxt_rss_hash_tbl_size(bp);
+       uint16_t idx, sft;
+       int i, rc;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
+
+       if (!vnic->rss_table)
+               return -EINVAL;
 
        if (!(dev_conf->rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
                return -EINVAL;
 
-       if (reta_size != HW_HASH_INDEX_SIZE) {
+       if (reta_size != tbl_size) {
                PMD_DRV_LOG(ERR, "The configured hash table lookup size "
                        "(%d) must equal the size supported by the hardware "
-                       "(%d)\n", reta_size, HW_HASH_INDEX_SIZE);
+                       "(%d)\n", reta_size, tbl_size);
                return -EINVAL;
        }
-       /* Update the RSS VNIC(s) */
-       for (i = 0; i < bp->max_vnics; i++) {
-               vnic = &bp->vnic_info[i];
-               memcpy(vnic->rss_table, reta_conf, reta_size);
-               bnxt_hwrm_vnic_rss_cfg(bp, vnic);
+
+       for (i = 0; i < reta_size; i++) {
+               struct bnxt_rx_queue *rxq;
+
+               idx = i / RTE_RETA_GROUP_SIZE;
+               sft = i % RTE_RETA_GROUP_SIZE;
+
+               if (!(reta_conf[idx].mask & (1ULL << sft)))
+                       continue;
+
+               rxq = bnxt_qid_to_rxq(bp, reta_conf[idx].reta[sft]);
+               if (!rxq) {
+                       PMD_DRV_LOG(ERR, "Invalid ring in reta_conf.\n");
+                       return -EINVAL;
+               }
+
+               if (BNXT_CHIP_THOR(bp)) {
+                       vnic->rss_table[i * 2] =
+                               rxq->rx_ring->rx_ring_struct->fw_ring_id;
+                       vnic->rss_table[i * 2 + 1] =
+                               rxq->cp_ring->cp_ring_struct->fw_ring_id;
+               } else {
+                       vnic->rss_table[i] =
+                           vnic->fw_grp_ids[reta_conf[idx].reta[sft]];
+               }
+
+               vnic->rss_table[i] =
+                   vnic->fw_grp_ids[reta_conf[idx].reta[sft]];
        }
+
+       bnxt_hwrm_vnic_rss_cfg(bp, vnic);
        return 0;
 }
 
@@ -1008,10 +1300,15 @@ static int bnxt_reta_query_op(struct rte_eth_dev *eth_dev,
                              struct rte_eth_rss_reta_entry64 *reta_conf,
                              uint16_t reta_size)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
        struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
-       struct rte_intr_handle *intr_handle
-               = &bp->pdev->intr_handle;
+       uint16_t tbl_size = bnxt_rss_hash_tbl_size(bp);
+       uint16_t idx, sft, i;
+       int rc;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
 
        /* Retrieve from the default VNIC */
        if (!vnic)
@@ -1019,18 +1316,32 @@ static int bnxt_reta_query_op(struct rte_eth_dev *eth_dev,
        if (!vnic->rss_table)
                return -EINVAL;
 
-       if (reta_size != HW_HASH_INDEX_SIZE) {
+       if (reta_size != tbl_size) {
                PMD_DRV_LOG(ERR, "The configured hash table lookup size "
                        "(%d) must equal the size supported by the hardware "
-                       "(%d)\n", reta_size, HW_HASH_INDEX_SIZE);
+                       "(%d)\n", reta_size, tbl_size);
                return -EINVAL;
        }
-       /* EW - need to revisit here copying from uint64_t to uint16_t */
-       memcpy(reta_conf, vnic->rss_table, reta_size);
 
-       if (rte_intr_allow_others(intr_handle)) {
-               if (eth_dev->data->dev_conf.intr_conf.lsc != 0)
-                       bnxt_dev_lsc_intr_setup(eth_dev);
+       for (idx = 0, i = 0; i < reta_size; i++) {
+               idx = i / RTE_RETA_GROUP_SIZE;
+               sft = i % RTE_RETA_GROUP_SIZE;
+
+               if (reta_conf[idx].mask & (1ULL << sft)) {
+                       uint16_t qid;
+
+                       if (BNXT_CHIP_THOR(bp))
+                               qid = bnxt_rss_to_qid(bp,
+                                                     vnic->rss_table[i * 2]);
+                       else
+                               qid = bnxt_rss_to_qid(bp, vnic->rss_table[i]);
+
+                       if (qid == INVALID_HW_RING_ID) {
+                               PMD_DRV_LOG(ERR, "Inv. entry in rss table.\n");
+                               return -EINVAL;
+                       }
+                       reta_conf[idx].reta[sft] = qid;
+               }
        }
 
        return 0;
@@ -1039,11 +1350,16 @@ static int bnxt_reta_query_op(struct rte_eth_dev *eth_dev,
 static int bnxt_rss_hash_update_op(struct rte_eth_dev *eth_dev,
                                   struct rte_eth_rss_conf *rss_conf)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
        struct rte_eth_conf *dev_conf = &bp->eth_dev->data->dev_conf;
        struct bnxt_vnic_info *vnic;
        uint16_t hash_type = 0;
        unsigned int i;
+       int rc;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
 
        /*
         * If RSS enablement were different than dev_configure,
@@ -1095,11 +1411,15 @@ static int bnxt_rss_hash_update_op(struct rte_eth_dev *eth_dev,
 static int bnxt_rss_hash_conf_get_op(struct rte_eth_dev *eth_dev,
                                     struct rte_eth_rss_conf *rss_conf)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
        struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
-       int len;
+       int len, rc;
        uint32_t hash_types;
 
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
+
        /* RSS configuration is the same for all VNICs */
        if (vnic && vnic->rss_hash_key) {
                if (rss_conf->rss_key) {
@@ -1153,10 +1473,14 @@ static int bnxt_rss_hash_conf_get_op(struct rte_eth_dev *eth_dev,
 static int bnxt_flow_ctrl_get_op(struct rte_eth_dev *dev,
                               struct rte_eth_fc_conf *fc_conf)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        struct rte_eth_link link_info;
        int rc;
 
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
+
        rc = bnxt_get_hwrm_link_config(bp, &link_info);
        if (rc)
                return rc;
@@ -1185,7 +1509,12 @@ static int bnxt_flow_ctrl_get_op(struct rte_eth_dev *dev,
 static int bnxt_flow_ctrl_set_op(struct rte_eth_dev *dev,
                               struct rte_eth_fc_conf *fc_conf)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
+       int rc;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
 
        if (!BNXT_SINGLE_PF(bp) || BNXT_VF(bp)) {
                PMD_DRV_LOG(ERR, "Flow Control Settings cannot be modified\n");
@@ -1241,10 +1570,14 @@ static int
 bnxt_udp_tunnel_port_add_op(struct rte_eth_dev *eth_dev,
                         struct rte_eth_udp_tunnel *udp_tunnel)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
        uint16_t tunnel_type = 0;
        int rc = 0;
 
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
+
        switch (udp_tunnel->prot_type) {
        case RTE_TUNNEL_TYPE_VXLAN:
                if (bp->vxlan_port_cnt) {
@@ -1289,13 +1622,17 @@ static int
 bnxt_udp_tunnel_port_del_op(struct rte_eth_dev *eth_dev,
                         struct rte_eth_udp_tunnel *udp_tunnel)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
        uint16_t tunnel_type = 0;
        uint16_t port = 0;
        int rc = 0;
 
-       switch (udp_tunnel->prot_type) {
-       case RTE_TUNNEL_TYPE_VXLAN:
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
+
+       switch (udp_tunnel->prot_type) {
+       case RTE_TUNNEL_TYPE_VXLAN:
                if (!bp->vxlan_port_cnt) {
                        PMD_DRV_LOG(ERR, "No Tunnel port configured yet\n");
                        return -EINVAL;
@@ -1348,148 +1685,110 @@ bnxt_udp_tunnel_port_del_op(struct rte_eth_dev *eth_dev,
 
 static int bnxt_del_vlan_filter(struct bnxt *bp, uint16_t vlan_id)
 {
-       struct bnxt_filter_info *filter, *temp_filter, *new_filter;
+       struct bnxt_filter_info *filter;
        struct bnxt_vnic_info *vnic;
-       unsigned int i;
        int rc = 0;
-       uint32_t chk = HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_OVLAN;
-
-       /* Cycle through all VNICs */
-       for (i = 0; i < bp->nr_vnics; i++) {
-               /*
-                * For each VNIC and each associated filter(s)
-                * if VLAN exists && VLAN matches vlan_id
-                *      remove the MAC+VLAN filter
-                *      add a new MAC only filter
-                * else
-                *      VLAN filter doesn't exist, just skip and continue
-                */
-               vnic = &bp->vnic_info[i];
-               filter = STAILQ_FIRST(&vnic->filter);
-               while (filter) {
-                       temp_filter = STAILQ_NEXT(filter, next);
+       uint32_t chk = HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_IVLAN;
 
-                       if (filter->enables & chk &&
-                           filter->l2_ovlan == vlan_id) {
-                               /* Must delete the filter */
-                               STAILQ_REMOVE(&vnic->filter, filter,
-                                             bnxt_filter_info, next);
-                               bnxt_hwrm_clear_l2_filter(bp, filter);
-                               STAILQ_INSERT_TAIL(&bp->free_filter_list,
-                                                  filter, next);
+       /* if VLAN exists && VLAN matches vlan_id
+        *      remove the MAC+VLAN filter
+        *      add a new MAC only filter
+        * else
+        *      VLAN filter doesn't exist, just skip and continue
+        */
+       vnic = BNXT_GET_DEFAULT_VNIC(bp);
+       filter = STAILQ_FIRST(&vnic->filter);
+       while (filter) {
+               /* Search for this matching MAC+VLAN filter */
+               if (filter->enables & chk && filter->l2_ivlan == vlan_id &&
+                   !memcmp(filter->l2_addr,
+                           bp->mac_addr,
+                           RTE_ETHER_ADDR_LEN)) {
+                       /* Delete the filter */
+                       rc = bnxt_hwrm_clear_l2_filter(bp, filter);
+                       if (rc)
+                               return rc;
+                       STAILQ_REMOVE(&vnic->filter, filter,
+                                     bnxt_filter_info, next);
+                       STAILQ_INSERT_TAIL(&bp->free_filter_list, filter, next);
 
-                               /*
-                                * Need to examine to see if the MAC
-                                * filter already existed or not before
-                                * allocating a new one
-                                */
-
-                               new_filter = bnxt_alloc_filter(bp);
-                               if (!new_filter) {
-                                       PMD_DRV_LOG(ERR,
-                                                       "MAC/VLAN filter alloc failed\n");
-                                       rc = -ENOMEM;
-                                       goto exit;
-                               }
-                               STAILQ_INSERT_TAIL(&vnic->filter,
-                                               new_filter, next);
-                               /* Inherit MAC from previous filter */
-                               new_filter->mac_index =
-                                       filter->mac_index;
-                               memcpy(new_filter->l2_addr, filter->l2_addr,
-                                      RTE_ETHER_ADDR_LEN);
-                               /* MAC only filter */
-                               rc = bnxt_hwrm_set_l2_filter(bp,
-                                                            vnic->fw_vnic_id,
-                                                            new_filter);
-                               if (rc)
-                                       goto exit;
-                               PMD_DRV_LOG(INFO,
-                                           "Del Vlan filter for %d\n",
-                                           vlan_id);
-                       }
-                       filter = temp_filter;
+                       PMD_DRV_LOG(INFO,
+                                   "Del Vlan filter for %d\n",
+                                   vlan_id);
+                       return rc;
                }
+               filter = STAILQ_NEXT(filter, next);
        }
-exit:
-       return rc;
+       return -ENOENT;
 }
 
 static int bnxt_add_vlan_filter(struct bnxt *bp, uint16_t vlan_id)
 {
-       struct bnxt_filter_info *filter, *temp_filter, *new_filter;
+       struct bnxt_filter_info *filter;
        struct bnxt_vnic_info *vnic;
-       unsigned int i;
        int rc = 0;
        uint32_t en = HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_IVLAN |
                HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_IVLAN_MASK;
        uint32_t chk = HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_IVLAN;
 
-       /* Cycle through all VNICs */
-       for (i = 0; i < bp->nr_vnics; i++) {
-               /*
-                * For each VNIC and each associated filter(s)
-                * if VLAN exists:
-                *   if VLAN matches vlan_id
-                *      VLAN filter already exists, just skip and continue
-                *   else
-                *      add a new MAC+VLAN filter
-                * else
-                *   Remove the old MAC only filter
-                *    Add a new MAC+VLAN filter
-                */
-               vnic = &bp->vnic_info[i];
-               filter = STAILQ_FIRST(&vnic->filter);
-               while (filter) {
-                       temp_filter = STAILQ_NEXT(filter, next);
+       /* Implementation notes on the use of VNIC in this command:
+        *
+        * By default, these filters belong to default vnic for the function.
+        * Once these filters are set up, only destination VNIC can be modified.
+        * If the destination VNIC is not specified in this command,
+        * then the HWRM shall only create an l2 context id.
+        */
 
-                       if (filter->enables & chk) {
-                               if (filter->l2_ivlan == vlan_id)
-                                       goto cont;
-                       } else {
-                               /* Must delete the MAC filter */
-                               STAILQ_REMOVE(&vnic->filter, filter,
-                                               bnxt_filter_info, next);
-                               bnxt_hwrm_clear_l2_filter(bp, filter);
-                               filter->l2_ovlan = 0;
-                               STAILQ_INSERT_TAIL(&bp->free_filter_list,
-                                                  filter, next);
-                       }
-                       new_filter = bnxt_alloc_filter(bp);
-                       if (!new_filter) {
-                               PMD_DRV_LOG(ERR,
-                                               "MAC/VLAN filter alloc failed\n");
-                               rc = -ENOMEM;
-                               goto exit;
-                       }
-                       STAILQ_INSERT_TAIL(&vnic->filter, new_filter, next);
-                       /* Inherit MAC from the previous filter */
-                       new_filter->mac_index = filter->mac_index;
-                       memcpy(new_filter->l2_addr, filter->l2_addr,
-                              RTE_ETHER_ADDR_LEN);
-                       /* MAC + VLAN ID filter */
-                       new_filter->l2_ivlan = vlan_id;
-                       new_filter->l2_ivlan_mask = 0xF000;
-                       new_filter->enables |= en;
-                       rc = bnxt_hwrm_set_l2_filter(bp,
-                                       vnic->fw_vnic_id,
-                                       new_filter);
-                       if (rc)
-                               goto exit;
-                       PMD_DRV_LOG(INFO,
-                                   "Added Vlan filter for %d\n", vlan_id);
-cont:
-                       filter = temp_filter;
-               }
+       vnic = BNXT_GET_DEFAULT_VNIC(bp);
+       filter = STAILQ_FIRST(&vnic->filter);
+       /* Check if the VLAN has already been added */
+       while (filter) {
+               if (filter->enables & chk && filter->l2_ivlan == vlan_id &&
+                   !memcmp(filter->l2_addr, bp->mac_addr, RTE_ETHER_ADDR_LEN))
+                       return -EEXIST;
+
+               filter = STAILQ_NEXT(filter, next);
        }
-exit:
+
+       /* No match found. Alloc a fresh filter and issue the L2_FILTER_ALLOC
+        * command to create MAC+VLAN filter with the right flags, enables set.
+        */
+       filter = bnxt_alloc_filter(bp);
+       if (!filter) {
+               PMD_DRV_LOG(ERR,
+                           "MAC/VLAN filter alloc failed\n");
+               return -ENOMEM;
+       }
+       /* MAC + VLAN ID filter */
+       filter->l2_ivlan = vlan_id;
+       filter->l2_ivlan_mask = 0x0FFF;
+       filter->enables |= en;
+       rc = bnxt_hwrm_set_l2_filter(bp, vnic->fw_vnic_id, filter);
+       if (rc) {
+               /* Free the newly allocated filter as we were
+                * not able to create the filter in hardware.
+                */
+               filter->fw_l2_filter_id = UINT64_MAX;
+               STAILQ_INSERT_TAIL(&bp->free_filter_list, filter, next);
+               return rc;
+       }
+
+       /* Add this new filter to the list */
+       STAILQ_INSERT_TAIL(&vnic->filter, filter, next);
+       PMD_DRV_LOG(INFO,
+                   "Added Vlan filter for %d\n", vlan_id);
        return rc;
 }
 
 static int bnxt_vlan_filter_set_op(struct rte_eth_dev *eth_dev,
                uint16_t vlan_id, int on)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
+       int rc;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
 
        /* These operations apply to ALL existing MAC/VLAN filters */
        if (on)
@@ -1501,9 +1800,14 @@ static int bnxt_vlan_filter_set_op(struct rte_eth_dev *eth_dev,
 static int
 bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads;
        unsigned int i;
+       int rc;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
 
        if (mask & ETH_VLAN_FILTER_MASK) {
                if (!(rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER)) {
@@ -1529,45 +1833,113 @@ bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask)
                        !!(rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP));
        }
 
-       if (mask & ETH_VLAN_EXTEND_MASK)
-               PMD_DRV_LOG(ERR, "Extend VLAN Not supported\n");
+       if (mask & ETH_VLAN_EXTEND_MASK) {
+               if (rx_offloads & DEV_RX_OFFLOAD_VLAN_EXTEND)
+                       PMD_DRV_LOG(DEBUG, "Extend VLAN supported\n");
+               else
+                       PMD_DRV_LOG(INFO, "Extend VLAN unsupported\n");
+       }
+
+       return 0;
+}
+
+static int
+bnxt_vlan_tpid_set_op(struct rte_eth_dev *dev, enum rte_vlan_type vlan_type,
+                     uint16_t tpid)
+{
+       struct bnxt *bp = dev->data->dev_private;
+       int qinq = dev->data->dev_conf.rxmode.offloads &
+                  DEV_RX_OFFLOAD_VLAN_EXTEND;
+
+       if (vlan_type != ETH_VLAN_TYPE_INNER &&
+           vlan_type != ETH_VLAN_TYPE_OUTER) {
+               PMD_DRV_LOG(ERR,
+                           "Unsupported vlan type.");
+               return -EINVAL;
+       }
+       if (!qinq) {
+               PMD_DRV_LOG(ERR,
+                           "QinQ not enabled. Needs to be ON as we can "
+                           "accelerate only outer vlan\n");
+               return -EINVAL;
+       }
+
+       if (vlan_type == ETH_VLAN_TYPE_OUTER) {
+               switch (tpid) {
+               case RTE_ETHER_TYPE_QINQ:
+                       bp->outer_tpid_bd =
+                               TX_BD_LONG_CFA_META_VLAN_TPID_TPID88A8;
+                               break;
+               case RTE_ETHER_TYPE_VLAN:
+                       bp->outer_tpid_bd =
+                               TX_BD_LONG_CFA_META_VLAN_TPID_TPID8100;
+                               break;
+               case 0x9100:
+                       bp->outer_tpid_bd =
+                               TX_BD_LONG_CFA_META_VLAN_TPID_TPID9100;
+                               break;
+               case 0x9200:
+                       bp->outer_tpid_bd =
+                               TX_BD_LONG_CFA_META_VLAN_TPID_TPID9200;
+                               break;
+               case 0x9300:
+                       bp->outer_tpid_bd =
+                                TX_BD_LONG_CFA_META_VLAN_TPID_TPID9300;
+                               break;
+               default:
+                       PMD_DRV_LOG(ERR, "Invalid TPID: %x\n", tpid);
+                       return -EINVAL;
+               }
+               bp->outer_tpid_bd |= tpid;
+               PMD_DRV_LOG(INFO, "outer_tpid_bd = %x\n", bp->outer_tpid_bd);
+       } else if (vlan_type == ETH_VLAN_TYPE_INNER) {
+               PMD_DRV_LOG(ERR,
+                           "Can accelerate only outer vlan in QinQ\n");
+               return -EINVAL;
+       }
 
        return 0;
 }
 
 static int
 bnxt_set_default_mac_addr_op(struct rte_eth_dev *dev,
-                       struct rte_ether_addr *addr)
+                            struct rte_ether_addr *addr)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        /* Default Filter is tied to VNIC 0 */
        struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
        struct bnxt_filter_info *filter;
        int rc;
 
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
+
        if (BNXT_VF(bp) && !BNXT_VF_IS_TRUSTED(bp))
                return -EPERM;
 
-       memcpy(bp->mac_addr, addr, sizeof(bp->mac_addr));
+       if (rte_is_zero_ether_addr(addr))
+               return -EINVAL;
 
        STAILQ_FOREACH(filter, &vnic->filter, next) {
                /* Default Filter is at Index 0 */
                if (filter->mac_index != 0)
                        continue;
-               rc = bnxt_hwrm_clear_l2_filter(bp, filter);
-               if (rc)
-                       return rc;
+
                memcpy(filter->l2_addr, bp->mac_addr, RTE_ETHER_ADDR_LEN);
                memset(filter->l2_addr_mask, 0xff, RTE_ETHER_ADDR_LEN);
                filter->flags |= HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_PATH_RX;
                filter->enables |=
                        HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR |
                        HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR_MASK;
+
                rc = bnxt_hwrm_set_l2_filter(bp, vnic->fw_vnic_id, filter);
                if (rc)
                        return rc;
-               filter->mac_index = 0;
+
+               memcpy(bp->mac_addr, addr, RTE_ETHER_ADDR_LEN);
                PMD_DRV_LOG(DEBUG, "Set MAC addr\n");
+               return 0;
        }
 
        return 0;
@@ -1578,10 +1950,15 @@ bnxt_dev_set_mc_addr_list_op(struct rte_eth_dev *eth_dev,
                          struct rte_ether_addr *mc_addr_set,
                          uint32_t nb_mc_addr)
 {
-       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
+       struct bnxt *bp = eth_dev->data->dev_private;
        char *mc_addr_list = (char *)mc_addr_set;
        struct bnxt_vnic_info *vnic;
        uint32_t off = 0, i = 0;
+       int rc;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
 
        vnic = &bp->vnic_info[0];
 
@@ -1607,7 +1984,7 @@ allmulti:
 static int
 bnxt_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        uint8_t fw_major = (bp->fw_ver >> 24) & 0xff;
        uint8_t fw_minor = (bp->fw_ver >> 16) & 0xff;
        uint8_t fw_updt = (bp->fw_ver >> 8) & 0xff;
@@ -1637,7 +2014,7 @@ bnxt_rxq_info_get_op(struct rte_eth_dev *dev, uint16_t queue_id,
 
        qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
        qinfo->conf.rx_drop_en = 0;
-       qinfo->conf.rx_deferred_start = 0;
+       qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
 }
 
 static void
@@ -1662,22 +2039,17 @@ bnxt_txq_info_get_op(struct rte_eth_dev *dev, uint16_t queue_id,
 static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu)
 {
        struct bnxt *bp = eth_dev->data->dev_private;
-       struct rte_eth_dev_info dev_info;
        uint32_t new_pkt_size;
        uint32_t rc = 0;
        uint32_t i;
 
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
+
        new_pkt_size = new_mtu + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN +
                       VLAN_TAG_SIZE * BNXT_NUM_VLANS;
 
-       bnxt_dev_info_get_op(eth_dev, &dev_info);
-
-       if (new_mtu < RTE_ETHER_MIN_MTU || new_mtu > BNXT_MAX_MTU) {
-               PMD_DRV_LOG(ERR, "MTU requested must be within (%d, %d)\n",
-                       RTE_ETHER_MIN_MTU, BNXT_MAX_MTU);
-               return -EINVAL;
-       }
-
 #ifdef RTE_ARCH_X86
        /*
         * If vector-mode tx/rx is active, disallow any MTU change that would
@@ -1707,15 +2079,12 @@ static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu)
 
        eth_dev->data->dev_conf.rxmode.max_rx_pkt_len = new_pkt_size;
 
-       eth_dev->data->mtu = new_mtu;
-       PMD_DRV_LOG(INFO, "New MTU is %d\n", eth_dev->data->mtu);
-
        for (i = 0; i < bp->nr_vnics; i++) {
                struct bnxt_vnic_info *vnic = &bp->vnic_info[i];
                uint16_t size = 0;
 
-               vnic->mru = bp->eth_dev->data->mtu + RTE_ETHER_HDR_LEN +
-                                       RTE_ETHER_CRC_LEN + VLAN_TAG_SIZE * 2;
+               vnic->mru = new_mtu + RTE_ETHER_HDR_LEN +
+                               RTE_ETHER_CRC_LEN + VLAN_TAG_SIZE * 2;
                rc = bnxt_hwrm_vnic_cfg(bp, vnic);
                if (rc)
                        break;
@@ -1730,16 +2099,22 @@ static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu)
                }
        }
 
+       PMD_DRV_LOG(INFO, "New MTU is %d\n", new_mtu);
+
        return rc;
 }
 
 static int
 bnxt_vlan_pvid_set_op(struct rte_eth_dev *dev, uint16_t pvid, int on)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        uint16_t vlan = bp->vlan;
        int rc;
 
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
+
        if (!BNXT_SINGLE_PF(bp) || BNXT_VF(bp)) {
                PMD_DRV_LOG(ERR,
                        "PVID cannot be modified for this function\n");
@@ -1756,7 +2131,12 @@ bnxt_vlan_pvid_set_op(struct rte_eth_dev *dev, uint16_t pvid, int on)
 static int
 bnxt_dev_led_on_op(struct rte_eth_dev *dev)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
+       int rc;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
 
        return bnxt_hwrm_port_led_cfg(bp, true);
 }
@@ -1764,7 +2144,12 @@ bnxt_dev_led_on_op(struct rte_eth_dev *dev)
 static int
 bnxt_dev_led_off_op(struct rte_eth_dev *dev)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
+       int rc;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
 
        return bnxt_hwrm_port_led_cfg(bp, false);
 }
@@ -1772,43 +2157,32 @@ bnxt_dev_led_off_op(struct rte_eth_dev *dev)
 static uint32_t
 bnxt_rx_queue_count_op(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
+       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
        uint32_t desc = 0, raw_cons = 0, cons;
        struct bnxt_cp_ring_info *cpr;
        struct bnxt_rx_queue *rxq;
        struct rx_pkt_cmpl *rxcmp;
-       uint16_t cmp_type;
-       uint8_t cmp = 1;
-       bool valid;
+       int rc;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
 
        rxq = dev->data->rx_queues[rx_queue_id];
        cpr = rxq->cp_ring;
-       valid = cpr->valid;
+       raw_cons = cpr->cp_raw_cons;
 
-       while (raw_cons < rxq->nb_rx_desc) {
+       while (1) {
                cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
+               rte_prefetch0(&cpr->cp_desc_ring[cons]);
                rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons];
 
-               if (!CMPL_VALID(rxcmp, valid))
-                       goto nothing_to_do;
-               valid = FLIP_VALID(cons, cpr->cp_ring_struct->ring_mask, valid);
-               cmp_type = CMP_TYPE(rxcmp);
-               if (cmp_type == RX_TPA_END_CMPL_TYPE_RX_TPA_END) {
-                       cmp = (rte_le_to_cpu_32(
-                                       ((struct rx_tpa_end_cmpl *)
-                                        (rxcmp))->agg_bufs_v1) &
-                              RX_TPA_END_CMPL_AGG_BUFS_MASK) >>
-                               RX_TPA_END_CMPL_AGG_BUFS_SFT;
-                       desc++;
-               } else if (cmp_type == 0x11) {
-                       desc++;
-                       cmp = (rxcmp->agg_bufs_v1 &
-                                  RX_PKT_CMPL_AGG_BUFS_MASK) >>
-                               RX_PKT_CMPL_AGG_BUFS_SFT;
+               if (!CMP_VALID(rxcmp, raw_cons, cpr->cp_ring_struct)) {
+                       break;
                } else {
-                       cmp = 1;
+                       raw_cons++;
+                       desc++;
                }
-nothing_to_do:
-               raw_cons += cmp ? cmp : 2;
        }
 
        return desc;
@@ -1823,10 +2197,15 @@ bnxt_rx_descriptor_status_op(void *rx_queue, uint16_t offset)
        struct bnxt_sw_rx_bd *rx_buf;
        struct rx_pkt_cmpl *rxcmp;
        uint32_t cons, cp_cons;
+       int rc;
 
        if (!rxq)
                return -EINVAL;
 
+       rc = is_bnxt_in_error(rxq->bp);
+       if (rc)
+               return rc;
+
        cpr = rxq->cp_ring;
        rxr = rxq->rx_ring;
 
@@ -1861,10 +2240,15 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
        struct bnxt_sw_tx_bd *tx_buf;
        struct tx_pkt_cmpl *txcmp;
        uint32_t cons, cp_cons;
+       int rc;
 
        if (!txq)
                return -EINVAL;
 
+       rc = is_bnxt_in_error(txq->bp);
+       if (rc)
+               return rc;
+
        cpr = txq->cp_ring;
        txr = txq->tx_ring;
 
@@ -1956,7 +2340,7 @@ bnxt_ethertype_filter(struct rte_eth_dev *dev,
                        enum rte_filter_op filter_op,
                        void *arg)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        struct rte_eth_ethertype_filter *efilter =
                        (struct rte_eth_ethertype_filter *)arg;
        struct bnxt_filter_info *bfilter, *filter1;
@@ -1999,7 +2383,7 @@ bnxt_ethertype_filter(struct rte_eth_dev *dev,
 
                filter1 = bnxt_get_l2_filter(bp, bfilter, vnic0);
                if (filter1 == NULL) {
-                       ret = -1;
+                       ret = -EINVAL;
                        goto cleanup;
                }
                bfilter->enables |=
@@ -2193,7 +2577,7 @@ bnxt_cfg_ntuple_filter(struct bnxt *bp,
        vnic0 = &bp->vnic_info[0];
        filter1 = STAILQ_FIRST(&vnic0->filter);
        if (filter1 == NULL) {
-               ret = -1;
+               ret = -EINVAL;
                goto free_filter;
        }
 
@@ -2260,7 +2644,7 @@ bnxt_ntuple_filter(struct rte_eth_dev *dev,
                        enum rte_filter_op filter_op,
                        void *arg)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        int ret;
 
        if (filter_op == RTE_ETH_FILTER_NOP)
@@ -2577,7 +2961,7 @@ bnxt_fdir_filter(struct rte_eth_dev *dev,
                 enum rte_filter_op filter_op,
                 void *arg)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        struct rte_eth_fdir_filter *fdir  = (struct rte_eth_fdir_filter *)arg;
        struct bnxt_filter_info *filter, *match;
        struct bnxt_vnic_info *vnic, *mvnic;
@@ -2694,6 +3078,10 @@ bnxt_filter_ctrl_op(struct rte_eth_dev *dev __rte_unused,
 {
        int ret = 0;
 
+       ret = is_bnxt_in_error(dev->data->dev_private);
+       if (ret)
+               return ret;
+
        switch (filter_type) {
        case RTE_ETH_FILTER_TUNNEL:
                PMD_DRV_LOG(ERR,
@@ -2864,7 +3252,7 @@ static int
 bnxt_timesync_write_time(struct rte_eth_dev *dev, const struct timespec *ts)
 {
        uint64_t ns;
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
 
        if (!ptp)
@@ -2880,25 +3268,32 @@ bnxt_timesync_write_time(struct rte_eth_dev *dev, const struct timespec *ts)
 static int
 bnxt_timesync_read_time(struct rte_eth_dev *dev, struct timespec *ts)
 {
-       uint64_t ns, systime_cycles;
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+       uint64_t ns, systime_cycles = 0;
+       int rc = 0;
 
        if (!ptp)
                return 0;
 
-       systime_cycles = bnxt_cc_read(bp);
+       if (BNXT_CHIP_THOR(bp))
+               rc = bnxt_hwrm_port_ts_query(bp, BNXT_PTP_FLAGS_CURRENT_TIME,
+                                            &systime_cycles);
+       else
+               systime_cycles = bnxt_cc_read(bp);
+
        ns = rte_timecounter_update(&ptp->tc, systime_cycles);
        *ts = rte_ns_to_timespec(ns);
 
-       return 0;
+       return rc;
 }
 static int
 bnxt_timesync_enable(struct rte_eth_dev *dev)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
        uint32_t shift = 0;
+       int rc;
 
        if (!ptp)
                return 0;
@@ -2907,8 +3302,9 @@ bnxt_timesync_enable(struct rte_eth_dev *dev)
        ptp->tx_tstamp_en = 1;
        ptp->rxctl = BNXT_PTP_MSG_EVENTS;
 
-       if (!bnxt_hwrm_ptp_cfg(bp))
-               bnxt_map_ptp_regs(bp);
+       rc = bnxt_hwrm_ptp_cfg(bp);
+       if (rc)
+               return rc;
 
        memset(&ptp->tc, 0, sizeof(struct rte_timecounter));
        memset(&ptp->rx_tstamp_tc, 0, sizeof(struct rte_timecounter));
@@ -2926,13 +3322,16 @@ bnxt_timesync_enable(struct rte_eth_dev *dev)
        ptp->tx_tstamp_tc.cc_shift = shift;
        ptp->tx_tstamp_tc.nsec_mask = (1ULL << shift) - 1;
 
+       if (!BNXT_CHIP_THOR(bp))
+               bnxt_map_ptp_regs(bp);
+
        return 0;
 }
 
 static int
 bnxt_timesync_disable(struct rte_eth_dev *dev)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
 
        if (!ptp)
@@ -2944,7 +3343,8 @@ bnxt_timesync_disable(struct rte_eth_dev *dev)
 
        bnxt_hwrm_ptp_cfg(bp);
 
-       bnxt_unmap_ptp_regs(bp);
+       if (!BNXT_CHIP_THOR(bp))
+               bnxt_unmap_ptp_regs(bp);
 
        return 0;
 }
@@ -2954,7 +3354,7 @@ bnxt_timesync_read_rx_timestamp(struct rte_eth_dev *dev,
                                 struct timespec *timestamp,
                                 uint32_t flags __rte_unused)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
        uint64_t rx_tstamp_cycles = 0;
        uint64_t ns;
@@ -2962,7 +3362,11 @@ bnxt_timesync_read_rx_timestamp(struct rte_eth_dev *dev,
        if (!ptp)
                return 0;
 
-       bnxt_get_rx_ts(bp, &rx_tstamp_cycles);
+       if (BNXT_CHIP_THOR(bp))
+               rx_tstamp_cycles = ptp->rx_timestamp;
+       else
+               bnxt_get_rx_ts(bp, &rx_tstamp_cycles);
+
        ns = rte_timecounter_update(&ptp->rx_tstamp_tc, rx_tstamp_cycles);
        *timestamp = rte_ns_to_timespec(ns);
        return  0;
@@ -2972,25 +3376,31 @@ static int
 bnxt_timesync_read_tx_timestamp(struct rte_eth_dev *dev,
                                 struct timespec *timestamp)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
        uint64_t tx_tstamp_cycles = 0;
        uint64_t ns;
+       int rc = 0;
 
        if (!ptp)
                return 0;
 
-       bnxt_get_tx_ts(bp, &tx_tstamp_cycles);
+       if (BNXT_CHIP_THOR(bp))
+               rc = bnxt_hwrm_port_ts_query(bp, BNXT_PTP_FLAGS_PATH_TX,
+                                            &tx_tstamp_cycles);
+       else
+               rc = bnxt_get_tx_ts(bp, &tx_tstamp_cycles);
+
        ns = rte_timecounter_update(&ptp->tx_tstamp_tc, tx_tstamp_cycles);
        *timestamp = rte_ns_to_timespec(ns);
 
-       return 0;
+       return rc;
 }
 
 static int
 bnxt_timesync_adjust_time(struct rte_eth_dev *dev, int64_t delta)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
 
        if (!ptp)
@@ -3004,11 +3414,15 @@ bnxt_timesync_adjust_time(struct rte_eth_dev *dev, int64_t delta)
 static int
 bnxt_get_eeprom_length_op(struct rte_eth_dev *dev)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        int rc;
        uint32_t dir_entries;
        uint32_t entry_length;
 
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
+
        PMD_DRV_LOG(INFO, "%04x:%02x:%02x:%02x\n",
                bp->pdev->addr.domain, bp->pdev->addr.bus,
                bp->pdev->addr.devid, bp->pdev->addr.function);
@@ -3024,9 +3438,14 @@ static int
 bnxt_get_eeprom_op(struct rte_eth_dev *dev,
                struct rte_dev_eeprom_info *in_eeprom)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        uint32_t index;
        uint32_t offset;
+       int rc;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
 
        PMD_DRV_LOG(INFO, "%04x:%02x:%02x:%02x in_eeprom->offset = %d "
                "len = %d\n", bp->pdev->addr.domain,
@@ -3095,9 +3514,14 @@ static int
 bnxt_set_eeprom_op(struct rte_eth_dev *dev,
                struct rte_dev_eeprom_info *in_eeprom)
 {
-       struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+       struct bnxt *bp = dev->data->dev_private;
        uint8_t index, dir_op;
        uint16_t type, ext, ordinal, attr;
+       int rc;
+
+       rc = is_bnxt_in_error(bp);
+       if (rc)
+               return rc;
 
        PMD_DRV_LOG(INFO, "%04x:%02x:%02x:%02x in_eeprom->offset = %d "
                "len = %d\n", bp->pdev->addr.domain,
@@ -3135,7 +3559,6 @@ bnxt_set_eeprom_op(struct rte_eth_dev *dev,
 
        return bnxt_hwrm_flash_nvram(bp, type, ordinal, ext, attr,
                                     in_eeprom->data, in_eeprom->length);
-       return 0;
 }
 
 /*
@@ -3175,6 +3598,7 @@ static const struct eth_dev_ops bnxt_dev_ops = {
        .udp_tunnel_port_del  = bnxt_udp_tunnel_port_del_op,
        .vlan_filter_set = bnxt_vlan_filter_set_op,
        .vlan_offload_set = bnxt_vlan_offload_set_op,
+       .vlan_tpid_set = bnxt_vlan_tpid_set_op,
        .vlan_pvid_set = bnxt_vlan_pvid_set_op,
        .mtu_set = bnxt_mtu_set_op,
        .mac_addr_set = bnxt_set_default_mac_addr_op,
@@ -3210,310 +3634,803 @@ static const struct eth_dev_ops bnxt_dev_ops = {
        .timesync_read_tx_timestamp = bnxt_timesync_read_tx_timestamp,
 };
 
-static bool bnxt_vf_pciid(uint16_t id)
+static uint32_t bnxt_map_reset_regs(struct bnxt *bp, uint32_t reg)
 {
-       if (id == BROADCOM_DEV_ID_57304_VF ||
-           id == BROADCOM_DEV_ID_57406_VF ||
-           id == BROADCOM_DEV_ID_5731X_VF ||
-           id == BROADCOM_DEV_ID_5741X_VF ||
-           id == BROADCOM_DEV_ID_57414_VF ||
-           id == BROADCOM_DEV_ID_STRATUS_NIC_VF1 ||
-           id == BROADCOM_DEV_ID_STRATUS_NIC_VF2 ||
-           id == BROADCOM_DEV_ID_58802_VF)
-               return true;
-       return false;
-}
+       uint32_t offset;
 
-bool bnxt_stratus_device(struct bnxt *bp)
-{
-       uint16_t id = bp->pdev->id.device_id;
+       /* Only pre-map the reset GRC registers using window 3 */
+       rte_write32(reg & 0xfffff000, (uint8_t *)bp->bar0 +
+                   BNXT_GRCPF_REG_WINDOW_BASE_OUT + 8);
 
-       if (id == BROADCOM_DEV_ID_STRATUS_NIC ||
-           id == BROADCOM_DEV_ID_STRATUS_NIC_VF1 ||
-           id == BROADCOM_DEV_ID_STRATUS_NIC_VF2)
-               return true;
-       return false;
+       offset = BNXT_GRCP_WINDOW_3_BASE + (reg & 0xffc);
+
+       return offset;
 }
 
-static int bnxt_init_board(struct rte_eth_dev *eth_dev)
+int bnxt_map_fw_health_status_regs(struct bnxt *bp)
 {
-       struct bnxt *bp = eth_dev->data->dev_private;
-       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
-       int rc;
+       struct bnxt_error_recovery_info *info = bp->recovery_info;
+       uint32_t reg_base = 0xffffffff;
+       int i;
 
-       /* enable device (incl. PCI PM wakeup), and bus-mastering */
-       if (!pci_dev->mem_resource[0].addr) {
-               PMD_DRV_LOG(ERR,
-                       "Cannot find PCI device base address, aborting\n");
-               rc = -ENODEV;
-               goto init_err_disable;
-       }
+       /* Only pre-map the monitoring GRC registers using window 2 */
+       for (i = 0; i < BNXT_FW_STATUS_REG_CNT; i++) {
+               uint32_t reg = info->status_regs[i];
 
-       bp->eth_dev = eth_dev;
-       bp->pdev = pci_dev;
+               if (BNXT_FW_STATUS_REG_TYPE(reg) != BNXT_FW_STATUS_REG_TYPE_GRC)
+                       continue;
 
-       bp->bar0 = (void *)pci_dev->mem_resource[0].addr;
-       if (!bp->bar0) {
-               PMD_DRV_LOG(ERR, "Cannot map device registers, aborting\n");
-               rc = -ENOMEM;
-               goto init_err_release;
-       }
+               if (reg_base == 0xffffffff)
+                       reg_base = reg & 0xfffff000;
+               if ((reg & 0xfffff000) != reg_base)
+                       return -ERANGE;
 
-       if (!pci_dev->mem_resource[2].addr) {
-               PMD_DRV_LOG(ERR,
-                           "Cannot find PCI device BAR 2 address, aborting\n");
-               rc = -ENODEV;
-               goto init_err_release;
-       } else {
-               bp->doorbell_base = (void *)pci_dev->mem_resource[2].addr;
+               /* Use mask 0xffc as the Lower 2 bits indicates
+                * address space location
+                */
+               info->mapped_status_regs[i] = BNXT_GRCP_WINDOW_2_BASE +
+                                               (reg & 0xffc);
        }
 
-       return 0;
-
-init_err_release:
-       if (bp->bar0)
-               bp->bar0 = NULL;
-       if (bp->doorbell_base)
-               bp->doorbell_base = NULL;
+       if (reg_base == 0xffffffff)
+               return 0;
 
-init_err_disable:
+       rte_write32(reg_base, (uint8_t *)bp->bar0 +
+                   BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4);
 
-       return rc;
+       return 0;
 }
 
-
-#define ALLOW_FUNC(x)  \
-       { \
-               typeof(x) arg = (x); \
-               bp->pf.vf_req_fwd[((arg) >> 5)] &= \
-               ~rte_cpu_to_le_32(1 << ((arg) & 0x1f)); \
+static void bnxt_write_fw_reset_reg(struct bnxt *bp, uint32_t index)
+{
+       struct bnxt_error_recovery_info *info = bp->recovery_info;
+       uint32_t delay = info->delay_after_reset[index];
+       uint32_t val = info->reset_reg_val[index];
+       uint32_t reg = info->reset_reg[index];
+       uint32_t type, offset;
+
+       type = BNXT_FW_STATUS_REG_TYPE(reg);
+       offset = BNXT_FW_STATUS_REG_OFF(reg);
+
+       switch (type) {
+       case BNXT_FW_STATUS_REG_TYPE_CFG:
+               rte_pci_write_config(bp->pdev, &val, sizeof(val), offset);
+               break;
+       case BNXT_FW_STATUS_REG_TYPE_GRC:
+               offset = bnxt_map_reset_regs(bp, offset);
+               rte_write32(val, (uint8_t *)bp->bar0 + offset);
+               break;
+       case BNXT_FW_STATUS_REG_TYPE_BAR0:
+               rte_write32(val, (uint8_t *)bp->bar0 + offset);
+               break;
        }
-static int
-bnxt_dev_init(struct rte_eth_dev *eth_dev)
+       /* wait on a specific interval of time until core reset is complete */
+       if (delay)
+               rte_delay_ms(delay);
+}
+
+static void bnxt_dev_cleanup(struct bnxt *bp)
 {
-       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
-       char mz_name[RTE_MEMZONE_NAMESIZE];
-       const struct rte_memzone *mz = NULL;
-       static int version_printed;
-       uint32_t total_alloc_len;
-       rte_iova_t mz_phys_addr;
-       struct bnxt *bp;
-       int rc;
+       bnxt_set_hwrm_link_config(bp, false);
+       bp->link_info.link_up = 0;
+       if (bp->dev_stopped == 0)
+               bnxt_dev_stop_op(bp->eth_dev);
 
-       if (version_printed++ == 0)
-               PMD_DRV_LOG(INFO, "%s\n", bnxt_version);
+       bnxt_uninit_resources(bp, true);
+}
 
-       rte_eth_copy_pci_info(eth_dev, pci_dev);
+static int bnxt_restore_filters(struct bnxt *bp)
+{
+       struct rte_eth_dev *dev = bp->eth_dev;
+       int ret = 0;
 
-       bp = eth_dev->data->dev_private;
+       if (dev->data->all_multicast)
+               ret = bnxt_allmulticast_enable_op(dev);
+       if (dev->data->promiscuous)
+               ret = bnxt_promiscuous_enable_op(dev);
 
-       bp->dev_stopped = 1;
+       /* TODO restore other filters as well */
+       return ret;
+}
 
-       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
-               goto skip_init;
+static void bnxt_dev_recover(void *arg)
+{
+       struct bnxt *bp = arg;
+       int timeout = bp->fw_reset_max_msecs;
+       int rc = 0;
 
-       if (bnxt_vf_pciid(pci_dev->id.device_id))
-               bp->flags |= BNXT_FLAG_VF;
+       /* Clear Error flag so that device re-init should happen */
+       bp->flags &= ~BNXT_FLAG_FATAL_ERROR;
+
+       do {
+               rc = bnxt_hwrm_ver_get(bp);
+               if (rc == 0)
+                       break;
+               rte_delay_ms(BNXT_FW_READY_WAIT_INTERVAL);
+               timeout -= BNXT_FW_READY_WAIT_INTERVAL;
+       } while (rc && timeout);
 
-       rc = bnxt_init_board(eth_dev);
+       if (rc) {
+               PMD_DRV_LOG(ERR, "FW is not Ready after reset\n");
+               goto err;
+       }
+
+       rc = bnxt_init_resources(bp, true);
        if (rc) {
                PMD_DRV_LOG(ERR,
-                       "Board initialization failed rc: %x\n", rc);
-               goto error;
+                           "Failed to initialize resources after reset\n");
+               goto err;
        }
-skip_init:
-       eth_dev->dev_ops = &bnxt_dev_ops;
-       eth_dev->rx_pkt_burst = &bnxt_recv_pkts;
-       eth_dev->tx_pkt_burst = &bnxt_xmit_pkts;
-       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+       /* clear reset flag as the device is initialized now */
+       bp->flags &= ~BNXT_FLAG_FW_RESET;
+
+       rc = bnxt_dev_start_op(bp->eth_dev);
+       if (rc) {
+               PMD_DRV_LOG(ERR, "Failed to start port after reset\n");
+               goto err;
+       }
+
+       rc = bnxt_restore_filters(bp);
+       if (rc)
+               goto err;
+
+       PMD_DRV_LOG(INFO, "Recovered from FW reset\n");
+       return;
+err:
+       bp->flags |= BNXT_FLAG_FATAL_ERROR;
+       bnxt_uninit_resources(bp, false);
+       PMD_DRV_LOG(ERR, "Failed to recover from FW reset\n");
+}
+
+void bnxt_dev_reset_and_resume(void *arg)
+{
+       struct bnxt *bp = arg;
+       int rc;
+
+       bnxt_dev_cleanup(bp);
+
+       bnxt_wait_for_device_shutdown(bp);
+
+       rc = rte_eal_alarm_set(US_PER_MS * bp->fw_reset_min_msecs,
+                              bnxt_dev_recover, (void *)bp);
+       if (rc)
+               PMD_DRV_LOG(ERR, "Error setting recovery alarm");
+}
+
+uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index)
+{
+       struct bnxt_error_recovery_info *info = bp->recovery_info;
+       uint32_t reg = info->status_regs[index];
+       uint32_t type, offset, val = 0;
+
+       type = BNXT_FW_STATUS_REG_TYPE(reg);
+       offset = BNXT_FW_STATUS_REG_OFF(reg);
+
+       switch (type) {
+       case BNXT_FW_STATUS_REG_TYPE_CFG:
+               rte_pci_read_config(bp->pdev, &val, sizeof(val), offset);
+               break;
+       case BNXT_FW_STATUS_REG_TYPE_GRC:
+               offset = info->mapped_status_regs[index];
+               /* FALLTHROUGH */
+       case BNXT_FW_STATUS_REG_TYPE_BAR0:
+               val = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 +
+                                      offset));
+               break;
+       }
+
+       return val;
+}
+
+static int bnxt_fw_reset_all(struct bnxt *bp)
+{
+       struct bnxt_error_recovery_info *info = bp->recovery_info;
+       uint32_t i;
+       int rc = 0;
+
+       if (info->flags & BNXT_FLAG_ERROR_RECOVERY_HOST) {
+               /* Reset through master function driver */
+               for (i = 0; i < info->reg_array_cnt; i++)
+                       bnxt_write_fw_reset_reg(bp, i);
+               /* Wait for time specified by FW after triggering reset */
+               rte_delay_ms(info->master_func_wait_period_after_reset);
+       } else if (info->flags & BNXT_FLAG_ERROR_RECOVERY_CO_CPU) {
+               /* Reset with the help of Kong processor */
+               rc = bnxt_hwrm_fw_reset(bp);
+               if (rc)
+                       PMD_DRV_LOG(ERR, "Failed to reset FW\n");
+       }
+
+       return rc;
+}
+
+static void bnxt_fw_reset_cb(void *arg)
+{
+       struct bnxt *bp = arg;
+       struct bnxt_error_recovery_info *info = bp->recovery_info;
+       int rc = 0;
+
+       /* Only Master function can do FW reset */
+       if (bnxt_is_master_func(bp) &&
+           bnxt_is_recovery_enabled(bp)) {
+               rc = bnxt_fw_reset_all(bp);
+               if (rc) {
+                       PMD_DRV_LOG(ERR, "Adapter recovery failed\n");
+                       return;
+               }
+       }
+
+       /* if recovery method is ERROR_RECOVERY_CO_CPU, KONG will send
+        * EXCEPTION_FATAL_ASYNC event to all the functions
+        * (including MASTER FUNC). After receiving this Async, all the active
+        * drivers should treat this case as FW initiated recovery
+        */
+       if (info->flags & BNXT_FLAG_ERROR_RECOVERY_HOST) {
+               bp->fw_reset_min_msecs = BNXT_MIN_FW_READY_TIMEOUT;
+               bp->fw_reset_max_msecs = BNXT_MAX_FW_RESET_TIMEOUT;
+
+               /* To recover from error */
+               rte_eal_alarm_set(US_PER_MS, bnxt_dev_reset_and_resume,
+                                 (void *)bp);
+       }
+}
+
+/* Driver should poll FW heartbeat, reset_counter with the frequency
+ * advertised by FW in HWRM_ERROR_RECOVERY_QCFG.
+ * When the driver detects heartbeat stop or change in reset_counter,
+ * it has to trigger a reset to recover from the error condition.
+ * A “master PF” is the function who will have the privilege to
+ * initiate the chimp reset. The master PF will be elected by the
+ * firmware and will be notified through async message.
+ */
+static void bnxt_check_fw_health(void *arg)
+{
+       struct bnxt *bp = arg;
+       struct bnxt_error_recovery_info *info = bp->recovery_info;
+       uint32_t val = 0, wait_msec;
+
+       if (!info || !bnxt_is_recovery_enabled(bp) ||
+           is_bnxt_in_error(bp))
+               return;
+
+       val = bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG);
+       if (val == info->last_heart_beat)
+               goto reset;
+
+       info->last_heart_beat = val;
+
+       val = bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG);
+       if (val != info->last_reset_counter)
+               goto reset;
+
+       info->last_reset_counter = val;
+
+       rte_eal_alarm_set(US_PER_MS * info->driver_polling_freq,
+                         bnxt_check_fw_health, (void *)bp);
+
+       return;
+reset:
+       /* Stop DMA to/from device */
+       bp->flags |= BNXT_FLAG_FATAL_ERROR;
+       bp->flags |= BNXT_FLAG_FW_RESET;
+
+       PMD_DRV_LOG(ERR, "Detected FW dead condition\n");
+
+       if (bnxt_is_master_func(bp))
+               wait_msec = info->master_func_wait_period;
+       else
+               wait_msec = info->normal_func_wait_period;
+
+       rte_eal_alarm_set(US_PER_MS * wait_msec,
+                         bnxt_fw_reset_cb, (void *)bp);
+}
+
+void bnxt_schedule_fw_health_check(struct bnxt *bp)
+{
+       uint32_t polling_freq;
+
+       if (!bnxt_is_recovery_enabled(bp))
+               return;
+
+       if (bp->flags & BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED)
+               return;
+
+       polling_freq = bp->recovery_info->driver_polling_freq;
+
+       rte_eal_alarm_set(US_PER_MS * polling_freq,
+                         bnxt_check_fw_health, (void *)bp);
+       bp->flags |= BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED;
+}
+
+static void bnxt_cancel_fw_health_check(struct bnxt *bp)
+{
+       if (!bnxt_is_recovery_enabled(bp))
+               return;
+
+       rte_eal_alarm_cancel(bnxt_check_fw_health, (void *)bp);
+       bp->flags &= ~BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED;
+}
+
+static bool bnxt_vf_pciid(uint16_t id)
+{
+       if (id == BROADCOM_DEV_ID_57304_VF ||
+           id == BROADCOM_DEV_ID_57406_VF ||
+           id == BROADCOM_DEV_ID_5731X_VF ||
+           id == BROADCOM_DEV_ID_5741X_VF ||
+           id == BROADCOM_DEV_ID_57414_VF ||
+           id == BROADCOM_DEV_ID_STRATUS_NIC_VF1 ||
+           id == BROADCOM_DEV_ID_STRATUS_NIC_VF2 ||
+           id == BROADCOM_DEV_ID_58802_VF ||
+           id == BROADCOM_DEV_ID_57500_VF1 ||
+           id == BROADCOM_DEV_ID_57500_VF2)
+               return true;
+       return false;
+}
+
+bool bnxt_stratus_device(struct bnxt *bp)
+{
+       uint16_t id = bp->pdev->id.device_id;
+
+       if (id == BROADCOM_DEV_ID_STRATUS_NIC ||
+           id == BROADCOM_DEV_ID_STRATUS_NIC_VF1 ||
+           id == BROADCOM_DEV_ID_STRATUS_NIC_VF2)
+               return true;
+       return false;
+}
+
+static int bnxt_init_board(struct rte_eth_dev *eth_dev)
+{
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
+       struct bnxt *bp = eth_dev->data->dev_private;
+
+       /* enable device (incl. PCI PM wakeup), and bus-mastering */
+       bp->bar0 = (void *)pci_dev->mem_resource[0].addr;
+       bp->doorbell_base = (void *)pci_dev->mem_resource[2].addr;
+       if (!bp->bar0 || !bp->doorbell_base) {
+               PMD_DRV_LOG(ERR, "Unable to access Hardware\n");
+               return -ENODEV;
+       }
+
+       bp->eth_dev = eth_dev;
+       bp->pdev = pci_dev;
+
+       return 0;
+}
+
+static int bnxt_alloc_ctx_mem_blk(__rte_unused struct bnxt *bp,
+                                 struct bnxt_ctx_pg_info *ctx_pg,
+                                 uint32_t mem_size,
+                                 const char *suffix,
+                                 uint16_t idx)
+{
+       struct bnxt_ring_mem_info *rmem = &ctx_pg->ring_mem;
+       const struct rte_memzone *mz = NULL;
+       char mz_name[RTE_MEMZONE_NAMESIZE];
+       rte_iova_t mz_phys_addr;
+       uint64_t valid_bits = 0;
+       uint32_t sz;
+       int i;
+
+       if (!mem_size)
                return 0;
 
-       if (pci_dev->id.device_id != BROADCOM_DEV_ID_NS2) {
+       rmem->nr_pages = RTE_ALIGN_MUL_CEIL(mem_size, BNXT_PAGE_SIZE) /
+                        BNXT_PAGE_SIZE;
+       rmem->page_size = BNXT_PAGE_SIZE;
+       rmem->pg_arr = ctx_pg->ctx_pg_arr;
+       rmem->dma_arr = ctx_pg->ctx_dma_arr;
+       rmem->flags = BNXT_RMEM_VALID_PTE_FLAG;
+
+       valid_bits = PTU_PTE_VALID;
+
+       if (rmem->nr_pages > 1) {
                snprintf(mz_name, RTE_MEMZONE_NAMESIZE,
-                        "bnxt_%04x:%02x:%02x:%02x-%s", pci_dev->addr.domain,
-                        pci_dev->addr.bus, pci_dev->addr.devid,
-                        pci_dev->addr.function, "rx_port_stats");
+                        "bnxt_ctx_pg_tbl%s_%x_%d",
+                        suffix, idx, bp->eth_dev->data->port_id);
                mz_name[RTE_MEMZONE_NAMESIZE - 1] = 0;
                mz = rte_memzone_lookup(mz_name);
-               total_alloc_len = RTE_CACHE_LINE_ROUNDUP(
-                                       sizeof(struct rx_port_stats) +
-                                       sizeof(struct rx_port_stats_ext) +
-                                       512);
                if (!mz) {
-                       mz = rte_memzone_reserve(mz_name, total_alloc_len,
-                                       SOCKET_ID_ANY,
-                                       RTE_MEMZONE_2MB |
-                                       RTE_MEMZONE_SIZE_HINT_ONLY |
-                                       RTE_MEMZONE_IOVA_CONTIG);
+                       mz = rte_memzone_reserve_aligned(mz_name,
+                                               rmem->nr_pages * 8,
+                                               SOCKET_ID_ANY,
+                                               RTE_MEMZONE_2MB |
+                                               RTE_MEMZONE_SIZE_HINT_ONLY |
+                                               RTE_MEMZONE_IOVA_CONTIG,
+                                               BNXT_PAGE_SIZE);
                        if (mz == NULL)
                                return -ENOMEM;
                }
+
                memset(mz->addr, 0, mz->len);
                mz_phys_addr = mz->iova;
                if ((unsigned long)mz->addr == mz_phys_addr) {
-                       PMD_DRV_LOG(INFO,
-                               "Memzone physical address same as virtual using rte_mem_virt2iova()\n");
+                       PMD_DRV_LOG(DEBUG,
+                                   "physical address same as virtual\n");
+                       PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
                        mz_phys_addr = rte_mem_virt2iova(mz->addr);
-                       if (mz_phys_addr == 0) {
+                       if (mz_phys_addr == RTE_BAD_IOVA) {
                                PMD_DRV_LOG(ERR,
-                               "unable to map address to physical memory\n");
+                                       "unable to map addr to phys memory\n");
                                return -ENOMEM;
                        }
                }
+               rte_mem_lock_page(((char *)mz->addr));
 
-               bp->rx_mem_zone = (const void *)mz;
-               bp->hw_rx_port_stats = mz->addr;
-               bp->hw_rx_port_stats_map = mz_phys_addr;
+               rmem->pg_tbl = mz->addr;
+               rmem->pg_tbl_map = mz_phys_addr;
+               rmem->pg_tbl_mz = mz;
+       }
 
-               snprintf(mz_name, RTE_MEMZONE_NAMESIZE,
-                        "bnxt_%04x:%02x:%02x:%02x-%s", pci_dev->addr.domain,
-                        pci_dev->addr.bus, pci_dev->addr.devid,
-                        pci_dev->addr.function, "tx_port_stats");
-               mz_name[RTE_MEMZONE_NAMESIZE - 1] = 0;
-               mz = rte_memzone_lookup(mz_name);
-               total_alloc_len = RTE_CACHE_LINE_ROUNDUP(
-                                       sizeof(struct tx_port_stats) +
-                                       sizeof(struct tx_port_stats_ext) +
-                                       512);
-               if (!mz) {
-                       mz = rte_memzone_reserve(mz_name,
-                                       total_alloc_len,
-                                       SOCKET_ID_ANY,
-                                       RTE_MEMZONE_2MB |
-                                       RTE_MEMZONE_SIZE_HINT_ONLY |
-                                       RTE_MEMZONE_IOVA_CONTIG);
-                       if (mz == NULL)
-                               return -ENOMEM;
+       snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "bnxt_ctx_%s_%x_%d",
+                suffix, idx, bp->eth_dev->data->port_id);
+       mz = rte_memzone_lookup(mz_name);
+       if (!mz) {
+               mz = rte_memzone_reserve_aligned(mz_name,
+                                                mem_size,
+                                                SOCKET_ID_ANY,
+                                                RTE_MEMZONE_1GB |
+                                                RTE_MEMZONE_SIZE_HINT_ONLY |
+                                                RTE_MEMZONE_IOVA_CONTIG,
+                                                BNXT_PAGE_SIZE);
+               if (mz == NULL)
+                       return -ENOMEM;
+       }
+
+       memset(mz->addr, 0, mz->len);
+       mz_phys_addr = mz->iova;
+       if ((unsigned long)mz->addr == mz_phys_addr) {
+               PMD_DRV_LOG(DEBUG,
+                           "Memzone physical address same as virtual.\n");
+               PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
+               for (sz = 0; sz < mem_size; sz += BNXT_PAGE_SIZE)
+                       rte_mem_lock_page(((char *)mz->addr) + sz);
+               mz_phys_addr = rte_mem_virt2iova(mz->addr);
+               if (mz_phys_addr == RTE_BAD_IOVA) {
+                       PMD_DRV_LOG(ERR,
+                                   "unable to map addr to phys memory\n");
+                       return -ENOMEM;
                }
-               memset(mz->addr, 0, mz->len);
-               mz_phys_addr = mz->iova;
-               if ((unsigned long)mz->addr == mz_phys_addr) {
-                       PMD_DRV_LOG(WARNING,
-                               "Memzone physical address same as virtual.\n");
-                       PMD_DRV_LOG(WARNING,
-                               "Using rte_mem_virt2iova()\n");
-                       mz_phys_addr = rte_mem_virt2iova(mz->addr);
-                       if (mz_phys_addr == 0) {
-                               PMD_DRV_LOG(ERR,
-                               "unable to map address to physical memory\n");
-                               return -ENOMEM;
-                       }
+       }
+
+       for (sz = 0, i = 0; sz < mem_size; sz += BNXT_PAGE_SIZE, i++) {
+               rte_mem_lock_page(((char *)mz->addr) + sz);
+               rmem->pg_arr[i] = ((char *)mz->addr) + sz;
+               rmem->dma_arr[i] = mz_phys_addr + sz;
+
+               if (rmem->nr_pages > 1) {
+                       if (i == rmem->nr_pages - 2 &&
+                           (rmem->flags & BNXT_RMEM_RING_PTE_FLAG))
+                               valid_bits |= PTU_PTE_NEXT_TO_LAST;
+                       else if (i == rmem->nr_pages - 1 &&
+                                (rmem->flags & BNXT_RMEM_RING_PTE_FLAG))
+                               valid_bits |= PTU_PTE_LAST;
+
+                       rmem->pg_tbl[i] = rte_cpu_to_le_64(rmem->dma_arr[i] |
+                                                          valid_bits);
                }
+       }
 
-               bp->tx_mem_zone = (const void *)mz;
-               bp->hw_tx_port_stats = mz->addr;
-               bp->hw_tx_port_stats_map = mz_phys_addr;
+       rmem->mz = mz;
+       if (rmem->vmem_size)
+               rmem->vmem = (void **)mz->addr;
+       rmem->dma_arr[0] = mz_phys_addr;
+       return 0;
+}
+
+static void bnxt_free_ctx_mem(struct bnxt *bp)
+{
+       int i;
+
+       if (!bp->ctx || !(bp->ctx->flags & BNXT_CTX_FLAG_INITED))
+               return;
 
-               bp->flags |= BNXT_FLAG_PORT_STATS;
+       bp->ctx->flags &= ~BNXT_CTX_FLAG_INITED;
+       rte_memzone_free(bp->ctx->qp_mem.ring_mem.mz);
+       rte_memzone_free(bp->ctx->srq_mem.ring_mem.mz);
+       rte_memzone_free(bp->ctx->cq_mem.ring_mem.mz);
+       rte_memzone_free(bp->ctx->vnic_mem.ring_mem.mz);
+       rte_memzone_free(bp->ctx->stat_mem.ring_mem.mz);
+       rte_memzone_free(bp->ctx->qp_mem.ring_mem.pg_tbl_mz);
+       rte_memzone_free(bp->ctx->srq_mem.ring_mem.pg_tbl_mz);
+       rte_memzone_free(bp->ctx->cq_mem.ring_mem.pg_tbl_mz);
+       rte_memzone_free(bp->ctx->vnic_mem.ring_mem.pg_tbl_mz);
+       rte_memzone_free(bp->ctx->stat_mem.ring_mem.pg_tbl_mz);
+
+       for (i = 0; i < BNXT_MAX_Q; i++) {
+               if (bp->ctx->tqm_mem[i])
+                       rte_memzone_free(bp->ctx->tqm_mem[i]->ring_mem.mz);
+       }
 
-               /* Display extended statistics if FW supports it */
-               if (bp->hwrm_spec_code < HWRM_SPEC_CODE_1_8_4 ||
-                   bp->hwrm_spec_code == HWRM_SPEC_CODE_1_9_0)
-                       goto skip_ext_stats;
+       rte_free(bp->ctx);
+       bp->ctx = NULL;
+}
 
-               bp->hw_rx_port_stats_ext = (void *)
-                       (bp->hw_rx_port_stats + sizeof(struct rx_port_stats));
-               bp->hw_rx_port_stats_ext_map = bp->hw_rx_port_stats_map +
-                       sizeof(struct rx_port_stats);
-               bp->flags |= BNXT_FLAG_EXT_RX_PORT_STATS;
+#define bnxt_roundup(x, y)   ((((x) + ((y) - 1)) / (y)) * (y))
 
+#define min_t(type, x, y) ({                    \
+       type __min1 = (x);                      \
+       type __min2 = (y);                      \
+       __min1 < __min2 ? __min1 : __min2; })
 
-               if (bp->hwrm_spec_code < HWRM_SPEC_CODE_1_9_2) {
-                       bp->hw_tx_port_stats_ext = (void *)
-                       (bp->hw_tx_port_stats + sizeof(struct tx_port_stats));
-                       bp->hw_tx_port_stats_ext_map =
-                               bp->hw_tx_port_stats_map +
-                               sizeof(struct tx_port_stats);
-                       bp->flags |= BNXT_FLAG_EXT_TX_PORT_STATS;
-               }
-       }
+#define max_t(type, x, y) ({                    \
+       type __max1 = (x);                      \
+       type __max2 = (y);                      \
+       __max1 > __max2 ? __max1 : __max2; })
 
-skip_ext_stats:
-       rc = bnxt_alloc_hwrm_resources(bp);
+#define clamp_t(type, _x, min, max)     min_t(type, max_t(type, _x, min), max)
+
+int bnxt_alloc_ctx_mem(struct bnxt *bp)
+{
+       struct bnxt_ctx_pg_info *ctx_pg;
+       struct bnxt_ctx_mem_info *ctx;
+       uint32_t mem_size, ena, entries;
+       int i, rc;
+
+       rc = bnxt_hwrm_func_backing_store_qcaps(bp);
        if (rc) {
-               PMD_DRV_LOG(ERR,
-                       "hwrm resource allocation failure rc: %x\n", rc);
-               goto error_free;
+               PMD_DRV_LOG(ERR, "Query context mem capability failed\n");
+               return rc;
        }
-       rc = bnxt_hwrm_ver_get(bp);
+       ctx = bp->ctx;
+       if (!ctx || (ctx->flags & BNXT_CTX_FLAG_INITED))
+               return 0;
+
+       ctx_pg = &ctx->qp_mem;
+       ctx_pg->entries = ctx->qp_min_qp1_entries + ctx->qp_max_l2_entries;
+       mem_size = ctx->qp_entry_size * ctx_pg->entries;
+       rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg, mem_size, "qp_mem", 0);
        if (rc)
-               goto error_free;
-       rc = bnxt_hwrm_queue_qportcfg(bp);
-       if (rc) {
-               PMD_DRV_LOG(ERR, "hwrm queue qportcfg failed\n");
-               goto error_free;
+               return rc;
+
+       ctx_pg = &ctx->srq_mem;
+       ctx_pg->entries = ctx->srq_max_l2_entries;
+       mem_size = ctx->srq_entry_size * ctx_pg->entries;
+       rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg, mem_size, "srq_mem", 0);
+       if (rc)
+               return rc;
+
+       ctx_pg = &ctx->cq_mem;
+       ctx_pg->entries = ctx->cq_max_l2_entries;
+       mem_size = ctx->cq_entry_size * ctx_pg->entries;
+       rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg, mem_size, "cq_mem", 0);
+       if (rc)
+               return rc;
+
+       ctx_pg = &ctx->vnic_mem;
+       ctx_pg->entries = ctx->vnic_max_vnic_entries +
+               ctx->vnic_max_ring_table_entries;
+       mem_size = ctx->vnic_entry_size * ctx_pg->entries;
+       rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg, mem_size, "vnic_mem", 0);
+       if (rc)
+               return rc;
+
+       ctx_pg = &ctx->stat_mem;
+       ctx_pg->entries = ctx->stat_max_entries;
+       mem_size = ctx->stat_entry_size * ctx_pg->entries;
+       rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg, mem_size, "stat_mem", 0);
+       if (rc)
+               return rc;
+
+       entries = ctx->qp_max_l2_entries;
+       entries = bnxt_roundup(entries, ctx->tqm_entries_multiple);
+       entries = clamp_t(uint32_t, entries, ctx->tqm_min_entries_per_ring,
+                         ctx->tqm_max_entries_per_ring);
+       for (i = 0, ena = 0; i < BNXT_MAX_Q; i++) {
+               ctx_pg = ctx->tqm_mem[i];
+               /* use min tqm entries for now. */
+               ctx_pg->entries = entries;
+               mem_size = ctx->tqm_entry_size * ctx_pg->entries;
+               rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg, mem_size, "tqm_mem", i);
+               if (rc)
+                       return rc;
+               ena |= HWRM_FUNC_BACKING_STORE_CFG_INPUT_ENABLES_TQM_SP << i;
        }
 
-       rc = bnxt_hwrm_func_qcfg(bp);
-       if (rc) {
-               PMD_DRV_LOG(ERR, "hwrm func qcfg failed\n");
-               goto error_free;
+       ena |= FUNC_BACKING_STORE_CFG_INPUT_DFLT_ENABLES;
+       rc = bnxt_hwrm_func_backing_store_cfg(bp, ena);
+       if (rc)
+               PMD_DRV_LOG(ERR,
+                           "Failed to configure context mem: rc = %d\n", rc);
+       else
+               ctx->flags |= BNXT_CTX_FLAG_INITED;
+
+       return rc;
+}
+
+static int bnxt_alloc_stats_mem(struct bnxt *bp)
+{
+       struct rte_pci_device *pci_dev = bp->pdev;
+       char mz_name[RTE_MEMZONE_NAMESIZE];
+       const struct rte_memzone *mz = NULL;
+       uint32_t total_alloc_len;
+       rte_iova_t mz_phys_addr;
+
+       if (pci_dev->id.device_id == BROADCOM_DEV_ID_NS2)
+               return 0;
+
+       snprintf(mz_name, RTE_MEMZONE_NAMESIZE,
+                "bnxt_" PCI_PRI_FMT "-%s", pci_dev->addr.domain,
+                pci_dev->addr.bus, pci_dev->addr.devid,
+                pci_dev->addr.function, "rx_port_stats");
+       mz_name[RTE_MEMZONE_NAMESIZE - 1] = 0;
+       mz = rte_memzone_lookup(mz_name);
+       total_alloc_len =
+               RTE_CACHE_LINE_ROUNDUP(sizeof(struct rx_port_stats) +
+                                      sizeof(struct rx_port_stats_ext) + 512);
+       if (!mz) {
+               mz = rte_memzone_reserve(mz_name, total_alloc_len,
+                                        SOCKET_ID_ANY,
+                                        RTE_MEMZONE_2MB |
+                                        RTE_MEMZONE_SIZE_HINT_ONLY |
+                                        RTE_MEMZONE_IOVA_CONTIG);
+               if (mz == NULL)
+                       return -ENOMEM;
+       }
+       memset(mz->addr, 0, mz->len);
+       mz_phys_addr = mz->iova;
+       if ((unsigned long)mz->addr == mz_phys_addr) {
+               PMD_DRV_LOG(DEBUG,
+                           "Memzone physical address same as virtual.\n");
+               PMD_DRV_LOG(DEBUG,
+                           "Using rte_mem_virt2iova()\n");
+               mz_phys_addr = rte_mem_virt2iova(mz->addr);
+               if (mz_phys_addr == RTE_BAD_IOVA) {
+                       PMD_DRV_LOG(ERR,
+                                   "Can't map address to physical memory\n");
+                       return -ENOMEM;
+               }
        }
 
-       /* Get the MAX capabilities for this function */
-       rc = bnxt_hwrm_func_qcaps(bp);
-       if (rc) {
-               PMD_DRV_LOG(ERR, "hwrm query capability failure rc: %x\n", rc);
-               goto error_free;
+       bp->rx_mem_zone = (const void *)mz;
+       bp->hw_rx_port_stats = mz->addr;
+       bp->hw_rx_port_stats_map = mz_phys_addr;
+
+       snprintf(mz_name, RTE_MEMZONE_NAMESIZE,
+                "bnxt_" PCI_PRI_FMT "-%s", pci_dev->addr.domain,
+                pci_dev->addr.bus, pci_dev->addr.devid,
+                pci_dev->addr.function, "tx_port_stats");
+       mz_name[RTE_MEMZONE_NAMESIZE - 1] = 0;
+       mz = rte_memzone_lookup(mz_name);
+       total_alloc_len =
+               RTE_CACHE_LINE_ROUNDUP(sizeof(struct tx_port_stats) +
+                                      sizeof(struct tx_port_stats_ext) + 512);
+       if (!mz) {
+               mz = rte_memzone_reserve(mz_name,
+                                        total_alloc_len,
+                                        SOCKET_ID_ANY,
+                                        RTE_MEMZONE_2MB |
+                                        RTE_MEMZONE_SIZE_HINT_ONLY |
+                                        RTE_MEMZONE_IOVA_CONTIG);
+               if (mz == NULL)
+                       return -ENOMEM;
        }
-       if (bp->max_tx_rings == 0) {
-               PMD_DRV_LOG(ERR, "No TX rings available!\n");
-               rc = -EBUSY;
-               goto error_free;
+       memset(mz->addr, 0, mz->len);
+       mz_phys_addr = mz->iova;
+       if ((unsigned long)mz->addr == mz_phys_addr) {
+               PMD_DRV_LOG(DEBUG,
+                           "Memzone physical address same as virtual\n");
+               PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
+               mz_phys_addr = rte_mem_virt2iova(mz->addr);
+               if (mz_phys_addr == RTE_BAD_IOVA) {
+                       PMD_DRV_LOG(ERR,
+                                   "Can't map address to physical memory\n");
+                       return -ENOMEM;
+               }
        }
+
+       bp->tx_mem_zone = (const void *)mz;
+       bp->hw_tx_port_stats = mz->addr;
+       bp->hw_tx_port_stats_map = mz_phys_addr;
+       bp->flags |= BNXT_FLAG_PORT_STATS;
+
+       /* Display extended statistics if FW supports it */
+       if (bp->hwrm_spec_code < HWRM_SPEC_CODE_1_8_4 ||
+           bp->hwrm_spec_code == HWRM_SPEC_CODE_1_9_0 ||
+           !(bp->flags & BNXT_FLAG_EXT_STATS_SUPPORTED))
+               return 0;
+
+       bp->hw_rx_port_stats_ext = (void *)
+               ((uint8_t *)bp->hw_rx_port_stats +
+                sizeof(struct rx_port_stats));
+       bp->hw_rx_port_stats_ext_map = bp->hw_rx_port_stats_map +
+               sizeof(struct rx_port_stats);
+       bp->flags |= BNXT_FLAG_EXT_RX_PORT_STATS;
+
+       if (bp->hwrm_spec_code < HWRM_SPEC_CODE_1_9_2 ||
+           bp->flags & BNXT_FLAG_EXT_STATS_SUPPORTED) {
+               bp->hw_tx_port_stats_ext = (void *)
+                       ((uint8_t *)bp->hw_tx_port_stats +
+                        sizeof(struct tx_port_stats));
+               bp->hw_tx_port_stats_ext_map =
+                       bp->hw_tx_port_stats_map +
+                       sizeof(struct tx_port_stats);
+               bp->flags |= BNXT_FLAG_EXT_TX_PORT_STATS;
+       }
+
+       return 0;
+}
+
+static int bnxt_setup_mac_addr(struct rte_eth_dev *eth_dev)
+{
+       struct bnxt *bp = eth_dev->data->dev_private;
+       int rc = 0;
+
        eth_dev->data->mac_addrs = rte_zmalloc("bnxt_mac_addr_tbl",
-                                       RTE_ETHER_ADDR_LEN * bp->max_l2_ctx, 0);
+                                              RTE_ETHER_ADDR_LEN *
+                                              bp->max_l2_ctx,
+                                              0);
        if (eth_dev->data->mac_addrs == NULL) {
-               PMD_DRV_LOG(ERR,
-                       "Failed to alloc %u bytes needed to store MAC addr tbl",
-                       RTE_ETHER_ADDR_LEN * bp->max_l2_ctx);
-               rc = -ENOMEM;
-               goto error_free;
+               PMD_DRV_LOG(ERR, "Failed to alloc MAC addr tbl\n");
+               return -ENOMEM;
        }
 
        if (bnxt_check_zero_bytes(bp->dflt_mac_addr, RTE_ETHER_ADDR_LEN)) {
-               PMD_DRV_LOG(ERR,
-                           "Invalid MAC addr %02X:%02X:%02X:%02X:%02X:%02X\n",
-                           bp->dflt_mac_addr[0], bp->dflt_mac_addr[1],
-                           bp->dflt_mac_addr[2], bp->dflt_mac_addr[3],
-                           bp->dflt_mac_addr[4], bp->dflt_mac_addr[5]);
-               rc = -EINVAL;
-               goto error_free;
+               if (BNXT_PF(bp))
+                       return -EINVAL;
+
+               /* Generate a random MAC address, if none was assigned by PF */
+               PMD_DRV_LOG(INFO, "VF MAC address not assigned by Host PF\n");
+               bnxt_eth_hw_addr_random(bp->mac_addr);
+               PMD_DRV_LOG(INFO,
+                           "Assign random MAC:%02X:%02X:%02X:%02X:%02X:%02X\n",
+                           bp->mac_addr[0], bp->mac_addr[1], bp->mac_addr[2],
+                           bp->mac_addr[3], bp->mac_addr[4], bp->mac_addr[5]);
+
+               rc = bnxt_hwrm_set_mac(bp);
+               if (!rc)
+                       memcpy(&bp->eth_dev->data->mac_addrs[0], bp->mac_addr,
+                              RTE_ETHER_ADDR_LEN);
+               return rc;
        }
-       /* Copy the permanent MAC from the qcap response address now. */
-       memcpy(bp->mac_addr, bp->dflt_mac_addr, sizeof(bp->mac_addr));
+
+       /* Copy the permanent MAC from the FUNC_QCAPS response */
+       memcpy(bp->mac_addr, bp->dflt_mac_addr, RTE_ETHER_ADDR_LEN);
        memcpy(&eth_dev->data->mac_addrs[0], bp->mac_addr, RTE_ETHER_ADDR_LEN);
 
-       if (bp->max_ring_grps < bp->rx_cp_nr_rings) {
-               /* 1 ring is for default completion ring */
-               PMD_DRV_LOG(ERR, "Insufficient resource: Ring Group\n");
-               rc = -ENOSPC;
-               goto error_free;
-       }
+       return rc;
+}
 
-       bp->grp_info = rte_zmalloc("bnxt_grp_info",
-                               sizeof(*bp->grp_info) * bp->max_ring_grps, 0);
-       if (!bp->grp_info) {
-               PMD_DRV_LOG(ERR,
-                       "Failed to alloc %zu bytes to store group info table\n",
-                       sizeof(*bp->grp_info) * bp->max_ring_grps);
-               rc = -ENOMEM;
-               goto error_free;
+static int bnxt_restore_dflt_mac(struct bnxt *bp)
+{
+       int rc = 0;
+
+       /* MAC is already configured in FW */
+       if (!bnxt_check_zero_bytes(bp->dflt_mac_addr, RTE_ETHER_ADDR_LEN))
+               return 0;
+
+       /* Restore the old MAC configured */
+       rc = bnxt_hwrm_set_mac(bp);
+       if (rc)
+               PMD_DRV_LOG(ERR, "Failed to restore MAC address\n");
+
+       return rc;
+}
+
+static void bnxt_config_vf_req_fwd(struct bnxt *bp)
+{
+       if (!BNXT_PF(bp))
+               return;
+
+#define ALLOW_FUNC(x)  \
+       { \
+               uint32_t arg = (x); \
+               bp->pf.vf_req_fwd[((arg) >> 5)] &= \
+               ~rte_cpu_to_le_32(1 << ((arg) & 0x1f)); \
        }
 
        /* Forward all requests if firmware is new enough */
        if (((bp->fw_ver >= ((20 << 24) | (6 << 16) | (100 << 8))) &&
-           (bp->fw_ver < ((20 << 24) | (7 << 16)))) ||
+            (bp->fw_ver < ((20 << 24) | (7 << 16)))) ||
            ((bp->fw_ver >= ((20 << 24) | (8 << 16))))) {
                memset(bp->pf.vf_req_fwd, 0xff, sizeof(bp->pf.vf_req_fwd));
        } else {
                PMD_DRV_LOG(WARNING,
-                       "Firmware too old for VF mailbox functionality\n");
+                           "Firmware too old for VF mailbox functionality\n");
                memset(bp->pf.vf_req_fwd, 0, sizeof(bp->pf.vf_req_fwd));
        }
 
        /*
-        * The following are used for driver cleanup.  If we disallow these,
+        * The following are used for driver cleanup. If we disallow these,
         * VF drivers can't clean up cleanly.
         */
        ALLOW_FUNC(HWRM_FUNC_DRV_UNRGTR);
@@ -3525,73 +4442,207 @@ skip_ext_stats:
        ALLOW_FUNC(HWRM_STAT_CTX_FREE);
        ALLOW_FUNC(HWRM_PORT_PHY_QCFG);
        ALLOW_FUNC(HWRM_VNIC_TPA_CFG);
-       rc = bnxt_hwrm_func_driver_register(bp);
-       if (rc) {
-               PMD_DRV_LOG(ERR,
-                       "Failed to register driver");
-               rc = -EBUSY;
-               goto error_free;
-       }
+}
 
-       PMD_DRV_LOG(INFO,
-               DRV_MODULE_NAME " found at mem %" PRIx64 ", node addr %pM\n",
-               pci_dev->mem_resource[0].phys_addr,
-               pci_dev->mem_resource[0].addr);
+static int bnxt_init_fw(struct bnxt *bp)
+{
+       uint16_t mtu;
+       int rc = 0;
+
+       rc = bnxt_hwrm_ver_get(bp);
+       if (rc)
+               return rc;
 
        rc = bnxt_hwrm_func_reset(bp);
+       if (rc)
+               return -EIO;
+
+       rc = bnxt_hwrm_queue_qportcfg(bp);
+       if (rc)
+               return rc;
+
+       /* Get the MAX capabilities for this function */
+       rc = bnxt_hwrm_func_qcaps(bp);
+       if (rc)
+               return rc;
+
+       rc = bnxt_hwrm_func_qcfg(bp, &mtu);
+       if (rc)
+               return rc;
+
+       /* Get the adapter error recovery support info */
+       rc = bnxt_hwrm_error_recovery_qcfg(bp);
+       if (rc)
+               bp->flags &= ~BNXT_FLAG_FW_CAP_ERROR_RECOVERY;
+
+       if (mtu >= RTE_ETHER_MIN_MTU && mtu <= BNXT_MAX_MTU &&
+           mtu != bp->eth_dev->data->mtu)
+               bp->eth_dev->data->mtu = mtu;
+
+       bnxt_hwrm_port_led_qcaps(bp);
+
+       return 0;
+}
+
+static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev)
+{
+       int rc;
+
+       rc = bnxt_init_fw(bp);
+       if (rc)
+               return rc;
+
+       if (!reconfig_dev) {
+               rc = bnxt_setup_mac_addr(bp->eth_dev);
+               if (rc)
+                       return rc;
+       } else {
+               rc = bnxt_restore_dflt_mac(bp);
+               if (rc)
+                       return rc;
+       }
+
+       bnxt_config_vf_req_fwd(bp);
+
+       rc = bnxt_hwrm_func_driver_register(bp);
        if (rc) {
-               PMD_DRV_LOG(ERR, "hwrm chip reset failure rc: %x\n", rc);
-               rc = -EIO;
-               goto error_free;
+               PMD_DRV_LOG(ERR, "Failed to register driver");
+               return -EBUSY;
        }
 
        if (BNXT_PF(bp)) {
-               //if (bp->pf.active_vfs) {
-                       // TODO: Deallocate VF resources?
-               //}
                if (bp->pdev->max_vfs) {
                        rc = bnxt_hwrm_allocate_vfs(bp, bp->pdev->max_vfs);
                        if (rc) {
                                PMD_DRV_LOG(ERR, "Failed to allocate VFs\n");
-                               goto error_free;
+                               return rc;
                        }
                } else {
                        rc = bnxt_hwrm_allocate_pf_only(bp);
                        if (rc) {
                                PMD_DRV_LOG(ERR,
-                                       "Failed to allocate PF resources\n");
-                               goto error_free;
+                                           "Failed to allocate PF resources");
+                               return rc;
                        }
                }
        }
 
-       bnxt_hwrm_port_led_qcaps(bp);
+       rc = bnxt_alloc_mem(bp, reconfig_dev);
+       if (rc)
+               return rc;
 
        rc = bnxt_setup_int(bp);
        if (rc)
-               goto error_free;
+               return rc;
 
-       rc = bnxt_alloc_mem(bp);
-       if (rc)
-               goto error_free_int;
+       bnxt_init_nic(bp);
 
        rc = bnxt_request_int(bp);
        if (rc)
-               goto error_free_int;
+               return rc;
 
-       bnxt_enable_int(bp);
-       bnxt_init_nic(bp);
+       return 0;
+}
+
+static int
+bnxt_dev_init(struct rte_eth_dev *eth_dev)
+{
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
+       static int version_printed;
+       struct bnxt *bp;
+       int rc;
+
+       if (version_printed++ == 0)
+               PMD_DRV_LOG(INFO, "%s\n", bnxt_version);
+
+       eth_dev->dev_ops = &bnxt_dev_ops;
+       eth_dev->rx_pkt_burst = &bnxt_recv_pkts;
+       eth_dev->tx_pkt_burst = &bnxt_xmit_pkts;
+
+       /*
+        * For secondary processes, we don't initialise any further
+        * as primary has already done this work.
+        */
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return 0;
+
+       rte_eth_copy_pci_info(eth_dev, pci_dev);
+
+       bp = eth_dev->data->dev_private;
+
+       bp->dev_stopped = 1;
+
+       if (bnxt_vf_pciid(pci_dev->id.device_id))
+               bp->flags |= BNXT_FLAG_VF;
+
+       if (pci_dev->id.device_id == BROADCOM_DEV_ID_57508 ||
+           pci_dev->id.device_id == BROADCOM_DEV_ID_57504 ||
+           pci_dev->id.device_id == BROADCOM_DEV_ID_57502 ||
+           pci_dev->id.device_id == BROADCOM_DEV_ID_57500_VF1 ||
+           pci_dev->id.device_id == BROADCOM_DEV_ID_57500_VF2)
+               bp->flags |= BNXT_FLAG_THOR_CHIP;
+
+       if (pci_dev->id.device_id == BROADCOM_DEV_ID_58802 ||
+           pci_dev->id.device_id == BROADCOM_DEV_ID_58804 ||
+           pci_dev->id.device_id == BROADCOM_DEV_ID_58808 ||
+           pci_dev->id.device_id == BROADCOM_DEV_ID_58802_VF)
+               bp->flags |= BNXT_FLAG_STINGRAY;
+
+       rc = bnxt_init_board(eth_dev);
+       if (rc) {
+               PMD_DRV_LOG(ERR,
+                           "Failed to initialize board rc: %x\n", rc);
+               return rc;
+       }
+
+       rc = bnxt_alloc_hwrm_resources(bp);
+       if (rc) {
+               PMD_DRV_LOG(ERR,
+                           "Failed to allocate hwrm resource rc: %x\n", rc);
+               goto error_free;
+       }
+       rc = bnxt_init_resources(bp, false);
+       if (rc)
+               goto error_free;
+
+       rc = bnxt_alloc_stats_mem(bp);
+       if (rc)
+               goto error_free;
+
+       PMD_DRV_LOG(INFO,
+                   DRV_MODULE_NAME "found at mem %" PRIX64 ", node addr %pM\n",
+                   pci_dev->mem_resource[0].phys_addr,
+                   pci_dev->mem_resource[0].addr);
 
        return 0;
 
-error_free_int:
-       bnxt_disable_int(bp);
-       bnxt_hwrm_func_buf_unrgtr(bp);
-       bnxt_free_int(bp);
-       bnxt_free_mem(bp);
 error_free:
        bnxt_dev_uninit(eth_dev);
-error:
+       return rc;
+}
+
+static int
+bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev)
+{
+       int rc;
+
+       bnxt_free_int(bp);
+       bnxt_free_mem(bp, reconfig_dev);
+       bnxt_hwrm_func_buf_unrgtr(bp);
+       rc = bnxt_hwrm_func_driver_unregister(bp, 0);
+       bp->flags &= ~BNXT_FLAG_REGISTERED;
+       bnxt_free_ctx_mem(bp);
+       if (!reconfig_dev) {
+               bnxt_free_hwrm_resources(bp);
+
+               if (bp->recovery_info != NULL) {
+                       rte_free(bp->recovery_info);
+                       bp->recovery_info = NULL;
+               }
+       }
+
+       rte_free(bp->ptp_cfg);
+       bp->ptp_cfg = NULL;
        return rc;
 }
 
@@ -3605,15 +4656,13 @@ bnxt_dev_uninit(struct rte_eth_dev *eth_dev)
                return -EPERM;
 
        PMD_DRV_LOG(DEBUG, "Calling Device uninit\n");
-       bnxt_disable_int(bp);
-       bnxt_free_int(bp);
-       bnxt_free_mem(bp);
+
+       rc = bnxt_uninit_resources(bp, false);
+
        if (bp->grp_info != NULL) {
                rte_free(bp->grp_info);
                bp->grp_info = NULL;
        }
-       rc = bnxt_hwrm_func_driver_unregister(bp, 0);
-       bnxt_free_hwrm_resources(bp);
 
        if (bp->tx_mem_zone) {
                rte_memzone_free((const struct rte_memzone *)bp->tx_mem_zone);
@@ -3654,8 +4703,7 @@ static int bnxt_pci_remove(struct rte_pci_device *pci_dev)
 
 static struct rte_pci_driver bnxt_rte_pmd = {
        .id_table = bnxt_pci_id_map,
-       .drv_flags = RTE_PCI_DRV_NEED_MAPPING |
-               RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_IOVA_AS_VA,
+       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
        .probe = bnxt_pci_probe,
        .remove = bnxt_pci_remove,
 };