net/bnxt: support for QinQ insertion and stripping
[dpdk.git] / drivers / net / bnxt / bnxt_ethdev.c
index 6bc006a..b3a37e1 100644 (file)
@@ -151,6 +151,7 @@ static const struct rte_pci_id bnxt_pci_id_map[] = {
                                     DEV_TX_OFFLOAD_GRE_TNL_TSO | \
                                     DEV_TX_OFFLOAD_IPIP_TNL_TSO | \
                                     DEV_TX_OFFLOAD_GENEVE_TNL_TSO | \
+                                    DEV_TX_OFFLOAD_QINQ_INSERT | \
                                     DEV_TX_OFFLOAD_MULTI_SEGS)
 
 #define BNXT_DEV_RX_OFFLOAD_SUPPORT (DEV_RX_OFFLOAD_VLAN_FILTER | \
@@ -161,6 +162,7 @@ static const struct rte_pci_id bnxt_pci_id_map[] = {
                                     DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM | \
                                     DEV_RX_OFFLOAD_JUMBO_FRAME | \
                                     DEV_RX_OFFLOAD_KEEP_CRC | \
+                                    DEV_RX_OFFLOAD_VLAN_EXTEND | \
                                     DEV_RX_OFFLOAD_TCP_LRO)
 
 static int bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask);
@@ -169,6 +171,7 @@ static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu);
 static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
 static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev);
 static int bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev);
+static void bnxt_cancel_fw_health_check(struct bnxt *bp);
 
 int is_bnxt_in_error(struct bnxt *bp)
 {
@@ -520,6 +523,7 @@ static int bnxt_init_nic(struct bnxt *bp)
 static int bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev,
                                struct rte_eth_dev_info *dev_info)
 {
+       struct rte_pci_device *pdev = RTE_DEV_TO_PCI(eth_dev->device);
        struct bnxt *bp = eth_dev->data->dev_private;
        uint16_t max_vnics, i, j, vpool, vrxq;
        unsigned int max_rx_rings;
@@ -535,7 +539,8 @@ static int bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev,
 
        /* PF/VF specifics */
        if (BNXT_PF(bp))
-               dev_info->max_vfs = bp->pdev->max_vfs;
+               dev_info->max_vfs = pdev->max_vfs;
+
        max_rx_rings = RTE_MIN(bp->max_rx_rings, bp->max_stat_ctx);
        /* For the sake of symmetry, max_rx_queues = max_tx_queues */
        dev_info->max_rx_queues = max_rx_rings;
@@ -544,10 +549,13 @@ static int bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev,
        dev_info->hash_key_size = 40;
        max_vnics = bp->max_vnics;
 
+       /* MTU specifics */
+       dev_info->min_mtu = RTE_ETHER_MIN_MTU;
+       dev_info->max_mtu = BNXT_MAX_MTU;
+
        /* Fast path specifics */
        dev_info->min_rx_bufsize = 1;
-       dev_info->max_rx_pktlen = BNXT_MAX_MTU + RTE_ETHER_HDR_LEN +
-               RTE_ETHER_CRC_LEN + VLAN_TAG_SIZE * 2;
+       dev_info->max_rx_pktlen = BNXT_MAX_PKT_LEN;
 
        dev_info->rx_offload_capa = BNXT_DEV_RX_OFFLOAD_SUPPORT;
        if (bp->flags & BNXT_FLAG_PTP_SUPPORTED)
@@ -739,6 +747,7 @@ static eth_rx_burst_t
 bnxt_receive_function(__rte_unused struct rte_eth_dev *eth_dev)
 {
 #ifdef RTE_ARCH_X86
+#ifndef RTE_LIBRTE_IEEE1588
        /*
         * Vector mode receive can be enabled only if scatter rx is not
         * in use and rx offloads are limited to VLAN stripping and
@@ -765,6 +774,7 @@ bnxt_receive_function(__rte_unused struct rte_eth_dev *eth_dev)
                    eth_dev->data->port_id,
                    eth_dev->data->scattered_rx,
                    eth_dev->data->dev_conf.rxmode.offloads);
+#endif
 #endif
        return bnxt_recv_pkts;
 }
@@ -773,6 +783,7 @@ static eth_tx_burst_t
 bnxt_transmit_function(__rte_unused struct rte_eth_dev *eth_dev)
 {
 #ifdef RTE_ARCH_X86
+#ifndef RTE_LIBRTE_IEEE1588
        /*
         * Vector mode transmit can be enabled only if not using scatter rx
         * or tx offloads.
@@ -790,10 +801,30 @@ bnxt_transmit_function(__rte_unused struct rte_eth_dev *eth_dev)
                    eth_dev->data->port_id,
                    eth_dev->data->scattered_rx,
                    eth_dev->data->dev_conf.txmode.offloads);
+#endif
 #endif
        return bnxt_xmit_pkts;
 }
 
+static int bnxt_handle_if_change_status(struct bnxt *bp)
+{
+       int rc;
+
+       /* Since fw has undergone a reset and lost all contexts,
+        * set fatal flag to not issue hwrm during cleanup
+        */
+       bp->flags |= BNXT_FLAG_FATAL_ERROR;
+       bnxt_uninit_resources(bp, true);
+
+       /* clear fatal flag so that re-init happens */
+       bp->flags &= ~BNXT_FLAG_FATAL_ERROR;
+       rc = bnxt_init_resources(bp, true);
+
+       bp->flags &= ~BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE;
+
+       return rc;
+}
+
 static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 {
        struct bnxt *bp = eth_dev->data->dev_private;
@@ -807,6 +838,16 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
                        bp->rx_cp_nr_rings, RTE_ETHDEV_QUEUE_STAT_CNTRS);
        }
 
+       bnxt_enable_int(bp);
+       rc = bnxt_hwrm_if_change(bp, 1);
+       if (!rc) {
+               if (bp->flags & BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE) {
+                       rc = bnxt_handle_if_change_status(bp);
+                       if (rc)
+                               return rc;
+               }
+       }
+
        rc = bnxt_init_chip(bp);
        if (rc)
                goto error;
@@ -826,13 +867,14 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
        eth_dev->rx_pkt_burst = bnxt_receive_function(eth_dev);
        eth_dev->tx_pkt_burst = bnxt_transmit_function(eth_dev);
 
-       bnxt_enable_int(bp);
        bp->flags |= BNXT_FLAG_INIT_DONE;
        eth_dev->data->dev_started = 1;
        bp->dev_stopped = 0;
+       bnxt_schedule_fw_health_check(bp);
        return 0;
 
 error:
+       bnxt_hwrm_if_change(bp, 0);
        bnxt_shutdown_nic(bp);
        bnxt_free_tx_mbufs(bp);
        bnxt_free_rx_mbufs(bp);
@@ -881,12 +923,16 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
        /* disable uio/vfio intr/eventfd mapping */
        rte_intr_disable(intr_handle);
 
+       bnxt_cancel_fw_health_check(bp);
+
        bp->flags &= ~BNXT_FLAG_INIT_DONE;
        if (bp->eth_dev->data->dev_started) {
                /* TBD: STOP HW queues DMA */
                eth_dev->data->dev_link.link_status = 0;
        }
-       bnxt_set_hwrm_link_config(bp, false);
+       bnxt_dev_set_link_down_op(eth_dev);
+       /* Wait for link to be reset and the async notification to process. */
+       rte_delay_ms(BNXT_LINK_WAIT_INTERVAL * 2);
 
        /* Clean queue intr-vector mapping */
        rte_intr_efd_disable(intr_handle);
@@ -898,7 +944,10 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
        bnxt_hwrm_port_clr_stats(bp);
        bnxt_free_tx_mbufs(bp);
        bnxt_free_rx_mbufs(bp);
+       /* Process any remaining notifications in default completion queue */
+       bnxt_int_handler(eth_dev);
        bnxt_shutdown_nic(bp);
+       bnxt_hwrm_if_change(bp, 0);
        bp->dev_stopped = 1;
 }
 
@@ -1043,8 +1092,7 @@ out:
        /* Timed out or success */
        if (new.link_status != eth_dev->data->dev_link.link_status ||
        new.link_speed != eth_dev->data->dev_link.link_speed) {
-               memcpy(&eth_dev->data->dev_link, &new,
-                       sizeof(struct rte_eth_link));
+               rte_eth_linkstatus_set(eth_dev, &new);
 
                _rte_eth_dev_callback_process(eth_dev,
                                              RTE_ETH_EVENT_INTR_LSC,
@@ -1785,15 +1833,77 @@ bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask)
                        !!(rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP));
        }
 
-       if (mask & ETH_VLAN_EXTEND_MASK)
-               PMD_DRV_LOG(ERR, "Extend VLAN Not supported\n");
+       if (mask & ETH_VLAN_EXTEND_MASK) {
+               if (rx_offloads & DEV_RX_OFFLOAD_VLAN_EXTEND)
+                       PMD_DRV_LOG(DEBUG, "Extend VLAN supported\n");
+               else
+                       PMD_DRV_LOG(INFO, "Extend VLAN unsupported\n");
+       }
+
+       return 0;
+}
+
+static int
+bnxt_vlan_tpid_set_op(struct rte_eth_dev *dev, enum rte_vlan_type vlan_type,
+                     uint16_t tpid)
+{
+       struct bnxt *bp = dev->data->dev_private;
+       int qinq = dev->data->dev_conf.rxmode.offloads &
+                  DEV_RX_OFFLOAD_VLAN_EXTEND;
+
+       if (vlan_type != ETH_VLAN_TYPE_INNER &&
+           vlan_type != ETH_VLAN_TYPE_OUTER) {
+               PMD_DRV_LOG(ERR,
+                           "Unsupported vlan type.");
+               return -EINVAL;
+       }
+       if (!qinq) {
+               PMD_DRV_LOG(ERR,
+                           "QinQ not enabled. Needs to be ON as we can "
+                           "accelerate only outer vlan\n");
+               return -EINVAL;
+       }
+
+       if (vlan_type == ETH_VLAN_TYPE_OUTER) {
+               switch (tpid) {
+               case RTE_ETHER_TYPE_QINQ:
+                       bp->outer_tpid_bd =
+                               TX_BD_LONG_CFA_META_VLAN_TPID_TPID88A8;
+                               break;
+               case RTE_ETHER_TYPE_VLAN:
+                       bp->outer_tpid_bd =
+                               TX_BD_LONG_CFA_META_VLAN_TPID_TPID8100;
+                               break;
+               case 0x9100:
+                       bp->outer_tpid_bd =
+                               TX_BD_LONG_CFA_META_VLAN_TPID_TPID9100;
+                               break;
+               case 0x9200:
+                       bp->outer_tpid_bd =
+                               TX_BD_LONG_CFA_META_VLAN_TPID_TPID9200;
+                               break;
+               case 0x9300:
+                       bp->outer_tpid_bd =
+                                TX_BD_LONG_CFA_META_VLAN_TPID_TPID9300;
+                               break;
+               default:
+                       PMD_DRV_LOG(ERR, "Invalid TPID: %x\n", tpid);
+                       return -EINVAL;
+               }
+               bp->outer_tpid_bd |= tpid;
+               PMD_DRV_LOG(INFO, "outer_tpid_bd = %x\n", bp->outer_tpid_bd);
+       } else if (vlan_type == ETH_VLAN_TYPE_INNER) {
+               PMD_DRV_LOG(ERR,
+                           "Can accelerate only outer vlan in QinQ\n");
+               return -EINVAL;
+       }
 
        return 0;
 }
 
 static int
 bnxt_set_default_mac_addr_op(struct rte_eth_dev *dev,
-                       struct rte_ether_addr *addr)
+                            struct rte_ether_addr *addr)
 {
        struct bnxt *bp = dev->data->dev_private;
        /* Default Filter is tied to VNIC 0 */
@@ -1904,7 +2014,7 @@ bnxt_rxq_info_get_op(struct rte_eth_dev *dev, uint16_t queue_id,
 
        qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
        qinfo->conf.rx_drop_en = 0;
-       qinfo->conf.rx_deferred_start = 0;
+       qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
 }
 
 static void
@@ -1929,7 +2039,6 @@ bnxt_txq_info_get_op(struct rte_eth_dev *dev, uint16_t queue_id,
 static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu)
 {
        struct bnxt *bp = eth_dev->data->dev_private;
-       struct rte_eth_dev_info dev_info;
        uint32_t new_pkt_size;
        uint32_t rc = 0;
        uint32_t i;
@@ -1941,18 +2050,6 @@ static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu)
        new_pkt_size = new_mtu + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN +
                       VLAN_TAG_SIZE * BNXT_NUM_VLANS;
 
-       rc = bnxt_dev_info_get_op(eth_dev, &dev_info);
-       if (rc != 0) {
-               PMD_DRV_LOG(ERR, "Error during getting ethernet device info\n");
-               return rc;
-       }
-
-       if (new_mtu < RTE_ETHER_MIN_MTU || new_mtu > BNXT_MAX_MTU) {
-               PMD_DRV_LOG(ERR, "MTU requested must be within (%d, %d)\n",
-                       RTE_ETHER_MIN_MTU, BNXT_MAX_MTU);
-               return -EINVAL;
-       }
-
 #ifdef RTE_ARCH_X86
        /*
         * If vector-mode tx/rx is active, disallow any MTU change that would
@@ -1982,15 +2079,12 @@ static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu)
 
        eth_dev->data->dev_conf.rxmode.max_rx_pkt_len = new_pkt_size;
 
-       eth_dev->data->mtu = new_mtu;
-       PMD_DRV_LOG(INFO, "New MTU is %d\n", eth_dev->data->mtu);
-
        for (i = 0; i < bp->nr_vnics; i++) {
                struct bnxt_vnic_info *vnic = &bp->vnic_info[i];
                uint16_t size = 0;
 
-               vnic->mru = bp->eth_dev->data->mtu + RTE_ETHER_HDR_LEN +
-                                       RTE_ETHER_CRC_LEN + VLAN_TAG_SIZE * 2;
+               vnic->mru = new_mtu + RTE_ETHER_HDR_LEN +
+                               RTE_ETHER_CRC_LEN + VLAN_TAG_SIZE * 2;
                rc = bnxt_hwrm_vnic_cfg(bp, vnic);
                if (rc)
                        break;
@@ -2005,6 +2099,8 @@ static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu)
                }
        }
 
+       PMD_DRV_LOG(INFO, "New MTU is %d\n", new_mtu);
+
        return rc;
 }
 
@@ -2066,9 +2162,6 @@ bnxt_rx_queue_count_op(struct rte_eth_dev *dev, uint16_t rx_queue_id)
        struct bnxt_cp_ring_info *cpr;
        struct bnxt_rx_queue *rxq;
        struct rx_pkt_cmpl *rxcmp;
-       uint16_t cmp_type;
-       uint8_t cmp = 1;
-       bool valid;
        int rc;
 
        rc = is_bnxt_in_error(bp);
@@ -2077,33 +2170,19 @@ bnxt_rx_queue_count_op(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 
        rxq = dev->data->rx_queues[rx_queue_id];
        cpr = rxq->cp_ring;
-       valid = cpr->valid;
+       raw_cons = cpr->cp_raw_cons;
 
-       while (raw_cons < rxq->nb_rx_desc) {
+       while (1) {
                cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
+               rte_prefetch0(&cpr->cp_desc_ring[cons]);
                rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons];
 
-               if (!CMPL_VALID(rxcmp, valid))
-                       goto nothing_to_do;
-               valid = FLIP_VALID(cons, cpr->cp_ring_struct->ring_mask, valid);
-               cmp_type = CMP_TYPE(rxcmp);
-               if (cmp_type == RX_TPA_END_CMPL_TYPE_RX_TPA_END) {
-                       cmp = (rte_le_to_cpu_32(
-                                       ((struct rx_tpa_end_cmpl *)
-                                        (rxcmp))->agg_bufs_v1) &
-                              RX_TPA_END_CMPL_AGG_BUFS_MASK) >>
-                               RX_TPA_END_CMPL_AGG_BUFS_SFT;
-                       desc++;
-               } else if (cmp_type == 0x11) {
-                       desc++;
-                       cmp = (rxcmp->agg_bufs_v1 &
-                                  RX_PKT_CMPL_AGG_BUFS_MASK) >>
-                               RX_PKT_CMPL_AGG_BUFS_SFT;
+               if (!CMP_VALID(rxcmp, raw_cons, cpr->cp_ring_struct)) {
+                       break;
                } else {
-                       cmp = 1;
+                       raw_cons++;
+                       desc++;
                }
-nothing_to_do:
-               raw_cons += cmp ? cmp : 2;
        }
 
        return desc;
@@ -3189,18 +3268,24 @@ bnxt_timesync_write_time(struct rte_eth_dev *dev, const struct timespec *ts)
 static int
 bnxt_timesync_read_time(struct rte_eth_dev *dev, struct timespec *ts)
 {
-       uint64_t ns, systime_cycles;
        struct bnxt *bp = dev->data->dev_private;
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+       uint64_t ns, systime_cycles = 0;
+       int rc = 0;
 
        if (!ptp)
                return 0;
 
-       systime_cycles = bnxt_cc_read(bp);
+       if (BNXT_CHIP_THOR(bp))
+               rc = bnxt_hwrm_port_ts_query(bp, BNXT_PTP_FLAGS_CURRENT_TIME,
+                                            &systime_cycles);
+       else
+               systime_cycles = bnxt_cc_read(bp);
+
        ns = rte_timecounter_update(&ptp->tc, systime_cycles);
        *ts = rte_ns_to_timespec(ns);
 
-       return 0;
+       return rc;
 }
 static int
 bnxt_timesync_enable(struct rte_eth_dev *dev)
@@ -3208,6 +3293,7 @@ bnxt_timesync_enable(struct rte_eth_dev *dev)
        struct bnxt *bp = dev->data->dev_private;
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
        uint32_t shift = 0;
+       int rc;
 
        if (!ptp)
                return 0;
@@ -3216,8 +3302,9 @@ bnxt_timesync_enable(struct rte_eth_dev *dev)
        ptp->tx_tstamp_en = 1;
        ptp->rxctl = BNXT_PTP_MSG_EVENTS;
 
-       if (!bnxt_hwrm_ptp_cfg(bp))
-               bnxt_map_ptp_regs(bp);
+       rc = bnxt_hwrm_ptp_cfg(bp);
+       if (rc)
+               return rc;
 
        memset(&ptp->tc, 0, sizeof(struct rte_timecounter));
        memset(&ptp->rx_tstamp_tc, 0, sizeof(struct rte_timecounter));
@@ -3235,6 +3322,9 @@ bnxt_timesync_enable(struct rte_eth_dev *dev)
        ptp->tx_tstamp_tc.cc_shift = shift;
        ptp->tx_tstamp_tc.nsec_mask = (1ULL << shift) - 1;
 
+       if (!BNXT_CHIP_THOR(bp))
+               bnxt_map_ptp_regs(bp);
+
        return 0;
 }
 
@@ -3253,7 +3343,8 @@ bnxt_timesync_disable(struct rte_eth_dev *dev)
 
        bnxt_hwrm_ptp_cfg(bp);
 
-       bnxt_unmap_ptp_regs(bp);
+       if (!BNXT_CHIP_THOR(bp))
+               bnxt_unmap_ptp_regs(bp);
 
        return 0;
 }
@@ -3271,7 +3362,11 @@ bnxt_timesync_read_rx_timestamp(struct rte_eth_dev *dev,
        if (!ptp)
                return 0;
 
-       bnxt_get_rx_ts(bp, &rx_tstamp_cycles);
+       if (BNXT_CHIP_THOR(bp))
+               rx_tstamp_cycles = ptp->rx_timestamp;
+       else
+               bnxt_get_rx_ts(bp, &rx_tstamp_cycles);
+
        ns = rte_timecounter_update(&ptp->rx_tstamp_tc, rx_tstamp_cycles);
        *timestamp = rte_ns_to_timespec(ns);
        return  0;
@@ -3285,15 +3380,21 @@ bnxt_timesync_read_tx_timestamp(struct rte_eth_dev *dev,
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
        uint64_t tx_tstamp_cycles = 0;
        uint64_t ns;
+       int rc = 0;
 
        if (!ptp)
                return 0;
 
-       bnxt_get_tx_ts(bp, &tx_tstamp_cycles);
+       if (BNXT_CHIP_THOR(bp))
+               rc = bnxt_hwrm_port_ts_query(bp, BNXT_PTP_FLAGS_PATH_TX,
+                                            &tx_tstamp_cycles);
+       else
+               rc = bnxt_get_tx_ts(bp, &tx_tstamp_cycles);
+
        ns = rte_timecounter_update(&ptp->tx_tstamp_tc, tx_tstamp_cycles);
        *timestamp = rte_ns_to_timespec(ns);
 
-       return 0;
+       return rc;
 }
 
 static int
@@ -3497,6 +3598,7 @@ static const struct eth_dev_ops bnxt_dev_ops = {
        .udp_tunnel_port_del  = bnxt_udp_tunnel_port_del_op,
        .vlan_filter_set = bnxt_vlan_filter_set_op,
        .vlan_offload_set = bnxt_vlan_offload_set_op,
+       .vlan_tpid_set = bnxt_vlan_tpid_set_op,
        .vlan_pvid_set = bnxt_vlan_pvid_set_op,
        .mtu_set = bnxt_mtu_set_op,
        .mac_addr_set = bnxt_set_default_mac_addr_op,
@@ -3532,6 +3634,81 @@ static const struct eth_dev_ops bnxt_dev_ops = {
        .timesync_read_tx_timestamp = bnxt_timesync_read_tx_timestamp,
 };
 
+static uint32_t bnxt_map_reset_regs(struct bnxt *bp, uint32_t reg)
+{
+       uint32_t offset;
+
+       /* Only pre-map the reset GRC registers using window 3 */
+       rte_write32(reg & 0xfffff000, (uint8_t *)bp->bar0 +
+                   BNXT_GRCPF_REG_WINDOW_BASE_OUT + 8);
+
+       offset = BNXT_GRCP_WINDOW_3_BASE + (reg & 0xffc);
+
+       return offset;
+}
+
+int bnxt_map_fw_health_status_regs(struct bnxt *bp)
+{
+       struct bnxt_error_recovery_info *info = bp->recovery_info;
+       uint32_t reg_base = 0xffffffff;
+       int i;
+
+       /* Only pre-map the monitoring GRC registers using window 2 */
+       for (i = 0; i < BNXT_FW_STATUS_REG_CNT; i++) {
+               uint32_t reg = info->status_regs[i];
+
+               if (BNXT_FW_STATUS_REG_TYPE(reg) != BNXT_FW_STATUS_REG_TYPE_GRC)
+                       continue;
+
+               if (reg_base == 0xffffffff)
+                       reg_base = reg & 0xfffff000;
+               if ((reg & 0xfffff000) != reg_base)
+                       return -ERANGE;
+
+               /* Use mask 0xffc as the Lower 2 bits indicates
+                * address space location
+                */
+               info->mapped_status_regs[i] = BNXT_GRCP_WINDOW_2_BASE +
+                                               (reg & 0xffc);
+       }
+
+       if (reg_base == 0xffffffff)
+               return 0;
+
+       rte_write32(reg_base, (uint8_t *)bp->bar0 +
+                   BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4);
+
+       return 0;
+}
+
+static void bnxt_write_fw_reset_reg(struct bnxt *bp, uint32_t index)
+{
+       struct bnxt_error_recovery_info *info = bp->recovery_info;
+       uint32_t delay = info->delay_after_reset[index];
+       uint32_t val = info->reset_reg_val[index];
+       uint32_t reg = info->reset_reg[index];
+       uint32_t type, offset;
+
+       type = BNXT_FW_STATUS_REG_TYPE(reg);
+       offset = BNXT_FW_STATUS_REG_OFF(reg);
+
+       switch (type) {
+       case BNXT_FW_STATUS_REG_TYPE_CFG:
+               rte_pci_write_config(bp->pdev, &val, sizeof(val), offset);
+               break;
+       case BNXT_FW_STATUS_REG_TYPE_GRC:
+               offset = bnxt_map_reset_regs(bp, offset);
+               rte_write32(val, (uint8_t *)bp->bar0 + offset);
+               break;
+       case BNXT_FW_STATUS_REG_TYPE_BAR0:
+               rte_write32(val, (uint8_t *)bp->bar0 + offset);
+               break;
+       }
+       /* wait on a specific interval of time until core reset is complete */
+       if (delay)
+               rte_delay_ms(delay);
+}
+
 static void bnxt_dev_cleanup(struct bnxt *bp)
 {
        bnxt_set_hwrm_link_config(bp, false);
@@ -3562,6 +3739,9 @@ static void bnxt_dev_recover(void *arg)
        int timeout = bp->fw_reset_max_msecs;
        int rc = 0;
 
+       /* Clear Error flag so that device re-init should happen */
+       bp->flags &= ~BNXT_FLAG_FATAL_ERROR;
+
        do {
                rc = bnxt_hwrm_ver_get(bp);
                if (rc == 0)
@@ -3609,12 +3789,168 @@ void bnxt_dev_reset_and_resume(void *arg)
 
        bnxt_dev_cleanup(bp);
 
+       bnxt_wait_for_device_shutdown(bp);
+
        rc = rte_eal_alarm_set(US_PER_MS * bp->fw_reset_min_msecs,
                               bnxt_dev_recover, (void *)bp);
        if (rc)
                PMD_DRV_LOG(ERR, "Error setting recovery alarm");
 }
 
+uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index)
+{
+       struct bnxt_error_recovery_info *info = bp->recovery_info;
+       uint32_t reg = info->status_regs[index];
+       uint32_t type, offset, val = 0;
+
+       type = BNXT_FW_STATUS_REG_TYPE(reg);
+       offset = BNXT_FW_STATUS_REG_OFF(reg);
+
+       switch (type) {
+       case BNXT_FW_STATUS_REG_TYPE_CFG:
+               rte_pci_read_config(bp->pdev, &val, sizeof(val), offset);
+               break;
+       case BNXT_FW_STATUS_REG_TYPE_GRC:
+               offset = info->mapped_status_regs[index];
+               /* FALLTHROUGH */
+       case BNXT_FW_STATUS_REG_TYPE_BAR0:
+               val = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 +
+                                      offset));
+               break;
+       }
+
+       return val;
+}
+
+static int bnxt_fw_reset_all(struct bnxt *bp)
+{
+       struct bnxt_error_recovery_info *info = bp->recovery_info;
+       uint32_t i;
+       int rc = 0;
+
+       if (info->flags & BNXT_FLAG_ERROR_RECOVERY_HOST) {
+               /* Reset through master function driver */
+               for (i = 0; i < info->reg_array_cnt; i++)
+                       bnxt_write_fw_reset_reg(bp, i);
+               /* Wait for time specified by FW after triggering reset */
+               rte_delay_ms(info->master_func_wait_period_after_reset);
+       } else if (info->flags & BNXT_FLAG_ERROR_RECOVERY_CO_CPU) {
+               /* Reset with the help of Kong processor */
+               rc = bnxt_hwrm_fw_reset(bp);
+               if (rc)
+                       PMD_DRV_LOG(ERR, "Failed to reset FW\n");
+       }
+
+       return rc;
+}
+
+static void bnxt_fw_reset_cb(void *arg)
+{
+       struct bnxt *bp = arg;
+       struct bnxt_error_recovery_info *info = bp->recovery_info;
+       int rc = 0;
+
+       /* Only Master function can do FW reset */
+       if (bnxt_is_master_func(bp) &&
+           bnxt_is_recovery_enabled(bp)) {
+               rc = bnxt_fw_reset_all(bp);
+               if (rc) {
+                       PMD_DRV_LOG(ERR, "Adapter recovery failed\n");
+                       return;
+               }
+       }
+
+       /* if recovery method is ERROR_RECOVERY_CO_CPU, KONG will send
+        * EXCEPTION_FATAL_ASYNC event to all the functions
+        * (including MASTER FUNC). After receiving this Async, all the active
+        * drivers should treat this case as FW initiated recovery
+        */
+       if (info->flags & BNXT_FLAG_ERROR_RECOVERY_HOST) {
+               bp->fw_reset_min_msecs = BNXT_MIN_FW_READY_TIMEOUT;
+               bp->fw_reset_max_msecs = BNXT_MAX_FW_RESET_TIMEOUT;
+
+               /* To recover from error */
+               rte_eal_alarm_set(US_PER_MS, bnxt_dev_reset_and_resume,
+                                 (void *)bp);
+       }
+}
+
+/* Driver should poll FW heartbeat, reset_counter with the frequency
+ * advertised by FW in HWRM_ERROR_RECOVERY_QCFG.
+ * When the driver detects heartbeat stop or change in reset_counter,
+ * it has to trigger a reset to recover from the error condition.
+ * A “master PF” is the function who will have the privilege to
+ * initiate the chimp reset. The master PF will be elected by the
+ * firmware and will be notified through async message.
+ */
+static void bnxt_check_fw_health(void *arg)
+{
+       struct bnxt *bp = arg;
+       struct bnxt_error_recovery_info *info = bp->recovery_info;
+       uint32_t val = 0, wait_msec;
+
+       if (!info || !bnxt_is_recovery_enabled(bp) ||
+           is_bnxt_in_error(bp))
+               return;
+
+       val = bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG);
+       if (val == info->last_heart_beat)
+               goto reset;
+
+       info->last_heart_beat = val;
+
+       val = bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG);
+       if (val != info->last_reset_counter)
+               goto reset;
+
+       info->last_reset_counter = val;
+
+       rte_eal_alarm_set(US_PER_MS * info->driver_polling_freq,
+                         bnxt_check_fw_health, (void *)bp);
+
+       return;
+reset:
+       /* Stop DMA to/from device */
+       bp->flags |= BNXT_FLAG_FATAL_ERROR;
+       bp->flags |= BNXT_FLAG_FW_RESET;
+
+       PMD_DRV_LOG(ERR, "Detected FW dead condition\n");
+
+       if (bnxt_is_master_func(bp))
+               wait_msec = info->master_func_wait_period;
+       else
+               wait_msec = info->normal_func_wait_period;
+
+       rte_eal_alarm_set(US_PER_MS * wait_msec,
+                         bnxt_fw_reset_cb, (void *)bp);
+}
+
+void bnxt_schedule_fw_health_check(struct bnxt *bp)
+{
+       uint32_t polling_freq;
+
+       if (!bnxt_is_recovery_enabled(bp))
+               return;
+
+       if (bp->flags & BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED)
+               return;
+
+       polling_freq = bp->recovery_info->driver_polling_freq;
+
+       rte_eal_alarm_set(US_PER_MS * polling_freq,
+                         bnxt_check_fw_health, (void *)bp);
+       bp->flags |= BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED;
+}
+
+static void bnxt_cancel_fw_health_check(struct bnxt *bp)
+{
+       if (!bnxt_is_recovery_enabled(bp))
+               return;
+
+       rte_eal_alarm_cancel(bnxt_check_fw_health, (void *)bp);
+       bp->flags &= ~BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED;
+}
+
 static bool bnxt_vf_pciid(uint16_t id)
 {
        if (id == BROADCOM_DEV_ID_57304_VF ||
@@ -3708,10 +4044,9 @@ static int bnxt_alloc_ctx_mem_blk(__rte_unused struct bnxt *bp,
                memset(mz->addr, 0, mz->len);
                mz_phys_addr = mz->iova;
                if ((unsigned long)mz->addr == mz_phys_addr) {
-                       PMD_DRV_LOG(WARNING,
-                               "Memzone physical address same as virtual.\n");
-                       PMD_DRV_LOG(WARNING,
-                                   "Using rte_mem_virt2iova()\n");
+                       PMD_DRV_LOG(DEBUG,
+                                   "physical address same as virtual\n");
+                       PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
                        mz_phys_addr = rte_mem_virt2iova(mz->addr);
                        if (mz_phys_addr == RTE_BAD_IOVA) {
                                PMD_DRV_LOG(ERR,
@@ -3744,10 +4079,9 @@ static int bnxt_alloc_ctx_mem_blk(__rte_unused struct bnxt *bp,
        memset(mz->addr, 0, mz->len);
        mz_phys_addr = mz->iova;
        if ((unsigned long)mz->addr == mz_phys_addr) {
-               PMD_DRV_LOG(WARNING,
+               PMD_DRV_LOG(DEBUG,
                            "Memzone physical address same as virtual.\n");
-               PMD_DRV_LOG(WARNING,
-                           "Using rte_mem_virt2iova()\n");
+               PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
                for (sz = 0; sz < mem_size; sz += BNXT_PAGE_SIZE)
                        rte_mem_lock_page(((char *)mz->addr) + sz);
                mz_phys_addr = rte_mem_virt2iova(mz->addr);
@@ -3935,9 +4269,9 @@ static int bnxt_alloc_stats_mem(struct bnxt *bp)
        memset(mz->addr, 0, mz->len);
        mz_phys_addr = mz->iova;
        if ((unsigned long)mz->addr == mz_phys_addr) {
-               PMD_DRV_LOG(WARNING,
+               PMD_DRV_LOG(DEBUG,
                            "Memzone physical address same as virtual.\n");
-               PMD_DRV_LOG(WARNING,
+               PMD_DRV_LOG(DEBUG,
                            "Using rte_mem_virt2iova()\n");
                mz_phys_addr = rte_mem_virt2iova(mz->addr);
                if (mz_phys_addr == RTE_BAD_IOVA) {
@@ -3973,10 +4307,9 @@ static int bnxt_alloc_stats_mem(struct bnxt *bp)
        memset(mz->addr, 0, mz->len);
        mz_phys_addr = mz->iova;
        if ((unsigned long)mz->addr == mz_phys_addr) {
-               PMD_DRV_LOG(WARNING,
+               PMD_DRV_LOG(DEBUG,
                            "Memzone physical address same as virtual\n");
-               PMD_DRV_LOG(WARNING,
-                           "Using rte_mem_virt2iova()\n");
+               PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
                mz_phys_addr = rte_mem_virt2iova(mz->addr);
                if (mz_phys_addr == RTE_BAD_IOVA) {
                        PMD_DRV_LOG(ERR,
@@ -4137,6 +4470,11 @@ static int bnxt_init_fw(struct bnxt *bp)
        if (rc)
                return rc;
 
+       /* Get the adapter error recovery support info */
+       rc = bnxt_hwrm_error_recovery_qcfg(bp);
+       if (rc)
+               bp->flags &= ~BNXT_FLAG_FW_CAP_ERROR_RECOVERY;
+
        if (mtu >= RTE_ETHER_MIN_MTU && mtu <= BNXT_MAX_MTU &&
            mtu != bp->eth_dev->data->mtu)
                bp->eth_dev->data->mtu = mtu;
@@ -4217,12 +4555,6 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
        if (version_printed++ == 0)
                PMD_DRV_LOG(INFO, "%s\n", bnxt_version);
 
-       rte_eth_copy_pci_info(eth_dev, pci_dev);
-
-       bp = eth_dev->data->dev_private;
-
-       bp->dev_stopped = 1;
-
        eth_dev->dev_ops = &bnxt_dev_ops;
        eth_dev->rx_pkt_burst = &bnxt_recv_pkts;
        eth_dev->tx_pkt_burst = &bnxt_xmit_pkts;
@@ -4234,6 +4566,12 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
                return 0;
 
+       rte_eth_copy_pci_info(eth_dev, pci_dev);
+
+       bp = eth_dev->data->dev_private;
+
+       bp->dev_stopped = 1;
+
        if (bnxt_vf_pciid(pci_dev->id.device_id))
                bp->flags |= BNXT_FLAG_VF;
 
@@ -4288,16 +4626,23 @@ bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev)
 {
        int rc;
 
-       bnxt_disable_int(bp);
        bnxt_free_int(bp);
        bnxt_free_mem(bp, reconfig_dev);
        bnxt_hwrm_func_buf_unrgtr(bp);
        rc = bnxt_hwrm_func_driver_unregister(bp, 0);
        bp->flags &= ~BNXT_FLAG_REGISTERED;
        bnxt_free_ctx_mem(bp);
-       if (!reconfig_dev)
+       if (!reconfig_dev) {
                bnxt_free_hwrm_resources(bp);
 
+               if (bp->recovery_info != NULL) {
+                       rte_free(bp->recovery_info);
+                       bp->recovery_info = NULL;
+               }
+       }
+
+       rte_free(bp->ptp_cfg);
+       bp->ptp_cfg = NULL;
        return rc;
 }