net/bnxt: support periodic FW health monitoring
[dpdk.git] / drivers / net / bnxt / bnxt_ethdev.c
index d90a6e4..d28f1bd 100644 (file)
@@ -11,6 +11,7 @@
 #include <rte_ethdev_pci.h>
 #include <rte_malloc.h>
 #include <rte_cycles.h>
+#include <rte_alarm.h>
 
 #include "bnxt.h"
 #include "bnxt_cpr.h"
@@ -166,6 +167,9 @@ static int bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask);
 static void bnxt_print_link_info(struct rte_eth_dev *eth_dev);
 static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu);
 static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
+static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev);
+static int bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev);
+static void bnxt_cancel_fw_health_check(struct bnxt *bp);
 
 int is_bnxt_in_error(struct bnxt *bp)
 {
@@ -201,19 +205,25 @@ static uint16_t  bnxt_rss_hash_tbl_size(const struct bnxt *bp)
        return bnxt_rss_ctxts(bp) * BNXT_RSS_ENTRIES_PER_CTX_THOR;
 }
 
-static void bnxt_free_mem(struct bnxt *bp)
+static void bnxt_free_mem(struct bnxt *bp, bool reconfig)
 {
        bnxt_free_filter_mem(bp);
        bnxt_free_vnic_attributes(bp);
        bnxt_free_vnic_mem(bp);
 
-       bnxt_free_stats(bp);
-       bnxt_free_tx_rings(bp);
-       bnxt_free_rx_rings(bp);
+       /* tx/rx rings are configured as part of *_queue_setup callbacks.
+        * If the number of rings change across fw update,
+        * we don't have much choice except to warn the user.
+        */
+       if (!reconfig) {
+               bnxt_free_stats(bp);
+               bnxt_free_tx_rings(bp);
+               bnxt_free_rx_rings(bp);
+       }
        bnxt_free_async_cp_ring(bp);
 }
 
-static int bnxt_alloc_mem(struct bnxt *bp)
+static int bnxt_alloc_mem(struct bnxt *bp, bool reconfig)
 {
        int rc;
 
@@ -244,7 +254,7 @@ static int bnxt_alloc_mem(struct bnxt *bp)
        return 0;
 
 alloc_mem_err:
-       bnxt_free_mem(bp);
+       bnxt_free_mem(bp, reconfig);
        return rc;
 }
 
@@ -785,6 +795,25 @@ bnxt_transmit_function(__rte_unused struct rte_eth_dev *eth_dev)
        return bnxt_xmit_pkts;
 }
 
+static int bnxt_handle_if_change_status(struct bnxt *bp)
+{
+       int rc;
+
+       /* Since fw has undergone a reset and lost all contexts,
+        * set fatal flag to not issue hwrm during cleanup
+        */
+       bp->flags |= BNXT_FLAG_FATAL_ERROR;
+       bnxt_uninit_resources(bp, true);
+
+       /* clear fatal flag so that re-init happens */
+       bp->flags &= ~BNXT_FLAG_FATAL_ERROR;
+       rc = bnxt_init_resources(bp, true);
+
+       bp->flags &= ~BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE;
+
+       return rc;
+}
+
 static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 {
        struct bnxt *bp = eth_dev->data->dev_private;
@@ -798,6 +827,15 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
                        bp->rx_cp_nr_rings, RTE_ETHDEV_QUEUE_STAT_CNTRS);
        }
 
+       rc = bnxt_hwrm_if_change(bp, 1);
+       if (!rc) {
+               if (bp->flags & BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE) {
+                       rc = bnxt_handle_if_change_status(bp);
+                       if (rc)
+                               return rc;
+               }
+       }
+
        rc = bnxt_init_chip(bp);
        if (rc)
                goto error;
@@ -821,9 +859,11 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
        bp->flags |= BNXT_FLAG_INIT_DONE;
        eth_dev->data->dev_started = 1;
        bp->dev_stopped = 0;
+       bnxt_schedule_fw_health_check(bp);
        return 0;
 
 error:
+       bnxt_hwrm_if_change(bp, 0);
        bnxt_shutdown_nic(bp);
        bnxt_free_tx_mbufs(bp);
        bnxt_free_rx_mbufs(bp);
@@ -872,6 +912,8 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
        /* disable uio/vfio intr/eventfd mapping */
        rte_intr_disable(intr_handle);
 
+       bnxt_cancel_fw_health_check(bp);
+
        bp->flags &= ~BNXT_FLAG_INIT_DONE;
        if (bp->eth_dev->data->dev_started) {
                /* TBD: STOP HW queues DMA */
@@ -890,6 +932,7 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
        bnxt_free_tx_mbufs(bp);
        bnxt_free_rx_mbufs(bp);
        bnxt_shutdown_nic(bp);
+       bnxt_hwrm_if_change(bp, 0);
        bp->dev_stopped = 1;
 }
 
@@ -3523,6 +3566,219 @@ static const struct eth_dev_ops bnxt_dev_ops = {
        .timesync_read_tx_timestamp = bnxt_timesync_read_tx_timestamp,
 };
 
+int bnxt_map_fw_health_status_regs(struct bnxt *bp)
+{
+       struct bnxt_error_recovery_info *info = bp->recovery_info;
+       uint32_t reg_base = 0xffffffff;
+       int i;
+
+       /* Only pre-map the monitoring GRC registers using window 2 */
+       for (i = 0; i < BNXT_FW_STATUS_REG_CNT; i++) {
+               uint32_t reg = info->status_regs[i];
+
+               if (BNXT_FW_STATUS_REG_TYPE(reg) != BNXT_FW_STATUS_REG_TYPE_GRC)
+                       continue;
+
+               if (reg_base == 0xffffffff)
+                       reg_base = reg & 0xfffff000;
+               if ((reg & 0xfffff000) != reg_base)
+                       return -ERANGE;
+
+               /* Use mask 0xffc as the Lower 2 bits indicates
+                * address space location
+                */
+               info->mapped_status_regs[i] = BNXT_GRCP_WINDOW_2_BASE +
+                                               (reg & 0xffc);
+       }
+
+       if (reg_base == 0xffffffff)
+               return 0;
+
+       rte_write32(reg_base, (uint8_t *)bp->bar0 +
+                   BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4);
+
+       return 0;
+}
+
+static void bnxt_dev_cleanup(struct bnxt *bp)
+{
+       bnxt_set_hwrm_link_config(bp, false);
+       bp->link_info.link_up = 0;
+       if (bp->dev_stopped == 0)
+               bnxt_dev_stop_op(bp->eth_dev);
+
+       bnxt_uninit_resources(bp, true);
+}
+
+static int bnxt_restore_filters(struct bnxt *bp)
+{
+       struct rte_eth_dev *dev = bp->eth_dev;
+       int ret = 0;
+
+       if (dev->data->all_multicast)
+               ret = bnxt_allmulticast_enable_op(dev);
+       if (dev->data->promiscuous)
+               ret = bnxt_promiscuous_enable_op(dev);
+
+       /* TODO restore other filters as well */
+       return ret;
+}
+
+static void bnxt_dev_recover(void *arg)
+{
+       struct bnxt *bp = arg;
+       int timeout = bp->fw_reset_max_msecs;
+       int rc = 0;
+
+       /* Clear Error flag so that device re-init should happen */
+       bp->flags &= ~BNXT_FLAG_FATAL_ERROR;
+
+       do {
+               rc = bnxt_hwrm_ver_get(bp);
+               if (rc == 0)
+                       break;
+               rte_delay_ms(BNXT_FW_READY_WAIT_INTERVAL);
+               timeout -= BNXT_FW_READY_WAIT_INTERVAL;
+       } while (rc && timeout);
+
+       if (rc) {
+               PMD_DRV_LOG(ERR, "FW is not Ready after reset\n");
+               goto err;
+       }
+
+       rc = bnxt_init_resources(bp, true);
+       if (rc) {
+               PMD_DRV_LOG(ERR,
+                           "Failed to initialize resources after reset\n");
+               goto err;
+       }
+       /* clear reset flag as the device is initialized now */
+       bp->flags &= ~BNXT_FLAG_FW_RESET;
+
+       rc = bnxt_dev_start_op(bp->eth_dev);
+       if (rc) {
+               PMD_DRV_LOG(ERR, "Failed to start port after reset\n");
+               goto err;
+       }
+
+       rc = bnxt_restore_filters(bp);
+       if (rc)
+               goto err;
+
+       PMD_DRV_LOG(INFO, "Recovered from FW reset\n");
+       return;
+err:
+       bp->flags |= BNXT_FLAG_FATAL_ERROR;
+       bnxt_uninit_resources(bp, false);
+       PMD_DRV_LOG(ERR, "Failed to recover from FW reset\n");
+}
+
+void bnxt_dev_reset_and_resume(void *arg)
+{
+       struct bnxt *bp = arg;
+       int rc;
+
+       bnxt_dev_cleanup(bp);
+
+       rc = rte_eal_alarm_set(US_PER_MS * bp->fw_reset_min_msecs,
+                              bnxt_dev_recover, (void *)bp);
+       if (rc)
+               PMD_DRV_LOG(ERR, "Error setting recovery alarm");
+}
+
+uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index)
+{
+       struct bnxt_error_recovery_info *info = bp->recovery_info;
+       uint32_t reg = info->status_regs[index];
+       uint32_t type, offset, val = 0;
+
+       type = BNXT_FW_STATUS_REG_TYPE(reg);
+       offset = BNXT_FW_STATUS_REG_OFF(reg);
+
+       switch (type) {
+       case BNXT_FW_STATUS_REG_TYPE_CFG:
+               rte_pci_read_config(bp->pdev, &val, sizeof(val), offset);
+               break;
+       case BNXT_FW_STATUS_REG_TYPE_GRC:
+               offset = info->mapped_status_regs[index];
+               /* FALLTHROUGH */
+       case BNXT_FW_STATUS_REG_TYPE_BAR0:
+               val = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 +
+                                      offset));
+               break;
+       }
+
+       return val;
+}
+
+/* Driver should poll FW heartbeat, reset_counter with the frequency
+ * advertised by FW in HWRM_ERROR_RECOVERY_QCFG.
+ * When the driver detects heartbeat stop or change in reset_counter,
+ * it has to trigger a reset to recover from the error condition.
+ * A “master PF” is the function who will have the privilege to
+ * initiate the chimp reset. The master PF will be elected by the
+ * firmware and will be notified through async message.
+ */
+static void bnxt_check_fw_health(void *arg)
+{
+       struct bnxt *bp = arg;
+       struct bnxt_error_recovery_info *info = bp->recovery_info;
+       uint32_t val = 0;
+
+       if (!info || !bnxt_is_recovery_enabled(bp) ||
+           is_bnxt_in_error(bp))
+               return;
+
+       val = bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG);
+       if (val == info->last_heart_beat)
+               goto reset;
+
+       info->last_heart_beat = val;
+
+       val = bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG);
+       if (val != info->last_reset_counter)
+               goto reset;
+
+       info->last_reset_counter = val;
+
+       rte_eal_alarm_set(US_PER_MS * info->driver_polling_freq,
+                         bnxt_check_fw_health, (void *)bp);
+
+       return;
+reset:
+       /* Stop DMA to/from device */
+       bp->flags |= BNXT_FLAG_FATAL_ERROR;
+       bp->flags |= BNXT_FLAG_FW_RESET;
+
+       PMD_DRV_LOG(ERR, "Detected FW dead condition\n");
+}
+
+void bnxt_schedule_fw_health_check(struct bnxt *bp)
+{
+       uint32_t polling_freq;
+
+       if (!bnxt_is_recovery_enabled(bp))
+               return;
+
+       if (bp->flags & BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED)
+               return;
+
+       polling_freq = bp->recovery_info->driver_polling_freq;
+
+       rte_eal_alarm_set(US_PER_MS * polling_freq,
+                         bnxt_check_fw_health, (void *)bp);
+       bp->flags |= BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED;
+}
+
+static void bnxt_cancel_fw_health_check(struct bnxt *bp)
+{
+       if (!bnxt_is_recovery_enabled(bp))
+               return;
+
+       rte_eal_alarm_cancel(bnxt_check_fw_health, (void *)bp);
+       bp->flags &= ~BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED;
+}
+
 static bool bnxt_vf_pciid(uint16_t id)
 {
        if (id == BROADCOM_DEV_ID_57304_VF ||
@@ -3965,6 +4221,22 @@ static int bnxt_setup_mac_addr(struct rte_eth_dev *eth_dev)
        return rc;
 }
 
+static int bnxt_restore_dflt_mac(struct bnxt *bp)
+{
+       int rc = 0;
+
+       /* MAC is already configured in FW */
+       if (!bnxt_check_zero_bytes(bp->dflt_mac_addr, RTE_ETHER_ADDR_LEN))
+               return 0;
+
+       /* Restore the old MAC configured */
+       rc = bnxt_hwrm_set_mac(bp);
+       if (rc)
+               PMD_DRV_LOG(ERR, "Failed to restore MAC address\n");
+
+       return rc;
+}
+
 static void bnxt_config_vf_req_fwd(struct bnxt *bp)
 {
        if (!BNXT_PF(bp))
@@ -4029,6 +4301,11 @@ static int bnxt_init_fw(struct bnxt *bp)
        if (rc)
                return rc;
 
+       /* Get the adapter error recovery support info */
+       rc = bnxt_hwrm_error_recovery_qcfg(bp);
+       if (rc)
+               bp->flags &= ~BNXT_FLAG_FW_CAP_ERROR_RECOVERY;
+
        if (mtu >= RTE_ETHER_MIN_MTU && mtu <= BNXT_MAX_MTU &&
            mtu != bp->eth_dev->data->mtu)
                bp->eth_dev->data->mtu = mtu;
@@ -4038,7 +4315,7 @@ static int bnxt_init_fw(struct bnxt *bp)
        return 0;
 }
 
-static int bnxt_init_resources(struct bnxt *bp)
+static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev)
 {
        int rc;
 
@@ -4046,9 +4323,15 @@ static int bnxt_init_resources(struct bnxt *bp)
        if (rc)
                return rc;
 
-       rc = bnxt_setup_mac_addr(bp->eth_dev);
-       if (rc)
-               return rc;
+       if (!reconfig_dev) {
+               rc = bnxt_setup_mac_addr(bp->eth_dev);
+               if (rc)
+                       return rc;
+       } else {
+               rc = bnxt_restore_dflt_mac(bp);
+               if (rc)
+                       return rc;
+       }
 
        bnxt_config_vf_req_fwd(bp);
 
@@ -4075,7 +4358,7 @@ static int bnxt_init_resources(struct bnxt *bp)
                }
        }
 
-       rc = bnxt_alloc_mem(bp);
+       rc = bnxt_alloc_mem(bp, reconfig_dev);
        if (rc)
                return rc;
 
@@ -4149,7 +4432,7 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
                            "Failed to allocate hwrm resource rc: %x\n", rc);
                goto error_free;
        }
-       rc = bnxt_init_resources(bp);
+       rc = bnxt_init_resources(bp, false);
        if (rc)
                goto error_free;
 
@@ -4170,18 +4453,25 @@ error_free:
 }
 
 static int
-bnxt_uninit_resources(struct bnxt *bp)
+bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev)
 {
        int rc;
 
        bnxt_disable_int(bp);
        bnxt_free_int(bp);
-       bnxt_free_mem(bp);
+       bnxt_free_mem(bp, reconfig_dev);
        bnxt_hwrm_func_buf_unrgtr(bp);
        rc = bnxt_hwrm_func_driver_unregister(bp, 0);
        bp->flags &= ~BNXT_FLAG_REGISTERED;
        bnxt_free_ctx_mem(bp);
-       bnxt_free_hwrm_resources(bp);
+       if (!reconfig_dev) {
+               bnxt_free_hwrm_resources(bp);
+
+               if (bp->recovery_info != NULL) {
+                       rte_free(bp->recovery_info);
+                       bp->recovery_info = NULL;
+               }
+       }
 
        return rc;
 }
@@ -4197,7 +4487,7 @@ bnxt_dev_uninit(struct rte_eth_dev *eth_dev)
 
        PMD_DRV_LOG(DEBUG, "Calling Device uninit\n");
 
-       rc = bnxt_uninit_resources(bp);
+       rc = bnxt_uninit_resources(bp, false);
 
        if (bp->grp_info != NULL) {
                rte_free(bp->grp_info);