From 9d0cbaecc91aa2385bbcbc3c10cbde85e01a5dc2 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 1 Oct 2019 18:23:29 -0700 Subject: [PATCH] net/bnxt: support periodic FW health monitoring Periodically poll the FW heartbeat register and FW recovery counter registers to check the FW health. Polling frequency will be advertised by the FW in HWRM_ERROR_RECOVERY_QCFG response. Schedule the task upon receiving the async event from FW. Signed-off-by: Kalesh AP Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur --- drivers/net/bnxt/bnxt.h | 6 +++ drivers/net/bnxt/bnxt_cpr.c | 10 ++++ drivers/net/bnxt/bnxt_ethdev.c | 97 ++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+) diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h index ea556d0792..50b9b38565 100644 --- a/drivers/net/bnxt/bnxt.h +++ b/drivers/net/bnxt/bnxt.h @@ -368,6 +368,9 @@ struct bnxt_error_recovery_info { #define BNXT_FLAG_MASTER_FUNC (1 << 2) #define BNXT_FLAG_RECOVERY_ENABLED (1 << 3) uint32_t flags; + + uint32_t last_heart_beat; + uint32_t last_reset_counter; }; /* address space location of register */ @@ -416,6 +419,7 @@ struct bnxt { #define BNXT_FLAG_FW_CAP_IF_CHANGE (1 << 17) #define BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE (1 << 18) #define BNXT_FLAG_FW_CAP_ERROR_RECOVERY (1 << 19) +#define BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED (1 << 20) #define BNXT_FLAG_EXT_STATS_SUPPORTED (1 << 29) #define BNXT_FLAG_NEW_RM (1 << 30) #define BNXT_FLAG_INIT_DONE (1U << 31) @@ -532,6 +536,8 @@ int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg); int is_bnxt_in_error(struct bnxt *bp); int bnxt_map_fw_health_status_regs(struct bnxt *bp); +uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index); +void bnxt_schedule_fw_health_check(struct bnxt *bp); bool is_bnxt_supported(struct rte_eth_dev *dev); bool bnxt_stratus_device(struct bnxt *bp); diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c index 1b046bbfac..38931799eb 100644 --- a/drivers/net/bnxt/bnxt_cpr.c +++ b/drivers/net/bnxt/bnxt_cpr.c @@ -90,6 +90,16 @@ void bnxt_handle_async_event(struct bnxt *bp, PMD_DRV_LOG(INFO, "recovery enabled(%d), master function(%d)\n", bnxt_is_recovery_enabled(bp), bnxt_is_master_func(bp)); + + if (bp->flags & BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED) + return; + + info->last_heart_beat = + bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG); + info->last_reset_counter = + bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG); + + bnxt_schedule_fw_health_check(bp); break; default: PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id); diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c index a23ca2b53d..d28f1bd8f9 100644 --- a/drivers/net/bnxt/bnxt_ethdev.c +++ b/drivers/net/bnxt/bnxt_ethdev.c @@ -169,6 +169,7 @@ static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu); static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev); static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev); static int bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev); +static void bnxt_cancel_fw_health_check(struct bnxt *bp); int is_bnxt_in_error(struct bnxt *bp) { @@ -858,6 +859,7 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) bp->flags |= BNXT_FLAG_INIT_DONE; eth_dev->data->dev_started = 1; bp->dev_stopped = 0; + bnxt_schedule_fw_health_check(bp); return 0; error: @@ -910,6 +912,8 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) /* disable uio/vfio intr/eventfd mapping */ rte_intr_disable(intr_handle); + bnxt_cancel_fw_health_check(bp); + bp->flags &= ~BNXT_FLAG_INIT_DONE; if (bp->eth_dev->data->dev_started) { /* TBD: STOP HW queues DMA */ @@ -3682,6 +3686,99 @@ void bnxt_dev_reset_and_resume(void *arg) PMD_DRV_LOG(ERR, "Error setting recovery alarm"); } +uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index) +{ + struct bnxt_error_recovery_info *info = bp->recovery_info; + uint32_t reg = info->status_regs[index]; + uint32_t type, offset, val = 0; + + type = BNXT_FW_STATUS_REG_TYPE(reg); + offset = BNXT_FW_STATUS_REG_OFF(reg); + + switch (type) { + case BNXT_FW_STATUS_REG_TYPE_CFG: + rte_pci_read_config(bp->pdev, &val, sizeof(val), offset); + break; + case BNXT_FW_STATUS_REG_TYPE_GRC: + offset = info->mapped_status_regs[index]; + /* FALLTHROUGH */ + case BNXT_FW_STATUS_REG_TYPE_BAR0: + val = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 + + offset)); + break; + } + + return val; +} + +/* Driver should poll FW heartbeat, reset_counter with the frequency + * advertised by FW in HWRM_ERROR_RECOVERY_QCFG. + * When the driver detects heartbeat stop or change in reset_counter, + * it has to trigger a reset to recover from the error condition. + * A “master PF” is the function who will have the privilege to + * initiate the chimp reset. The master PF will be elected by the + * firmware and will be notified through async message. + */ +static void bnxt_check_fw_health(void *arg) +{ + struct bnxt *bp = arg; + struct bnxt_error_recovery_info *info = bp->recovery_info; + uint32_t val = 0; + + if (!info || !bnxt_is_recovery_enabled(bp) || + is_bnxt_in_error(bp)) + return; + + val = bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG); + if (val == info->last_heart_beat) + goto reset; + + info->last_heart_beat = val; + + val = bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG); + if (val != info->last_reset_counter) + goto reset; + + info->last_reset_counter = val; + + rte_eal_alarm_set(US_PER_MS * info->driver_polling_freq, + bnxt_check_fw_health, (void *)bp); + + return; +reset: + /* Stop DMA to/from device */ + bp->flags |= BNXT_FLAG_FATAL_ERROR; + bp->flags |= BNXT_FLAG_FW_RESET; + + PMD_DRV_LOG(ERR, "Detected FW dead condition\n"); +} + +void bnxt_schedule_fw_health_check(struct bnxt *bp) +{ + uint32_t polling_freq; + + if (!bnxt_is_recovery_enabled(bp)) + return; + + if (bp->flags & BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED) + return; + + polling_freq = bp->recovery_info->driver_polling_freq; + + rte_eal_alarm_set(US_PER_MS * polling_freq, + bnxt_check_fw_health, (void *)bp); + bp->flags |= BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED; +} + +static void bnxt_cancel_fw_health_check(struct bnxt *bp) +{ + if (!bnxt_is_recovery_enabled(bp)) + return; + + rte_eal_alarm_cancel(bnxt_check_fw_health, (void *)bp); + bp->flags &= ~BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED; +} + static bool bnxt_vf_pciid(uint16_t id) { if (id == BROADCOM_DEV_ID_57304_VF || -- 2.20.1