From 6f5f3b99821ea5011573f0383582ac2fd3dbb0e0 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 24 Dec 2020 15:07:34 +0530 Subject: [PATCH] net/bnxt: check chip reset in stop and close While the error recovery thread is running, an application can invoke dev_stop or dev_close_op thus triggering a race and unwanted consequences if dev_close is invoked while the recovery is not yet completed. Fix by having another lock to synchronize between the 2 threads and return EGAIN if adapter is in the middle of recovery when dev_stop or dev_close ops are invoked Signed-off-by: Somnath Kotur Reviewed-by: Ajit Khaparde --- drivers/net/bnxt/bnxt.h | 5 ++++ drivers/net/bnxt/bnxt_cpr.c | 2 ++ drivers/net/bnxt/bnxt_ethdev.c | 49 +++++++++++++++++++++++++++++----- 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h index 276d807c7b..d2d76b623a 100644 --- a/drivers/net/bnxt/bnxt.h +++ b/drivers/net/bnxt/bnxt.h @@ -738,6 +738,11 @@ struct bnxt { * health_check_lock */ pthread_mutex_t health_check_lock; + /* synchronize between dev_stop/dev_close_op and + * error recovery thread triggered as part of + * HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY + */ + pthread_mutex_t err_recovery_lock; uint16_t max_req_len; uint16_t max_resp_len; uint16_t hwrm_max_ext_req_len; diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c index ee96ae81bf..6e172a9eea 100644 --- a/drivers/net/bnxt/bnxt_cpr.c +++ b/drivers/net/bnxt/bnxt_cpr.c @@ -133,6 +133,7 @@ void bnxt_handle_async_event(struct bnxt *bp, return; } + pthread_mutex_lock(&bp->err_recovery_lock); event_data = rte_le_to_cpu_32(async_cmp->event_data1); /* timestamp_lo/hi values are in units of 100ms */ bp->fw_reset_max_msecs = async_cmp->timestamp_hi ? @@ -152,6 +153,7 @@ void bnxt_handle_async_event(struct bnxt *bp, } bp->flags |= BNXT_FLAG_FW_RESET; + pthread_mutex_unlock(&bp->err_recovery_lock); rte_eal_alarm_set(US_PER_MS, bnxt_dev_reset_and_resume, (void *)bp); break; diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c index b4a23366ee..74b0f3d1dc 100644 --- a/drivers/net/bnxt/bnxt_ethdev.c +++ b/drivers/net/bnxt/bnxt_ethdev.c @@ -1297,8 +1297,7 @@ static void bnxt_free_switch_domain(struct bnxt *bp) } } -/* Unload the driver, release resources */ -static int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) +static int bnxt_dev_stop(struct rte_eth_dev *eth_dev) { struct bnxt *bp = eth_dev->data->dev_private; struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); @@ -1366,6 +1365,22 @@ static int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) return 0; } +/* Unload the driver, release resources */ +static int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) +{ + struct bnxt *bp = eth_dev->data->dev_private; + + pthread_mutex_lock(&bp->err_recovery_lock); + if (bp->flags & BNXT_FLAG_FW_RESET) { + PMD_DRV_LOG(ERR, + "Adapter recovering from error..Please retry\n"); + return -EAGAIN; + } + pthread_mutex_unlock(&bp->err_recovery_lock); + + return bnxt_dev_stop(eth_dev); +} + static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) { struct bnxt *bp = eth_dev->data->dev_private; @@ -1432,7 +1447,7 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) return 0; error: - bnxt_dev_stop_op(eth_dev); + bnxt_dev_stop(eth_dev); return rc; } @@ -1442,6 +1457,7 @@ bnxt_uninit_locks(struct bnxt *bp) pthread_mutex_destroy(&bp->flow_lock); pthread_mutex_destroy(&bp->def_cp_lock); pthread_mutex_destroy(&bp->health_check_lock); + pthread_mutex_destroy(&bp->err_recovery_lock); if (bp->rep_info) { pthread_mutex_destroy(&bp->rep_info->vfr_lock); pthread_mutex_destroy(&bp->rep_info->vfr_start_lock); @@ -1456,13 +1472,21 @@ static int bnxt_dev_close_op(struct rte_eth_dev *eth_dev) if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; + pthread_mutex_lock(&bp->err_recovery_lock); + if (bp->flags & BNXT_FLAG_FW_RESET) { + PMD_DRV_LOG(ERR, + "Adapter recovering from error...Please retry\n"); + return -EAGAIN; + } + pthread_mutex_unlock(&bp->err_recovery_lock); + /* cancel the recovery handler before remove dev */ rte_eal_alarm_cancel(bnxt_dev_reset_and_resume, (void *)bp); rte_eal_alarm_cancel(bnxt_dev_recover, (void *)bp); bnxt_cancel_fc_thread(bp); if (eth_dev->data->dev_started) - ret = bnxt_dev_stop_op(eth_dev); + ret = bnxt_dev_stop(eth_dev); bnxt_free_switch_domain(bp); @@ -3676,7 +3700,7 @@ static void bnxt_dev_cleanup(struct bnxt *bp) bp->eth_dev->data->dev_link.link_status = 0; bp->link_info->link_up = 0; if (bp->eth_dev->data->dev_started) - bnxt_dev_stop_op(bp->eth_dev); + bnxt_dev_stop(bp->eth_dev); bnxt_uninit_resources(bp, true); } @@ -3777,6 +3801,7 @@ static void bnxt_dev_recover(void *arg) int timeout = bp->fw_reset_max_msecs; int rc = 0; + pthread_mutex_lock(&bp->err_recovery_lock); /* Clear Error flag so that device re-init should happen */ bp->flags &= ~BNXT_FLAG_FATAL_ERROR; @@ -3813,12 +3838,15 @@ static void bnxt_dev_recover(void *arg) goto err_start; PMD_DRV_LOG(INFO, "Recovered from FW reset\n"); + pthread_mutex_unlock(&bp->err_recovery_lock); + return; err_start: - bnxt_dev_stop_op(bp->eth_dev); + bnxt_dev_stop(bp->eth_dev); err: bp->flags |= BNXT_FLAG_FATAL_ERROR; bnxt_uninit_resources(bp, false); + pthread_mutex_unlock(&bp->err_recovery_lock); PMD_DRV_LOG(ERR, "Failed to recover from FW reset\n"); } @@ -4775,8 +4803,15 @@ bnxt_init_locks(struct bnxt *bp) } err = pthread_mutex_init(&bp->health_check_lock, NULL); - if (err) + if (err) { PMD_DRV_LOG(ERR, "Unable to initialize health_check_lock\n"); + return err; + } + + err = pthread_mutex_init(&bp->err_recovery_lock, NULL); + if (err) + PMD_DRV_LOG(ERR, "Unable to initialize err_recovery_lock\n"); + return err; } -- 2.20.1