net/ice/base: handle critical FW error
authorQi Zhang <qi.z.zhang@intel.com>
Mon, 30 Mar 2020 11:45:32 +0000 (19:45 +0800)
committerFerruh Yigit <ferruh.yigit@intel.com>
Tue, 21 Apr 2020 11:57:05 +0000 (13:57 +0200)
A race condition between FW and SW can occur between admin queue setup
and the first command sent. A link event may occur and FW attempts to
notify a non-existent queue. FW will set the critical error bit and
disable the queue. When this happens retry queue setup.

Signed-off-by: Evan Swanson <evan.swanson@intel.com>
Signed-off-by: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Acked-by: Qiming Yang <qiming.yang@intel.com>
drivers/net/ice/base/ice_controlq.c
drivers/net/ice/base/ice_controlq.h
drivers/net/ice/base/ice_status.h

index 0fcf62e..41c8c7e 100644 (file)
@@ -13,6 +13,7 @@ do {                                                          \
        (qinfo)->sq.bal = prefix##_ATQBAL;                      \
        (qinfo)->sq.len_mask = prefix##_ATQLEN_ATQLEN_M;        \
        (qinfo)->sq.len_ena_mask = prefix##_ATQLEN_ATQENABLE_M; \
+       (qinfo)->sq.len_crit_mask = prefix##_ATQLEN_ATQCRIT_M;  \
        (qinfo)->sq.head_mask = prefix##_ATQH_ATQH_M;           \
        (qinfo)->rq.head = prefix##_ARQH;                       \
        (qinfo)->rq.tail = prefix##_ARQT;                       \
@@ -21,6 +22,7 @@ do {                                                          \
        (qinfo)->rq.bal = prefix##_ARQBAL;                      \
        (qinfo)->rq.len_mask = prefix##_ARQLEN_ARQLEN_M;        \
        (qinfo)->rq.len_ena_mask = prefix##_ARQLEN_ARQENABLE_M; \
+       (qinfo)->rq.len_crit_mask = prefix##_ARQLEN_ARQCRIT_M;  \
        (qinfo)->rq.head_mask = prefix##_ARQH_ARQH_M;           \
 } while (0)
 
@@ -609,6 +611,53 @@ init_ctrlq_free_sq:
        return ret_code;
 }
 
+/**
+ * ice_shutdown_ctrlq - shutdown routine for any control queue
+ * @hw: pointer to the hardware structure
+ * @q_type: specific Control queue type
+ *
+ * NOTE: this function does not destroy the control queue locks.
+ */
+static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
+{
+       struct ice_ctl_q_info *cq;
+
+       ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__);
+
+       switch (q_type) {
+       case ICE_CTL_Q_ADMIN:
+               cq = &hw->adminq;
+               if (ice_check_sq_alive(hw, cq))
+                       ice_aq_q_shutdown(hw, true);
+               break;
+       case ICE_CTL_Q_MAILBOX:
+               cq = &hw->mailboxq;
+               break;
+       default:
+               return;
+       }
+
+       ice_shutdown_sq(hw, cq);
+       ice_shutdown_rq(hw, cq);
+}
+
+/**
+ * ice_shutdown_all_ctrlq - shutdown routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * NOTE: this function does not destroy the control queue locks. The driver
+ * may call this at runtime to shutdown and later restart control queues, such
+ * as in response to a reset event.
+ */
+void ice_shutdown_all_ctrlq(struct ice_hw *hw)
+{
+       ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__);
+       /* Shutdown FW admin queue */
+       ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+       /* Shutdown PF-VF Mailbox */
+       ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX);
+}
+
 /**
  * ice_init_all_ctrlq - main initialization routine for all control queues
  * @hw: pointer to the hardware structure
@@ -625,15 +674,26 @@ init_ctrlq_free_sq:
 enum ice_status ice_init_all_ctrlq(struct ice_hw *hw)
 {
        enum ice_status status;
+       u32 retry = 0;
 
        ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__);
 
        /* Init FW admin queue */
-       status = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN);
-       if (status)
-               return status;
+       do {
+               status = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN);
+               if (status)
+                       return status;
+
+               status = ice_init_check_adminq(hw);
+               if (status != ICE_ERR_AQ_FW_CRITICAL)
+                       break;
+
+               ice_debug(hw, ICE_DBG_AQ_MSG,
+                         "Retry Admin Queue init due to FW critical error\n");
+               ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+               ice_msec_delay(ICE_CTL_Q_ADMIN_INIT_MSEC, true);
+       } while (retry++ < ICE_CTL_Q_ADMIN_INIT_TIMEOUT);
 
-       status = ice_init_check_adminq(hw);
        if (status)
                return status;
        /* Init Mailbox queue */
@@ -676,53 +736,6 @@ enum ice_status ice_create_all_ctrlq(struct ice_hw *hw)
        return ice_init_all_ctrlq(hw);
 }
 
-/**
- * ice_shutdown_ctrlq - shutdown routine for any control queue
- * @hw: pointer to the hardware structure
- * @q_type: specific Control queue type
- *
- * NOTE: this function does not destroy the control queue locks.
- */
-static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
-{
-       struct ice_ctl_q_info *cq;
-
-       ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__);
-
-       switch (q_type) {
-       case ICE_CTL_Q_ADMIN:
-               cq = &hw->adminq;
-               if (ice_check_sq_alive(hw, cq))
-                       ice_aq_q_shutdown(hw, true);
-               break;
-       case ICE_CTL_Q_MAILBOX:
-               cq = &hw->mailboxq;
-               break;
-       default:
-               return;
-       }
-
-       ice_shutdown_sq(hw, cq);
-       ice_shutdown_rq(hw, cq);
-}
-
-/**
- * ice_shutdown_all_ctrlq - shutdown routine for all control queues
- * @hw: pointer to the hardware structure
- *
- * NOTE: this function does not destroy the control queue locks. The driver
- * may call this at runtime to shutdown and later restart control queues, such
- * as in response to a reset event.
- */
-void ice_shutdown_all_ctrlq(struct ice_hw *hw)
-{
-       ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__);
-       /* Shutdown FW admin queue */
-       ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
-       /* Shutdown PF-VF Mailbox */
-       ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX);
-}
-
 /**
  * ice_destroy_ctrlq_locks - Destroy locks for a control queue
  * @cq: pointer to the control queue
@@ -1025,9 +1038,16 @@ ice_sq_send_cmd_nolock(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 
        /* update the error if time out occurred */
        if (!cmd_completed) {
-               ice_debug(hw, ICE_DBG_AQ_MSG,
-                         "Control Send Queue Writeback timeout.\n");
-               status = ICE_ERR_AQ_TIMEOUT;
+               if (rd32(hw, cq->rq.len) & cq->rq.len_crit_mask ||
+                   rd32(hw, cq->sq.len) & cq->sq.len_crit_mask) {
+                       ice_debug(hw, ICE_DBG_AQ_MSG,
+                                 "Critical FW error.\n");
+                       status = ICE_ERR_AQ_FW_CRITICAL;
+               } else {
+                       ice_debug(hw, ICE_DBG_AQ_MSG,
+                                 "Control Send Queue Writeback timeout.\n");
+                       status = ICE_ERR_AQ_TIMEOUT;
+               }
        }
 
 sq_send_command_error:
index 464a2ad..f47fd43 100644 (file)
@@ -35,6 +35,8 @@ enum ice_ctl_q {
 /* Control Queue timeout settings - max delay 250ms */
 #define ICE_CTL_Q_SQ_CMD_TIMEOUT       2500  /* Count 2500 times */
 #define ICE_CTL_Q_SQ_CMD_USEC          100   /* Check every 100usec */
+#define ICE_CTL_Q_ADMIN_INIT_TIMEOUT   10    /* Count 10 times */
+#define ICE_CTL_Q_ADMIN_INIT_MSEC      100   /* Check every 100msec */
 
 struct ice_ctl_q_ring {
        void *dma_head;                 /* Virtual address to DMA head */
@@ -60,6 +62,7 @@ struct ice_ctl_q_ring {
        u32 bal;
        u32 len_mask;
        u32 len_ena_mask;
+       u32 len_crit_mask;
        u32 head_mask;
 };
 
index 7bcccd3..446702f 100644 (file)
@@ -42,6 +42,7 @@ enum ice_status {
        ICE_ERR_AQ_FULL                         = -102,
        ICE_ERR_AQ_NO_WORK                      = -103,
        ICE_ERR_AQ_EMPTY                        = -104,
+       ICE_ERR_AQ_FW_CRITICAL                  = -105,
 };
 
 #endif /* _ICE_STATUS_H_ */