net/qede/base: add mdump sub-commands
authorRasesh Mody <rasesh.mody@cavium.com>
Tue, 19 Sep 2017 01:29:54 +0000 (18:29 -0700)
committerFerruh Yigit <ferruh.yigit@intel.com>
Fri, 6 Oct 2017 00:49:49 +0000 (02:49 +0200)
- Add support to retain/clear data for crash dump by introducing the mdump
   GET_RETAIN/CLR_RETAIN sub commands, new APIs
   ecore_mcp_mdump_get_retain() and ecore_mcp_mdump_clr_retain()
 - Avoid checking for mdump logs and data in case of an emulator
 - Fix "deadbeaf" returned value in case of pcie status command read
   fails (prevent false detection)

Signed-off-by: Rasesh Mody <rasesh.mody@cavium.com>
drivers/net/qede/base/ecore_dev.c
drivers/net/qede/base/ecore_mcp.c
drivers/net/qede/base/ecore_mcp.h
drivers/net/qede/base/ecore_mcp_api.h
drivers/net/qede/base/mcp_public.h

index 938834b..93c2306 100644 (file)
@@ -3564,6 +3564,7 @@ ecore_hw_prepare_single(struct ecore_hwfn *p_hwfn,
                        void OSAL_IOMEM * p_doorbells,
                        struct ecore_hw_prepare_params *p_params)
 {
+       struct ecore_mdump_retain_data mdump_retain;
        struct ecore_dev *p_dev = p_hwfn->p_dev;
        struct ecore_mdump_info mdump_info;
        enum _ecore_status_t rc = ECORE_SUCCESS;
@@ -3631,24 +3632,37 @@ ecore_hw_prepare_single(struct ecore_hwfn *p_hwfn,
        /* Sending a mailbox to the MFW should be after ecore_get_hw_info() is
         * called, since among others it sets the ports number in an engine.
         */
-       if (p_params->initiate_pf_flr && p_hwfn == ECORE_LEADING_HWFN(p_dev) &&
+       if (p_params->initiate_pf_flr && IS_LEAD_HWFN(p_hwfn) &&
            !p_dev->recov_in_prog) {
                rc = ecore_mcp_initiate_pf_flr(p_hwfn, p_hwfn->p_main_ptt);
                if (rc != ECORE_SUCCESS)
                        DP_NOTICE(p_hwfn, false, "Failed to initiate PF FLR\n");
        }
 
-       /* Check if mdump logs are present and update the epoch value */
-       if (p_hwfn == ECORE_LEADING_HWFN(p_hwfn->p_dev)) {
+       /* Check if mdump logs/data are present and update the epoch value */
+       if (IS_LEAD_HWFN(p_hwfn)) {
+#ifndef ASIC_ONLY
+               if (!CHIP_REV_IS_EMUL(p_dev)) {
+#endif
                rc = ecore_mcp_mdump_get_info(p_hwfn, p_hwfn->p_main_ptt,
                                              &mdump_info);
-               if (rc == ECORE_SUCCESS && mdump_info.num_of_logs > 0) {
+               if (rc == ECORE_SUCCESS && mdump_info.num_of_logs)
                        DP_NOTICE(p_hwfn, false,
                                  "* * * IMPORTANT - HW ERROR register dump captured by device * * *\n");
-               }
+
+               rc = ecore_mcp_mdump_get_retain(p_hwfn, p_hwfn->p_main_ptt,
+                                               &mdump_retain);
+               if (rc == ECORE_SUCCESS && mdump_retain.valid)
+                       DP_NOTICE(p_hwfn, false,
+                                 "mdump retained data: epoch 0x%08x, pf 0x%x, status 0x%08x\n",
+                                 mdump_retain.epoch, mdump_retain.pf,
+                                 mdump_retain.status);
 
                ecore_mcp_mdump_set_values(p_hwfn, p_hwfn->p_main_ptt,
                                           p_params->epoch);
+#ifndef ASIC_ONLY
+               }
+#endif
        }
 
        /* Allocate the init RT array and initialize the init-ops engine */
index 868b075..462fcc9 100644 (file)
@@ -1434,11 +1434,16 @@ ecore_mcp_mdump_cmd(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
                return rc;
 
        p_mdump_cmd_params->mcp_resp = mb_params.mcp_resp;
+
        if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_MDUMP_INVALID_CMD) {
-               DP_NOTICE(p_hwfn, false,
-                         "MFW claims that the mdump command is illegal [mdump_cmd 0x%x]\n",
-                         p_mdump_cmd_params->cmd);
-               rc = ECORE_INVAL;
+               DP_INFO(p_hwfn,
+                       "The mdump sub command is unsupported by the MFW [mdump_cmd 0x%x]\n",
+                       p_mdump_cmd_params->cmd);
+               rc = ECORE_NOTIMPL;
+       } else if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+               DP_INFO(p_hwfn,
+                       "The mdump command is not supported by the MFW\n");
+               rc = ECORE_NOTIMPL;
        }
 
        return rc;
@@ -1496,16 +1501,10 @@ ecore_mcp_mdump_get_config(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
        if (rc != ECORE_SUCCESS)
                return rc;
 
-       if (mdump_cmd_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
-               DP_INFO(p_hwfn,
-                       "The mdump command is not supported by the MFW\n");
-               return ECORE_NOTIMPL;
-       }
-
        if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) {
-               DP_NOTICE(p_hwfn, false,
-                         "Failed to get the mdump configuration and logs info [mcp_resp 0x%x]\n",
-                         mdump_cmd_params.mcp_resp);
+               DP_INFO(p_hwfn,
+                       "Failed to get the mdump configuration and logs info [mcp_resp 0x%x]\n",
+                       mdump_cmd_params.mcp_resp);
                rc = ECORE_UNKNOWN_ERROR;
        }
 
@@ -1566,17 +1565,71 @@ enum _ecore_status_t ecore_mcp_mdump_clear_logs(struct ecore_hwfn *p_hwfn,
        return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
 }
 
+enum _ecore_status_t
+ecore_mcp_mdump_get_retain(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                          struct ecore_mdump_retain_data *p_mdump_retain)
+{
+       struct ecore_mdump_cmd_params mdump_cmd_params;
+       struct mdump_retain_data_stc mfw_mdump_retain;
+       enum _ecore_status_t rc;
+
+       OSAL_MEM_ZERO(&mdump_cmd_params, sizeof(mdump_cmd_params));
+       mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_GET_RETAIN;
+       mdump_cmd_params.p_data_dst = &mfw_mdump_retain;
+       mdump_cmd_params.data_dst_size = sizeof(mfw_mdump_retain);
+
+       rc = ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
+       if (rc != ECORE_SUCCESS)
+               return rc;
+
+       if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) {
+               DP_INFO(p_hwfn,
+                       "Failed to get the mdump retained data [mcp_resp 0x%x]\n",
+                       mdump_cmd_params.mcp_resp);
+               return ECORE_UNKNOWN_ERROR;
+       }
+
+       p_mdump_retain->valid = mfw_mdump_retain.valid;
+       p_mdump_retain->epoch = mfw_mdump_retain.epoch;
+       p_mdump_retain->pf = mfw_mdump_retain.pf;
+       p_mdump_retain->status = mfw_mdump_retain.status;
+
+       return ECORE_SUCCESS;
+}
+
+enum _ecore_status_t ecore_mcp_mdump_clr_retain(struct ecore_hwfn *p_hwfn,
+                                               struct ecore_ptt *p_ptt)
+{
+       struct ecore_mdump_cmd_params mdump_cmd_params;
+
+       OSAL_MEM_ZERO(&mdump_cmd_params, sizeof(mdump_cmd_params));
+       mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_CLR_RETAIN;
+
+       return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
+}
+
 static void ecore_mcp_handle_critical_error(struct ecore_hwfn *p_hwfn,
                                            struct ecore_ptt *p_ptt)
 {
+       struct ecore_mdump_retain_data mdump_retain;
+       enum _ecore_status_t rc;
+
        /* In CMT mode - no need for more than a single acknowledgment to the
         * MFW, and no more than a single notification to the upper driver.
         */
        if (p_hwfn != ECORE_LEADING_HWFN(p_hwfn->p_dev))
                return;
 
-       DP_NOTICE(p_hwfn, false,
-                 "Received a critical error notification from the MFW!\n");
+       rc = ecore_mcp_mdump_get_retain(p_hwfn, p_ptt, &mdump_retain);
+       if (rc == ECORE_SUCCESS && mdump_retain.valid) {
+               DP_NOTICE(p_hwfn, false,
+                         "The MFW notified that a critical error occurred in the device [epoch 0x%08x, pf 0x%x, status 0x%08x]\n",
+                         mdump_retain.epoch, mdump_retain.pf,
+                         mdump_retain.status);
+       } else {
+               DP_NOTICE(p_hwfn, false,
+                         "The MFW notified that a critical error occurred in the device\n");
+       }
 
        if (p_hwfn->p_dev->allow_mdump) {
                DP_NOTICE(p_hwfn, false,
@@ -1584,6 +1637,8 @@ static void ecore_mcp_handle_critical_error(struct ecore_hwfn *p_hwfn,
                return;
        }
 
+       DP_NOTICE(p_hwfn, false,
+                 "Acknowledging the notification to not allow the MFW crash dump [driver debug data collection is preferable]\n");
        ecore_mcp_mdump_ack(p_hwfn, p_ptt);
        ecore_hw_err_notify(p_hwfn, ECORE_HW_ERR_HW_ATTN);
 }
@@ -2245,8 +2300,8 @@ enum _ecore_status_t ecore_mcp_mask_parities(struct ecore_hwfn *p_hwfn,
                                             struct ecore_ptt *p_ptt,
                                             u32 mask_parities)
 {
-       enum _ecore_status_t rc;
        u32 resp = 0, param = 0;
+       enum _ecore_status_t rc;
 
        rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MASK_PARITIES,
                           mask_parities, &resp, &param);
index 9b6a9b4..b84f0d1 100644 (file)
@@ -376,12 +376,33 @@ enum _ecore_status_t ecore_mcp_mdump_set_values(struct ecore_hwfn *p_hwfn,
  *
  * @param p_hwfn
  * @param p_ptt
+ * @param epoch
  *
  * @param return ECORE_SUCCESS upon success.
  */
 enum _ecore_status_t ecore_mcp_mdump_trigger(struct ecore_hwfn *p_hwfn,
                                             struct ecore_ptt *p_ptt);
 
+struct ecore_mdump_retain_data {
+       u32 valid;
+       u32 epoch;
+       u32 pf;
+       u32 status;
+};
+
+/**
+ * @brief - Gets the mdump retained data from the MFW.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param p_mdump_retain
+ *
+ * @param return ECORE_SUCCESS upon success.
+ */
+enum _ecore_status_t
+ecore_mcp_mdump_get_retain(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                          struct ecore_mdump_retain_data *p_mdump_retain);
+
 /**
  * @brief - Sets the MFW's max value for the given resource
  *
index 86fa0cb..059b55e 100644 (file)
@@ -1122,6 +1122,17 @@ ecore_mcp_mdump_get_info(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 enum _ecore_status_t ecore_mcp_mdump_clear_logs(struct ecore_hwfn *p_hwfn,
                                                struct ecore_ptt *p_ptt);
 
+/**
+ * @brief - Clear the mdump retained data.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @param return ECORE_SUCCESS upon success.
+ */
+enum _ecore_status_t ecore_mcp_mdump_clr_retain(struct ecore_hwfn *p_hwfn,
+                                               struct ecore_ptt *p_ptt);
+
 /**
  * @brief - Processes the TLV request from MFW i.e., get the required TLV info
  *          from the ecore client and send it to the MFW.
index 41711cc..f934c17 100644 (file)
@@ -1108,6 +1108,13 @@ struct load_rsp_stc {
 #define LOAD_RSP_FLAGS0_DRV_EXISTS     (0x1 << 0)
 };
 
+struct mdump_retain_data_stc {
+       u32 valid;
+       u32 epoch;
+       u32 pf;
+       u32 status;
+};
+
 union drv_union_data {
        struct mcp_mac wol_mac; /* UNLOAD_DONE */
 
@@ -1138,6 +1145,7 @@ union drv_union_data {
 
        struct load_req_stc load_req;
        struct load_rsp_stc load_rsp;
+       struct mdump_retain_data_stc mdump_retain;
        /* ... */
 };
 
@@ -1350,6 +1358,8 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_MDUMP_SET_ENABLE          0x05
 /* Clear all logs */
 #define DRV_MSG_CODE_MDUMP_CLEAR_LOGS          0x06
+#define DRV_MSG_CODE_MDUMP_GET_RETAIN          0x07 /* Get retained data */
+#define DRV_MSG_CODE_MDUMP_CLR_RETAIN          0x08 /* Clear retain data */
 #define DRV_MSG_CODE_MEM_ECC_EVENTS            0x00260000 /* Param: None */
 /* Param: [0:15] - gpio number */
 #define DRV_MSG_CODE_GPIO_INFO                 0x00270000