net/qede/base: support MFW crash dump
authorRasesh Mody <rasesh.mody@qlogic.com>
Wed, 19 Oct 2016 04:11:28 +0000 (21:11 -0700)
committerBruce Richardson <bruce.richardson@intel.com>
Wed, 26 Oct 2016 17:42:22 +0000 (19:42 +0200)
Add support for management firmware(MFW) crash dump collection.

Signed-off-by: Rasesh Mody <rasesh.mody@qlogic.com>
drivers/net/qede/base/ecore.h
drivers/net/qede/base/ecore_dev.c
drivers/net/qede/base/ecore_dev_api.h
drivers/net/qede/base/ecore_mcp.c
drivers/net/qede/base/ecore_mcp.h
drivers/net/qede/base/ecore_mcp_api.h
drivers/net/qede/qede_main.c

index 874c3a3..89e2bd0 100644 (file)
@@ -735,6 +735,9 @@ struct ecore_dev {
 
        bool                            attn_clr_en;
 
+       /* Indicates whether allowing the MFW to collect a crash dump */
+       bool                            mdump_en;
+
        /* Indicates if the reg_fifo is checked after any register access */
        bool                            chk_reg_fifo;
 
index 319edeb..b530173 100644 (file)
@@ -1619,24 +1619,20 @@ static void ecore_reset_mb_shadow(struct ecore_hwfn *p_hwfn,
 }
 
 enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
-                                  struct ecore_tunn_start_params *p_tunn,
-                                  bool b_hw_start,
-                                  enum ecore_int_mode int_mode,
-                                  bool allow_npar_tx_switch,
-                                  const u8 *bin_fw_data)
+                                  struct ecore_hw_init_params *p_params)
 {
        enum _ecore_status_t rc, mfw_rc;
        u32 load_code, param;
        int i, j;
 
-       if ((int_mode == ECORE_INT_MODE_MSI) && (p_dev->num_hwfns > 1)) {
+       if (p_params->int_mode == ECORE_INT_MODE_MSI && p_dev->num_hwfns > 1) {
                DP_NOTICE(p_dev, false,
                          "MSI mode is not supported for CMT devices\n");
                return ECORE_INVAL;
        }
 
        if (IS_PF(p_dev)) {
-               rc = ecore_init_fw_data(p_dev, bin_fw_data);
+               rc = ecore_init_fw_data(p_dev, p_params->bin_fw_data);
                if (rc != ECORE_SUCCESS)
                        return rc;
        }
@@ -1733,9 +1729,11 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
                        /* Fall into */
                case FW_MSG_CODE_DRV_LOAD_FUNCTION:
                        rc = ecore_hw_init_pf(p_hwfn, p_hwfn->p_main_ptt,
-                                             p_tunn, p_hwfn->hw_info.hw_mode,
-                                             b_hw_start, int_mode,
-                                             allow_npar_tx_switch);
+                                             p_params->p_tunn,
+                                             p_hwfn->hw_info.hw_mode,
+                                             p_params->b_hw_start,
+                                             p_params->int_mode,
+                                             p_params->allow_npar_tx_switch);
                        break;
                default:
                        rc = ECORE_NOTIMPL;
@@ -1759,6 +1757,10 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
                        return mfw_rc;
                }
 
+               ecore_mcp_mdump_get_info(p_hwfn, p_hwfn->p_main_ptt);
+               ecore_mcp_mdump_set_values(p_hwfn, p_hwfn->p_main_ptt,
+                                          p_params->epoch);
+
                /* send DCBX attention request command */
                DP_VERBOSE(p_hwfn, ECORE_MSG_DCB,
                           "sending phony dcbx set command to trigger DCBx attention handling\n");
index 1a810b5..042c0af 100644 (file)
@@ -57,26 +57,31 @@ enum _ecore_status_t ecore_resc_alloc(struct ecore_dev *p_dev);
  */
 void ecore_resc_setup(struct ecore_dev *p_dev);
 
+struct ecore_hw_init_params {
+       /* tunnelling parameters */
+       struct ecore_tunn_start_params *p_tunn;
+       bool b_hw_start;
+       /* interrupt mode [msix, inta, etc.] to use */
+       enum ecore_int_mode int_mode;
+/* npar tx switching to be used for vports configured for tx-switching */
+
+       bool allow_npar_tx_switch;
+       /* binary fw data pointer in binary fw file */
+       const u8 *bin_fw_data;
+       /* the OS Epoch time in seconds */
+       u32 epoch;
+};
+
 /**
  * @brief ecore_hw_init -
  *
  * @param p_dev
- * @param p_tunn - tunneling parameters
- * @param b_hw_start
- * @param int_mode - interrupt mode [msix, inta, etc.] to use.
- * @param allow_npar_tx_switch - npar tx switching to be used
- *       for vports configured for tx-switching.
- * @param bin_fw_data - binary fw data pointer in binary fw file.
- *                     Pass NULL if not using binary fw file.
+ * @param p_params
  *
  * @return enum _ecore_status_t
  */
 enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
-                                  struct ecore_tunn_start_params *p_tunn,
-                                  bool b_hw_start,
-                                  enum ecore_int_mode int_mode,
-                                  bool allow_npar_tx_switch,
-                                  const u8 *bin_fw_data);
+                                  struct ecore_hw_init_params *p_params);
 
 /**
  * @brief ecore_hw_timers_stop_all -
index cf67fa1..500368e 100644 (file)
@@ -1043,6 +1043,154 @@ static void ecore_mcp_handle_fan_failure(struct ecore_hwfn *p_hwfn,
        ecore_hw_err_notify(p_hwfn, ECORE_HW_ERR_FAN_FAIL);
 }
 
+static enum _ecore_status_t
+ecore_mcp_mdump_cmd(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                   u32 mdump_cmd, union drv_union_data *p_data_src,
+                   union drv_union_data *p_data_dst, u32 *p_mcp_resp)
+{
+       struct ecore_mcp_mb_params mb_params;
+       enum _ecore_status_t rc;
+
+       OSAL_MEM_ZERO(&mb_params, sizeof(mb_params));
+       mb_params.cmd = DRV_MSG_CODE_MDUMP_CMD;
+       mb_params.param = mdump_cmd;
+       mb_params.p_data_src = p_data_src;
+       mb_params.p_data_dst = p_data_dst;
+       rc = ecore_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+       if (rc != ECORE_SUCCESS)
+               return rc;
+
+       *p_mcp_resp = mb_params.mcp_resp;
+       if (*p_mcp_resp == FW_MSG_CODE_MDUMP_INVALID_CMD) {
+               DP_NOTICE(p_hwfn, false,
+                         "MFW claims that the mdump command is illegal [mdump_cmd 0x%x]\n",
+                         mdump_cmd);
+               rc = ECORE_INVAL;
+       }
+
+       return rc;
+}
+
+static enum _ecore_status_t ecore_mcp_mdump_ack(struct ecore_hwfn *p_hwfn,
+                                               struct ecore_ptt *p_ptt)
+{
+       u32 mcp_resp;
+
+       return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MDUMP_ACK,
+                                  OSAL_NULL, OSAL_NULL, &mcp_resp);
+}
+
+enum _ecore_status_t ecore_mcp_mdump_set_values(struct ecore_hwfn *p_hwfn,
+                                               struct ecore_ptt *p_ptt,
+                                               u32 epoch)
+{
+       union drv_union_data union_data;
+       u32 mcp_resp;
+
+       OSAL_MEMCPY(&union_data.raw_data, &epoch, sizeof(epoch));
+
+       return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MDUMP_SET_VALUES,
+                                  &union_data, OSAL_NULL, &mcp_resp);
+}
+
+enum _ecore_status_t ecore_mcp_mdump_trigger(struct ecore_hwfn *p_hwfn,
+                                            struct ecore_ptt *p_ptt)
+{
+       u32 mcp_resp;
+
+       return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MDUMP_TRIGGER,
+                                  OSAL_NULL, OSAL_NULL, &mcp_resp);
+}
+
+enum _ecore_status_t ecore_mcp_mdump_clear_logs(struct ecore_hwfn *p_hwfn,
+                                               struct ecore_ptt *p_ptt)
+{
+       u32 mcp_resp;
+
+       return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MDUMP_CLEAR_LOGS,
+                                  OSAL_NULL, OSAL_NULL, &mcp_resp);
+}
+
+static enum _ecore_status_t
+ecore_mcp_mdump_get_config(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                          struct mdump_config_stc *p_mdump_config)
+{
+       union drv_union_data union_data;
+       u32 mcp_resp;
+       enum _ecore_status_t rc;
+
+       rc = ecore_mcp_mdump_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MDUMP_GET_CONFIG,
+                                OSAL_NULL, &union_data, &mcp_resp);
+       if (rc != ECORE_SUCCESS)
+               return rc;
+
+       /* A zero response implies that the mdump command is not supported */
+       if (!mcp_resp)
+               return ECORE_NOTIMPL;
+
+       if (mcp_resp != FW_MSG_CODE_OK) {
+               DP_NOTICE(p_hwfn, false,
+                         "Failed to get the mdump configuration and logs info [mcp_resp 0x%x]\n",
+                         mcp_resp);
+               rc = ECORE_UNKNOWN_ERROR;
+       }
+
+       OSAL_MEMCPY(p_mdump_config, &union_data.mdump_config,
+                   sizeof(*p_mdump_config));
+
+       return rc;
+}
+
+enum _ecore_status_t ecore_mcp_mdump_get_info(struct ecore_hwfn *p_hwfn,
+                                             struct ecore_ptt *p_ptt)
+{
+       struct mdump_config_stc mdump_config;
+       enum _ecore_status_t rc;
+
+       rc = ecore_mcp_mdump_get_config(p_hwfn, p_ptt, &mdump_config);
+       if (rc != ECORE_SUCCESS)
+               return rc;
+
+       DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
+                  "MFW mdump_config: version 0x%x, config 0x%x, epoch 0x%x, num_of_logs 0x%x, valid_logs 0x%x\n",
+                  mdump_config.version, mdump_config.config, mdump_config.epoc,
+                  mdump_config.num_of_logs, mdump_config.valid_logs);
+
+       if (mdump_config.valid_logs > 0) {
+               DP_NOTICE(p_hwfn, false,
+                         "* * * IMPORTANT - HW ERROR register dump captured by device * * *\n");
+       }
+
+       return rc;
+}
+
+void ecore_mcp_mdump_enable(struct ecore_dev *p_dev, bool mdump_enable)
+{
+       p_dev->mdump_en = mdump_enable;
+}
+
+static void ecore_mcp_handle_critical_error(struct ecore_hwfn *p_hwfn,
+                                           struct ecore_ptt *p_ptt)
+{
+       /* In CMT mode - no need for more than a single acknowledgment to the
+        * MFW, and no more than a single notification to the upper driver.
+        */
+       if (p_hwfn != ECORE_LEADING_HWFN(p_hwfn->p_dev))
+               return;
+
+       DP_NOTICE(p_hwfn, false,
+                 "Received a critical error notification from the MFW!\n");
+
+       if (p_hwfn->p_dev->mdump_en) {
+               DP_NOTICE(p_hwfn, false,
+                         "Not acknowledging the notification to allow the MFW crash dump\n");
+               return;
+       }
+
+       ecore_mcp_mdump_ack(p_hwfn, p_ptt);
+       ecore_hw_err_notify(p_hwfn, ECORE_HW_ERR_HW_ATTN);
+}
+
 enum _ecore_status_t ecore_mcp_handle_events(struct ecore_hwfn *p_hwfn,
                                             struct ecore_ptt *p_ptt)
 {
@@ -1104,6 +1252,9 @@ enum _ecore_status_t ecore_mcp_handle_events(struct ecore_hwfn *p_hwfn,
                case MFW_DRV_MSG_FAILURE_DETECTED:
                        ecore_mcp_handle_fan_failure(p_hwfn, p_ptt);
                        break;
+               case MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED:
+                       ecore_mcp_handle_critical_error(p_hwfn, p_ptt);
+                       break;
                default:
                        /* @DPDK */
                        DP_NOTICE(p_hwfn, false,
index 64c639f..d3103ff 100644 (file)
@@ -303,6 +303,51 @@ int __ecore_configure_pf_min_bandwidth(struct ecore_hwfn *p_hwfn,
 enum _ecore_status_t ecore_mcp_mask_parities(struct ecore_hwfn *p_hwfn,
                                             struct ecore_ptt *p_ptt,
                                             u32 mask_parities);
+/**
+ * @brief - Sends crash mdump related info to the MFW.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @param return ECORE_SUCCESS upon success.
+ */
+enum _ecore_status_t ecore_mcp_mdump_set_values(struct ecore_hwfn *p_hwfn,
+                                               struct ecore_ptt *p_ptt,
+                                               u32 epoch);
+
+/**
+ * @brief - Triggers a MFW crash dump procedure.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @param return ECORE_SUCCESS upon success.
+ */
+enum _ecore_status_t ecore_mcp_mdump_trigger(struct ecore_hwfn *p_hwfn,
+                                            struct ecore_ptt *p_ptt);
+
+/**
+ * @brief - Clears the MFW crash dump logs.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @param return ECORE_SUCCESS upon success.
+ */
+enum _ecore_status_t ecore_mcp_mdump_clear_logs(struct ecore_hwfn *p_hwfn,
+                                               struct ecore_ptt *p_ptt);
+
+/**
+ * @brief - Gets the MFW crash dump configuration and logs info.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @param return ECORE_SUCCESS upon success.
+ */
+enum _ecore_status_t ecore_mcp_mdump_get_info(struct ecore_hwfn *p_hwfn,
+                                             struct ecore_ptt *p_ptt);
+
 enum _ecore_status_t ecore_mcp_get_resc_info(struct ecore_hwfn *p_hwfn,
                                             struct ecore_ptt *p_ptt,
                                             struct resource_info *p_resc_info,
index ff4f1ca..c26b494 100644 (file)
@@ -792,4 +792,14 @@ enum _ecore_status_t ecore_mcp_mem_ecc_events(struct ecore_hwfn *p_hwfn,
                                              struct ecore_ptt *p_ptt,
                                              u64 *num_events);
 
+/**
+ * @brief Sets whether a critical error notification from the MFW is acked, or
+ *        is it being ignored and thus allowing the MFW crash dump.
+ *
+ * @param p_dev
+ * @param mdump_enable
+ *
+ */
+void ecore_mcp_mdump_enable(struct ecore_dev *p_dev, bool mdump_enable);
+
 #endif
index e4ef4f0..60655b7 100644 (file)
@@ -7,6 +7,7 @@
  */
 
 #include <limits.h>
+#include <time.h>
 #include <rte_alarm.h>
 
 #include "qede_ethdev.h"
@@ -221,6 +222,7 @@ static int qed_slowpath_start(struct ecore_dev *edev,
        const uint8_t *data = NULL;
        struct ecore_hwfn *hwfn;
        struct ecore_mcp_drv_version drv_version;
+       struct ecore_hw_init_params hw_init_params;
        struct qede_dev *qdev = (struct qede_dev *)edev;
        int rc;
 #ifdef QED_ENC_SUPPORTED
@@ -259,7 +261,6 @@ static int qed_slowpath_start(struct ecore_dev *edev,
        qed_start_iov_task(edev);
 #endif
 
-       /* Start the slowpath */
 #ifdef CONFIG_ECORE_BINARY_FW
        if (IS_PF(edev))
                data = (const uint8_t *)edev->firmware + sizeof(u32);
@@ -267,6 +268,8 @@ static int qed_slowpath_start(struct ecore_dev *edev,
 
        allow_npar_tx_switching = npar_tx_switching ? true : false;
 
+       /* Start the slowpath */
+       memset(&hw_init_params, 0, sizeof(hw_init_params));
 #ifdef QED_ENC_SUPPORTED
        memset(&tunn_info, 0, sizeof(tunn_info));
        tunn_info.tunn_mode |= 1 << QED_MODE_VXLAN_TUNN |
@@ -276,12 +279,14 @@ static int qed_slowpath_start(struct ecore_dev *edev,
        tunn_info.tunn_clss_vxlan = QED_TUNN_CLSS_MAC_VLAN;
        tunn_info.tunn_clss_l2gre = QED_TUNN_CLSS_MAC_VLAN;
        tunn_info.tunn_clss_ipgre = QED_TUNN_CLSS_MAC_VLAN;
-       rc = ecore_hw_init(edev, &tunn_info, true, ECORE_INT_MODE_MSIX,
-                          allow_npar_tx_switching, data);
-#else
-       rc = ecore_hw_init(edev, NULL, true, ECORE_INT_MODE_MSIX,
-                          allow_npar_tx_switching, data);
+       hw_init_params.p_tunn = &tunn_info;
 #endif
+       hw_init_params.b_hw_start = true;
+       hw_init_params.int_mode = ECORE_INT_MODE_MSIX;
+       hw_init_params.allow_npar_tx_switch = allow_npar_tx_switching;
+       hw_init_params.bin_fw_data = data;
+       hw_init_params.epoch = (u32)time(NULL);
+       rc = ecore_hw_init(edev, &hw_init_params);
        if (rc) {
                DP_ERR(edev, "ecore_hw_init failed\n");
                goto err2;