To implement debug dump retrieval on a live system we add callbacks to collect the same data which is collected now during manual `ethtool -d` call.
But we instead collect the dump immediately at the moment bad thing happens, and save it for later retrieval by the same `ethtool -d`. To have ability to track this event, we add kobject uevent trigger, so udev event handler script could be used to automatically collect dumps. Signed-off-by: Ariel Elior <ariel.el...@marvell.com> Signed-off-by: Michal Kalderon <michal.kalde...@marvell.com> Signed-off-by: Igor Russkikh <irussk...@marvell.com> --- drivers/net/ethernet/qlogic/qed/qed.h | 2 + drivers/net/ethernet/qlogic/qed/qed_debug.c | 76 ++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_debug.h | 1 + drivers/net/ethernet/qlogic/qed/qed_main.c | 1 + drivers/net/ethernet/qlogic/qede/qede_main.c | 3 + include/linux/qed/qed_if.h | 1 + 6 files changed, 84 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 07f6ef930b52..47679e67ac48 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -876,6 +876,8 @@ struct qed_dev { DECLARE_HASHTABLE(connections, 10); const struct firmware *firmware; + u8 *p_dbg_data_buf; + u32 dbg_data_buf_size; bool print_dbg_data; u32 rdma_max_sge; diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index 57a0dab88431..6c8c44052012 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -7776,6 +7776,12 @@ static u32 qed_calc_regdump_header(struct qed_dev *cdev, return res; } +static void qed_dbg_all_data_free_buf(struct qed_dev *cdev) +{ + vfree(cdev->p_dbg_data_buf); + cdev->p_dbg_data_buf = NULL; +} + int qed_dbg_all_data(struct qed_dev *cdev, void *buffer) { u8 cur_engine, omit_engine = 0, org_engine; @@ -7786,6 +7792,14 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer) u32 offset = 0, feature_size; int rc; + if (cdev->p_dbg_data_buf) { + DP_NOTICE(cdev, + "Using a debug data buffer that was previously obtained and saved\n"); + memcpy(buffer, cdev->p_dbg_data_buf, cdev->dbg_data_buf_size); + qed_dbg_all_data_free_buf(cdev); + return 0; + } + for (i = 0; i < MAX_DBG_GRC_PARAMS; i++) grc_params[i] = dev_data->grc.param_val[i]; @@ -8004,6 +8018,8 @@ int qed_dbg_all_data_size(struct qed_dev *cdev) u32 regs_len = 0, image_len = 0, ilt_len = 0, total_ilt_len = 0; u8 cur_engine, org_engine; + if (cdev->p_dbg_data_buf) + return cdev->dbg_data_buf_size; cdev->disable_ilt_dump = false; org_engine = qed_get_debug_engine(cdev); for (cur_engine = 0; cur_engine < cdev->num_hwfns; cur_engine++) { @@ -8055,6 +8071,63 @@ int qed_dbg_all_data_size(struct qed_dev *cdev) return regs_len; } +static void qed_dbg_send_uevent(struct qed_dev *cdev, char *uevent) +{ + struct device *dev = &cdev->pdev->dev; + char bdf[64]; + char *envp_ext[] = { bdf, NULL }; + int rc; + + snprintf(bdf, sizeof(bdf), "QED_DEBUGFS_BDF_%s=%02x:%02x.%x", + uevent, cdev->pdev->bus->number, PCI_SLOT(cdev->pdev->devfn), + PCI_FUNC(cdev->pdev->devfn)); + + rc = kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp_ext); + if (rc) + DP_NOTICE(cdev, "Failed to send uevent %s\n", uevent); +} + +static int __qed_dbg_save_all_data(struct qed_dev *cdev) +{ + u32 dbg_data_buf_size; + u8 *p_dbg_data_buf; + int rc; + + dbg_data_buf_size = qed_dbg_all_data_size(cdev); + p_dbg_data_buf = vzalloc(dbg_data_buf_size); + if (!p_dbg_data_buf) { + DP_NOTICE(cdev, + "Failed to allocate memory for a debug data buffer\n"); + return -ENOMEM; + } + + rc = qed_dbg_all_data(cdev, p_dbg_data_buf); + if (rc) { + DP_NOTICE(cdev, "Failed to obtain debug data\n"); + vfree(p_dbg_data_buf); + return rc; + } + + cdev->p_dbg_data_buf = p_dbg_data_buf; + cdev->dbg_data_buf_size = dbg_data_buf_size; + + return 0; +} + +void qed_dbg_save_all_data(struct qed_dev *cdev, bool print_dbg_data) +{ + bool curr_print_flag = cdev->print_dbg_data; + + /* Only one debug buffer is kept, so remove anything collected + * before this request + */ + qed_dbg_all_data_free_buf(cdev); + + cdev->print_dbg_data = print_dbg_data; + __qed_dbg_save_all_data(cdev); + qed_dbg_send_uevent(cdev, "DBG"); + cdev->print_dbg_data = curr_print_flag; +} int qed_dbg_feature(struct qed_dev *cdev, void *buffer, enum qed_dbg_features feature, u32 *num_dumped_bytes) { @@ -8164,4 +8237,7 @@ void qed_dbg_pf_exit(struct qed_dev *cdev) feature->dump_buf = NULL; } } + + /* free a previously saved buffer if exists */ + qed_dbg_all_data_free_buf(cdev); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.h b/drivers/net/ethernet/qlogic/qed/qed_debug.h index edf99d296bd1..7ba42375287a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.h +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.h @@ -46,6 +46,7 @@ int qed_dbg_mcp_trace(struct qed_dev *cdev, void *buffer, int qed_dbg_mcp_trace_size(struct qed_dev *cdev); int qed_dbg_all_data(struct qed_dev *cdev, void *buffer); int qed_dbg_all_data_size(struct qed_dev *cdev); +void qed_dbg_save_all_data(struct qed_dev *cdev, bool print_dbg_data); u8 qed_get_debug_engine(struct qed_dev *cdev); void qed_set_debug_engine(struct qed_dev *cdev, int engine_number); int qed_dbg_feature(struct qed_dev *cdev, void *buffer, diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index d7c9d94e4c59..4411bc8fce98 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -2710,6 +2710,7 @@ const struct qed_common_ops qed_common_ops_pass = { .update_msglvl = &qed_init_dp, .dbg_all_data = &qed_dbg_all_data, .dbg_all_data_size = &qed_dbg_all_data_size, + .dbg_save_all_data = &qed_dbg_save_all_data, .chain_alloc = &qed_chain_alloc, .chain_free = &qed_chain_free, .nvm_flash = &qed_nvm_flash, diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 3a3e0089a03c..590b0bfa7030 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -2527,6 +2527,9 @@ static void qede_generic_hw_err_handler(struct qede_dev *edev) "Generic sleepable HW error handling started - err_flags 0x%lx\n", edev->err_flags); + if (test_and_clear_bit(QEDE_ERR_GET_DBG_INFO, &edev->err_flags)) + edev->ops->common->dbg_save_all_data(cdev, true); + /* Trigger a recovery process. * This is placed in the sleep requiring section just to make * sure it is the last one, and that all the other operations diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 1b7d9548ee43..47f69964da27 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -940,6 +940,7 @@ struct qed_common_ops { int (*dbg_all_data_size) (struct qed_dev *cdev); + void (*dbg_save_all_data)(struct qed_dev *cdev, bool print_dbg_data); /** * @brief can_link_change - can the instance change the link or not * -- 2.25.1