From: Moshe Shemesh <mo...@mellanox.com> Add support of dump callback for mlx5 FW fatal reporter. The FW fatal dump use cr-dump functionality to gather cr-space data for debug. The cr-dump uses vsc interface which is valid even if the FW command interface is not functional, which is the case in FW fatal errors.
Signed-off-by: Moshe Shemesh <mo...@mellanox.com> Signed-off-by: Eran Ben Elisha <era...@mellanox.com> --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index ae08af00b101..406856002dd1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -252,9 +252,100 @@ mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter, return 0; } +static int +mlx5_devlink_health_buffer_fill_snapshot(struct devlink_health_buffer *dh_buffer, + char *crdump_region, u32 snapshot_id) +{ + int err; + + err = devlink_health_buffer_nest_start(dh_buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT); + if (err) + return err; + err = devlink_health_buffer_nest_start(dh_buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR); + if (err) + return err; + err = devlink_health_buffer_put_object_name(dh_buffer, + "devlink region name"); + if (err) + return err; + err = devlink_health_buffer_nest_start(dh_buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE); + if (err) + return err; + err = devlink_health_buffer_put_value_string(dh_buffer, crdump_region); + if (err) + return err; + devlink_health_buffer_nest_end(dh_buffer); + devlink_health_buffer_nest_end(dh_buffer); + + err = devlink_health_buffer_nest_start(dh_buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR); + if (err) + return err; + err = devlink_health_buffer_put_object_name(dh_buffer, "snapshot id"); + if (err) + return err; + err = devlink_health_buffer_nest_start(dh_buffer, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE); + if (err) + return err; + err = devlink_health_buffer_put_value_u32(dh_buffer, snapshot_id); + if (err) + return err; + devlink_health_buffer_nest_end(dh_buffer); + devlink_health_buffer_nest_end(dh_buffer); + devlink_health_buffer_nest_end(dh_buffer); + + return 0; +} + +#define MLX5_FW_FATAL_REPORTER_DUMP_SIZE_BYTE 256 +static int +mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_health_buffer **buffers_array, + unsigned int buff_size, unsigned int num_buffers, + void *priv_ctx) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + struct devlink_health_buffer *buffer; + char crdump_region[20]; + u32 snapshot_id; + int err; + + if (!mlx5_core_is_pf(dev)) { + mlx5_core_err(dev, "Only PF is permitted run FW fatal dump"); + return -EPERM; + } + + if (!buffers_array || num_buffers < 1 || + buff_size < MLX5_FW_FATAL_REPORTER_DUMP_SIZE_BYTE) + return -EINVAL; + + err = mlx5_crdump_collect(dev, crdump_region, &snapshot_id); + if (err) + return err; + + buffer = buffers_array[0]; + if (priv_ctx) { + struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; + + err = mlx5_devlink_health_buffer_fill_syndrom(buffer, + fw_reporter_ctx->err_synd); + if (err) + return err; + } + + return mlx5_devlink_health_buffer_fill_snapshot(buffer, crdump_region, + snapshot_id); +} + static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { .name = "FW_fatal", .recover = mlx5_fw_fatal_reporter_recover, + .dump_size = MLX5_FW_FATAL_REPORTER_DUMP_SIZE_BYTE, + .dump = mlx5_fw_fatal_reporter_dump, }; #define MLX5_REPORTER_FW_GRACEFUL_PERIOD 1200000 -- 2.17.1