Mon, Sep 14, 2020 at 08:07:57AM CEST, mo...@mellanox.com wrote:
>Add support for devlink reload action fw_activate. To activate firmware
>image the mlx5 driver resets the firmware and reloads it from flash. If
>a new image was stored on flash it will be loaded. Once this reload
>command is executed the driver initiates fw sync reset flow, where the
>firmware synchronizes all PFs on coming reset and driver reload.
>
>Signed-off-by: Moshe Shemesh <mo...@mellanox.com>
>---
>v3 -> v4:
>- Renamed actions_done to actions_performed
>v2 -> v3:
>- Return the reload actions done
>- Update reload action counters if reset initiated by remote host
>v1 -> v2:
>- Have fw_activate action instead of fw_reset level
>---
> .../net/ethernet/mellanox/mlx5/core/devlink.c | 62 ++++++++++++++++---
> .../ethernet/mellanox/mlx5/core/fw_reset.c    | 60 ++++++++++++++++--
> .../ethernet/mellanox/mlx5/core/fw_reset.h    |  1 +
> 3 files changed, 109 insertions(+), 14 deletions(-)
>
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c 
>b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>index f6b29deaf02e..fa8f6abbea4e 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>@@ -4,6 +4,7 @@
> #include <devlink.h>
> 
> #include "mlx5_core.h"
>+#include "fw_reset.h"
> #include "fs_core.h"
> #include "eswitch.h"
> 
>@@ -88,6 +89,32 @@ mlx5_devlink_info_get(struct devlink *devlink, struct 
>devlink_info_req *req,
>       return 0;
> }
> 
>+static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct 
>netlink_ext_ack *extack)
>+{
>+      struct mlx5_core_dev *dev = devlink_priv(devlink);
>+      u8 reset_level, reset_type, net_port_alive;
>+      int err;
>+
>+      err = mlx5_reg_mfrl_query(dev, &reset_level, &reset_type);
>+      if (err)
>+              return err;
>+      if (!(reset_level & MLX5_MFRL_REG_RESET_LEVEL3)) {
>+              NL_SET_ERR_MSG_MOD(extack, "FW activate requires reboot");
>+              return -EINVAL;
>+      }
>+
>+      net_port_alive = !!(reset_type & 
>MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE);
>+      err = mlx5_fw_set_reset_sync(dev, net_port_alive);
>+      if (err)
>+              goto out;
>+
>+      err = mlx5_fw_wait_fw_reset_done(dev);
>+out:
>+      if (err)
>+              NL_SET_ERR_MSG_MOD(extack, "FW activate command failed");
>+      return err;
>+}
>+
> static int mlx5_devlink_reload_down(struct devlink *devlink, bool 
> netns_change,
>                                   enum devlink_reload_action action,
>                                   enum devlink_reload_action_limit_level 
> limit_level,
>@@ -95,8 +122,17 @@ static int mlx5_devlink_reload_down(struct devlink 
>*devlink, bool netns_change,
> {
>       struct mlx5_core_dev *dev = devlink_priv(devlink);
> 
>-      mlx5_unload_one(dev, false);
>-      return 0;
>+      switch (action) {
>+      case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
>+              mlx5_unload_one(dev, false);
>+              return 0;
>+      case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
>+              return mlx5_devlink_reload_fw_activate(devlink, extack);
>+      default:
>+              /* Unsupported action should not get to this function */
>+              WARN_ON(1);
>+              return -EOPNOTSUPP;
>+      }
> }
> 
> static int mlx5_devlink_reload_up(struct devlink *devlink, enum 
> devlink_reload_action action,
>@@ -104,13 +140,22 @@ static int mlx5_devlink_reload_up(struct devlink 
>*devlink, enum devlink_reload_a
>                                 struct netlink_ext_ack *extack, unsigned long 
> *actions_performed)
> {
>       struct mlx5_core_dev *dev = devlink_priv(devlink);
>-      int err;
> 
>-      err = mlx5_load_one(dev, false);
>-      if (err)
>-              return err;
>       if (actions_performed)
>-              *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
>+              *actions_performed = BIT(action);
>+
>+      switch (action) {
>+      case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
>+      case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
>+              /* On fw_activate action, also driver is reloaded and reinit 
>performed */
>+              if (actions_performed)
>+                      *actions_performed |= 
>BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);

You should set DEVLINK_RELOAD_ACTION_FW_ACTIVATE bit in actions_performed
upon activation.


>+              return mlx5_load_one(dev, false);
>+      default:
>+              /* Unsupported action should not get to this function */
>+              WARN_ON(1);
>+              return -EOPNOTSUPP;
>+      }
> 
>       return 0;
> }
>@@ -128,7 +173,8 @@ static const struct devlink_ops mlx5_devlink_ops = {
> #endif
>       .flash_update = mlx5_devlink_flash_update,
>       .info_get = mlx5_devlink_info_get,
>-      .supported_reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT),
>+      .supported_reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
>+                                  BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE),
>       .supported_reload_action_limit_levels = 
> BIT(DEVLINK_RELOAD_ACTION_LIMIT_LEVEL_NONE),
>       .reload_down = mlx5_devlink_reload_down,
>       .reload_up = mlx5_devlink_reload_up,
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c 
>b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
>index 61237f4836cc..550f67b00473 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
>@@ -5,6 +5,7 @@
> 
> enum {
>       MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
>+      MLX5_FW_RESET_FLAGS_PENDING_COMP
> };
> 
> struct mlx5_fw_reset {
>@@ -17,6 +18,8 @@ struct mlx5_fw_reset {
>       struct work_struct reset_abort_work;
>       unsigned long reset_flags;
>       struct timer_list timer;
>+      struct completion done;
>+      int ret;
> };
> 
> static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level,
>@@ -53,7 +56,14 @@ int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 
>*reset_level, u8 *reset_ty
> 
> int mlx5_fw_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel)
> {
>-      return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 
>reset_type_sel, 0, true);
>+      struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
>+      int err;
>+
>+      set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
>+      err =  mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 
>reset_type_sel, 0, true);
>+      if (err)
>+              clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, 
>&fw_reset->reset_flags);
>+      return err;
> }
> 
> int mlx5_fw_set_live_patch(struct mlx5_core_dev *dev)
>@@ -66,19 +76,36 @@ static int mlx5_fw_set_reset_sync_ack(struct mlx5_core_dev 
>*dev)
>       return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 1, false);
> }
> 
>+static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
>+{
>+      struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
>+
>+      /* if this is the driver that initiated the fw reset, devlink completed 
>the reload */
>+      if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) 
>{
>+              complete(&fw_reset->done);
>+      } else {
>+              mlx5_load_one(dev, false);
>+              devlink_reload_implicit_actions_performed(priv_to_devlink(dev),
>+                                                        
>DEVLINK_RELOAD_ACTION_LIMIT_LEVEL_NONE,
>+                                                        
>BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
>+                                                        
>BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE));
>+      }
>+}
>+
> static void mlx5_sync_reset_reload_work(struct work_struct *work)
> {
>       struct mlx5_fw_reset *fw_reset = container_of(work, struct 
> mlx5_fw_reset,
>                                                     reset_reload_work);
>       struct mlx5_core_dev *dev = fw_reset->dev;
>+      int err;
> 
>       mlx5_enter_error_state(dev, true);
>       mlx5_unload_one(dev, false);
>-      if (mlx5_health_wait_pci_up(dev)) {
>+      err = mlx5_health_wait_pci_up(dev);
>+      if (err)
>               mlx5_core_err(dev, "reset reload flow aborted, PCI reads still 
> not working\n");
>-              return;
>-      }
>-      mlx5_load_one(dev, false);
>+      fw_reset->ret = err;
>+      mlx5_fw_reset_complete_reload(dev);
> }
> 
> static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
>@@ -264,7 +291,8 @@ static void mlx5_sync_reset_now_event(struct work_struct 
>*work)
> done:
>       if (err)
>               mlx5_start_health_poll(dev);
>-      mlx5_load_one(dev, false);
>+      fw_reset->ret = err;
>+      mlx5_fw_reset_complete_reload(dev);
> }
> 
> static void mlx5_sync_reset_abort_event(struct work_struct *work)
>@@ -313,6 +341,25 @@ static int fw_reset_event_notifier(struct notifier_block 
>*nb, unsigned long acti
>       return NOTIFY_OK;
> }
> 
>+#define MLX5_FW_RESET_TIMEOUT_MSEC 5000
>+int mlx5_fw_wait_fw_reset_done(struct mlx5_core_dev *dev)
>+{
>+      unsigned long timeout = msecs_to_jiffies(MLX5_FW_RESET_TIMEOUT_MSEC);
>+      struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
>+      int err;
>+
>+      if (!wait_for_completion_timeout(&fw_reset->done, timeout)) {
>+              mlx5_core_warn(dev, "FW sync reset timeout after %d seconds\n",
>+                             MLX5_FW_RESET_TIMEOUT_MSEC / 1000);
>+              err = -ETIMEDOUT;
>+              goto out;
>+      }
>+      err = fw_reset->ret;
>+out:
>+      clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
>+      return err;
>+}
>+
> int mlx5_fw_reset_events_init(struct mlx5_core_dev *dev)
> {
>       struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL);
>@@ -336,6 +383,7 @@ int mlx5_fw_reset_events_init(struct mlx5_core_dev *dev)
>       MLX5_NB_INIT(&fw_reset->nb, fw_reset_event_notifier, GENERAL_EVENT);
>       mlx5_eq_notifier_register(dev, &fw_reset->nb);
> 
>+      init_completion(&fw_reset->done);
>       return 0;
> }
> 
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h 
>b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
>index 278f538ea92a..d7ee951a2258 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
>@@ -10,6 +10,7 @@ int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 
>*reset_level, u8 *reset_ty
> int mlx5_fw_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel);
> int mlx5_fw_set_live_patch(struct mlx5_core_dev *dev);
> 
>+int mlx5_fw_wait_fw_reset_done(struct mlx5_core_dev *dev);
> int mlx5_fw_reset_events_init(struct mlx5_core_dev *dev);
> void mlx5_fw_reset_events_cleanup(struct mlx5_core_dev *dev);
> 
>-- 
>2.17.1
>

Reply via email to