Sun, Jul 07, 2019 at 01:53:06PM CEST, tar...@mellanox.com wrote:
>From: Aya Levin <a...@mellanox.com>
>
>Add support for recovery from rx timeout. On driver open we post NOP
>work request on the rx channels to trigger napi in order to fillup the
>rx rings. In case napi wasn't scheduled due to a lost interrupt, perform
>EQ recovery.
>
>Signed-off-by: Aya Levin <a...@mellanox.com>
>Signed-off-by: Tariq Toukan <tar...@mellanox.com>
>---
> .../net/ethernet/mellanox/mlx5/core/en/health.h    |  1 +
> .../ethernet/mellanox/mlx5/core/en/reporter_rx.c   | 30 ++++++++++++++++++++++
> drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  1 +
> 3 files changed, 32 insertions(+)
>
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h 
>b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
>index e8c5d3bd86f1..aa46f7ecae53 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
>@@ -19,6 +19,7 @@
> int mlx5e_reporter_rx_create(struct mlx5e_priv *priv);
> void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
> void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
>+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
> 
> #define MLX5E_REPORTER_PER_Q_MAX_LEN 256
> 
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c 
>b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
>index c47e9a53bd53..7e7dba129330 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
>@@ -109,6 +109,36 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq 
>*icosq)
>       mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
> }
> 
>+static int mlx5e_rx_reporter_timeout_recover(void *ctx)
>+{
>+      struct mlx5e_rq *rq = (struct mlx5e_rq *)ctx;

No need to cast. Please fix this in the rest of the patchset too.


>+      struct mlx5e_icosq *icosq = &rq->channel->icosq;
>+      struct mlx5_eq_comp *eq = rq->cq.mcq.eq;
>+      int err;
>+
>+      err = mlx5e_health_channel_eq_recover(eq, rq->channel);
>+      if (err)
>+              clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
>+
>+      return err;
>+}
>+
>+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
>+{
>+      struct mlx5e_icosq *icosq = &rq->channel->icosq;
>+      struct mlx5e_priv *priv = rq->channel->priv;
>+      char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
>+      struct mlx5e_err_ctx err_ctx = {};
>+
>+      err_ctx.ctx = rq;
>+      err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
>+      sprintf(err_str,
>+              "RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x\n",
>+              icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn);
>+
>+      mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
>+}
>+
> static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
> {
>       return err_ctx->recover(err_ctx->ctx);
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
>b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>index 2d57611ac579..1ebdeccf395d 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>@@ -809,6 +809,7 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int 
>wait_time)
>       netdev_warn(c->netdev, "Failed to get min RX wqes on Channel[%d] 
> RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
>                   c->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
> 
>+      mlx5e_reporter_rx_timeout(rq);
>       return -ETIMEDOUT;
> }
> 
>-- 
>1.8.3.1
>

Reply via email to