Sun, Jul 07, 2019 at 01:53:06PM CEST, tar...@mellanox.com wrote: >From: Aya Levin <a...@mellanox.com> > >Add support for recovery from rx timeout. On driver open we post NOP >work request on the rx channels to trigger napi in order to fillup the >rx rings. In case napi wasn't scheduled due to a lost interrupt, perform >EQ recovery. > >Signed-off-by: Aya Levin <a...@mellanox.com> >Signed-off-by: Tariq Toukan <tar...@mellanox.com> >--- > .../net/ethernet/mellanox/mlx5/core/en/health.h | 1 + > .../ethernet/mellanox/mlx5/core/en/reporter_rx.c | 30 ++++++++++++++++++++++ > drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 + > 3 files changed, 32 insertions(+) > >diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h >b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h >index e8c5d3bd86f1..aa46f7ecae53 100644 >--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h >+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h >@@ -19,6 +19,7 @@ > int mlx5e_reporter_rx_create(struct mlx5e_priv *priv); > void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv); > void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq); >+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); > > #define MLX5E_REPORTER_PER_Q_MAX_LEN 256 > >diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c >b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c >index c47e9a53bd53..7e7dba129330 100644 >--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c >+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c >@@ -109,6 +109,36 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq >*icosq) > mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); > } > >+static int mlx5e_rx_reporter_timeout_recover(void *ctx) >+{ >+ struct mlx5e_rq *rq = (struct mlx5e_rq *)ctx;
No need to cast. Please fix this in the rest of the patchset too. >+ struct mlx5e_icosq *icosq = &rq->channel->icosq; >+ struct mlx5_eq_comp *eq = rq->cq.mcq.eq; >+ int err; >+ >+ err = mlx5e_health_channel_eq_recover(eq, rq->channel); >+ if (err) >+ clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); >+ >+ return err; >+} >+ >+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) >+{ >+ struct mlx5e_icosq *icosq = &rq->channel->icosq; >+ struct mlx5e_priv *priv = rq->channel->priv; >+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; >+ struct mlx5e_err_ctx err_ctx = {}; >+ >+ err_ctx.ctx = rq; >+ err_ctx.recover = mlx5e_rx_reporter_timeout_recover; >+ sprintf(err_str, >+ "RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x\n", >+ icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn); >+ >+ mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); >+} >+ > static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) > { > return err_ctx->recover(err_ctx->ctx); >diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c >b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c >index 2d57611ac579..1ebdeccf395d 100644 >--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c >+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c >@@ -809,6 +809,7 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int >wait_time) > netdev_warn(c->netdev, "Failed to get min RX wqes on Channel[%d] > RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n", > c->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes); > >+ mlx5e_reporter_rx_timeout(rq); > return -ETIMEDOUT; > } > >-- >1.8.3.1 >