From: Tariq Toukan <tar...@mellanox.com>

HW expects the data size in DUMP WQEs to be up to MTU.
Make sure they are in range.

We elevate the frag page refcount by 'n-1', in addition to the
one obtained in tx_sync_info_get(), having an overall of 'n'
references. We bulk increments by using a single page_ref_add()
command, to optimize perfermance.
The refcounts are released one by one, by the corresponding completions.

Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support")
Signed-off-by: Tariq Toukan <tar...@mellanox.com>
Reviewed-by: Eran Ben Elisha <era...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  1 +
 .../net/ethernet/mellanox/mlx5/core/en/txrx.h | 11 +++---
 .../mellanox/mlx5/core/en_accel/ktls.h        | 11 +++++-
 .../mellanox/mlx5/core/en_accel/ktls_tx.c     | 34 ++++++++++++++++---
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  7 +++-
 5 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index cb6f7b87e38f..f1a7bc46f1c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -410,6 +410,7 @@ struct mlx5e_txqsq {
        struct device             *pdev;
        __be32                     mkey_be;
        unsigned long              state;
+       unsigned int               hw_mtu;
        struct hwtstamp_config    *tstamp;
        struct mlx5_clock         *clock;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 25f9dda578ac..7c8796d9743f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -15,15 +15,14 @@
 #else
 /* TLS offload requires additional stop_room for:
  *  - a resync SKB.
- * kTLS offload requires additional stop_room for:
- * - static params WQE,
- * - progress params WQE, and
- * - resync DUMP per frag.
+ * kTLS offload requires fixed additional stop_room for:
+ * - a static params WQE, and a progress params WQE.
+ * The additional MTU-depending room for the resync DUMP WQEs
+ * will be calculated and added in runtime.
  */
 #define MLX5E_SQ_TLS_ROOM  \
        (MLX5_SEND_WQE_MAX_WQEBBS + \
-        MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + \
-        MAX_SKB_FRAGS * MLX5E_KTLS_DUMP_WQEBBS)
+        MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS)
 #endif
 
 #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg 
*)NULL)->inline_hdr.start))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
index eb692feba4a6..929966e6fbc4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -94,7 +94,16 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device 
*netdev,
 void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
                                           struct mlx5e_tx_wqe_info *wi,
                                           u32 *dma_fifo_cc);
-
+static inline u8
+mlx5e_ktls_dumps_num_wqebbs(struct mlx5e_txqsq *sq, unsigned int nfrags,
+                           unsigned int sync_len)
+{
+       /* Given the MTU and sync_len, calculates an upper bound for the
+        * number of WQEBBs needed for the TX resync DUMP WQEs of a record.
+        */
+       return MLX5E_KTLS_DUMP_WQEBBS *
+               (nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu));
+}
 #else
 
 static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 59e3f48470d9..e10b0bb696da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -373,7 +373,7 @@ mlx5e_ktls_tx_handle_ooo(struct 
mlx5e_ktls_offload_context_tx *priv_tx,
                return skb;
        }
 
-       num_wqebbs = info.nr_frags * MLX5E_KTLS_DUMP_WQEBBS;
+       num_wqebbs = mlx5e_ktls_dumps_num_wqebbs(sq, info.nr_frags, 
info.sync_len);
        pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
        contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
 
@@ -382,14 +382,40 @@ mlx5e_ktls_tx_handle_ooo(struct 
mlx5e_ktls_offload_context_tx *priv_tx,
 
        tx_post_resync_params(sq, priv_tx, info.rcd_sn);
 
-       for (; i < info.nr_frags; i++)
-               if (tx_post_resync_dump(sq, &info.frags[i], priv_tx->tisn, !i))
-                       goto err_out;
+       for (; i < info.nr_frags; i++) {
+               unsigned int orig_fsz, frag_offset = 0, n = 0;
+               skb_frag_t *f = &info.frags[i];
+
+               orig_fsz = skb_frag_size(f);
+
+               do {
+                       bool fence = !(i || frag_offset);
+                       unsigned int fsz;
+
+                       n++;
+                       fsz = min_t(unsigned int, sq->hw_mtu, orig_fsz - 
frag_offset);
+                       skb_frag_size_set(f, fsz);
+                       if (tx_post_resync_dump(sq, f, priv_tx->tisn, fence)) {
+                               page_ref_add(skb_frag_page(f), n - 1);
+                               goto err_out;
+                       }
+
+                       skb_frag_off_add(f, fsz);
+                       frag_offset += fsz;
+               } while (frag_offset < orig_fsz);
+
+               page_ref_add(skb_frag_page(f), n - 1);
+       }
 
        return skb;
 
 err_out:
        for (; i < info.nr_frags; i++)
+               /* The put_page() here undoes the page ref obtained in 
tx_sync_info_get().
+                * Page refs obtained for the DUMP WQEs above (by page_ref_add) 
will be
+                * released only upon their completions (or in 
mlx5e_free_txqsq_descs,
+                * if channel closes).
+                */
                put_page(skb_frag_page(&info.frags[i]));
 
        dev_kfree_skb_any(skb);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b476b007f093..772bfdbdeb9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1128,6 +1128,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
        sq->txq_ix    = txq_ix;
        sq->uar_map   = mdev->mlx5e_res.bfreg.map;
        sq->min_inline_mode = params->tx_min_inline_mode;
+       sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
        sq->stats     = &c->priv->channel_stats[c->ix].sq[tc];
        sq->stop_room = MLX5E_SQ_STOP_ROOM;
        INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
@@ -1135,10 +1136,14 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
                set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
        if (MLX5_IPSEC_DEV(c->priv->mdev))
                set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
+#ifdef CONFIG_MLX5_EN_TLS
        if (mlx5_accel_is_tls_device(c->priv->mdev)) {
                set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
-               sq->stop_room += MLX5E_SQ_TLS_ROOM;
+               sq->stop_room += MLX5E_SQ_TLS_ROOM +
+                       mlx5e_ktls_dumps_num_wqebbs(sq, MAX_SKB_FRAGS,
+                                                   TLS_MAX_PAYLOAD_SIZE);
        }
+#endif
 
        param->wq.db_numa_node = cpu_to_node(c->cpu);
        err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
-- 
2.21.0

Reply via email to