From: Maxim Mikityanskiy <maxi...@mellanox.com>

TX MPWQE support for SKBs is coming in one of the following patches, and
a single MPWQE can send multiple SKBs. This commit prepares the TX path
code to handle such cases:

1. An additional FIFO for SKBs is added, just like the FIFO for DMA
chunks.

2. struct mlx5e_tx_wqe_info will contain num_fifo_pkts. If a given WQE
contains only one packet, num_fifo_pkts will be zero, and the SKB will
be stored in mlx5e_tx_wqe_info, as usual. If num_fifo_pkts > 0, the SKB
pointer will be NULL, and the SKBs will be stored in the FIFO.

This change has no performance impact in TCP single stream test and
XDP_TX single stream test.

UDP pktgen (burst 32), single stream:
  Packet rate: 19.23 Mpps -> 19.12 Mpps
  Instructions per packet: 360 -> 354
  Cycles per packet: 142 -> 140

CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz (x86_64)
NIC: Mellanox ConnectX-6 Dx

Signed-off-by: Maxim Mikityanskiy <maxi...@mellanox.com>
Reviewed-by: Tariq Toukan <tar...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  4 ++
 .../net/ethernet/mellanox/mlx5/core/en/txrx.h | 18 +++++
 .../mellanox/mlx5/core/en_accel/ktls_txrx.h   | 10 ++-
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  7 +-
 .../net/ethernet/mellanox/mlx5/core/en_tx.c   | 71 ++++++++++++++-----
 5 files changed, 89 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 4f33658da25a..6ab60074fca9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -317,11 +317,13 @@ struct mlx5e_txqsq {
 
        /* dirtied @completion */
        u16                        cc;
+       u16                        skb_fifo_cc;
        u32                        dma_fifo_cc;
        struct dim                 dim; /* Adaptive Moderation */
 
        /* dirtied @xmit */
        u16                        pc ____cacheline_aligned_in_smp;
+       u16                        skb_fifo_pc;
        u32                        dma_fifo_pc;
 
        struct mlx5e_cq            cq;
@@ -329,9 +331,11 @@ struct mlx5e_txqsq {
        /* read only */
        struct mlx5_wq_cyc         wq;
        u32                        dma_fifo_mask;
+       u16                        skb_fifo_mask;
        struct mlx5e_sq_stats     *stats;
        struct {
                struct mlx5e_sq_dma       *dma_fifo;
+               struct sk_buff           **skb_fifo;
                struct mlx5e_tx_wqe_info  *wqe_info;
        } db;
        void __iomem              *uar_map;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 155b89998891..7baac2971758 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -105,6 +105,7 @@ struct mlx5e_tx_wqe_info {
        u32 num_bytes;
        u8 num_wqebbs;
        u8 num_dma;
+       u8 num_fifo_pkts;
 #ifdef CONFIG_MLX5_EN_TLS
        struct page *resync_dump_frag_page;
 #endif
@@ -231,6 +232,23 @@ mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, 
u32 size,
        dma->type = map_type;
 }
 
+static inline struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_txqsq *sq, u16 
i)
+{
+       return &sq->db.skb_fifo[i & sq->skb_fifo_mask];
+}
+
+static inline void mlx5e_skb_fifo_push(struct mlx5e_txqsq *sq, struct sk_buff 
*skb)
+{
+       struct sk_buff **skb_item = mlx5e_skb_fifo_get(sq, sq->skb_fifo_pc++);
+
+       *skb_item = skb;
+}
+
+static inline struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_txqsq *sq)
+{
+       return *mlx5e_skb_fifo_get(sq, sq->skb_fifo_cc++);
+}
+
 static inline void
 mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
index fcfb156cf09d..7521c9be735b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
@@ -29,20 +29,24 @@ void mlx5e_ktls_handle_get_psv_completion(struct 
mlx5e_icosq_wqe_info *wi,
 void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
                                           struct mlx5e_tx_wqe_info *wi,
                                           u32 *dma_fifo_cc);
-static inline void
+static inline bool
 mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
                                          struct mlx5e_tx_wqe_info *wi,
                                          u32 *dma_fifo_cc)
 {
-       if (unlikely(wi->resync_dump_frag_page))
+       if (unlikely(wi->resync_dump_frag_page)) {
                mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma_fifo_cc);
+               return true;
+       }
+       return false;
 }
 #else
-static inline void
+static inline bool
 mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
                                          struct mlx5e_tx_wqe_info *wi,
                                          u32 *dma_fifo_cc)
 {
+       return false;
 }
 
 #endif /* CONFIG_MLX5_EN_TLS */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 26834625556d..b413aa168e4e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1040,6 +1040,7 @@ static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
 static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
 {
        kvfree(sq->db.wqe_info);
+       kvfree(sq->db.skb_fifo);
        kvfree(sq->db.dma_fifo);
 }
 
@@ -1051,15 +1052,19 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, 
int numa)
        sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
                                                   sizeof(*sq->db.dma_fifo)),
                                        GFP_KERNEL, numa);
+       sq->db.skb_fifo = kvzalloc_node(array_size(df_sz,
+                                                  sizeof(*sq->db.skb_fifo)),
+                                       GFP_KERNEL, numa);
        sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
                                                   sizeof(*sq->db.wqe_info)),
                                        GFP_KERNEL, numa);
-       if (!sq->db.dma_fifo || !sq->db.wqe_info) {
+       if (!sq->db.dma_fifo || !sq->db.skb_fifo || !sq->db.wqe_info) {
                mlx5e_free_txqsq_db(sq);
                return -ENOMEM;
        }
 
        sq->dma_fifo_mask = df_sz - 1;
+       sq->skb_fifo_mask = df_sz - 1;
 
        return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 869b3313dabf..9ced350150b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -326,6 +326,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff 
*skb,
                .num_bytes = attr->num_bytes,
                .num_dma = num_dma,
                .num_wqebbs = wqe_attr->num_wqebbs,
+               .num_fifo_pkts = 0,
        };
 
        cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode);
@@ -474,6 +475,20 @@ static inline void mlx5e_consume_skb(struct mlx5e_txqsq 
*sq, struct sk_buff *skb
        napi_consume_skb(skb, napi_budget);
 }
 
+static inline void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq,
+                                                struct mlx5e_tx_wqe_info *wi,
+                                                struct mlx5_cqe64 *cqe,
+                                                int napi_budget)
+{
+       int i;
+
+       for (i = 0; i < wi->num_fifo_pkts; i++) {
+               struct sk_buff *skb = mlx5e_skb_fifo_pop(sq);
+
+               mlx5e_consume_skb(sq, skb, cqe, napi_budget);
+       }
+}
+
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 {
        struct mlx5e_sq_stats *stats;
@@ -519,26 +534,33 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int 
napi_budget)
                wqe_counter = be16_to_cpu(cqe->wqe_counter);
 
                do {
-                       struct sk_buff *skb;
-
                        last_wqe = (sqcc == wqe_counter);
 
                        ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
                        wi = &sq->db.wqe_info[ci];
-                       skb = wi->skb;
 
                        sqcc += wi->num_wqebbs;
 
-                       if (unlikely(!skb)) {
-                               mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, 
wi, &dma_fifo_cc);
+                       if (likely(wi->skb)) {
+                               mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+                               mlx5e_consume_skb(sq, wi->skb, cqe, 
napi_budget);
+
+                               npkts++;
+                               nbytes += wi->num_bytes;
                                continue;
                        }
 
-                       mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
-                       mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);
+                       if 
(unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi,
+                                                                              
&dma_fifo_cc)))
+                               continue;
 
-                       npkts++;
-                       nbytes += wi->num_bytes;
+                       if (wi->num_fifo_pkts) {
+                               mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+                               mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, 
napi_budget);
+
+                               npkts += wi->num_fifo_pkts;
+                               nbytes += wi->num_bytes;
+                       }
                } while (!last_wqe);
 
                if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
@@ -577,12 +599,19 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int 
napi_budget)
        return (i == MLX5E_TX_CQ_POLL_BUDGET);
 }
 
+static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct 
mlx5e_tx_wqe_info *wi)
+{
+       int i;
+
+       for (i = 0; i < wi->num_fifo_pkts; i++)
+               dev_kfree_skb_any(mlx5e_skb_fifo_pop(sq));
+}
+
 void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
 {
        struct mlx5e_tx_wqe_info *wi;
        u32 dma_fifo_cc, nbytes = 0;
        u16 ci, sqcc, npkts = 0;
-       struct sk_buff *skb;
 
        sqcc = sq->cc;
        dma_fifo_cc = sq->dma_fifo_cc;
@@ -590,20 +619,28 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
        while (sqcc != sq->pc) {
                ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
                wi = &sq->db.wqe_info[ci];
-               skb = wi->skb;
 
                sqcc += wi->num_wqebbs;
 
-               if (!skb) {
-                       mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, 
&dma_fifo_cc);
+               if (likely(wi->skb)) {
+                       mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+                       dev_kfree_skb_any(wi->skb);
+
+                       npkts++;
+                       nbytes += wi->num_bytes;
                        continue;
                }
 
-               mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
-               dev_kfree_skb_any(skb);
+               if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, 
&dma_fifo_cc)))
+                       continue;
 
-               npkts++;
-               nbytes += wi->num_bytes;
+               if (wi->num_fifo_pkts) {
+                       mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+                       mlx5e_tx_wi_kfree_fifo_skbs(sq, wi);
+
+                       npkts += wi->num_fifo_pkts;
+                       nbytes += wi->num_bytes;
+               }
        }
 
        sq->dma_fifo_cc = dma_fifo_cc;
-- 
2.26.2

Reply via email to