When the alignment is non-zero, it means that the single umem and MR
allocation for all Tx queues will be used.

In this commit, the total length of SQs and associated CQs will be
calculated and saved.

Signed-off-by: Bing Zhao <bi...@nvidia.com>
---
 drivers/net/mlx5/mlx5.h     |  4 +++
 drivers/net/mlx5/mlx5_tx.h  |  2 ++
 drivers/net/mlx5/mlx5_txq.c | 67 +++++++++++++++++++++++++++++++++++--
 3 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 6b8d29a2bf..285c9ba396 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -2138,6 +2138,10 @@ struct mlx5_priv {
        struct mlx5_nta_sample_ctx *nta_sample_ctx;
 #endif
        struct rte_eth_dev *shared_host; /* Host device for HW steering. */
+       struct {
+               uint32_t sq_total_size;
+               uint32_t cq_total_size;
+       } consec_tx_mem;
        RTE_ATOMIC(uint16_t) shared_refcnt; /* HW steering host reference 
counter. */
 };
 
diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h
index 55568c41b1..94f2028513 100644
--- a/drivers/net/mlx5/mlx5_tx.h
+++ b/drivers/net/mlx5/mlx5_tx.h
@@ -149,6 +149,7 @@ struct __rte_cache_aligned mlx5_txq_data {
        uint16_t inlen_mode; /* Minimal data length to inline. */
        uint8_t tx_aggr_affinity; /* TxQ affinity configuration. */
        uint32_t qp_num_8s; /* QP number shifted by 8. */
+       uint32_t sq_mem_len; /* Length of TxQ for WQEs */
        uint64_t offloads; /* Offloads for Tx Queue. */
        struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */
        struct mlx5_wqe *wqes; /* Work queue. */
@@ -167,6 +168,7 @@ struct __rte_cache_aligned mlx5_txq_data {
        uint64_t ts_mask; /* Timestamp flag dynamic mask. */
        uint64_t ts_last; /* Last scheduled timestamp. */
        int32_t ts_offset; /* Timestamp field dynamic offset. */
+       uint32_t cq_mem_len; /* Length of TxQ for CQEs */
        struct mlx5_dev_ctx_shared *sh; /* Shared context. */
        struct mlx5_txq_stats stats; /* TX queue counters. */
        struct mlx5_txq_stats stats_reset; /* stats on last reset. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 8ee8108497..1948a700f1 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -17,6 +17,7 @@
 #include <bus_pci_driver.h>
 #include <rte_common.h>
 #include <rte_eal_paging.h>
+#include <rte_bitops.h>
 
 #include <mlx5_common.h>
 #include <mlx5_common_mr.h>
@@ -1032,6 +1033,57 @@ txq_adjust_params(struct mlx5_txq_ctrl *txq_ctrl)
                    !txq_ctrl->txq.inlen_empw);
 }
 
+/*
+ * Calculate WQ memory length for a Tx queue.
+ *
+ * @param log_wqe_cnt
+ *   Logarithm value of WQE numbers.
+ *
+ * @return
+ *   memory length of this WQ.
+ */
+static uint32_t mlx5_txq_wq_mem_length(uint32_t log_wqe_cnt)
+{
+       uint32_t num_of_wqbbs = 1U << log_wqe_cnt;
+       uint32_t umem_size;
+
+       umem_size = MLX5_WQE_SIZE * num_of_wqbbs;
+       return umem_size;
+}
+
+/*
+ * Calculate CQ memory length for a Tx queue.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param txq_ctrl
+ *   Pointer to the TxQ control structure of the CQ.
+ *
+ * @return
+ *   memory length of this CQ.
+ */
+static uint32_t
+mlx5_txq_cq_mem_length(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl)
+{
+       uint32_t cqe_n, log_desc_n;
+
+       if (__rte_trace_point_fp_is_enabled() &&
+           txq_ctrl->txq.offloads & RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP)
+               cqe_n = UINT16_MAX / 2 - 1;
+       else
+               cqe_n = (1UL << txq_ctrl->txq.elts_n) / MLX5_TX_COMP_THRESH +
+                       1 + MLX5_TX_COMP_THRESH_INLINE_DIV;
+       log_desc_n = log2above(cqe_n);
+       cqe_n = 1UL << log_desc_n;
+       if (cqe_n > UINT16_MAX) {
+               DRV_LOG(ERR, "Port %u Tx queue %u requests to many CQEs %u.",
+                       dev->data->port_id, txq_ctrl->txq.idx, cqe_n);
+               rte_errno = EINVAL;
+               return 0;
+       }
+       return sizeof(struct mlx5_cqe) * cqe_n;
+}
+
 /**
  * Create a DPDK Tx queue.
  *
@@ -1057,6 +1109,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, 
uint16_t desc,
        struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_txq_ctrl *tmpl;
        uint16_t max_wqe;
+       uint32_t wqebb_cnt, log_desc_n;
 
        if (socket != (unsigned int)SOCKET_ID_ANY) {
                tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) +
@@ -1099,15 +1152,25 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, 
uint16_t desc,
        tmpl->txq.idx = idx;
        txq_set_params(tmpl);
        txq_adjust_params(tmpl);
+       wqebb_cnt = txq_calc_wqebb_cnt(tmpl);
        max_wqe = mlx5_dev_get_max_wq_size(priv->sh);
-       if (txq_calc_wqebb_cnt(tmpl) > max_wqe) {
+       if (wqebb_cnt > max_wqe) {
                DRV_LOG(ERR,
                        "port %u Tx WQEBB count (%d) exceeds the limit (%d),"
                        " try smaller queue size",
-                       dev->data->port_id, txq_calc_wqebb_cnt(tmpl), max_wqe);
+                       dev->data->port_id, wqebb_cnt, max_wqe);
                rte_errno = ENOMEM;
                goto error;
        }
+       if (priv->sh->config.txq_mem_algn != 0) {
+               log_desc_n = log2above(wqebb_cnt);
+               tmpl->txq.sq_mem_len = mlx5_txq_wq_mem_length(log_desc_n);
+               tmpl->txq.cq_mem_len = mlx5_txq_cq_mem_length(dev, tmpl);
+               DRV_LOG(DEBUG, "Port %u TxQ %u WQ length %u, CQ length %u 
before align.",
+                       dev->data->port_id, idx, tmpl->txq.sq_mem_len, 
tmpl->txq.cq_mem_len);
+               priv->consec_tx_mem.sq_total_size += tmpl->txq.sq_mem_len;
+               priv->consec_tx_mem.cq_total_size += tmpl->txq.cq_mem_len;
+       }
        rte_atomic_fetch_add_explicit(&tmpl->refcnt, 1, 
rte_memory_order_relaxed);
        tmpl->is_hairpin = false;
        LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next);
-- 
2.34.1

Reply via email to