If the unique umem and MR method is enabled, before starting Tx
queues in device start stage, the memory will be pre-allocated
and the MR will be registered for the Tx queues' usage later.

Signed-off-by: Bing Zhao <bi...@nvidia.com>
---
 drivers/net/mlx5/mlx5.h         |  4 ++
 drivers/net/mlx5/mlx5_trigger.c | 85 +++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 285c9ba396..c08894cd03 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -2141,6 +2141,10 @@ struct mlx5_priv {
        struct {
                uint32_t sq_total_size;
                uint32_t cq_total_size;
+               void *umem;
+               void *umem_obj;
+               uint32_t sq_cur_off;
+               uint32_t cq_cur_off;
        } consec_tx_mem;
        RTE_ATOMIC(uint16_t) shared_refcnt; /* HW steering host reference 
counter. */
 };
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 3aa7d01ee2..0fdf66d696 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -1135,6 +1135,83 @@ mlx5_hw_representor_port_allowed_start(struct 
rte_eth_dev *dev)
 
 #endif
 
+/*
+ * Allocate TxQs unique umem and register its MR.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int mlx5_dev_allocate_consec_tx_mem(struct rte_eth_dev *dev)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       size_t alignment;
+       uint32_t total_size;
+       struct mlx5dv_devx_umem *umem_obj = NULL;
+       void *umem_buf = NULL;
+
+       /* Legacy per queue allocation, do nothing here. */
+       if (priv->sh->config.txq_mem_algn == 0)
+               return 0;
+       alignment = RTE_BIT32(priv->sh->config.txq_mem_algn);
+       total_size = priv->consec_tx_mem.sq_total_size + 
priv->consec_tx_mem.cq_total_size;
+       /*
+        * Hairpin queues can be skipped later
+        * queue size alignment is bigger than doorbell alignment, no need to 
align or
+        * round-up again. 1 queue have 2 DBs.
+        */
+       total_size += MLX5_DBR_SIZE * priv->txqs_n * 2;
+       umem_buf = mlx5_malloc_numa_tolerant(MLX5_MEM_RTE | MLX5_MEM_ZERO, 
total_size,
+                                            alignment, priv->sh->numa_node);
+       if (!umem_buf) {
+               DRV_LOG(ERR, "Failed to allocate consecutive memory for TxQs.");
+               rte_errno = ENOMEM;
+               return -rte_errno;
+       }
+       umem_obj = mlx5_os_umem_reg(priv->sh->cdev->ctx, (void 
*)(uintptr_t)umem_buf,
+                                   total_size, IBV_ACCESS_LOCAL_WRITE);
+       if (!umem_obj) {
+               DRV_LOG(ERR, "Failed to register unique umem for all SQs.");
+               rte_errno = errno;
+               if (umem_buf)
+                       mlx5_free(umem_buf);
+               return -rte_errno;
+       }
+       priv->consec_tx_mem.umem = umem_buf;
+       priv->consec_tx_mem.sq_cur_off = 0;
+       priv->consec_tx_mem.cq_cur_off = priv->consec_tx_mem.sq_total_size;
+       priv->consec_tx_mem.umem_obj = umem_obj;
+       DRV_LOG(DEBUG, "Allocated umem %p with size %u for %u queues with 
sq_len %u,"
+               " cq_len %u and registered object %p on port %u",
+               umem_buf, total_size, priv->txqs_n, 
priv->consec_tx_mem.sq_total_size,
+               priv->consec_tx_mem.cq_total_size, (void *)umem_obj, 
dev->data->port_id);
+       return 0;
+}
+
+/*
+ * Release TxQs unique umem and register its MR.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void mlx5_dev_free_consec_tx_mem(struct rte_eth_dev *dev)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+
+       if (priv->sh->config.txq_mem_algn == 0)
+               return;
+       if (priv->consec_tx_mem.umem_obj) {
+               mlx5_os_umem_dereg(priv->consec_tx_mem.umem_obj);
+               priv->consec_tx_mem.umem_obj = NULL;
+       }
+       if (priv->consec_tx_mem.umem) {
+               mlx5_free(priv->consec_tx_mem.umem);
+               priv->consec_tx_mem.umem = NULL;
+       }
+}
+
 /**
  * DPDK callback to start the device.
  *
@@ -1225,6 +1302,12 @@ mlx5_dev_start(struct rte_eth_dev *dev)
                if (ret)
                        goto error;
        }
+       ret = mlx5_dev_allocate_consec_tx_mem(dev);
+       if (ret) {
+               DRV_LOG(ERR, "port %u Tx queues memory allocation failed: %s",
+                       dev->data->port_id, strerror(rte_errno));
+               goto error;
+       }
        ret = mlx5_txq_start(dev);
        if (ret) {
                DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
@@ -1358,6 +1441,7 @@ mlx5_dev_start(struct rte_eth_dev *dev)
        mlx5_rxq_stop(dev);
        if (priv->obj_ops.lb_dummy_queue_release)
                priv->obj_ops.lb_dummy_queue_release(dev);
+       mlx5_dev_free_consec_tx_mem(dev);
        mlx5_txpp_stop(dev); /* Stop last. */
        rte_errno = ret; /* Restore rte_errno. */
        return -rte_errno;
@@ -1470,6 +1554,7 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
        priv->sh->port[priv->dev_port - 1].nl_ih_port_id = RTE_MAX_ETHPORTS;
        mlx5_txq_stop(dev);
        mlx5_rxq_stop(dev);
+       mlx5_dev_free_consec_tx_mem(dev);
        if (priv->obj_ops.lb_dummy_queue_release)
                priv->obj_ops.lb_dummy_queue_release(dev);
        mlx5_txpp_stop(dev);
-- 
2.34.1

Reply via email to