On Thu, Mar 18, 2021 at 01:15:47PM +0200, Leon Romanovsky wrote:
> From: Maor Gottlieb <ma...@nvidia.com>
> 
> MEMIC buffer, in addition to regular read and write operations, can
> support atomic operations from the host.
> 
> Introduce and implement new UAPI to allocate address space for MEMIC
> operations such as atomic. This includes:
> 
> 1. Expose new IOCTL for request mapping of MEMIC operation.
> 2. Hold the operations address in a list, so same operation to same DM
>    will be allocated only once.
> 3. Manage refcount on the mlx5_ib_dm object, so it would be keep valid
>    until all addresses were unmapped.
> 
> Signed-off-by: Maor Gottlieb <ma...@nvidia.com>
> Signed-off-by: Leon Romanovsky <leo...@nvidia.com>
> ---
>  drivers/infiniband/hw/mlx5/dm.c          | 196 +++++++++++++++++++++--
>  drivers/infiniband/hw/mlx5/dm.h          |   2 +
>  drivers/infiniband/hw/mlx5/main.c        |   7 +-
>  drivers/infiniband/hw/mlx5/mlx5_ib.h     |  16 +-
>  include/uapi/rdma/mlx5_user_ioctl_cmds.h |  11 ++
>  5 files changed, 214 insertions(+), 18 deletions(-)
> 
> --
> 2.30.2
> 
> diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c
> index 97a925d43312..ee4ee197a626 100644
> --- a/drivers/infiniband/hw/mlx5/dm.c
> +++ b/drivers/infiniband/hw/mlx5/dm.c
> @@ -150,12 +150,14 @@ static int mlx5_cmd_alloc_memic_op(struct mlx5_dm *dm, 
> phys_addr_t addr,
>  }
> 
>  static int add_dm_mmap_entry(struct ib_ucontext *context,
> -                          struct mlx5_ib_dm *mdm, u64 address)
> +                          struct mlx5_user_mmap_entry *mentry, u8 mmap_flag,
> +                          size_t size, u64 address)
>  {
> -     mdm->mentry.mmap_flag = MLX5_IB_MMAP_TYPE_MEMIC;
> -     mdm->mentry.address = address;
> +     mentry->mmap_flag = mmap_flag;
> +     mentry->address = address;
> +
>       return rdma_user_mmap_entry_insert_range(
> -             context, &mdm->mentry.rdma_entry, mdm->size,
> +             context, &mentry->rdma_entry, size,
>               MLX5_IB_MMAP_DEVICE_MEM << 16,
>               (MLX5_IB_MMAP_DEVICE_MEM << 16) + (1UL << 16) - 1);
>  }
> @@ -183,6 +185,114 @@ static inline int check_dm_type_support(struct 
> mlx5_ib_dev *dev, u32 type)
>       return 0;
>  }
> 
> +void mlx5_ib_dm_memic_free(struct kref *kref)
> +{
> +     struct mlx5_ib_dm *dm =
> +             container_of(kref, struct mlx5_ib_dm, memic.ref);
> +     struct mlx5_ib_dev *dev = to_mdev(dm->ibdm.device);
> +
> +     mlx5_cmd_dealloc_memic(&dev->dm, dm->dev_addr, dm->size);
> +     kfree(dm);
> +}
> +
> +static int copy_op_to_user(struct mlx5_ib_dm_op_entry *op_entry,
> +                        struct uverbs_attr_bundle *attrs)
> +{
> +     u64 start_offset;
> +     u16 page_idx;
> +     int err;
> +
> +     page_idx = op_entry->mentry.rdma_entry.start_pgoff & 0xFFFF;
> +     start_offset = op_entry->op_addr & ~PAGE_MASK;
> +     err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_PAGE_INDEX,
> +                          &page_idx, sizeof(page_idx));
> +     if (err)
> +             return err;
> +
> +     return uverbs_copy_to(attrs,
> +                           MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_START_OFFSET,
> +                           &start_offset, sizeof(start_offset));
> +}
> +
> +static int map_existing_op(struct mlx5_ib_dm *dm, u8 op,
> +                        struct uverbs_attr_bundle *attrs)
> +{
> +     struct mlx5_ib_dm_op_entry *op_entry;
> +
> +     op_entry = xa_load(&dm->memic.ops, op);
> +     if (!op_entry)
> +             return -ENOENT;
> +
> +     return copy_op_to_user(op_entry, attrs);
> +}
> +
> +static int UVERBS_HANDLER(MLX5_IB_METHOD_DM_MAP_OP_ADDR)(
> +     struct uverbs_attr_bundle *attrs)
> +{
> +     struct ib_uobject *uobj = uverbs_attr_get_uobject(
> +             attrs, MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_HANDLE);
> +     struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
> +     struct ib_dm *ibdm = uobj->object;
> +     struct mlx5_ib_dm *dm = to_mdm(ibdm);
> +     struct mlx5_ib_dm_op_entry *op_entry;
> +     int err;
> +     u8 op;
> +
> +     err = uverbs_copy_from(&op, attrs, MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_OP);
> +     if (err)
> +             return err;
> +
> +     if (!(MLX5_CAP_DEV_MEM(dev->mdev, memic_operations) & BIT(op)))
> +             return -EOPNOTSUPP;
> +
> +     mutex_lock(&dm->memic.ops_xa_lock);
> +     err = map_existing_op(dm, op, attrs);
> +     if (!err || err != -ENOENT)
> +             goto err_unlock;
> +
> +     op_entry = kzalloc(sizeof(*op_entry), GFP_KERNEL);
> +     if (!op_entry)
> +             goto err_unlock;
> +
> +     err = mlx5_cmd_alloc_memic_op(&dev->dm, dm->dev_addr, op,
> +                                   &op_entry->op_addr);
> +     if (err) {
> +             kfree(op_entry);
> +             goto err_unlock;
> +     }
> +     op_entry->op = op;
> +     op_entry->dm = dm;
> +
> +     err = add_dm_mmap_entry(uobj->context, &op_entry->mentry,
> +                             MLX5_IB_MMAP_TYPE_MEMIC_OP, dm->size,
> +                             op_entry->op_addr & PAGE_MASK);
> +     if (err) {
> +             mlx5_cmd_dealloc_memic_op(&dev->dm, dm->dev_addr, op);
> +             kfree(op_entry);
> +             goto err_unlock;
> +     }
> +     /* From this point, entry will be freed by mmap_free */
> +     kref_get(&dm->memic.ref);
> +
> +     err = copy_op_to_user(op_entry, attrs);
> +     if (err)
> +             goto err_remove;
> +
> +     err = xa_insert(&dm->memic.ops, op, op_entry, GFP_KERNEL);
> +     if (err)
> +             goto err_remove;
> +     mutex_unlock(&dm->memic.ops_xa_lock);
> +
> +     return 0;
> +
> +err_remove:
> +     rdma_user_mmap_entry_remove(&op_entry->mentry.rdma_entry);
> +err_unlock:
> +     mutex_unlock(&dm->memic.ops_xa_lock);
> +
> +     return err;
> +}
> +
>  static int handle_alloc_dm_memic(struct ib_ucontext *ctx, struct mlx5_ib_dm 
> *dm,
>                                struct ib_dm_alloc_attr *attr,
>                                struct uverbs_attr_bundle *attrs)
> @@ -193,6 +303,9 @@ static int handle_alloc_dm_memic(struct ib_ucontext *ctx, 
> struct mlx5_ib_dm *dm,
>       int err;
>       u64 address;
> 
> +     kref_init(&dm->memic.ref);
> +     xa_init(&dm->memic.ops);
> +     mutex_init(&dm->memic.ops_xa_lock);
>       dm->size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
> 
>       err = mlx5_cmd_alloc_memic(dm_db, &dm->dev_addr,
> @@ -203,18 +316,17 @@ static int handle_alloc_dm_memic(struct ib_ucontext 
> *ctx, struct mlx5_ib_dm *dm,
>       }
> 
>       address = dm->dev_addr & PAGE_MASK;
> -     err = add_dm_mmap_entry(ctx, dm, address);
> +     err = add_dm_mmap_entry(ctx, &dm->memic.mentry, MLX5_IB_MMAP_TYPE_MEMIC,
> +                             dm->size, address);
>       if (err) {
>               mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
>               kfree(dm);
>               return err;
>       }
> 
> -     page_idx = dm->mentry.rdma_entry.start_pgoff & 0xFFFF;
> -     err = uverbs_copy_to(attrs,
> -                          MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
> -                          &page_idx,
> -                          sizeof(page_idx));
> +     page_idx = dm->memic.mentry.rdma_entry.start_pgoff & 0xFFFF;
> +     err = uverbs_copy_to(attrs, MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
> +                          &page_idx, sizeof(page_idx));
>       if (err)
>               goto err_copy;
> 
> @@ -228,7 +340,7 @@ static int handle_alloc_dm_memic(struct ib_ucontext *ctx, 
> struct mlx5_ib_dm *dm,
>       return 0;
> 
>  err_copy:
> -     rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
> +     rdma_user_mmap_entry_remove(&dm->memic.mentry.rdma_entry);
> 
>       return err;
>  }
> @@ -292,6 +404,7 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
>               return ERR_PTR(-ENOMEM);
> 
>       dm->type = type;
> +     dm->ibdm.device = ibdev;
> 
>       switch (type) {
>       case MLX5_IB_UAPI_DM_TYPE_MEMIC:
> @@ -323,6 +436,19 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
>       return ERR_PTR(err);
>  }
> 
> +static void dm_memic_remove_ops(struct mlx5_ib_dm *dm)
> +{
> +     struct mlx5_ib_dm_op_entry *entry;
> +     unsigned long idx;
> +
> +     mutex_lock(&dm->memic.ops_xa_lock);
> +     xa_for_each(&dm->memic.ops, idx, entry) {
> +             xa_erase(&dm->memic.ops, idx);
> +             rdma_user_mmap_entry_remove(&entry->mentry.rdma_entry);
> +     }
> +     mutex_unlock(&dm->memic.ops_xa_lock);
> +}
> +
>  int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
>  {
>       struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
> @@ -333,7 +459,8 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct 
> uverbs_attr_bundle *attrs)
> 
>       switch (dm->type) {
>       case MLX5_IB_UAPI_DM_TYPE_MEMIC:
> -             rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
> +             dm_memic_remove_ops(dm);
> +             rdma_user_mmap_entry_remove(&dm->memic.mentry.rdma_entry);
>               return 0;
>       case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
>               ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_STEERING,
> @@ -359,6 +486,31 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct 
> uverbs_attr_bundle *attrs)
>       return 0;
>  }
> 
> +void mlx5_ib_dm_mmap_free(struct mlx5_ib_dev *dev,
> +                       struct mlx5_user_mmap_entry *mentry)
> +{
> +     struct mlx5_ib_dm_op_entry *op_entry;
> +     struct mlx5_ib_dm *mdm;
> +
> +     switch (mentry->mmap_flag) {
> +     case MLX5_IB_MMAP_TYPE_MEMIC:
> +             mdm = container_of(mentry, struct mlx5_ib_dm, memic.mentry);
> +             kref_put(&mdm->memic.ref, mlx5_ib_dm_memic_free);
> +             break;
> +     case MLX5_IB_MMAP_TYPE_MEMIC_OP:
> +             op_entry = container_of(mentry, struct mlx5_ib_dm_op_entry,
> +                                     mentry);
> +             mdm = op_entry->dm;
> +             mlx5_cmd_dealloc_memic_op(&dev->dm, mdm->dev_addr,
> +                                       op_entry->op);
> +             kfree(op_entry);
> +             kref_put(&mdm->memic.ref, mlx5_ib_dm_memic_free);
> +             break;
> +     default:
> +             WARN_ON(true);
> +     }
> +}
> +
>  ADD_UVERBS_ATTRIBUTES_SIMPLE(
>       mlx5_ib_dm, UVERBS_OBJECT_DM, UVERBS_METHOD_DM_ALLOC,
>       UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
> @@ -368,8 +520,28 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
>       UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
>                            enum mlx5_ib_uapi_dm_type, UA_OPTIONAL));
> 
> +DECLARE_UVERBS_NAMED_METHOD(
> +     MLX5_IB_METHOD_DM_MAP_OP_ADDR,
> +     UVERBS_ATTR_IDR(MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_HANDLE,
> +                     UVERBS_OBJECT_DM,
> +                     UVERBS_ACCESS_READ,
> +                     UA_MANDATORY),
> +     UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_OP,
> +                        UVERBS_ATTR_TYPE(u8),
> +                        UA_MANDATORY),
> +     UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_START_OFFSET,
> +                         UVERBS_ATTR_TYPE(u64),
> +                         UA_MANDATORY),
> +     UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_PAGE_INDEX,
> +                         UVERBS_ATTR_TYPE(u16),
> +                         UA_OPTIONAL));
> +
> +DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DM,
> +                           &UVERBS_METHOD(MLX5_IB_METHOD_DM_MAP_OP_ADDR));
> +
>  const struct uapi_definition mlx5_ib_dm_defs[] = {
>       UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
> +     UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DM),
>       {},
>  };
> 
> diff --git a/drivers/infiniband/hw/mlx5/dm.h b/drivers/infiniband/hw/mlx5/dm.h
> index adb39d3d8131..56cf1ba9c985 100644
> --- a/drivers/infiniband/hw/mlx5/dm.h
> +++ b/drivers/infiniband/hw/mlx5/dm.h
> @@ -8,6 +8,8 @@
> 
>  #include "mlx5_ib.h"
> 
> +void mlx5_ib_dm_mmap_free(struct mlx5_ib_dev *dev,
> +                       struct mlx5_user_mmap_entry *mentry);
>  void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr,
>                           u64 length);
>  void mlx5_cmd_dealloc_memic_op(struct mlx5_dm *dm, phys_addr_t addr,
> diff --git a/drivers/infiniband/hw/mlx5/main.c 
> b/drivers/infiniband/hw/mlx5/main.c
> index 49c8c60d9520..6908db28b796 100644
> --- a/drivers/infiniband/hw/mlx5/main.c
> +++ b/drivers/infiniband/hw/mlx5/main.c
> @@ -2090,14 +2090,11 @@ static void mlx5_ib_mmap_free(struct 
> rdma_user_mmap_entry *entry)
>       struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
>       struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
>       struct mlx5_var_table *var_table = &dev->var_table;
> -     struct mlx5_ib_dm *mdm;
> 
>       switch (mentry->mmap_flag) {
>       case MLX5_IB_MMAP_TYPE_MEMIC:
> -             mdm = container_of(mentry, struct mlx5_ib_dm, mentry);
> -             mlx5_cmd_dealloc_memic(&dev->dm, mdm->dev_addr,
> -                                    mdm->size);
> -             kfree(mdm);
> +     case MLX5_IB_MMAP_TYPE_MEMIC_OP:
> +             mlx5_ib_dm_mmap_free(dev, mentry);
>               break;
>       case MLX5_IB_MMAP_TYPE_VAR:
>               mutex_lock(&var_table->bitmap_lock);
> diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
> b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> index ae971de6e934..b714131f87b7 100644
> --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
> +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> @@ -166,6 +166,7 @@ enum mlx5_ib_mmap_type {
>       MLX5_IB_MMAP_TYPE_VAR = 2,
>       MLX5_IB_MMAP_TYPE_UAR_WC = 3,
>       MLX5_IB_MMAP_TYPE_UAR_NC = 4,
> +     MLX5_IB_MMAP_TYPE_MEMIC_OP = 5,
>  };
> 
>  struct mlx5_bfreg_info {
> @@ -618,18 +619,30 @@ struct mlx5_user_mmap_entry {
>       u32 page_idx;
>  };
> 
> +struct mlx5_ib_dm_op_entry {
> +     struct mlx5_user_mmap_entry     mentry;
> +     phys_addr_t                     op_addr;
> +     struct mlx5_ib_dm               *dm;
> +     u8                              op;
> +};
> +
>  struct mlx5_ib_dm {
>       struct ib_dm            ibdm;
>       phys_addr_t             dev_addr;
>       u32                     type;
>       size_t                  size;
>       union {
> +             struct {
> +                             struct mlx5_user_mmap_entry mentry;
> +                             struct xarray           ops;
> +                             struct mutex            ops_xa_lock;
> +                             struct kref             ref;
> +             } memic;
>               struct {
>                       u32     obj_id;
>               } icm_dm;

This union is making it much too difficult to read and understand now.
An optional kref inside a structure is too far

Please split it to more types and have proper typesafety throughout

It looks mostly fine otherwise, the error flows are a bit hard to read
though, when a new type is added this should also get re-organized so
we don't do stuff like:

err_free:
        /* In MEMIC error flow, dm will be freed internally */
        if (type != MLX5_IB_UAPI_DM_TYPE_MEMIC)
                kfree(dm);

I'd inline the checks from check_dm_type_support() into their
respective allocation functions too

Jason

Reply via email to