From: David Zhang <[email protected]> Expose the command doorbell register to userspace on a per-hardware context basis, enabling applications to notify the firmware of pending commands via doorbell writes.
Introduce DRM_IOCTL_AMDXDNA_WAIT_CMD to allow userspace to wait for completion of individual commands. Co-developed-by: Hayden Laccabue <[email protected]> Signed-off-by: Hayden Laccabue <[email protected]> Signed-off-by: David Zhang <[email protected]> Signed-off-by: Lizhi Hou <[email protected]> --- drivers/accel/amdxdna/aie4_ctx.c | 75 +++++++++++++++++++++++++ drivers/accel/amdxdna/aie4_host_queue.h | 2 + drivers/accel/amdxdna/aie4_pci.c | 34 +++++++++++ drivers/accel/amdxdna/aie4_pci.h | 3 + drivers/accel/amdxdna/amdxdna_ctx.c | 34 +++++++++++ drivers/accel/amdxdna/amdxdna_ctx.h | 4 +- drivers/accel/amdxdna/amdxdna_gem.c | 5 +- drivers/accel/amdxdna/amdxdna_pci_drv.c | 18 +++++- drivers/accel/amdxdna/amdxdna_pci_drv.h | 3 + drivers/accel/amdxdna/npu3_regs.c | 5 ++ include/uapi/drm/amdxdna_accel.h | 22 +++++++- 11 files changed, 198 insertions(+), 7 deletions(-) diff --git a/drivers/accel/amdxdna/aie4_ctx.c b/drivers/accel/amdxdna/aie4_ctx.c index 84ac706d0ffb..8408b0d2696f 100644 --- a/drivers/accel/amdxdna/aie4_ctx.c +++ b/drivers/accel/amdxdna/aie4_ctx.c @@ -256,3 +256,78 @@ void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx) aie4_hwctx_umq_fini(hwctx); kfree(hwctx->priv); } + +static inline bool valid_queue_index(u64 read, u64 write, u32 capacity) +{ + return (write >= read) && ((write - read) <= capacity); +} + +static u64 get_read_index(struct amdxdna_hwctx *hwctx) +{ + u64 wi = READ_ONCE(*hwctx->priv->umq_write_index); + u64 ri = READ_ONCE(*hwctx->priv->umq_read_index); + struct amdxdna_dev *xdna = hwctx->client->xdna; + + /* + * CERT cannot update read index as uint64 atomically. Driver may read + * half-updated read index when it has bits in high 32bit. In case read + * index is not valid, wait for some time and retry once. It should + * allow CERT to complete the read index update. + */ + if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) { + XDNA_WARN(xdna, "Invalid index, ri %llu, wi %llu", ri, wi); + usleep_range(100, 200); + ri = READ_ONCE(*hwctx->priv->umq_read_index); + if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) { + XDNA_ERR(xdna, "Invalid index after retry, ri %llu, wi %llu", ri, wi); + ri = 0; + } + } + + return ri; +} + +static inline bool check_cmd_done(struct amdxdna_hwctx *hwctx, u64 seq) +{ + u64 read_idx = get_read_index(hwctx); + + return read_idx > seq; +} + +int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout) +{ + unsigned long wait_jifs = MAX_SCHEDULE_TIMEOUT; + struct amdxdna_hwctx_priv *priv = hwctx->priv; + struct cert_comp *cert_comp = priv->cert_comp; + long ret; + + if (timeout) + wait_jifs = msecs_to_jiffies(timeout); + + ret = wait_event_interruptible_timeout(cert_comp->waitq, + (check_cmd_done(hwctx, seq)), + wait_jifs); + + if (!ret) + ret = -ETIME; + + return ret <= 0 ? ret : 0; +} + +int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 vm_pgoff) +{ + struct amdxdna_hwctx *hwctx; + unsigned long hwctx_id; + int idx; + + idx = srcu_read_lock(&client->hwctx_srcu); + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) { + if (vm_pgoff == (hwctx->doorbell_offset >> PAGE_SHIFT)) { + srcu_read_unlock(&client->hwctx_srcu, idx); + return 1; + } + } + srcu_read_unlock(&client->hwctx_srcu, idx); + + return 0; +} diff --git a/drivers/accel/amdxdna/aie4_host_queue.h b/drivers/accel/amdxdna/aie4_host_queue.h index eb6a38dfb53e..1b33eda3f727 100644 --- a/drivers/accel/amdxdna/aie4_host_queue.h +++ b/drivers/accel/amdxdna/aie4_host_queue.h @@ -8,6 +8,8 @@ #include <linux/types.h> +#define CTX_MAX_CMDS 32 + struct host_queue_header { __u64 read_index; struct { diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c index 3be9066b7178..9ff34ce57fcb 100644 --- a/drivers/accel/amdxdna/aie4_pci.c +++ b/drivers/accel/amdxdna/aie4_pci.c @@ -503,6 +503,38 @@ static int aie4m_pcidev_init(struct amdxdna_dev *xdna) return 0; } +static int aie4_doorbell_mmap(struct amdxdna_client *client, struct vm_area_struct *vma) +{ + struct amdxdna_dev *xdna = client->xdna; + struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev); + const struct amdxdna_dev_priv *npriv = xdna->dev_info->dev_priv; + phys_addr_t res_start; + unsigned long pfn; + int ret; + + if (!aie4_hwctx_valid_doorbell(client, vma->vm_pgoff)) { + XDNA_ERR(xdna, "Invalid doorbell page offset 0x%lx", vma->vm_pgoff); + return -EINVAL; + } + + if (vma_pages(vma) != 1) { + XDNA_ERR(xdna, "can only map one page, got %ld", vma_pages(vma)); + return -EINVAL; + } + + res_start = pci_resource_start(pdev, xdna->dev_info->doorbell_bar) + npriv->doorbell_off; + pfn = PHYS_PFN(res_start) + vma->vm_pgoff; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP); + ret = io_remap_pfn_range(vma, vma->vm_start, + pfn, + PAGE_SIZE, + vma->vm_page_prot); + + XDNA_DBG(xdna, "doorbell ret %d", ret); + return ret; +} + static int aie4_pf_init(struct amdxdna_dev *xdna) { int ret; @@ -547,4 +579,6 @@ const struct amdxdna_dev_ops aie4_vf_ops = { .fini = aie4_vf_fini, .hwctx_init = aie4_hwctx_init, .hwctx_fini = aie4_hwctx_fini, + .mmap = aie4_doorbell_mmap, + .cmd_wait = aie4_cmd_wait, }; diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h index 6103007e6d2f..b69489acd53d 100644 --- a/drivers/accel/amdxdna/aie4_pci.h +++ b/drivers/accel/amdxdna/aie4_pci.h @@ -36,6 +36,7 @@ struct amdxdna_dev_priv { u32 mbox_bar; u32 mbox_rbuf_bar; u64 mbox_info_off; + u32 doorbell_off; struct aie_bar_off_pair psp_regs_off[PSP_MAX_REGS]; struct aie_bar_off_pair smu_regs_off[SMU_MAX_REGS]; @@ -60,6 +61,8 @@ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev); /* aie4_ctx.c */ int aie4_hwctx_init(struct amdxdna_hwctx *hwctx); void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx); +int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout); +int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 vm_pgoff); /* aie4_sriov.c */ #if IS_ENABLED(CONFIG_PCI_IOV) diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index b5ad60d4b734..b79229a63af3 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -627,3 +627,37 @@ int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_ XDNA_ERR(client->xdna, "Invalid command type %d", args->type); return -EINVAL; } + +int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_drm_wait_cmd *args = data; + struct amdxdna_hwctx *hwctx; + int ret, idx; + + XDNA_DBG(xdna, "PID %d ctx %d timeout set %d ms for cmd %llu", + client->pid, args->hwctx, args->timeout, args->seq); + + if (!xdna->dev_info->ops->cmd_wait) + return -EOPNOTSUPP; + + idx = srcu_read_lock(&client->hwctx_srcu); + hwctx = xa_load(&client->hwctx_xa, args->hwctx); + if (!hwctx) { + XDNA_DBG(xdna, "PID %d failed to get ctx %d", client->pid, args->hwctx); + ret = -EINVAL; + goto unlock_ctx_srcu; + } + + ret = xdna->dev_info->ops->cmd_wait(hwctx, args->seq, args->timeout); + + XDNA_DBG(xdna, "PID %d ctx %d cmd %lld wait finished, ret %d", + client->pid, args->hwctx, args->seq, ret); + + trace_amdxdna_debug_point(current->comm, args->seq, "job returned to user"); + +unlock_ctx_srcu: + srcu_read_unlock(&client->hwctx_srcu, idx); + return ret; +} diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h index c5622718b4d5..6e3c6371a088 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.h +++ b/drivers/accel/amdxdna/amdxdna_ctx.h @@ -211,12 +211,10 @@ int amdxdna_cmd_submit(struct amdxdna_client *client, u32 *arg_bo_hdls, u32 arg_bo_cnt, u32 hwctx_hdl, u64 *seq); -int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl, - u64 seq, u32 timeout); - int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); #endif /* _AMDXDNA_CTX_H_ */ diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c index ebfc472aa9e7..319d2064fafa 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.c +++ b/drivers/accel/amdxdna/amdxdna_gem.c @@ -212,7 +212,8 @@ static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni, mmu_interval_set_seq(&mapp->notifier, cur_seq); up_write(&xdna->notifier_lock); - xdna->dev_info->ops->hmm_invalidate(abo, cur_seq); + if (xdna->dev_info->ops->hmm_invalidate) + xdna->dev_info->ops->hmm_invalidate(abo, cur_seq); if (range->event == MMU_NOTIFY_UNMAP) { down_write(&xdna->notifier_lock); @@ -295,7 +296,7 @@ static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo, u32 nr_pages; int ret; - if (!xdna->dev_info->ops->hmm_invalidate) + if (!amdxdna_pasid_on(abo->client)) return 0; mapp = kzalloc_obj(*mapp); diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index 39ad081ac082..c0d00db25cde 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -224,6 +224,21 @@ static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struc return ret; } +static int amdxdna_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct drm_file *drm_filp = filp->private_data; + struct amdxdna_client *client = drm_filp->driver_priv; + struct amdxdna_dev *xdna = client->xdna; + + if (likely(vma->vm_pgoff >= DRM_FILE_PAGE_OFFSET_START)) + return drm_gem_mmap(filp, vma); + + if (!xdna->dev_info->ops->mmap) + return -EOPNOTSUPP; + + return xdna->dev_info->ops->mmap(client, vma); +} + static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = { /* Context */ DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0), @@ -235,6 +250,7 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0), /* Execution */ DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0), + DRM_IOCTL_DEF_DRV(AMDXDNA_WAIT_CMD, amdxdna_drm_wait_cmd_ioctl, 0), /* AIE hardware */ DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0), DRM_IOCTL_DEF_DRV(AMDXDNA_GET_ARRAY, amdxdna_drm_get_array_ioctl, 0), @@ -281,7 +297,7 @@ static const struct file_operations amdxdna_fops = { .poll = drm_poll, .read = drm_read, .llseek = noop_llseek, - .mmap = drm_gem_mmap, + .mmap = amdxdna_drm_gem_mmap, .show_fdinfo = drm_show_fdinfo, .fop_flags = FOP_UNSIGNED_OFFSET, }; diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h index caed11c09e55..471b72299aee 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h @@ -56,12 +56,14 @@ struct amdxdna_dev_ops { int (*resume)(struct amdxdna_dev *xdna); int (*suspend)(struct amdxdna_dev *xdna); int (*sriov_configure)(struct amdxdna_dev *xdna, int num_vfs); + int (*mmap)(struct amdxdna_client *client, struct vm_area_struct *vma); int (*hwctx_init)(struct amdxdna_hwctx *hwctx); void (*hwctx_fini)(struct amdxdna_hwctx *hwctx); int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl); void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq); int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); + int (*cmd_wait)(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout); int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args); int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args); int (*get_array)(struct amdxdna_client *client, struct amdxdna_drm_get_array *args); @@ -85,6 +87,7 @@ struct amdxdna_dev_info { int sram_bar; int psp_bar; int smu_bar; + int doorbell_bar; int device_type; int first_col; u32 dev_mem_buf_shift; diff --git a/drivers/accel/amdxdna/npu3_regs.c b/drivers/accel/amdxdna/npu3_regs.c index 6d5da779232b..d76b2e99c308 100644 --- a/drivers/accel/amdxdna/npu3_regs.c +++ b/drivers/accel/amdxdna/npu3_regs.c @@ -14,6 +14,9 @@ #define NPU3_MBOX_BUFFER_BAR 2 #define NPU3_MBOX_INFO_OFF 0x0 +#define NPU3_DOORBELL_BAR 2 +#define NPU3_DOORBELL_OFF 0x0 + /* PCIe BAR Index for NPU3 */ #define NPU3_REG_BAR_INDEX 0 #define NPU3_PSP_BAR_INDEX 4 @@ -45,6 +48,7 @@ static const struct amdxdna_dev_priv npu3_dev_priv = { .mbox_bar = NPU3_MBOX_BAR, .mbox_rbuf_bar = NPU3_MBOX_BUFFER_BAR, .mbox_info_off = NPU3_MBOX_INFO_OFF, + .doorbell_off = NPU3_DOORBELL_OFF, .psp_regs_off = { DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU3_PSP, MPASP_C2PMSG_123_ALT_1), DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU3_PSP, MPASP_C2PMSG_156_ALT_1), @@ -87,6 +91,7 @@ const struct amdxdna_dev_info dev_npu3_pf_info = { const struct amdxdna_dev_info dev_npu3_vf_info = { .mbox_bar = NPU3_MBOX_BAR, .sram_bar = NPU3_MBOX_BUFFER_BAR, + .doorbell_bar = NPU3_DOORBELL_BAR, .default_vbnv = "RyzenAI-npu3-vf", .device_type = AMDXDNA_DEV_TYPE_UMQ, .dev_priv = &npu3_dev_vf_priv, diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h index ad9b33dd7b13..51a507561df6 100644 --- a/include/uapi/drm/amdxdna_accel.h +++ b/include/uapi/drm/amdxdna_accel.h @@ -45,7 +45,8 @@ enum amdxdna_drm_ioctl_id { DRM_AMDXDNA_EXEC_CMD, DRM_AMDXDNA_GET_INFO, DRM_AMDXDNA_SET_STATE, - DRM_AMDXDNA_GET_ARRAY = 10, + DRM_AMDXDNA_WAIT_CMD, + DRM_AMDXDNA_GET_ARRAY, }; /** @@ -274,6 +275,21 @@ struct amdxdna_drm_exec_cmd { __u64 seq; }; +/** + * struct amdxdna_drm_wait_cmd - Wait execution command. + * + * @hwctx: Context handle. + * @timeout: timeout in ms, 0 implies infinite wait. + * @seq: sequence number of the command returned by execute command. + * + * Wait a command specified by seq to be completed. + */ +struct amdxdna_drm_wait_cmd { + __u32 hwctx; + __u32 timeout; + __u64 seq; +}; + /** * struct amdxdna_drm_query_aie_status - Query the status of the AIE hardware * @buffer: The user space buffer that will return the AIE status. @@ -739,6 +755,10 @@ struct amdxdna_drm_set_power_mode { DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_ARRAY, \ struct amdxdna_drm_get_array) +#define DRM_IOCTL_AMDXDNA_WAIT_CMD \ + DRM_IOW(DRM_COMMAND_BASE + DRM_AMDXDNA_WAIT_CMD, \ + struct amdxdna_drm_wait_cmd) + #if defined(__cplusplus) } /* extern c end */ #endif -- 2.34.1
