From: Nishad Saraf <[email protected]> NPU firmware requires a host-allocated work buffer for hardware contexts. Allocate a 4 MB host buffer and attach it to device during device init.
Refactor aie2_alloc_msg_buffer() and aie2_free_msg_buffer() into common helpers by moving them to aie.c and renaming them to amdxdna_alloc_msg_buffer() and amdxdna_free_msg_buffer(), allowing both AIE2 and AIE4 to reuse the implementation. Signed-off-by: Nishad Saraf <[email protected]> Signed-off-by: Lizhi Hou <[email protected]> --- drivers/accel/amdxdna/aie.c | 34 +++++++++++++++ drivers/accel/amdxdna/aie.h | 4 ++ drivers/accel/amdxdna/aie2_error.c | 7 ++-- drivers/accel/amdxdna/aie2_message.c | 49 +++------------------- drivers/accel/amdxdna/aie2_pci.h | 4 -- drivers/accel/amdxdna/aie4_message.c | 18 ++++++++ drivers/accel/amdxdna/aie4_msg_priv.h | 14 +++++++ drivers/accel/amdxdna/aie4_pci.c | 55 ++++++++++++++++++++++++- drivers/accel/amdxdna/aie4_pci.h | 5 +++ drivers/accel/amdxdna/amdxdna_pci_drv.c | 3 +- 10 files changed, 141 insertions(+), 52 deletions(-) diff --git a/drivers/accel/amdxdna/aie.c b/drivers/accel/amdxdna/aie.c index a31051cc1ec8..4db2fd80a032 100644 --- a/drivers/accel/amdxdna/aie.c +++ b/drivers/accel/amdxdna/aie.c @@ -162,3 +162,37 @@ int amdxdna_get_metadata(struct aie_device *aie, kfree(meta); return ret; } + +void *amdxdna_alloc_msg_buffer(struct amdxdna_dev *xdna, u32 *size, + dma_addr_t *dma_addr) +{ + void *vaddr; + int order; + + *size = max_t(u32, *size, SZ_8K); + order = get_order(*size); + if (order > MAX_PAGE_ORDER) + return ERR_PTR(-EINVAL); + *size = PAGE_SIZE << order; + + if (amdxdna_iova_on(xdna)) + return amdxdna_iommu_alloc(xdna, *size, dma_addr); + + vaddr = dma_alloc_noncoherent(xdna->ddev.dev, *size, dma_addr, + DMA_FROM_DEVICE, GFP_KERNEL); + if (!vaddr) + return ERR_PTR(-ENOMEM); + + return vaddr; +} + +void amdxdna_free_msg_buffer(struct amdxdna_dev *xdna, size_t size, + void *cpu_addr, dma_addr_t dma_addr) +{ + if (amdxdna_iova_on(xdna)) { + amdxdna_iommu_free(xdna, size, cpu_addr, dma_addr); + return; + } + + dma_free_noncoherent(xdna->ddev.dev, size, cpu_addr, dma_addr, DMA_FROM_DEVICE); +} diff --git a/drivers/accel/amdxdna/aie.h b/drivers/accel/amdxdna/aie.h index 4bb3719ee0c0..70618204c0ab 100644 --- a/drivers/accel/amdxdna/aie.h +++ b/drivers/accel/amdxdna/aie.h @@ -121,6 +121,10 @@ int aie_check_protocol(struct aie_device *aie, u32 fw_major, u32 fw_minor); void amdxdna_vbnv_init(struct amdxdna_dev *xdna); int amdxdna_get_metadata(struct aie_device *aie, struct amdxdna_client *client, struct amdxdna_drm_get_info *args); +void *amdxdna_alloc_msg_buffer(struct amdxdna_dev *xdna, u32 *size, + dma_addr_t *dma_addr); +void amdxdna_free_msg_buffer(struct amdxdna_dev *xdna, size_t size, + void *cpu_addr, dma_addr_t dma_addr); /* aie_psp.c */ struct psp_device *aiem_psp_create(struct drm_device *ddev, struct psp_config *conf); diff --git a/drivers/accel/amdxdna/aie2_error.c b/drivers/accel/amdxdna/aie2_error.c index 70007b4363cd..babdac0157ab 100644 --- a/drivers/accel/amdxdna/aie2_error.c +++ b/drivers/accel/amdxdna/aie2_error.c @@ -11,6 +11,7 @@ #include <linux/kthread.h> #include <linux/kernel.h> +#include "aie.h" #include "aie2_msg_priv.h" #include "aie2_pci.h" #include "amdxdna_error.h" @@ -338,7 +339,7 @@ void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev) destroy_workqueue(events->wq); mutex_lock(&xdna->dev_lock); - aie2_free_msg_buffer(ndev, events->size, events->buf, events->addr); + amdxdna_free_msg_buffer(xdna, events->size, events->buf, events->addr); kfree(events); } @@ -354,7 +355,7 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev) if (!events) return -ENOMEM; - events->buf = aie2_alloc_msg_buffer(ndev, &total_size, &events->addr); + events->buf = amdxdna_alloc_msg_buffer(xdna, &total_size, &events->addr); if (IS_ERR(events->buf)) { ret = PTR_ERR(events->buf); goto free_events; @@ -394,7 +395,7 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev) free_wq: destroy_workqueue(events->wq); free_buf: - aie2_free_msg_buffer(ndev, events->size, events->buf, events->addr); + amdxdna_free_msg_buffer(xdna, events->size, events->buf, events->addr); free_events: kfree(events); return ret; diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index f555ffecea6f..0417c6a4c80a 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -27,43 +27,6 @@ #define EXEC_MSG_OPS(xdna) ((xdna)->dev_handle->exec_msg_ops) -void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size, - dma_addr_t *dma_addr) -{ - struct amdxdna_dev *xdna = ndev->aie.xdna; - void *vaddr; - int order; - - *size = max(*size, SZ_8K); - order = get_order(*size); - if (order > MAX_PAGE_ORDER) - return ERR_PTR(-EINVAL); - *size = PAGE_SIZE << order; - - if (amdxdna_iova_on(xdna)) - return amdxdna_iommu_alloc(xdna, *size, dma_addr); - - vaddr = dma_alloc_noncoherent(xdna->ddev.dev, *size, dma_addr, - DMA_FROM_DEVICE, GFP_KERNEL); - if (!vaddr) - return ERR_PTR(-ENOMEM); - - return vaddr; -} - -void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size, - void *cpu_addr, dma_addr_t dma_addr) -{ - struct amdxdna_dev *xdna = ndev->aie.xdna; - - if (amdxdna_iova_on(xdna)) { - amdxdna_iommu_free(xdna, size, cpu_addr, dma_addr); - return; - } - - dma_free_noncoherent(xdna->ddev.dev, size, cpu_addr, dma_addr, DMA_FROM_DEVICE); -} - int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev) { DECLARE_AIE_MSG(suspend, MSG_OP_SUSPEND); @@ -376,7 +339,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, int ret; buf_sz = ndev->aie.metadata.cols * ndev->aie.metadata.size; - buff_addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr); + buff_addr = amdxdna_alloc_msg_buffer(xdna, &buf_sz, &dma_addr); if (IS_ERR(buff_addr)) return PTR_ERR(buff_addr); @@ -415,7 +378,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, *cols_filled = aie_bitmap; fail: - aie2_free_msg_buffer(ndev, buf_sz, buff_addr, dma_addr); + amdxdna_free_msg_buffer(xdna, buf_sz, buff_addr, dma_addr); return ret; } @@ -434,7 +397,7 @@ int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev, return -EINVAL; buf_sz = min(size, SZ_4M); - addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr); + addr = amdxdna_alloc_msg_buffer(xdna, &buf_sz, &dma_addr); if (IS_ERR(addr)) return PTR_ERR(addr); @@ -466,7 +429,7 @@ int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev, header->minor = resp.minor; free_buf: - aie2_free_msg_buffer(ndev, buf_sz, addr, dma_addr); + amdxdna_free_msg_buffer(xdna, buf_sz, addr, dma_addr); return ret; } @@ -1176,7 +1139,7 @@ int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id, } buf_size = sizeof(*report); - buf = aie2_alloc_msg_buffer(ndev, &buf_size, &dma_addr); + buf = amdxdna_alloc_msg_buffer(xdna, &buf_size, &dma_addr); if (IS_ERR(buf)) { XDNA_ERR(xdna, "Failed to allocate buffer for app health"); return PTR_ERR(buf); @@ -1197,7 +1160,7 @@ int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id, memcpy(report, buf, sizeof(*report)); free_buf: - aie2_free_msg_buffer(ndev, buf_size, buf, dma_addr); + amdxdna_free_msg_buffer(xdna, buf_size, buf, dma_addr); return ret; } diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h index c884fed610f9..33b6c84e8b6e 100644 --- a/drivers/accel/amdxdna/aie2_pci.h +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -290,10 +290,6 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, int (*notify_cb)(void *, void __iomem *, size_t)); int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 us); -void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size, - dma_addr_t *dma_addr); -void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size, - void *cpu_addr, dma_addr_t dma_addr); /* aie2_hwctx.c */ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx); diff --git a/drivers/accel/amdxdna/aie4_message.c b/drivers/accel/amdxdna/aie4_message.c index ac89a9a842b2..d85df04c5f6b 100644 --- a/drivers/accel/amdxdna/aie4_message.c +++ b/drivers/accel/amdxdna/aie4_message.c @@ -62,3 +62,21 @@ int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *m return 0; } + +int aie4_attach_work_buffer(struct amdxdna_dev_hdl *ndev) +{ + DECLARE_AIE_MSG(aie4_msg_attach_work_buffer, AIE4_MSG_OP_ATTACH_WORK_BUFFER); + struct amdxdna_dev *xdna = ndev->aie.xdna; + int ret; + + req.buff_addr = ndev->work_buf_addr; + req.buff_size = AIE4_WORK_BUFFER_MIN_SIZE; + + ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg); + if (ret) + XDNA_ERR(xdna, "Failed to attach work buffer, ret %d", ret); + else + XDNA_DBG(xdna, "Attached work buffer"); + + return ret; +} diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h index 69e220e40900..af0866045b91 100644 --- a/drivers/accel/amdxdna/aie4_msg_priv.h +++ b/drivers/accel/amdxdna/aie4_msg_priv.h @@ -6,10 +6,12 @@ #ifndef _AIE4_MSG_PRIV_H_ #define _AIE4_MSG_PRIV_H_ +#include <linux/sizes.h> #include <linux/types.h> enum aie4_msg_opcode { AIE4_MSG_OP_SUSPEND = 0x10003, + AIE4_MSG_OP_ATTACH_WORK_BUFFER = 0x1000D, AIE4_MSG_OP_CREATE_VFS = 0x20001, AIE4_MSG_OP_DESTROY_VFS = 0x20002, @@ -130,4 +132,16 @@ struct aie4_msg_aie4_tile_info_resp { struct aie4_tile_info info; } __packed; +#define AIE4_WORK_BUFFER_MIN_SIZE SZ_4M + +struct aie4_msg_attach_work_buffer_req { + __u64 buff_addr; + __u32 reserved; + __u32 buff_size; +} __packed; + +struct aie4_msg_attach_work_buffer_resp { + enum aie4_msg_status status; +} __packed; + #endif /* _AIE4_MSG_PRIV_H_ */ diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c index 8b5eff0e45c1..a58a83af42a4 100644 --- a/drivers/accel/amdxdna/aie4_pci.c +++ b/drivers/accel/amdxdna/aie4_pci.c @@ -286,8 +286,14 @@ static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev) if (ret) goto stop_fw; + ret = aie4_attach_work_buffer(ndev); + if (ret) + goto mbox_fini; + return 0; +mbox_fini: + aie4_mailbox_fini(ndev); stop_fw: aie4_fw_stop(ndev); @@ -564,6 +570,40 @@ static int aie4_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i return ret; } +static int aie4_alloc_work_buffer(struct amdxdna_dev_hdl *ndev) +{ + struct amdxdna_dev *xdna = ndev->aie.xdna; + u32 buf_size = AIE4_WORK_BUFFER_MIN_SIZE; + + ndev->work_buf = amdxdna_alloc_msg_buffer(xdna, &buf_size, + &ndev->work_buf_addr); + if (IS_ERR(ndev->work_buf)) { + int ret = PTR_ERR(ndev->work_buf); + + XDNA_ERR(xdna, "Failed to alloc work buffer, size 0x%x", + AIE4_WORK_BUFFER_MIN_SIZE); + ndev->work_buf = NULL; + return ret; + } + + ndev->work_buf_size = buf_size; + XDNA_DBG(xdna, "Work buffer allocated: size 0x%x", buf_size); + + return 0; +} + +static void aie4_free_work_buffer(struct amdxdna_dev_hdl *ndev) +{ + struct amdxdna_dev *xdna = ndev->aie.xdna; + + if (!ndev->work_buf) + return; + + amdxdna_free_msg_buffer(xdna, ndev->work_buf_size, ndev->work_buf, + ndev->work_buf_addr); + ndev->work_buf = NULL; +} + static int aie4_pf_init(struct amdxdna_dev *xdna) { int ret; @@ -572,7 +612,19 @@ static int aie4_pf_init(struct amdxdna_dev *xdna) if (ret) return ret; - return aie4_pf_hw_start(xdna->dev_handle); + ret = aie4_alloc_work_buffer(xdna->dev_handle); + if (ret) + return ret; + + ret = aie4_pf_hw_start(xdna->dev_handle); + if (ret) + goto free_work_buf; + + return 0; + +free_work_buf: + aie4_free_work_buffer(xdna->dev_handle); + return ret; } static int aie4_vf_init(struct amdxdna_dev *xdna) @@ -590,6 +642,7 @@ static void aie4_pf_fini(struct amdxdna_dev *xdna) { aie4_sriov_stop(xdna->dev_handle); aie4_pf_hw_stop(xdna->dev_handle); + aie4_free_work_buffer(xdna->dev_handle); } static void aie4_vf_fini(struct amdxdna_dev *xdna) diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h index 1886cffc62db..390864876ca5 100644 --- a/drivers/accel/amdxdna/aie4_pci.h +++ b/drivers/accel/amdxdna/aie4_pci.h @@ -53,11 +53,16 @@ struct amdxdna_dev_hdl { struct xarray cert_comp_xa; /* device level indexed by msix id */ struct mutex cert_comp_lock; /* protects cert_comp operations*/ + + void *work_buf; + dma_addr_t work_buf_addr; + u32 work_buf_size; }; /* aie4_message.c */ int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata); int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev); +int aie4_attach_work_buffer(struct amdxdna_dev_hdl *ndev); /* aie4_ctx.c */ int aie4_hwctx_init(struct amdxdna_hwctx *hwctx); diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index c0d00db25cde..a6e9be7960c2 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -40,9 +40,10 @@ MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin"); * 0.7: Support getting power and utilization data * 0.8: Support BO usage query * 0.9: Add new device type AMDXDNA_DEV_TYPE_PF + * 0.10: Support AIE4 UMQ */ #define AMDXDNA_DRIVER_MAJOR 0 -#define AMDXDNA_DRIVER_MINOR 9 +#define AMDXDNA_DRIVER_MINOR 10 /* * Bind the driver base on (vendor_id, device_id) pair and later use the -- 2.34.1
