From: David Zhang <[email protected]> Implement hardware context creation and destruction for AIE4 VF devices.
Co-developed-by: Hayden Laccabue <[email protected]> Signed-off-by: Hayden Laccabue <[email protected]> Signed-off-by: David Zhang <[email protected]> Signed-off-by: Lizhi Hou <[email protected]> --- drivers/accel/amdxdna/Makefile | 1 + drivers/accel/amdxdna/aie4_ctx.c | 258 ++++++++++++++++++++++++ drivers/accel/amdxdna/aie4_host_queue.h | 22 ++ drivers/accel/amdxdna/aie4_msg_priv.h | 29 +++ drivers/accel/amdxdna/aie4_pci.c | 5 + drivers/accel/amdxdna/aie4_pci.h | 24 +++ drivers/accel/amdxdna/amdxdna_ctx.c | 6 + drivers/accel/amdxdna/amdxdna_ctx.h | 3 + include/uapi/drm/amdxdna_accel.h | 1 + 9 files changed, 349 insertions(+) create mode 100644 drivers/accel/amdxdna/aie4_ctx.c create mode 100644 drivers/accel/amdxdna/aie4_host_queue.h diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile index d7720c8c8a98..05cce0a38692 100644 --- a/drivers/accel/amdxdna/Makefile +++ b/drivers/accel/amdxdna/Makefile @@ -10,6 +10,7 @@ amdxdna-y := \ aie2_pci.o \ aie2_pm.o \ aie2_solver.o \ + aie4_ctx.o \ aie4_message.o \ aie4_pci.o \ amdxdna_cbuf.o \ diff --git a/drivers/accel/amdxdna/aie4_ctx.c b/drivers/accel/amdxdna/aie4_ctx.c new file mode 100644 index 000000000000..84ac706d0ffb --- /dev/null +++ b/drivers/accel/amdxdna/aie4_ctx.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2026, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> +#include <linux/types.h> + +#include "aie.h" +#include "aie4_host_queue.h" +#include "aie4_msg_priv.h" +#include "aie4_pci.h" +#include "amdxdna_ctx.h" +#include "amdxdna_gem.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_mailbox_helper.h" +#include "amdxdna_pci_drv.h" + +static irqreturn_t cert_comp_isr(int irq, void *p) +{ + struct cert_comp *cert_comp = p; + + wake_up_all(&cert_comp->waitq); + return IRQ_HANDLED; +} + +static struct cert_comp *aie4_lookup_cert_comp(struct amdxdna_dev_hdl *ndev, u32 msix_idx) +{ + struct amdxdna_dev *xdna = ndev->aie.xdna; + struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev); + struct cert_comp *cert_comp; + int ret; + + guard(mutex)(&ndev->cert_comp_lock); + + cert_comp = xa_load(&ndev->cert_comp_xa, msix_idx); + if (cert_comp) { + kref_get(&cert_comp->kref); + return cert_comp; + } + + cert_comp = kzalloc_obj(*cert_comp); + if (!cert_comp) + return NULL; + + cert_comp->ndev = ndev; + cert_comp->msix_idx = msix_idx; + init_waitqueue_head(&cert_comp->waitq); + kref_init(&cert_comp->kref); + + ret = pci_irq_vector(pdev, cert_comp->msix_idx); + if (ret < 0) { + XDNA_ERR(xdna, "MSI-X idx %u is invalid, ret:%d", msix_idx, ret); + goto free_cert_comp; + } + cert_comp->irq = ret; + + ret = request_irq(cert_comp->irq, cert_comp_isr, 0, "xdna_hsa", cert_comp); + if (ret) { + XDNA_ERR(xdna, "request irq %d failed %d", cert_comp->irq, ret); + goto free_cert_comp; + } + + ret = xa_err(xa_store(&ndev->cert_comp_xa, msix_idx, cert_comp, GFP_KERNEL)); + if (ret) { + XDNA_ERR(xdna, "store cert_comp for msix idx %d failed %d", msix_idx, ret); + goto free_irq; + } + + return cert_comp; + +free_irq: + free_irq(cert_comp->irq, cert_comp); +free_cert_comp: + kfree(cert_comp); + return NULL; +} + +static void cert_comp_release(struct kref *kref) +{ + struct cert_comp *cert_comp = container_of(kref, struct cert_comp, kref); + struct amdxdna_dev_hdl *ndev = cert_comp->ndev; + + drm_WARN_ON(&ndev->aie.xdna->ddev, !mutex_is_locked(&ndev->cert_comp_lock)); + + xa_erase(&ndev->cert_comp_xa, cert_comp->msix_idx); + free_irq(cert_comp->irq, cert_comp); + kfree(cert_comp); +} + +static void aie4_put_cert_comp(struct cert_comp *cert_comp) +{ + struct amdxdna_dev_hdl *ndev; + + ndev = cert_comp->ndev; + guard(mutex)(&ndev->cert_comp_lock); + kref_put(&cert_comp->kref, cert_comp_release); +} + +static int aie4_msg_destroy_context(struct amdxdna_dev_hdl *ndev, u32 hw_context_id) +{ + DECLARE_AIE_MSG(aie4_msg_destroy_hw_context, AIE4_MSG_OP_DESTROY_HW_CONTEXT); + + req.hw_context_id = hw_context_id; + return aie_send_mgmt_msg_wait(&ndev->aie, &msg); +} + +static int aie4_hwctx_create(struct amdxdna_hwctx *hwctx) +{ + DECLARE_AIE_MSG(aie4_msg_create_hw_context, AIE4_MSG_OP_CREATE_HW_CONTEXT); + struct amdxdna_client *client = hwctx->client; + struct amdxdna_hwctx_priv *priv = hwctx->priv; + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct amdxdna_dev_hdl *ndev = xdna->dev_handle; + int ret; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + + if (!ndev->partition_id || !hwctx->num_tiles) { + XDNA_ERR(xdna, "invalid request partition_id %d, num_tiles %d", + ndev->partition_id, hwctx->num_tiles); + return -EINVAL; + } + + req.partition_id = ndev->partition_id; + req.request_num_tiles = hwctx->num_tiles; + req.pasid = FIELD_PREP(AIE4_MSG_PASID, client->pasid) | + FIELD_PREP(AIE4_MSG_PASID_VLD, 1); + req.priority_band = hwctx->qos.priority; + + req.hsa_addr_high = upper_32_bits(amdxdna_gem_dev_addr(priv->umq_bo)); + req.hsa_addr_low = lower_32_bits(amdxdna_gem_dev_addr(priv->umq_bo)); + + XDNA_DBG(xdna, "pasid 0x%x, num_tiles %d, hsa[0x%x 0x%x]", + req.pasid, req.request_num_tiles, req.hsa_addr_high, req.hsa_addr_low); + + ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg); + if (ret) { + XDNA_ERR(xdna, "create ctx failed: %d", ret); + return ret; + } + + XDNA_DBG(xdna, "resp msix: %d, ctx id: %d, doorbell: %d", + resp.job_complete_msix_idx, + resp.hw_context_id, + resp.doorbell_offset); + + /* setup interrupt completion per msix index */ + priv->cert_comp = aie4_lookup_cert_comp(ndev, resp.job_complete_msix_idx); + if (!priv->cert_comp) { + aie4_msg_destroy_context(ndev, resp.hw_context_id); + return -EINVAL; + } + + priv->hw_ctx_id = resp.hw_context_id; + hwctx->doorbell_offset = resp.doorbell_offset; + + return 0; +} + +static void aie4_hwctx_destroy(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_client *client = hwctx->client; + struct amdxdna_hwctx_priv *priv = hwctx->priv; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_dev_hdl *ndev = xdna->dev_handle; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + + aie4_msg_destroy_context(ndev, priv->hw_ctx_id); + aie4_put_cert_comp(priv->cert_comp); +} + +static void aie4_hwctx_umq_fini(struct amdxdna_hwctx *hwctx) +{ + if (hwctx->priv && hwctx->priv->umq_bo) + amdxdna_gem_put_obj(hwctx->priv->umq_bo); +} + +static int aie4_hwctx_umq_init(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_hwctx_priv *priv = hwctx->priv; + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct amdxdna_gem_obj *umq_bo; + struct host_queue_header *qhdr; + int ret; + + umq_bo = amdxdna_gem_get_obj(hwctx->client, hwctx->umq_bo_hdl, AMDXDNA_BO_SHARE); + if (!umq_bo) { + XDNA_ERR(xdna, "cannot find umq_bo handle %d", hwctx->umq_bo_hdl); + return -ENOENT; + } + if (umq_bo->mem.size < sizeof(*qhdr)) { + XDNA_ERR(xdna, "umq_bo size is too small"); + ret = -EINVAL; + goto put_umq_bo; + } + + /* get kva address for host queue read index and write index */ + qhdr = amdxdna_gem_vmap(umq_bo); + if (!qhdr) { + ret = -ENOMEM; + goto put_umq_bo; + } + + priv->umq_bo = umq_bo; + priv->umq_read_index = &qhdr->read_index; + priv->umq_write_index = &qhdr->write_index; + + return 0; + +put_umq_bo: + amdxdna_gem_put_obj(umq_bo); + return ret; +} + +int aie4_hwctx_init(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_client *client = hwctx->client; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_hwctx_priv *priv; + int ret; + + priv = kzalloc_obj(*priv); + if (!priv) + return -ENOMEM; + hwctx->priv = priv; + + ret = aie4_hwctx_umq_init(hwctx); + if (ret) + goto free_priv; + + ret = aie4_hwctx_create(hwctx); + if (ret) + goto umq_fini; + + XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); + return 0; + +umq_fini: + aie4_hwctx_umq_fini(hwctx); +free_priv: + kfree(priv); + hwctx->priv = NULL; + return ret; +} + +void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx) +{ + aie4_hwctx_destroy(hwctx); + aie4_hwctx_umq_fini(hwctx); + kfree(hwctx->priv); +} diff --git a/drivers/accel/amdxdna/aie4_host_queue.h b/drivers/accel/amdxdna/aie4_host_queue.h new file mode 100644 index 000000000000..eb6a38dfb53e --- /dev/null +++ b/drivers/accel/amdxdna/aie4_host_queue.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2026, Advanced Micro Devices, Inc. + */ + +#ifndef _AIE4_HOST_QUEUE_H_ +#define _AIE4_HOST_QUEUE_H_ + +#include <linux/types.h> + +struct host_queue_header { + __u64 read_index; + struct { + __u16 major; + __u16 minor; + } version; + __u32 capacity; /* Queue capacity, must be power of two. */ + __u64 write_index; + __u64 data_address; /* The xdna dev addr for payload. */ +}; + +#endif /* _AIE4_HOST_QUEUE_H_ */ diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h index cada53257921..7faa01ca3436 100644 --- a/drivers/accel/amdxdna/aie4_msg_priv.h +++ b/drivers/accel/amdxdna/aie4_msg_priv.h @@ -16,6 +16,8 @@ enum aie4_msg_opcode { AIE4_MSG_OP_CREATE_PARTITION = 0x30001, AIE4_MSG_OP_DESTROY_PARTITION = 0x30002, + AIE4_MSG_OP_CREATE_HW_CONTEXT = 0x30003, + AIE4_MSG_OP_DESTROY_HW_CONTEXT = 0x30004, }; enum aie4_msg_status { @@ -67,4 +69,31 @@ struct aie4_msg_destroy_partition_resp { enum aie4_msg_status status; } __packed; +struct aie4_msg_create_hw_context_req { + __u32 partition_id; + __u32 request_num_tiles; + __u32 hsa_addr_high; + __u32 hsa_addr_low; +#define AIE4_MSG_PASID GENMASK(19, 0) +#define AIE4_MSG_PASID_VLD GENMASK(31, 31) + __u32 pasid; + __u32 priority_band; +} __packed; + +struct aie4_msg_create_hw_context_resp { + enum aie4_msg_status status; + __u32 hw_context_id; + __u32 doorbell_offset; + __u32 job_complete_msix_idx; +} __packed; + +struct aie4_msg_destroy_hw_context_req { + __u32 hw_context_id; + __u32 resvd1; +} __packed; + +struct aie4_msg_destroy_hw_context_resp { + enum aie4_msg_status status; +} __packed; + #endif /* _AIE4_MSG_PRIV_H_ */ diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c index 13f5d45e388d..3be9066b7178 100644 --- a/drivers/accel/amdxdna/aie4_pci.c +++ b/drivers/accel/amdxdna/aie4_pci.c @@ -451,6 +451,9 @@ static int aie4m_pcidev_init(struct amdxdna_dev *xdna) ndev->aie.xdna = xdna; xdna->dev_handle = ndev; + xa_init_flags(&ndev->cert_comp_xa, XA_FLAGS_ALLOC); + mutex_init(&ndev->cert_comp_lock); + /* Enable managed PCI device */ ret = pcim_enable_device(pdev); if (ret) { @@ -542,4 +545,6 @@ const struct amdxdna_dev_ops aie4_pf_ops = { const struct amdxdna_dev_ops aie4_vf_ops = { .init = aie4_vf_init, .fini = aie4_vf_fini, + .hwctx_init = aie4_hwctx_init, + .hwctx_fini = aie4_hwctx_fini, }; diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h index 620fb5bd23e4..6103007e6d2f 100644 --- a/drivers/accel/amdxdna/aie4_pci.h +++ b/drivers/accel/amdxdna/aie4_pci.h @@ -13,6 +13,23 @@ #include "aie.h" #include "amdxdna_mailbox.h" +struct cert_comp { + struct amdxdna_dev_hdl *ndev; + u32 msix_idx; + int irq; + struct kref kref; + wait_queue_head_t waitq; +}; + +struct amdxdna_hwctx_priv { + struct amdxdna_gem_obj *umq_bo; + u64 *umq_read_index; + u64 *umq_write_index; + + struct cert_comp *cert_comp; + u32 hw_ctx_id; +}; + struct amdxdna_dev_priv { const char *npufw_path; const char *certfw_path; @@ -32,11 +49,18 @@ struct amdxdna_dev_hdl { struct mailbox *mbox; u32 partition_id; + + struct xarray cert_comp_xa; /* device level indexed by msix id */ + struct mutex cert_comp_lock; /* protects cert_comp operations*/ }; /* aie4_message.c */ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev); +/* aie4_ctx.c */ +int aie4_hwctx_init(struct amdxdna_hwctx *hwctx); +void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx); + /* aie4_sriov.c */ #if IS_ENABLED(CONFIG_PCI_IOV) int aie4_sriov_configure(struct amdxdna_dev *xdna, int num_vfs); diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index 2c2c21992c87..b5ad60d4b734 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -207,6 +207,9 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr if (args->ext || args->ext_flags) return -EINVAL; + if (!xdna->dev_info->ops->hwctx_init) + return -EOPNOTSUPP; + hwctx = kzalloc_obj(*hwctx); if (!hwctx) return -ENOMEM; @@ -220,6 +223,8 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr hwctx->client = client; hwctx->fw_ctx_id = -1; hwctx->num_tiles = args->num_tiles; + hwctx->umq_bo_hdl = args->umq_bo; + hwctx->doorbell_offset = AMDXDNA_INVALID_DOORBELL_OFFSET; hwctx->mem_size = args->mem_size; hwctx->max_opc = args->max_opc; @@ -252,6 +257,7 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr args->handle = hwctx->id; args->syncobj_handle = hwctx->syncobj_hdl; + args->umq_doorbell = hwctx->doorbell_offset; atomic64_set(&hwctx->job_submit_cnt, 0); atomic64_set(&hwctx->job_free_cnt, 0); diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h index 355798687376..c5622718b4d5 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.h +++ b/drivers/accel/amdxdna/amdxdna_ctx.h @@ -14,6 +14,7 @@ struct amdxdna_hwctx_priv; enum ert_cmd_opcode { ERT_START_CU = 0, + ERT_START_DPU = 18, ERT_CMD_CHAIN = 19, ERT_START_NPU = 20, ERT_START_NPU_PREEMPT = 21, @@ -105,6 +106,8 @@ struct amdxdna_hwctx { u32 *col_list; u32 start_col; u32 num_col; + u32 umq_bo_hdl; + u32 doorbell_offset; u32 num_unused_col; struct amdxdna_qos_info qos; diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h index 34212feee15c..ad9b33dd7b13 100644 --- a/include/uapi/drm/amdxdna_accel.h +++ b/include/uapi/drm/amdxdna_accel.h @@ -18,6 +18,7 @@ extern "C" { #define AMDXDNA_INVALID_CTX_HANDLE 0 #define AMDXDNA_INVALID_BO_HANDLE 0 #define AMDXDNA_INVALID_FENCE_HANDLE 0 +#define AMDXDNA_INVALID_DOORBELL_OFFSET (~0U) /* * Define hardware context priority -- 2.34.1
