MSG_OP_CHAIN_EXEC_NPU is a unified mailbox message that replaces
MSG_OP_CHAIN_EXEC_BUFFER_CF and MSG_OP_CHAIN_EXEC_DPU.

Add driver logic to check firmware version, and if MSG_OP_CHAIN_EXEC_NPU
is supported, uses it to submit firmware commands.

Signed-off-by: Lizhi Hou <[email protected]>
---
 drivers/accel/amdxdna/aie2_message.c  | 443 +++++++++++++++++---------
 drivers/accel/amdxdna/aie2_msg_priv.h |  42 ++-
 drivers/accel/amdxdna/aie2_pci.c      |  13 +
 drivers/accel/amdxdna/aie2_pci.h      |  29 ++
 drivers/accel/amdxdna/amdxdna_ctx.c   |   6 +-
 drivers/accel/amdxdna/amdxdna_ctx.h   |  11 +-
 drivers/accel/amdxdna/npu1_regs.c     |   6 +
 drivers/accel/amdxdna/npu2_regs.c     |   1 +
 drivers/accel/amdxdna/npu4_regs.c     |   6 +
 drivers/accel/amdxdna/npu5_regs.c     |   1 +
 drivers/accel/amdxdna/npu6_regs.c     |   1 +
 11 files changed, 392 insertions(+), 167 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_message.c 
b/drivers/accel/amdxdna/aie2_message.c
index 3a4c845d783a..4751a8aff0f7 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -27,6 +27,8 @@
 #define DECLARE_AIE2_MSG(name, op) \
        DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE)
 
+#define EXEC_MSG_OPS(xdna)     ((xdna)->dev_handle->exec_msg_ops)
+
 static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
                                   struct xdna_mailbox_msg *msg)
 {
@@ -479,177 +481,291 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx,
        return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
 }
 
-int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
-                int (*notify_cb)(void *, void __iomem *, size_t))
+static int aie2_init_exec_cu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
+                                size_t *size, u32 *msg_op)
 {
-       struct mailbox_channel *chann = hwctx->priv->mbox_chann;
-       struct amdxdna_dev *xdna = hwctx->client->xdna;
-       struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
-       union {
-               struct execute_buffer_req ebuf;
-               struct exec_dpu_req dpu;
-       } req;
-       struct xdna_mailbox_msg msg;
-       u32 payload_len;
-       void *payload;
-       int cu_idx;
-       int ret;
-       u32 op;
+       struct execute_buffer_req *cu_req = req;
+       u32 cmd_len;
+       void *cmd;
 
-       if (!chann)
-               return -ENODEV;
+       cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+       if (cmd_len > sizeof(cu_req->payload))
+               return -EINVAL;
 
-       payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
-       if (!payload) {
-               XDNA_ERR(xdna, "Invalid command, cannot get payload");
+       cu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+       if (cu_req->cu_idx == INVALID_CU_IDX)
                return -EINVAL;
-       }
 
-       cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo);
-       if (cu_idx < 0) {
-               XDNA_DBG(xdna, "Invalid cu idx");
+       memcpy(cu_req->payload, cmd, cmd_len);
+
+       *size = sizeof(*cu_req);
+       *msg_op = MSG_OP_EXECUTE_BUFFER_CF;
+       return 0;
+}
+
+static int aie2_init_exec_dpu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
+                                 size_t *size, u32 *msg_op)
+{
+       struct exec_dpu_req *dpu_req = req;
+       struct amdxdna_cmd_start_npu *sn;
+       u32 cmd_len;
+
+       sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+       if (cmd_len - sizeof(*sn) > sizeof(dpu_req->payload))
                return -EINVAL;
-       }
 
-       op = amdxdna_cmd_get_op(cmd_abo);
-       switch (op) {
-       case ERT_START_CU:
-               if (unlikely(payload_len > sizeof(req.ebuf.payload)))
-                       XDNA_DBG(xdna, "Invalid ebuf payload len: %d", 
payload_len);
-               req.ebuf.cu_idx = cu_idx;
-               memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload));
-               msg.send_size = sizeof(req.ebuf);
-               msg.opcode = MSG_OP_EXECUTE_BUFFER_CF;
-               break;
-       case ERT_START_NPU: {
-               struct amdxdna_cmd_start_npu *sn = payload;
-
-               if (unlikely(payload_len - sizeof(*sn) > 
sizeof(req.dpu.payload)))
-                       XDNA_DBG(xdna, "Invalid dpu payload len: %d", 
payload_len);
-               req.dpu.inst_buf_addr = sn->buffer;
-               req.dpu.inst_size = sn->buffer_size;
-               req.dpu.inst_prop_cnt = sn->prop_count;
-               req.dpu.cu_idx = cu_idx;
-               memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload));
-               msg.send_size = sizeof(req.dpu);
-               msg.opcode = MSG_OP_EXEC_DPU;
-               break;
-       }
-       default:
-               XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op);
+       dpu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+       if (dpu_req->cu_idx == INVALID_CU_IDX)
                return -EINVAL;
-       }
-       msg.handle = job;
-       msg.notify_cb = notify_cb;
-       msg.send_data = (u8 *)&req;
-       print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
-                            0x40, false);
 
-       ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
-       if (ret) {
-               XDNA_ERR(xdna, "Send message failed");
-               return ret;
-       }
+       dpu_req->inst_buf_addr = sn->buffer;
+       dpu_req->inst_size = sn->buffer_size;
+       dpu_req->inst_prop_cnt = sn->prop_count;
+       memcpy(dpu_req->payload, sn->prop_args, cmd_len - sizeof(*sn));
 
+       *size = sizeof(*dpu_req);
+       *msg_op = MSG_OP_EXEC_DPU;
        return 0;
 }
 
+static void aie2_init_exec_chain_req(void *req, u64 slot_addr, size_t size, 
u32 cmd_cnt)
+{
+       struct cmd_chain_req *chain_req = req;
+
+       chain_req->buf_addr = slot_addr;
+       chain_req->buf_size = size;
+       chain_req->count = cmd_cnt;
+}
+
+static void aie2_init_npu_chain_req(void *req, u64 slot_addr, size_t size, u32 
cmd_cnt)
+{
+       struct cmd_chain_npu_req *npu_chain_req = req;
+
+       npu_chain_req->flags = 0;
+       npu_chain_req->reserved = 0;
+       npu_chain_req->buf_addr = slot_addr;
+       npu_chain_req->buf_size = size;
+       npu_chain_req->count = cmd_cnt;
+}
+
 static int
-aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset,
-                             struct amdxdna_gem_obj *abo, u32 *size)
+aie2_cmdlist_fill_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
 {
-       struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset;
-       int cu_idx = amdxdna_cmd_get_cu_idx(abo);
-       u32 payload_len;
-       void *payload;
+       struct cmd_chain_slot_execbuf_cf *cf_slot = slot;
+       u32 cmd_len;
+       void *cmd;
 
-       if (cu_idx < 0)
+       cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+       if (*size < sizeof(*cf_slot) + cmd_len)
                return -EINVAL;
 
-       payload = amdxdna_cmd_get_payload(abo, &payload_len);
-       if (!payload)
+       cf_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+       if (cf_slot->cu_idx == INVALID_CU_IDX)
                return -EINVAL;
 
-       if (!slot_has_space(*buf, offset, payload_len))
-               return -ENOSPC;
-
-       buf->cu_idx = cu_idx;
-       buf->arg_cnt = payload_len / sizeof(u32);
-       memcpy(buf->args, payload, payload_len);
-       /* Accurate buf size to hint firmware to do necessary copy */
-       *size = sizeof(*buf) + payload_len;
+       cf_slot->arg_cnt = cmd_len / sizeof(u32);
+       memcpy(cf_slot->args, cmd, cmd_len);
+       /* Accurate slot size to hint firmware to do necessary copy */
+       *size = sizeof(*cf_slot) + cmd_len;
        return 0;
 }
 
 static int
-aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
-                              struct amdxdna_gem_obj *abo, u32 *size)
+aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
 {
-       struct cmd_chain_slot_dpu *buf = cmd_buf + offset;
-       int cu_idx = amdxdna_cmd_get_cu_idx(abo);
+       struct cmd_chain_slot_dpu *dpu_slot = slot;
        struct amdxdna_cmd_start_npu *sn;
-       u32 payload_len;
-       void *payload;
+       u32 cmd_len;
        u32 arg_sz;
 
-       if (cu_idx < 0)
+       sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+       arg_sz = cmd_len - sizeof(*sn);
+       if (cmd_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
                return -EINVAL;
 
-       payload = amdxdna_cmd_get_payload(abo, &payload_len);
-       if (!payload)
+       if (*size < sizeof(*dpu_slot) + arg_sz)
                return -EINVAL;
-       sn = payload;
-       arg_sz = payload_len - sizeof(*sn);
-       if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
+
+       dpu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+       if (dpu_slot->cu_idx == INVALID_CU_IDX)
                return -EINVAL;
 
-       if (!slot_has_space(*buf, offset, arg_sz))
-               return -ENOSPC;
+       dpu_slot->inst_buf_addr = sn->buffer;
+       dpu_slot->inst_size = sn->buffer_size;
+       dpu_slot->inst_prop_cnt = sn->prop_count;
+       dpu_slot->arg_cnt = arg_sz / sizeof(u32);
+       memcpy(dpu_slot->args, sn->prop_args, arg_sz);
+
+       /* Accurate slot size to hint firmware to do necessary copy */
+       *size = sizeof(*dpu_slot) + arg_sz;
+       return 0;
+}
+
+static u32 aie2_get_chain_msg_op(u32 cmd_op)
+{
+       switch (cmd_op) {
+       case ERT_START_CU:
+               return MSG_OP_CHAIN_EXEC_BUFFER_CF;
+       case ERT_START_NPU:
+               return MSG_OP_CHAIN_EXEC_DPU;
+       default:
+               break;
+       }
 
-       buf->inst_buf_addr = sn->buffer;
-       buf->inst_size = sn->buffer_size;
-       buf->inst_prop_cnt = sn->prop_count;
-       buf->cu_idx = cu_idx;
-       buf->arg_cnt = arg_sz / sizeof(u32);
-       memcpy(buf->args, sn->prop_args, arg_sz);
+       return MSG_OP_MAX_OPCODE;
+}
 
-       /* Accurate buf size to hint firmware to do necessary copy */
-       *size = sizeof(*buf) + arg_sz;
+static struct aie2_exec_msg_ops legacy_exec_message_ops = {
+       .init_cu_req = aie2_init_exec_cu_req,
+       .init_dpu_req = aie2_init_exec_dpu_req,
+       .init_chain_req = aie2_init_exec_chain_req,
+       .fill_cf_slot = aie2_cmdlist_fill_cf,
+       .fill_dpu_slot = aie2_cmdlist_fill_dpu,
+       .get_chain_msg_op = aie2_get_chain_msg_op,
+};
+
+static int
+aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t 
*size)
+{
+       struct cmd_chain_slot_npu *npu_slot = slot;
+       u32 cmd_len;
+       void *cmd;
+
+       cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+       if (*size < sizeof(*npu_slot) + cmd_len)
+               return -EINVAL;
+
+       npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+       if (npu_slot->cu_idx == INVALID_CU_IDX)
+               return -EINVAL;
+
+       memset(npu_slot, 0, sizeof(*npu_slot));
+       npu_slot->type = EXEC_NPU_TYPE_NON_ELF;
+       npu_slot->arg_cnt = cmd_len / sizeof(u32);
+       memcpy(npu_slot->args, cmd, cmd_len);
+
+       *size = sizeof(*npu_slot) + cmd_len;
        return 0;
 }
 
 static int
-aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 
offset,
-                          struct amdxdna_gem_obj *abo, u32 *size)
+aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t 
*size)
+{
+       struct cmd_chain_slot_npu *npu_slot = slot;
+       struct amdxdna_cmd_start_npu *sn;
+       u32 cmd_len;
+       u32 arg_sz;
+
+       sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+       arg_sz = cmd_len - sizeof(*sn);
+       if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE)
+               return -EINVAL;
+
+       if (*size < sizeof(*npu_slot) + arg_sz)
+               return -EINVAL;
+
+       npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+       if (npu_slot->cu_idx == INVALID_CU_IDX)
+               return -EINVAL;
+
+       memset(npu_slot, 0, sizeof(*npu_slot));
+       npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF;
+       npu_slot->inst_buf_addr = sn->buffer;
+       npu_slot->inst_size = sn->buffer_size;
+       npu_slot->inst_prop_cnt = sn->prop_count;
+       npu_slot->arg_cnt = arg_sz / sizeof(u32);
+       memcpy(npu_slot->args, sn->prop_args, arg_sz);
+
+       *size = sizeof(*npu_slot) + arg_sz;
+       return 0;
+}
+
+static u32 aie2_get_npu_chain_msg_op(u32 cmd_op)
+{
+       return MSG_OP_CHAIN_EXEC_NPU;
+}
+
+static struct aie2_exec_msg_ops npu_exec_message_ops = {
+       .init_cu_req = aie2_init_exec_cu_req,
+       .init_dpu_req = aie2_init_exec_dpu_req,
+       .init_chain_req = aie2_init_npu_chain_req,
+       .fill_cf_slot = aie2_cmdlist_fill_npu_cf,
+       .fill_dpu_slot = aie2_cmdlist_fill_npu_dpu,
+       .get_chain_msg_op = aie2_get_npu_chain_msg_op,
+};
+
+static int aie2_init_exec_req(void *req, struct amdxdna_gem_obj *cmd_abo,
+                             size_t *size, u32 *msg_op)
 {
-       u32 this_op = amdxdna_cmd_get_op(abo);
-       void *cmd_buf = cmdbuf_abo->mem.kva;
+       struct amdxdna_dev *xdna = cmd_abo->client->xdna;
        int ret;
+       u32 op;
 
-       if (this_op != op) {
-               ret = -EINVAL;
-               goto done;
-       }
 
+       op = amdxdna_cmd_get_op(cmd_abo);
        switch (op) {
        case ERT_START_CU:
-               ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size);
+               ret = EXEC_MSG_OPS(xdna)->init_cu_req(cmd_abo, req, size, 
msg_op);
+               if (ret) {
+                       XDNA_DBG(xdna, "Init CU req failed ret %d", ret);
+                       return ret;
+               }
                break;
        case ERT_START_NPU:
-               ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, 
size);
+               ret = EXEC_MSG_OPS(xdna)->init_dpu_req(cmd_abo, req, size, 
msg_op);
+               if (ret) {
+                       XDNA_DBG(xdna, "Init DPU req failed ret %d", ret);
+                       return ret;
+               }
+
                break;
        default:
+               XDNA_INFO(xdna, "Unsupported op %d", op);
                ret = -EOPNOTSUPP;
+               break;
        }
 
-done:
-       if (ret) {
-               XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret 
%d",
-                        op, ret);
+       return ret;
+}
+
+static int
+aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo,
+                      size_t *size, u32 *cmd_op)
+{
+       struct amdxdna_dev *xdna = cmd_abo->client->xdna;
+       int ret;
+       u32 op;
+
+       op = amdxdna_cmd_get_op(cmd_abo);
+       if (*cmd_op == ERT_INVALID_CMD)
+               *cmd_op = op;
+       else if (op != *cmd_op)
+               return -EINVAL;
+
+       switch (op) {
+       case ERT_START_CU:
+               ret = EXEC_MSG_OPS(xdna)->fill_cf_slot(cmd_abo, slot, size);
+               break;
+       case ERT_START_NPU:
+               ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size);
+               break;
+       default:
+               XDNA_INFO(xdna, "Unsupported op %d", op);
+               ret = -EOPNOTSUPP;
+               break;
        }
+
        return ret;
 }
 
+void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
+{
+       if (AIE2_FEATURE_ON(ndev, AIE2_NPU_COMMAND))
+               ndev->exec_msg_ops = &npu_exec_message_ops;
+       else
+               ndev->exec_msg_ops = &legacy_exec_message_ops;
+}
+
 static inline struct amdxdna_gem_obj *
 aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
 {
@@ -658,29 +774,36 @@ aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
        return job->hwctx->priv->cmd_buf[idx];
 }
 
-static void
-aie2_cmdlist_prepare_request(struct cmd_chain_req *req,
-                            struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 
cnt)
+int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+                int (*notify_cb)(void *, void __iomem *, size_t))
 {
-       req->buf_addr = cmdbuf_abo->mem.dev_addr;
-       req->buf_size = size;
-       req->count = cnt;
-       drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
-       XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x 
count %d",
-                req->buf_addr, size, cnt);
-}
+       struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+       struct amdxdna_dev *xdna = hwctx->client->xdna;
+       struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+       struct xdna_mailbox_msg msg;
+       union exec_req req;
+       int ret;
 
-static inline u32
-aie2_cmd_op_to_msg_op(u32 op)
-{
-       switch (op) {
-       case ERT_START_CU:
-               return MSG_OP_CHAIN_EXEC_BUFFER_CF;
-       case ERT_START_NPU:
-               return MSG_OP_CHAIN_EXEC_DPU;
-       default:
-               return MSG_OP_MAX_OPCODE;
+       if (!chann)
+               return -ENODEV;
+
+       ret = aie2_init_exec_req(&req, cmd_abo, &msg.send_size, &msg.opcode);
+       if (ret)
+               return ret;
+
+       msg.handle = job;
+       msg.notify_cb = notify_cb;
+       msg.send_data = (u8 *)&req;
+       print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
+                            0x40, false);
+
+       ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+       if (ret) {
+               XDNA_ERR(xdna, "Send message failed");
+               return ret;
        }
+
+       return 0;
 }
 
 int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
@@ -691,12 +814,13 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx 
*hwctx,
        struct mailbox_channel *chann = hwctx->priv->mbox_chann;
        struct amdxdna_client *client = hwctx->client;
        struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+       struct amdxdna_dev *xdna = client->xdna;
        struct amdxdna_cmd_chain *payload;
        struct xdna_mailbox_msg msg;
-       struct cmd_chain_req req;
+       union exec_chain_req req;
        u32 payload_len;
        u32 offset = 0;
-       u32 size;
+       size_t size;
        int ret;
        u32 op;
        u32 i;
@@ -707,41 +831,42 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx 
*hwctx,
            payload_len < struct_size(payload, data, payload->command_count))
                return -EINVAL;
 
+       op = ERT_INVALID_CMD;
        for (i = 0; i < payload->command_count; i++) {
                u32 boh = (u32)(payload->data[i]);
                struct amdxdna_gem_obj *abo;
 
                abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD);
                if (!abo) {
-                       XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh);
+                       XDNA_ERR(xdna, "Failed to find cmd BO %d", boh);
                        return -ENOENT;
                }
 
-               /* All sub-cmd should have same op, use the first one. */
-               if (i == 0)
-                       op = amdxdna_cmd_get_op(abo);
-
-               ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, 
&size);
+               size = cmdbuf_abo->mem.size - offset;
+               ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva + offset,
+                                            abo, &size, &op);
                amdxdna_gem_put_obj(abo);
                if (ret)
-                       return -EINVAL;
+                       return ret;
 
                offset += size;
        }
+       msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
+       if (msg.opcode == MSG_OP_MAX_OPCODE)
+               return -EOPNOTSUPP;
 
        /* The offset is the accumulated total size of the cmd buffer */
-       aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, 
payload->command_count);
+       EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
+                                          offset, payload->command_count);
+       drm_clflush_virt_range(cmdbuf_abo->mem.kva, offset);
 
-       msg.opcode = aie2_cmd_op_to_msg_op(op);
-       if (msg.opcode == MSG_OP_MAX_OPCODE)
-               return -EOPNOTSUPP;
        msg.handle = job;
        msg.notify_cb = notify_cb;
        msg.send_data = (u8 *)&req;
        msg.send_size = sizeof(req);
        ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
        if (ret) {
-               XDNA_ERR(hwctx->client->xdna, "Send message failed");
+               XDNA_ERR(xdna, "Send message failed");
                return ret;
        }
 
@@ -754,23 +879,27 @@ int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx 
*hwctx,
 {
        struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
        struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+       struct amdxdna_dev *xdna = hwctx->client->xdna;
        struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
        struct xdna_mailbox_msg msg;
-       struct cmd_chain_req req;
-       u32 size;
+       union exec_chain_req req;
+       u32 op = ERT_INVALID_CMD;
+       size_t size;
        int ret;
-       u32 op;
 
-       op = amdxdna_cmd_get_op(cmd_abo);
-       ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size);
+       size = cmdbuf_abo->mem.size;
+       ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva, cmd_abo, &size, &op);
        if (ret)
                return ret;
 
-       aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1);
-
-       msg.opcode = aie2_cmd_op_to_msg_op(op);
+       msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
        if (msg.opcode == MSG_OP_MAX_OPCODE)
                return -EOPNOTSUPP;
+
+       EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
+                                          size, 1);
+       drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
+
        msg.handle = job;
        msg.notify_cb = notify_cb;
        msg.send_data = (u8 *)&req;
diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h 
b/drivers/accel/amdxdna/aie2_msg_priv.h
index 2dbea1d09980..947daa63f064 100644
--- a/drivers/accel/amdxdna/aie2_msg_priv.h
+++ b/drivers/accel/amdxdna/aie2_msg_priv.h
@@ -20,6 +20,7 @@ enum aie2_msg_opcode {
        MSG_OP_CHAIN_EXEC_BUFFER_CF        = 0x12,
        MSG_OP_CHAIN_EXEC_DPU              = 0x13,
        MSG_OP_CONFIG_DEBUG_BO             = 0x14,
+       MSG_OP_CHAIN_EXEC_NPU              = 0x18,
        MSG_OP_MAX_XRT_OPCODE,
        MSG_OP_SUSPEND                     = 0x101,
        MSG_OP_RESUME                      = 0x102,
@@ -172,6 +173,16 @@ struct exec_dpu_req {
        __u32   payload[35];
 } __packed;
 
+enum exec_npu_type {
+       EXEC_NPU_TYPE_NON_ELF           = 0x1,
+       EXEC_NPU_TYPE_PARTIAL_ELF       = 0x2,
+};
+
+union exec_req {
+       struct execute_buffer_req ebuf;
+       struct exec_dpu_req dpu_req;
+};
+
 struct execute_buffer_resp {
        enum aie2_msg_status    status;
 } __packed;
@@ -343,9 +354,6 @@ struct async_event_msg_resp {
 } __packed;
 
 #define MAX_CHAIN_CMDBUF_SIZE SZ_4K
-#define slot_has_space(slot, offset, payload_size)             \
-       (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) +   \
-        sizeof(typeof(slot)))
 
 struct cmd_chain_slot_execbuf_cf {
        __u32 cu_idx;
@@ -363,12 +371,40 @@ struct cmd_chain_slot_dpu {
        __u32 args[] __counted_by(arg_cnt);
 };
 
+#define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32))
+struct cmd_chain_slot_npu {
+       enum exec_npu_type type;
+       u64 inst_buf_addr;
+       u64 save_buf_addr;
+       u64 restore_buf_addr;
+       u32 inst_size;
+       u32 save_size;
+       u32 restore_size;
+       u32 inst_prop_cnt;
+       u32 cu_idx;
+       u32 arg_cnt;
+       u32 args[] __counted_by(arg_cnt);
+} __packed;
+
 struct cmd_chain_req {
        __u64 buf_addr;
        __u32 buf_size;
        __u32 count;
 } __packed;
 
+struct cmd_chain_npu_req {
+       u32 flags;
+       u32 reserved;
+       u64 buf_addr;
+       u32 buf_size;
+       u32 count;
+} __packed;
+
+union exec_chain_req {
+       struct cmd_chain_npu_req npu_req;
+       struct cmd_chain_req req;
+};
+
 struct cmd_chain_resp {
        enum aie2_msg_status    status;
        __u32                   fail_cmd_idx;
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 80313a2a98d4..d7ccbdaf47f5 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -55,6 +55,7 @@ struct mgmt_mbox_chann_info {
 
 static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 
fw_minor)
 {
+       const struct aie2_fw_feature_tbl *feature;
        struct amdxdna_dev *xdna = ndev->xdna;
 
        /*
@@ -78,6 +79,17 @@ static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, 
u32 fw_major, u32 f
                XDNA_ERR(xdna, "Firmware minor version smaller than supported");
                return -EINVAL;
        }
+
+       for (feature = ndev->priv->fw_feature_tbl; feature && 
feature->min_minor;
+            feature++) {
+               if (fw_minor < feature->min_minor)
+                       continue;
+               if (feature->max_minor > 0 && fw_minor > feature->max_minor)
+                       continue;
+
+               set_bit(feature->feature, &ndev->feature_mask);
+       }
+
        return 0;
 }
 
@@ -587,6 +599,7 @@ static int aie2_init(struct amdxdna_dev *xdna)
        }
 
        release_firmware(fw);
+       aie2_msg_init(ndev);
        amdxdna_pm_init(xdna);
        return 0;
 
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index cfe42b0d4242..d0a3cb1fe8be 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -156,6 +156,17 @@ enum aie2_dev_status {
        AIE2_DEV_START,
 };
 
+struct aie2_exec_msg_ops {
+       int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
+                          size_t *size, u32 *msg_op);
+       int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
+                           size_t *size, u32 *msg_op);
+       void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 
cmd_cnt);
+       int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t 
*size);
+       int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t 
*size);
+       u32 (*get_chain_msg_op)(u32 cmd_op);
+};
+
 struct amdxdna_dev_hdl {
        struct amdxdna_dev              *xdna;
        const struct amdxdna_dev_priv   *priv;
@@ -173,6 +184,8 @@ struct amdxdna_dev_hdl {
        u32                             total_col;
        struct aie_version              version;
        struct aie_metadata             metadata;
+       unsigned long                   feature_mask;
+       struct aie2_exec_msg_ops        *exec_msg_ops;
 
        /* power management and clock*/
        enum amdxdna_power_mode_type    pw_mode;
@@ -208,12 +221,26 @@ struct aie2_hw_ops {
        int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
 };
 
+enum aie2_fw_feature {
+       AIE2_NPU_COMMAND,
+       AIE2_FEATURE_MAX
+};
+
+struct aie2_fw_feature_tbl {
+       enum aie2_fw_feature feature;
+       u32 max_minor;
+       u32 min_minor;
+};
+
+#define AIE2_FEATURE_ON(ndev, feature) test_bit(feature, &(ndev)->feature_mask)
+
 struct amdxdna_dev_priv {
        const char                      *fw_path;
        u64                             protocol_major;
        u64                             protocol_minor;
        const struct rt_config          *rt_config;
        const struct dpm_clk_freq       *dpm_clk_tbl;
+       const struct aie2_fw_feature_tbl *fw_feature_tbl;
 
 #define COL_ALIGN_NONE   0
 #define COL_ALIGN_NATURE 1
@@ -239,6 +266,7 @@ extern const struct dpm_clk_freq npu1_dpm_clk_table[];
 extern const struct dpm_clk_freq npu4_dpm_clk_table[];
 extern const struct rt_config npu1_default_rt_cfg[];
 extern const struct rt_config npu4_default_rt_cfg[];
+extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[];
 
 /* aie2_smu.c */
 int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
@@ -263,6 +291,7 @@ int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,
                               struct amdxdna_drm_get_array *args);
 
 /* aie2_message.c */
+void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
 int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
 int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
 int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c 
b/drivers/accel/amdxdna/amdxdna_ctx.c
index d18182c59668..878cc955f56d 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -113,14 +113,14 @@ void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj 
*abo, u32 *size)
        return &cmd->data[num_masks];
 }
 
-int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
+u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
 {
        struct amdxdna_cmd *cmd = abo->mem.kva;
        u32 num_masks, i;
        u32 *cu_mask;
 
        if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
-               return -1;
+               return INVALID_CU_IDX;
 
        num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
        cu_mask = cmd->data;
@@ -129,7 +129,7 @@ int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
                        return ffs(cu_mask[i]) - 1;
        }
 
-       return -1;
+       return INVALID_CU_IDX;
 }
 
 /*
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h 
b/drivers/accel/amdxdna/amdxdna_ctx.h
index 919c654dfea6..1aa2b938e07b 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -13,9 +13,10 @@
 struct amdxdna_hwctx_priv;
 
 enum ert_cmd_opcode {
-       ERT_START_CU      = 0,
-       ERT_CMD_CHAIN     = 19,
-       ERT_START_NPU     = 20,
+       ERT_INVALID_CMD = ~0U,
+       ERT_START_CU = 0,
+       ERT_CMD_CHAIN = 19,
+       ERT_START_NPU = 20,
 };
 
 enum ert_cmd_state {
@@ -64,6 +65,8 @@ struct amdxdna_cmd {
        u32 data[];
 };
 
+#define INVALID_CU_IDX         (~0U)
+
 struct amdxdna_hwctx {
        struct amdxdna_client           *client;
        struct amdxdna_hwctx_priv       *priv;
@@ -150,7 +153,7 @@ amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo)
 }
 
 void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size);
-int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
+u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
 
 void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
 void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
diff --git a/drivers/accel/amdxdna/npu1_regs.c 
b/drivers/accel/amdxdna/npu1_regs.c
index 23feb5f6fad3..ffc2e7c7b523 100644
--- a/drivers/accel/amdxdna/npu1_regs.c
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -63,12 +63,18 @@ const struct dpm_clk_freq npu1_dpm_clk_table[] = {
        { 0 }
 };
 
+static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = {
+       { .feature = AIE2_NPU_COMMAND, .min_minor = 8 },
+       { 0 }
+};
+
 static const struct amdxdna_dev_priv npu1_dev_priv = {
        .fw_path        = "amdnpu/1502_00/npu.sbin",
        .protocol_major = 0x5,
        .protocol_minor = 0x7,
        .rt_config      = npu1_default_rt_cfg,
        .dpm_clk_tbl    = npu1_dpm_clk_table,
+       .fw_feature_tbl = npu1_fw_feature_table,
        .col_align      = COL_ALIGN_NONE,
        .mbox_dev_addr  = NPU1_MBOX_BAR_BASE,
        .mbox_size      = 0, /* Use BAR size */
diff --git a/drivers/accel/amdxdna/npu2_regs.c 
b/drivers/accel/amdxdna/npu2_regs.c
index 67c2ae931c62..5fbfdcc3762d 100644
--- a/drivers/accel/amdxdna/npu2_regs.c
+++ b/drivers/accel/amdxdna/npu2_regs.c
@@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu2_dev_priv = {
        .protocol_minor = 0x6,
        .rt_config      = npu4_default_rt_cfg,
        .dpm_clk_tbl    = npu4_dpm_clk_table,
+       .fw_feature_tbl = npu4_fw_feature_table,
        .col_align      = COL_ALIGN_NATURE,
        .mbox_dev_addr  = NPU2_MBOX_BAR_BASE,
        .mbox_size      = 0, /* Use BAR size */
diff --git a/drivers/accel/amdxdna/npu4_regs.c 
b/drivers/accel/amdxdna/npu4_regs.c
index fac6c1b0b74b..79aba12acfde 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -83,12 +83,18 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
        { 0 }
 };
 
+const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
+       { .feature = AIE2_NPU_COMMAND, .min_minor = 15 },
+       { 0 }
+};
+
 static const struct amdxdna_dev_priv npu4_dev_priv = {
        .fw_path        = "amdnpu/17f0_10/npu.sbin",
        .protocol_major = 0x6,
        .protocol_minor = 12,
        .rt_config      = npu4_default_rt_cfg,
        .dpm_clk_tbl    = npu4_dpm_clk_table,
+       .fw_feature_tbl = npu4_fw_feature_table,
        .col_align      = COL_ALIGN_NATURE,
        .mbox_dev_addr  = NPU4_MBOX_BAR_BASE,
        .mbox_size      = 0, /* Use BAR size */
diff --git a/drivers/accel/amdxdna/npu5_regs.c 
b/drivers/accel/amdxdna/npu5_regs.c
index c91e1fa76ff5..c5e259ab9f49 100644
--- a/drivers/accel/amdxdna/npu5_regs.c
+++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu5_dev_priv = {
        .protocol_minor = 12,
        .rt_config      = npu4_default_rt_cfg,
        .dpm_clk_tbl    = npu4_dpm_clk_table,
+       .fw_feature_tbl = npu4_fw_feature_table,
        .col_align      = COL_ALIGN_NATURE,
        .mbox_dev_addr  = NPU5_MBOX_BAR_BASE,
        .mbox_size      = 0, /* Use BAR size */
diff --git a/drivers/accel/amdxdna/npu6_regs.c 
b/drivers/accel/amdxdna/npu6_regs.c
index 773f738915a7..2de63b44d6e7 100644
--- a/drivers/accel/amdxdna/npu6_regs.c
+++ b/drivers/accel/amdxdna/npu6_regs.c
@@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu6_dev_priv = {
        .protocol_minor = 12,
        .rt_config      = npu4_default_rt_cfg,
        .dpm_clk_tbl    = npu4_dpm_clk_table,
+       .fw_feature_tbl = npu4_fw_feature_table,
        .col_align      = COL_ALIGN_NATURE,
        .mbox_dev_addr  = NPU6_MBOX_BAR_BASE,
        .mbox_size      = 0, /* Use BAR size */
-- 
2.34.1


Reply via email to