On 2/24/2026 4:41 AM, Bjorn Andersson wrote:
> On Tue, Feb 24, 2026 at 12:39:00AM +0530, Ekansh Gupta wrote:
>> Introduce a per-device memory manager for the QDA driver that tracks
>> IOMMU-capable compute context-bank (CB) devices. Each CB device is
>> represented by a qda_iommu_device and registered with a central
>> qda_memory_manager instance owned by qda_dev.
>>
> The name makes me expect that this manages memory, but it seems to
> manage devices and context banks...
>
>> The memory manager maintains an xarray of devices and assigns a
>> unique ID to each CB. It also provides basic lifetime management
>> and a workqueue for deferred device removal. qda_cb_setup_device()
>> now allocates a qda_iommu_device for each CB and registers it with
>> the memory manager after DMA configuration succeeds.
>>
>> qda_init_device() is extended to allocate and initialize the memory
>> manager, while qda_deinit_device() will tear it down in later
>> patches.
> "in later patches" makes this extremely hard to review. I had to apply
> the series to try to navigate the code...
Thanks for highlighting. I'll update this.
>
>> This prepares the QDA driver for fine-grained memory and
>> IOMMU domain management tied to individual CB devices.
>>
>> Signed-off-by: Ekansh Gupta <[email protected]>
> [..]
>> obj-$(CONFIG_DRM_ACCEL_QDA_COMPUTE_BUS) += qda_compute_bus.o
>> diff --git a/drivers/accel/qda/qda_cb.c b/drivers/accel/qda/qda_cb.c
> [..]
>> @@ -46,6 +52,18 @@ static int qda_cb_setup_device(struct qda_dev *qdev,
>> struct device *cb_dev)
>> rc = dma_set_mask(cb_dev, DMA_BIT_MASK(pa_bits));
>> if (rc) {
>> qda_err(qdev, "%d bit DMA enable failed: %d\n", pa_bits, rc);
>> + kfree(iommu_dev);
>> + return rc;
>> + }
>> +
>> + iommu_dev->dev = cb_dev;
>> + iommu_dev->sid = sid;
>> + snprintf(iommu_dev->name, sizeof(iommu_dev->name), "qda_iommu_dev_%u",
>> sid);
> It's not easy to follow, when you have scattered the code across so many
> patches and so many files. But I don't think iommu_dev->name is ever
> used.
I'll remove this.
>
>> +
>> + rc = qda_memory_manager_register_device(qdev->iommu_mgr, iommu_dev);
>> + if (rc) {
>> + qda_err(qdev, "Failed to register IOMMU device: %d\n", rc);
>> + kfree(iommu_dev);
>> return rc;
>> }
>>
>> @@ -127,6 +145,8 @@ int qda_create_cb_device(struct qda_dev *qdev, struct
>> device_node *cb_node)
>> void qda_destroy_cb_device(struct device *cb_dev)
>> {
>> struct iommu_group *group;
>> + struct qda_iommu_device *iommu_dev;
>> + struct qda_dev *qdev;
>>
>> if (!cb_dev) {
>> qda_dbg(NULL, "NULL CB device passed to destroy\n");
>> @@ -135,6 +155,18 @@ void qda_destroy_cb_device(struct device *cb_dev)
>>
>> qda_dbg(NULL, "Destroying CB device %s\n", dev_name(cb_dev));
>>
>> + iommu_dev = dev_get_drvdata(cb_dev);
> I'm not sure, but I think cb_dev is the struct device allocated in
> qda_create_cb_device(), but I can not find a place where you set drvdata
> for this device.
It should be updated with iommu_dev in qda_cb_setup_device. I believe I missed
adding this and it didn't give me any functional failure. Thanks for
highlighting this,
I'll fix this in the next spin.
>
>> + if (iommu_dev) {
>> + if (cb_dev->parent) {
>> + qdev = dev_get_drvdata(cb_dev->parent);
>> + if (qdev && qdev->iommu_mgr) {
>> + qda_dbg(NULL, "Unregistering IOMMU device for
>> %s\n",
>> + dev_name(cb_dev));
>> +
>> qda_memory_manager_unregister_device(qdev->iommu_mgr, iommu_dev);
>> + }
>> + }
>> + }
>> +
>> group = iommu_group_get(cb_dev);
>> if (group) {
>> qda_dbg(NULL, "Removing %s from IOMMU group\n",
>> dev_name(cb_dev));
>> diff --git a/drivers/accel/qda/qda_drv.c b/drivers/accel/qda/qda_drv.c
> [..]
>> @@ -25,12 +37,46 @@ static void init_device_resources(struct qda_dev *qdev)
>> atomic_set(&qdev->removing, 0);
>> }
>>
>> +static int init_memory_manager(struct qda_dev *qdev)
>> +{
>> + int ret;
>> +
>> + qda_dbg(qdev, "Initializing IOMMU manager\n");
>> +
>> + qdev->iommu_mgr = kzalloc_obj(*qdev->iommu_mgr, GFP_KERNEL);
>> + if (!qdev->iommu_mgr)
>> + return -ENOMEM;
>> +
>> + ret = qda_memory_manager_init(qdev->iommu_mgr);
>> + if (ret) {
>> + qda_err(qdev, "Failed to initialize memory manager: %d\n", ret);
> qda_memory_manager_init() already logged 1 error and 1 debug prints if
> you get here.
ack.
>
>> + kfree(qdev->iommu_mgr);
>> + qdev->iommu_mgr = NULL;
> We're going to fail probe, you shouldn't have to clear this.
>
>> + return ret;
>> + }
>> +
>> + qda_dbg(qdev, "IOMMU manager initialized successfully\n");
>> + return 0;
>> +}
>> +
>> int qda_init_device(struct qda_dev *qdev)
>> {
>> + int ret;
>> +
>> init_device_resources(qdev);
>>
>> + ret = init_memory_manager(qdev);
>> + if (ret) {
>> + qda_err(qdev, "IOMMU manager initialization failed: %d\n", ret);
> And now we have 2 debug prints and two error prints in the log.
I'll clean the duplicate/unnecessary logs at at places
>
>> + goto err_cleanup_resources;
>> + }
>> +
>> qda_dbg(qdev, "QDA device initialized successfully\n");
> Or, if we get here, you have 8 debug prints.
>
> Please learn how to use kprobe/kretprobe instead of reimplementing it
> using printk().
ack.
>
>> return 0;
>> +
>> +err_cleanup_resources:
>> + cleanup_device_resources(qdev);
>> + return ret;
>> }
>>
>> static int __init qda_core_init(void)
>> diff --git a/drivers/accel/qda/qda_drv.h b/drivers/accel/qda/qda_drv.h
>> index eb732b7d8091..2cb97e4eafbf 100644
>> --- a/drivers/accel/qda/qda_drv.h
>> +++ b/drivers/accel/qda/qda_drv.h
>> @@ -11,6 +11,7 @@
>> #include <linux/mutex.h>
>> #include <linux/rpmsg.h>
>> #include <linux/xarray.h>
>> +#include "qda_memory_manager.h"
>>
>> /* Driver identification */
>> #define DRIVER_NAME "qda"
>> @@ -23,6 +24,8 @@ struct qda_dev {
>> struct device *dev;
>> /* Mutex protecting device state */
>> struct mutex lock;
>> + /* IOMMU/memory manager */
>> + struct qda_memory_manager *iommu_mgr;
>> /* Flag indicating device removal in progress */
>> atomic_t removing;
>> /* Name of the DSP (e.g., "cdsp", "adsp") */
>> diff --git a/drivers/accel/qda/qda_memory_manager.c
>> b/drivers/accel/qda/qda_memory_manager.c
> [..]
>> +int qda_memory_manager_register_device(struct qda_memory_manager *mem_mgr,
>> + struct qda_iommu_device *iommu_dev)
>> +{
>> + int ret;
>> + u32 id;
>> +
>> + if (!mem_mgr || !iommu_dev || !iommu_dev->dev) {
> How could this happen? You call this function from one place, that looks
> like this:
>
> iommu_dev->dev = cb_dev;
> iommu_dev->sid = sid;
> rc = qda_memory_manager_register_device(qdev->iommu_mgr, iommu_dev);
>
> You just allocated in filled out iommu_dev.
>
> Looking up the callstack, we're coming from qda_rpmsg_probe() which just
> did qda_init_device() which created the qsdev->iommu_mgr.
>
> In other words, these can't possibly be NULL.
I'll recheck this and remove redundant checks.
>
>> + qda_err(NULL, "Invalid parameters for device registration\n");
>> + return -EINVAL;
>> + }
>> +
>> + init_iommu_device_fields(iommu_dev, mem_mgr);
>> +
>> + ret = allocate_device_id(mem_mgr, iommu_dev, &id);
>> + if (ret) {
>> + qda_err(NULL, "Failed to allocate device ID: %d (sid=%u)\n",
>> ret, iommu_dev->sid);
>> + return ret;
>> + }
>> +
>> + iommu_dev->id = id;
>> +
>> + qda_dbg(NULL, "Registered device id=%u (sid=%u)\n", id, iommu_dev->sid);
>> +
>> + return 0;
>> +}
>> +
>> +void qda_memory_manager_unregister_device(struct qda_memory_manager
>> *mem_mgr,
>> + struct qda_iommu_device *iommu_dev)
>> +{
>> + if (!mem_mgr || !iommu_dev) {
> The one call to this function is wrapped in:
>
> if (iommu_dev) {
> if (qdev->iommu_mgr) {
> qda_dbg(NULL, ...);
> qda_memory_manager_unregister_device(qdev->iommu_mgr,
> iommu_dev);
> }
> }
>
>> + qda_err(NULL, "Attempted to unregister invalid
>> device/manager\n");
>> + return;
>> + }
>> +
>> + qda_dbg(NULL, "Unregistering device id=%u (refcount=%u)\n",
>> iommu_dev->id,
>> + refcount_read(&iommu_dev->refcount));
> And just before the call to qda_memory_manager_unregister_device() you
> print a debug log, saying you will call this function.
>
>> +
>> + if (refcount_read(&iommu_dev->refcount) == 0) {
>> + xa_erase(&mem_mgr->device_xa, iommu_dev->id);
>> + kfree(iommu_dev);
>> + return;
>> + }
>> +
>> + if (refcount_dec_and_test(&iommu_dev->refcount)) {
>> + qda_info(NULL, "Device id=%u refcount reached zero, queuing
>> removal\n",
>> + iommu_dev->id);
>> + queue_work(mem_mgr->wq, &iommu_dev->remove_work);
>> + }
>> +}
>> +
> [..]
>> diff --git a/drivers/accel/qda/qda_memory_manager.h
>> b/drivers/accel/qda/qda_memory_manager.h
> [..]
>> +
>> +/**
> This says "kernel-doc"
>
>> + * struct qda_iommu_device - IOMMU device instance for memory management
>> + *
>> + * This structure represents a single IOMMU-enabled device managed by the
>> + * memory manager. Each device can be assigned to a specific process.
>> + */
>> +struct qda_iommu_device {
>> + /* Unique identifier for this IOMMU device */
> But this doesn't follow kernel-doc style.
>
> At the end of the series,
>
> ./scripts/kernel-doc -none -vv -Wall drivers/accel/qda/
>
> reports 270 warnings.
I'll resolve the warnings in next version.
>
>> + u32 id;
>> + /* Pointer to the underlying device */
>> + struct device *dev;
>> + /* Name for the device */
>> + char name[32];
>> + /* Spinlock protecting concurrent access to device */
>> + spinlock_t lock;
>> + /* Reference counter for device */
>> + refcount_t refcount;
>> + /* Work structure for deferred device removal */
>> + struct work_struct remove_work;
>> + /* Stream ID for IOMMU transactions */
>> + u32 sid;
>> + /* Pointer to parent memory manager */
>> + struct qda_memory_manager *manager;
>> +};
> Regards,
> Bjorn