On 3/5/26 04:07, Srinivasan Shanmugam wrote:
> Introduce a per-drm_file eventfd manager to support render-node event
> subscriptions.
> 
> The manager is implemented in amdgpu_eventfd.[ch] and is owned by the
> drm_file (amdgpu_fpriv). It maps event_id -> eventfd_id object, where
> each eventfd_id can have multiple eventfds bound (fan-out).
> 
> The design is IRQ-safe for signaling: IRQ path takes the xarray lock
> (irqsave) and signals eventfds while still holding the lock.
> 
> This patch only adds the core manager
> 
> v4:
> - Use eventfd_ctx pointer as binding identity instead of fd number
> - Make duplicate (event_id, ctx) binds idempotent
> - Replace mgr lock with atomic bind limit
> - Add helper for xa get-or-create event_id
> 
> Cc: Alex Deucher <[email protected]>
> Suggested-by: Christian König <[email protected]>
> Signed-off-by: Srinivasan Shanmugam <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/Makefile         |   3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.c | 279 ++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.h |  59 +++++
>  3 files changed, 340 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.c
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.h
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
> b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 006d49d6b4af..30b1cf3c6cdf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -67,7 +67,8 @@ amdgpu-y += amdgpu_device.o amdgpu_reg_access.o 
> amdgpu_doorbell_mgr.o amdgpu_kms
>       amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>       amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
>       amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o 
> amdgpu_dev_coredump.o \
> -     amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu_ip.o
> +     amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu_ip.o \
> +     amdgpu_eventfd.o
>  
>  amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.c
> new file mode 100644
> index 000000000000..9806ec515cfc
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.c
> @@ -0,0 +1,279 @@
> +/*
> + * Copyright 2026 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +/*
> + * Render-node eventfd subscription infrastructure.
> + */
> +
> +#include <linux/slab.h>
> +#include <linux/err.h>
> +
> +#include "amdgpu_eventfd.h"
> +
> +#define AMDGPU_EVENTFD_MAX_BINDS 4096
> +
> +/*
> + * Helper: caller holds xa_lock_irqsave(&mgr->ids, flags).

It would be better if that function is called without holding a lock and also 
allocates the new entry.

> + *
> + * If id exists -> returns it, keeps new_id untouched (caller frees it).
> + * If id missing -> stores new_id and consumes it (sets *new_id = NULL).
> + */
> +static struct amdgpu_eventfd_id *
> +amdgpu_eventfd_get_or_create_id_locked(struct amdgpu_eventfd_mgr *mgr,
> +                                    u32 event_id,
> +                                    struct amdgpu_eventfd_id **new_id)
> +{
> +     struct amdgpu_eventfd_id *id;
> +

> +     id = xa_load(&mgr->ids, event_id);
> +     if (id)
> +             return id;
> +
> +     if (!*new_id)
> +             return NULL;
> +
> +     if (xa_err(__xa_store(&mgr->ids, event_id, *new_id, 

You can simplyfy this by using xa_insert(), that function returns -EBUSY when 
there is already an entry at the specific location.


GFP_NOWAIT)))

That is incorrect. If xa_store can't drop and retacke the lock you would need 
to use GFP_ATOMIC here.


> +             return NULL;
> +
> +     id = *new_id;
> +     *new_id = NULL;
> +     return id;
> +}
> +
> +static struct amdgpu_eventfd_id *amdgpu_eventfd_id_alloc(u32 event_id)
> +{
> +     struct amdgpu_eventfd_id *id;
> +
> +     id = kzalloc(sizeof(*id), GFP_KERNEL);
> +     if (!id)
> +             return NULL;
> +
> +     id->event_id = event_id;
> +     INIT_HLIST_HEAD(&id->entries);
> +     id->n_entries = 0;
> +     return id;
> +}
> +
> +/*
> + * mgr lifetime is tied to fpriv:
> + * - init at open
> + * - fini at postclose (after unregister / wait for ongoing IRQs if needed)
> + */
> +void amdgpu_eventfd_mgr_init(struct amdgpu_eventfd_mgr *mgr)
> +{
> +     xa_init_flags(&mgr->ids, XA_FLAGS_LOCK_IRQ);
> +     atomic_set(&mgr->bind_count, 0);
> +}
> +
> +void amdgpu_eventfd_mgr_fini(struct amdgpu_eventfd_mgr *mgr)
> +{
> +     unsigned long index;
> +     struct amdgpu_eventfd_id *id;
> +
> +     /*
> +      * Final teardown: keep xa locked while we remove ids and
> +      * drop all eventfd references (IRQ-safe).
> +      */

> +     xa_lock_irq(&mgr->ids);

That is only necessary if the we didn't waited on IRQs before, but I think we 
should do exactly that in the next patch set.

> +     xa_for_each(&mgr->ids, index, id) {
> +             struct amdgpu_eventfd_entry *e;
> +             struct hlist_node *tmp;
> +


> +             __xa_erase(&mgr->ids, index);

That is superflous, xa_destroy() takes case of removing all entries and is much 
faster.

> +
> +             hlist_for_each_entry_safe(e, tmp, &id->entries, hnode) {
> +                     hlist_del(&e->hnode);
> +                     id->n_entries--;
> +                     eventfd_ctx_put(e->ctx);
> +                     kfree(e);
> +             }
> +
> +             kfree(id);
> +     }
> +     xa_unlock_irq(&mgr->ids);
> +
> +     xa_destroy(&mgr->ids);
> +}
> +
> +int amdgpu_eventfd_bind(struct amdgpu_eventfd_mgr *mgr, u32 event_id, int 
> eventfd)
> +{
> +     struct amdgpu_eventfd_id *id, *new_id = NULL;
> +     struct amdgpu_eventfd_entry *e = NULL;
> +     struct eventfd_ctx *ctx;
> +     unsigned long flags;
> +     bool found = false;
> +     int r;
> +
> +     if (!mgr || !event_id || eventfd < 0)
> +             return -EINVAL;
> +
> +     /* Enforce total bind limit (atomic, no mgr lock). */
> +     if (atomic_inc_return(&mgr->bind_count) > AMDGPU_EVENTFD_MAX_BINDS) {
> +             atomic_dec(&mgr->bind_count);
> +             return -ENOSPC;
> +     }
> +
> +     /*
> +      * Allocate objects first (can sleep). Take references later.
> +      * new_id is cheap even if unused.
> +      */
> +     new_id = amdgpu_eventfd_id_alloc(event_id);


> +     e = kzalloc(sizeof(*e), GFP_KERNEL);
Only allocate that after all the pre-requisites have been checked. E.g. if ctx, 
id etc... have been allocated.


> +     if (!e) {
> +             r = -ENOMEM;
> +             goto err_dec_limit;
> +     }
> +
> +     ctx = eventfd_ctx_fdget(eventfd);
> +     if (IS_ERR(ctx)) {
> +             r = PTR_ERR(ctx);
> +             goto err_free_entry;
> +     }
> +     e->ctx = ctx;
> +
> +     xa_lock_irqsave(&mgr->ids, flags);
> +

> +     id = amdgpu_eventfd_get_or_create_id_locked(mgr, event_id, &new_id);

Move all the functionality of this into amdgpu_eventfd_id_alloc(). It is not 
problematic that we drop and take the lock multiple times as far as I see.

Regards,
Christian.

> +     if (!id) {
> +             xa_unlock_irqrestore(&mgr->ids, flags);
> +             r = -ENOMEM;
> +             goto err_put_ctx_free_newid;
> +     }
> +
> +     /* Duplicate bind policy: idempotent no-op. Compare ctx pointers. */
> +     {
> +             struct amdgpu_eventfd_entry *it;
> +
> +             hlist_for_each_entry(it, &id->entries, hnode) {
> +                     if (it->ctx == ctx) {
> +                             found = true;
> +                             break;
> +                     }
> +             }
> +
> +             if (!found) {
> +                     hlist_add_head(&e->hnode, &id->entries);
> +                     id->n_entries++;
> +             }
> +     }
> +
> +     xa_unlock_irqrestore(&mgr->ids, flags);
> +
> +     /* If event_id already existed, drop unused allocation. */
> +     kfree(new_id);
> +
> +     if (found) {
> +             /* Drop the new reference + entry; keep existing binding. */
> +             eventfd_ctx_put(ctx);
> +             kfree(e);
> +             atomic_dec(&mgr->bind_count);
> +     }
> +
> +     return 0;
> +
> +err_put_ctx_free_newid:
> +     kfree(new_id);
> +     eventfd_ctx_put(ctx);
> +err_free_entry:
> +     kfree(e);
> +err_dec_limit:
> +     atomic_dec(&mgr->bind_count);
> +     return r;
> +}
> +
> +int amdgpu_eventfd_unbind(struct amdgpu_eventfd_mgr *mgr, u32 event_id, int 
> eventfd)
> +{
> +     struct amdgpu_eventfd_id *id;
> +     struct amdgpu_eventfd_entry *e;
> +     struct hlist_node *tmp;
> +     struct eventfd_ctx *ctx;
> +     unsigned long flags;
> +     bool removed = false;
> +
> +     if (!mgr || !event_id || eventfd < 0)
> +             return -EINVAL;
> +
> +     ctx = eventfd_ctx_fdget(eventfd);
> +     if (IS_ERR(ctx))
> +             return PTR_ERR(ctx);
> +
> +     xa_lock_irqsave(&mgr->ids, flags);
> +
> +     id = xa_load(&mgr->ids, event_id);
> +     if (!id)
> +             goto out_unlock;
> +
> +     hlist_for_each_entry_safe(e, tmp, &id->entries, hnode) {
> +             if (e->ctx == ctx) {
> +                     hlist_del(&e->hnode);
> +                     id->n_entries--;
> +                     removed = true;
> +
> +                     eventfd_ctx_put(e->ctx);
> +                     kfree(e);
> +
> +                     atomic_dec(&mgr->bind_count);
> +
> +                     if (id->n_entries == 0) {
> +                             __xa_erase(&mgr->ids, event_id);
> +                             kfree(id);
> +                     }
> +                     break;
> +             }
> +     }
> +
> +out_unlock:
> +     xa_unlock_irqrestore(&mgr->ids, flags);
> +
> +     /* Drop the temporary ref from fdget */
> +     eventfd_ctx_put(ctx);
> +
> +     return removed ? 0 : -ENOENT;
> +}
> +
> +void amdgpu_eventfd_signal(struct amdgpu_eventfd_mgr *mgr, u32 event_id)
> +{
> +     struct amdgpu_eventfd_id *id;
> +     struct amdgpu_eventfd_entry *e;
> +     unsigned long flags;
> +
> +     if (!mgr || !event_id)
> +             return;
> +
> +     /*
> +      * IRQ-safe: keep xa locked while signaling.
> +      * eventfd_signal(ctx) is IRQ-safe.
> +      */
> +     xa_lock_irqsave(&mgr->ids, flags);
> +
> +     id = xa_load(&mgr->ids, event_id);
> +     if (!id) {
> +             xa_unlock_irqrestore(&mgr->ids, flags);
> +             return;
> +     }
> +
> +     hlist_for_each_entry(e, &id->entries, hnode)
> +             eventfd_signal(e->ctx);
> +
> +     xa_unlock_irqrestore(&mgr->ids, flags);
> +}
> \ No newline at end of file
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.h
> new file mode 100644
> index 000000000000..248afb1f2f14
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.h
> @@ -0,0 +1,59 @@
> +/*
> + * Copyright 2026 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +/*
> + * Render-node eventfd subscription infrastructure.
> + */
> +
> +#ifndef __AMDGPU_EVENTFD_H__
> +#define __AMDGPU_EVENTFD_H__
> +
> +#include <linux/eventfd.h>
> +#include <linux/xarray.h>
> +#include <linux/atomic.h>
> +
> +struct amdgpu_eventfd_entry {
> +     struct eventfd_ctx *ctx;
> +     struct hlist_node hnode;
> +};
> +
> +struct amdgpu_eventfd_id {
> +     u32 event_id;
> +     struct hlist_head entries;
> +     u32 n_entries;
> +};
> +
> +struct amdgpu_eventfd_mgr {
> +     struct xarray ids;          /* event_id -> struct amdgpu_eventfd_id* */
> +     atomic_t bind_count;        /* total binds across all event_ids */
> +};
> +
> +void amdgpu_eventfd_mgr_init(struct amdgpu_eventfd_mgr *mgr);
> +void amdgpu_eventfd_mgr_fini(struct amdgpu_eventfd_mgr *mgr);
> +
> +int amdgpu_eventfd_bind(struct amdgpu_eventfd_mgr *mgr, u32 event_id, int 
> eventfd);
> +int amdgpu_eventfd_unbind(struct amdgpu_eventfd_mgr *mgr, u32 event_id, int 
> eventfd);
> +
> +void amdgpu_eventfd_signal(struct amdgpu_eventfd_mgr *mgr, u32 event_id);
> +
> +#endif /* __AMDGPU_EVENTFD_H__ */

Reply via email to