On Thu, 2025-11-13 at 15:51 +0100, Christian König wrote:
> Implement per-fence spinlocks, allowing implementations to not give an
> external spinlock to protect the fence internal statei. Instead a spinlock

s/statei/state

> embedded into the fence structure itself is used in this case.
> Shared spinlocks have the problem that implementations need to guarantee
> that the lock live at least as long all fences referencing them.

s/live/lives
s/them/it

> 
> Using a per-fence spinlock allows completely decoupling spinlock producer

nit: AFAIK in English it's "allows for"

> and consumer life times, simplifying the handling in most use cases.

Yup! That's a great improvement :)

> 
> v2: improve naming, coverage and function documentation
> 
> Signed-off-by: Christian König <[email protected]>
> ---
>  drivers/dma-buf/dma-fence.c              | 66 +++++++++++++-----------
>  drivers/dma-buf/sw_sync.c                | 14 ++---
>  drivers/dma-buf/sync_debug.h             |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |  4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   | 12 ++---
>  drivers/gpu/drm/drm_crtc.c               |  2 +-
>  drivers/gpu/drm/drm_writeback.c          |  2 +-
>  drivers/gpu/drm/nouveau/nouveau_drm.c    |  5 +-
>  drivers/gpu/drm/nouveau/nouveau_fence.c  |  3 +-
>  drivers/gpu/drm/qxl/qxl_release.c        |  3 +-
>  drivers/gpu/drm/scheduler/sched_fence.c  |  4 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_fence.c    |  3 +-
>  drivers/gpu/drm/xe/xe_hw_fence.c         |  3 +-
>  drivers/gpu/drm/xe/xe_sched_job.c        |  4 +-
>  include/linux/dma-fence.h                | 42 ++++++++++++++-
>  16 files changed, 111 insertions(+), 62 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> index 7074347f506d..9a5aa9e44e13 100644
> --- a/drivers/dma-buf/dma-fence.c
> +++ b/drivers/dma-buf/dma-fence.c
> @@ -343,7 +343,6 @@ void __dma_fence_might_wait(void)
>  }
>  #endif
>  
> -
>  /**
>   * dma_fence_signal_timestamp_locked - signal completion of a fence
>   * @fence: the fence to signal
> @@ -368,7 +367,7 @@ int dma_fence_signal_timestamp_locked(struct dma_fence 
> *fence,
>       struct dma_fence_cb *cur, *tmp;
>       struct list_head cb_list;
>  
> -     lockdep_assert_held(fence->lock);
> +     lockdep_assert_held(dma_fence_spinlock(fence));

This acessor function could be a separate patch, couldn't it? One were
you just return fence->lock, and then in the actual inline-patch you
just add the check for the new flag.

Would be cleaner; but I guess doing it all at once is also no tragedy.

>  
>       if (unlikely(test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>                                     &fence->flags)))
> @@ -421,9 +420,9 @@ int dma_fence_signal_timestamp(struct dma_fence *fence, 
> ktime_t timestamp)
>       if (WARN_ON(!fence))
>               return -EINVAL;
>  
> -     spin_lock_irqsave(fence->lock, flags);
> +     dma_fence_lock_irqsave(fence, flags);

Similar with this helper, could it be a separate patch?

>       ret = dma_fence_signal_timestamp_locked(fence, timestamp);
> -     spin_unlock_irqrestore(fence->lock, flags);
> +     dma_fence_unlock_irqrestore(fence, flags);
>  
>       return ret;
>  }
> 

[…]

>  
>       trace_dma_fence_init(fence);
> @@ -1071,7 +1069,7 @@ __dma_fence_init(struct dma_fence *fence, const struct 
> dma_fence_ops *ops,
>   * dma_fence_init - Initialize a custom fence.
>   * @fence: the fence to initialize
>   * @ops: the dma_fence_ops for operations on this fence
> - * @lock: the irqsafe spinlock to use for locking this fence
> + * @lock: optional irqsafe spinlock to use for locking this fence
>   * @context: the execution context this fence is run on
>   * @seqno: a linear increasing sequence number for this context
>   *
> @@ -1081,6 +1079,10 @@ __dma_fence_init(struct dma_fence *fence, const struct 
> dma_fence_ops *ops,
>   *
>   * context and seqno are used for easy comparison between fences, allowing
>   * to check which fence is later by simply using dma_fence_later().
> + *
> + * It is strongly discouraged to provide an external lock. This is only 
> allowed
> + * for legacy use cases when multiple fences need to be prevented from
> + * signaling out of order.

Mhhhmm, wait.
It's still one of the legendary dma_fence rules that they must (or:
should very much) signal in order.

And you agreed before that the lock doesn't actually help with
enforcing that, because you can take the lock and then still signal out
of order.

So that comment seems wrong to me


>   */
>  void
>  dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
> @@ -1094,7 +1096,7 @@ EXPORT_SYMBOL(dma_fence_init);
>   * dma_fence_init64 - Initialize a custom fence with 64-bit seqno support.
>   * @fence: the fence to initialize
>   * @ops: the dma_fence_ops for operations on this fence
> - * @lock: the irqsafe spinlock to use for locking this fence
> + * @lock: optional irqsafe spinlock to use for locking this fence
>   * @context: the execution context this fence is run on
>   * @seqno: a linear increasing sequence number for this context
>   *
> @@ -1104,6 +1106,10 @@ EXPORT_SYMBOL(dma_fence_init);
>   *
>   * Context and seqno are used for easy comparison between fences, allowing
>   * to check which fence is later by simply using dma_fence_later().
> + *
> + * It is strongly discouraged to provide an external lock. This is only 
> allowed
> + * for legacy use cases when multiple fences need to be prevented from
> + * signaling out of order.

Same.

>   */
>  void
>  dma_fence_init64(struct dma_fence *fence, const struct dma_fence_ops *ops,
> diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
> index 3c20f1d31cf5..956a3ad7a075 100644
> --- a/drivers/dma-buf/sw_sync.c
> +++ b/drivers/dma-buf/sw_sync.c
> @@ -155,12 +155,12 @@ static void timeline_fence_release(struct dma_fence 
> *fence)
>       struct sync_timeline *parent = dma_fence_parent(fence);
>       unsigned long flags;
>  
> -     spin_lock_irqsave(fence->lock, flags);
> +     dma_fence_lock_irqsave(fence, flags);
>       if (!list_empty(&pt->link)) {
>               list_del(&pt->link);
>               rb_erase(&pt->node, &parent->pt_tree);
>       }
> -     spin_unlock_irqrestore(fence->lock, flags);
> +     dma_fence_unlock_irqrestore(fence, flags);
>  
>       sync_timeline_put(parent);
>       dma_fence_free(fence);
> @@ -178,7 +178,7 @@ static void timeline_fence_set_deadline(struct dma_fence 
> *fence, ktime_t deadlin
>       struct sync_pt *pt = dma_fence_to_sync_pt(fence);
>       unsigned long flags;
>  
> -     spin_lock_irqsave(fence->lock, flags);
> +     dma_fence_lock_irqsave(fence, flags);
>       if (test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) {
>               if (ktime_before(deadline, pt->deadline))
>                       pt->deadline = deadline;
> @@ -186,7 +186,7 @@ static void timeline_fence_set_deadline(struct dma_fence 
> *fence, ktime_t deadlin
>               pt->deadline = deadline;
>               __set_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags);
>       }
> -     spin_unlock_irqrestore(fence->lock, flags);
> +     dma_fence_unlock_irqrestore(fence, flags);
>  }
> 

[…]

>  
> +/**
> + * dma_fence_spinlock - return pointer to the spinlock protecting the fence
> + * @fence: the fence to get the lock from
> + *
> + * Return either the pointer to the embedded or the external spin lock.
> + */
> +static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence)
> +{
> +     return test_bit(DMA_FENCE_FLAG_INLINE_LOCK_BIT, &fence->flags) ?
> +             &fence->inline_lock : fence->extern_lock;
> +}
> +
> +/**
> + * dma_fence_lock_irqsave - irqsave lock the fence
> + * @fence: the fence to lock
> + * @flags: where to store the CPU flags.
> + *
> + * Lock the fence, preventing it from changing to the signaled state.
> + */
> +#define dma_fence_lock_irqsave(fence, flags) \


I think we discussed that before (too many emails..).

The names are inconsistent. One is dma_fence_spinlock, the other
dma_fence_lock().


P.

> +     spin_lock_irqsave(dma_fence_spinlock(fence), flags)
> +
> +/**
> + * dma_fence_unlock_irqrestore - unlock the fence and irqrestore
> + * @fence: the fence to unlock
> + * @flags the CPU flags to restore
> + *
> + * Unlock the fence, allowing it to change it's state to signaled again.
> + */
> +#define dma_fence_unlock_irqrestore(fence, flags)    \
> +     spin_unlock_irqrestore(dma_fence_spinlock(fence), flags)
> +
>  #ifdef CONFIG_LOCKDEP
>  bool dma_fence_begin_signalling(void);
>  void dma_fence_end_signalling(bool cookie);

Reply via email to