Since commit 541c8f2468b9 ("dma-buf: detach fence ops on signal v3"),
fence->ops may be set to NULL via RCU when a fence signals and has no
release/wait ops.  ttm_bo_flush_all_fences() was not updated to handle
this and directly dereferences fence->ops->signaled, leading to a NULL
pointer dereference crash:

```
<7> [290.719359] i915 0000:4d:00.0: [drm:i915_gem_open [i915]]
<1> [291.602076] BUG: kernel NULL pointer dereference, address: 0000000000000018
...
<4> [291.602244] RIP: 0010:ttm_bo_release+0x1bc/0x330 [ttm]
...
<4> [291.602494] Call Trace:
<4> [291.602504]  <TASK>
<4> [291.602521]  ttm_bo_put+0x3c/0x70 [ttm]
<4> [291.602558]  ttm_bo_move_accel_cleanup+0xf6/0x3a0 [ttm]
<4> [291.602602]  i915_ttm_move+0x361/0x480 [i915]
<4> [291.603405]  ttm_bo_handle_move_mem+0xe8/0x1e0 [ttm]
<4> [291.603447]  ttm_bo_validate+0xcf/0x1c0 [ttm]
<4> [291.603485]  __i915_ttm_get_pages+0x73/0x290 [i915]
<4> [291.604208]  i915_ttm_get_pages+0x106/0x160 [i915]
<4> [291.604915]  ? lock_acquire+0xc4/0x2f0
<4> [291.604940]  ? eb_validate_vmas+0x6b/0xd30 [i915]
<4> [291.605633]  ____i915_gem_object_get_pages+0x3f/0x120 [i915]
<4> [291.606335]  __i915_gem_object_get_pages+0xa5/0x110 [i915]
<4> [291.607021]  i915_vma_get_pages+0xf9/0x300 [i915]
<4> [291.607774]  i915_vma_pin_ww+0xf5/0x1390 [i915]
<4> [291.608532]  eb_validate_vmas+0x209/0xd30 [i915]
<4> [291.609215]  ? eb_pin_engine+0x2f0/0x3b0 [i915]
<4> [291.609887]  i915_gem_do_execbuffer+0xda3/0x36e0 [i915]
<4> [291.610442]  ? lock_release+0xd0/0x2b0
<4> [291.610463]  ? kernel_text_address+0x139/0x150
<4> [291.610496]  ? __lock_acquire+0x43e/0x2790
<4> [291.610512]  ? find_held_lock+0x31/0x90
<4> [291.610527]  ? __create_object+0x68/0xc0
<4> [291.610551]  ? find_held_lock+0x31/0x90
<4> [291.610564]  ? __might_fault+0x53/0xb0
<4> [291.610589]  i915_gem_execbuffer2_ioctl+0x169/0x320 [i915]
<4> [291.611127]  ? __pfx_i915_gem_execbuffer2_ioctl+0x10/0x10 [i915]
<4> [291.611664]  drm_ioctl_kernel+0xb3/0x120
<4> [291.611686]  drm_ioctl+0x2d4/0x5a0
<4> [291.611699]  ? __pfx_i915_gem_execbuffer2_ioctl+0x10/0x10 [i915]
...
```

Fix this by reading fence->ops under an RCU read, and skipping
dma_fence_enable_sw_signaling() when ops is NULL. A NULL ops pointer
means the fence is already signaled, so no software signaling is needed.

Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/15759
Fixes: 541c8f2468b9 ("dma-buf: detach fence ops on signal v3")
Cc: Christian König <[email protected]>
Signed-off-by: Sebastian Brzezinka <[email protected]>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index acb9197db879..293f3d423655 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -223,7 +223,13 @@ static void ttm_bo_flush_all_fences(struct 
ttm_buffer_object *bo)
 
        dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP);
        dma_resv_for_each_fence_unlocked(&cursor, fence) {
-               if (!fence->ops->signaled)
+               const struct dma_fence_ops *ops;
+
+               rcu_read_lock();
+               ops = rcu_dereference(fence->ops);
+               rcu_read_unlock();
+
+               if (ops && !ops->signaled)
                        dma_fence_enable_sw_signaling(fence);
        }
        dma_resv_iter_end(&cursor);
-- 
2.52.0

Reply via email to