Since commit 541c8f2468b9 ("dma-buf: detach fence ops on signal v3"),
fence->ops may be set to NULL via RCU when a fence signals and has no
release/wait ops. ttm_bo_flush_all_fences() was not updated to handle
this and directly dereferences fence->ops->signaled, leading to a NULL
pointer dereference crash:
```
<7> [290.719359] i915 0000:4d:00.0: [drm:i915_gem_open [i915]]
<1> [291.602076] BUG: kernel NULL pointer dereference, address: 0000000000000018
...
<4> [291.602244] RIP: 0010:ttm_bo_release+0x1bc/0x330 [ttm]
...
<4> [291.602494] Call Trace:
<4> [291.602504] <TASK>
<4> [291.602521] ttm_bo_put+0x3c/0x70 [ttm]
<4> [291.602558] ttm_bo_move_accel_cleanup+0xf6/0x3a0 [ttm]
<4> [291.602602] i915_ttm_move+0x361/0x480 [i915]
<4> [291.603405] ttm_bo_handle_move_mem+0xe8/0x1e0 [ttm]
<4> [291.603447] ttm_bo_validate+0xcf/0x1c0 [ttm]
<4> [291.603485] __i915_ttm_get_pages+0x73/0x290 [i915]
<4> [291.604208] i915_ttm_get_pages+0x106/0x160 [i915]
<4> [291.604915] ? lock_acquire+0xc4/0x2f0
<4> [291.604940] ? eb_validate_vmas+0x6b/0xd30 [i915]
<4> [291.605633] ____i915_gem_object_get_pages+0x3f/0x120 [i915]
<4> [291.606335] __i915_gem_object_get_pages+0xa5/0x110 [i915]
<4> [291.607021] i915_vma_get_pages+0xf9/0x300 [i915]
<4> [291.607774] i915_vma_pin_ww+0xf5/0x1390 [i915]
<4> [291.608532] eb_validate_vmas+0x209/0xd30 [i915]
<4> [291.609215] ? eb_pin_engine+0x2f0/0x3b0 [i915]
<4> [291.609887] i915_gem_do_execbuffer+0xda3/0x36e0 [i915]
<4> [291.610442] ? lock_release+0xd0/0x2b0
<4> [291.610463] ? kernel_text_address+0x139/0x150
<4> [291.610496] ? __lock_acquire+0x43e/0x2790
<4> [291.610512] ? find_held_lock+0x31/0x90
<4> [291.610527] ? __create_object+0x68/0xc0
<4> [291.610551] ? find_held_lock+0x31/0x90
<4> [291.610564] ? __might_fault+0x53/0xb0
<4> [291.610589] i915_gem_execbuffer2_ioctl+0x169/0x320 [i915]
<4> [291.611127] ? __pfx_i915_gem_execbuffer2_ioctl+0x10/0x10 [i915]
<4> [291.611664] drm_ioctl_kernel+0xb3/0x120
<4> [291.611686] drm_ioctl+0x2d4/0x5a0
<4> [291.611699] ? __pfx_i915_gem_execbuffer2_ioctl+0x10/0x10 [i915]
...
```
Fix this by reading fence->ops under an RCU read, and skipping
dma_fence_enable_sw_signaling() when ops is NULL. A NULL ops pointer
means the fence is already signaled, so no software signaling is needed.
Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/15759
Fixes: 541c8f2468b9 ("dma-buf: detach fence ops on signal v3")
Cc: Christian König <[email protected]>
Signed-off-by: Sebastian Brzezinka <[email protected]>
---
drivers/gpu/drm/ttm/ttm_bo.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index acb9197db879..293f3d423655 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -223,7 +223,13 @@ static void ttm_bo_flush_all_fences(struct
ttm_buffer_object *bo)
dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP);
dma_resv_for_each_fence_unlocked(&cursor, fence) {
- if (!fence->ops->signaled)
+ const struct dma_fence_ops *ops;
+
+ rcu_read_lock();
+ ops = rcu_dereference(fence->ops);
+ rcu_read_unlock();
+
+ if (ops && !ops->signaled)
dma_fence_enable_sw_signaling(fence);
}
dma_resv_iter_end(&cursor);
--
2.52.0