Module: Mesa
Branch: main
Commit: d6d68ceda19a1fedfc0fc6d0532069708e3a7f46
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d6d68ceda19a1fedfc0fc6d0532069708e3a7f46

Author: Friedrich Vock <[email protected]>
Date:   Fri Dec  1 15:18:44 2023 +0100

radv: Enable compute dispatch tunneling

Compute tunneling can considerably lower the latency of high-priority
compute work. Enabling it is beneficial in cases where high-priority
work is dispatched while the GPU is already busy with other work (e.g.
rendering on GFX). This is the case in VR compositors that dispatch
latency-sensitive compositing work to ACE while GFX is busy rendering
the next frame.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26462>

---

 src/amd/vulkan/radv_device.c   | 7 +++++++
 src/amd/vulkan/si_cmd_buffer.c | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index ec9d485fcf8..5bda530d369 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -988,6 +988,13 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const 
VkDeviceCreateInfo *pCr
        */
       device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
    }
+   if (device->physical_device->rad_info.gfx_level >= GFX10) {
+      /* Enable asynchronous compute tunneling. The KMD restricts this feature
+       * to high-priority compute queues, so setting the bit on any other queue
+       * is a no-op. PAL always sets this bit as well.
+       */
+      device->dispatch_initiator |= S_00B800_TUNNEL_ENABLE(1);
+   }
 
    /* Disable partial preemption for task shaders.
     * The kernel may not support preemption, but PAL always sets this bit,
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index 73f469c5bf6..caea3e1a10d 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -113,6 +113,8 @@ si_emit_compute(struct radv_device *device, struct 
radeon_cmdbuf *cs)
       radeon_emit(cs, 0); /* R_00B894_COMPUTE_USER_ACCUM_1 */
       radeon_emit(cs, 0); /* R_00B898_COMPUTE_USER_ACCUM_2 */
       radeon_emit(cs, 0); /* R_00B89C_COMPUTE_USER_ACCUM_3 */
+
+      radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
    }
 
    /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID

Reply via email to