We should allow BV to run ahead of BR when there are multiple submits
from the same context. Per the Vulkan memory model this should be safe
because there are no implied execution dependencies between submits. In
particular this should allow BV to run at least a frame ahead of BR when
applications render direct to display (i.e. unredirected rendering).

We also shuffle around some of the synchronization in
a6xx_set_pagetable() to better match what the downstream driver does.
Previously this was only different because of the extra synchronization
before a6xx_set_pagetable().

Signed-off-by: Connor Abbott <[email protected]>
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 
b8f8ae940b55f5578abdbdec6bf1e90a53e721a5..794b79a6a4a1940c84709c32e895b62b97f1ac5a
 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -216,15 +216,9 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
                return;
 
        if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) {
-               /* Wait for previous submit to complete before continuing: */
-               OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
-               OUT_RING(ring, 0);
-               OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
-               OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
-               OUT_RING(ring, submit->seqno - 1);
-
+               /* Sync both threads. */
                OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
-               OUT_RING(ring, CP_SET_THREAD_BOTH);
+               OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | 
CP_SET_THREAD_BOTH);
 
                /* Reset state used to synchronize BR and BV */
                OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1);
@@ -234,8 +228,21 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
                         CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER |
                         CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS);
 
+               /*
+                * Toggle concurrent binning for pagetable switch and set the
+                * thread to BR since only it can execute the pagetable switch
+                * packets.
+                */
                OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
-               OUT_RING(ring, CP_SET_THREAD_BR);
+               OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | 
CP_SET_THREAD_BR);
+
+               /* Wait for previous submit to complete before continuing: */
+               OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
+               OUT_RING(ring, 0);
+               OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
+               OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
+               OUT_RING(ring, submit->seqno - 1);
+
        }
 
        if (!sysprof) {
@@ -444,14 +451,13 @@ static void a7xx_submit(struct msm_gpu *gpu, struct 
msm_gem_submit *submit)
 
        adreno_check_and_reenable_stall(adreno_gpu);
 
+       a6xx_set_pagetable(a6xx_gpu, ring, submit);
+
        /*
-        * Toggle concurrent binning for pagetable switch and set the thread to
-        * BR since only it can execute the pagetable switch packets.
+        * Set pseudo register and get counters on BR.
         */
        OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
-       OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
-
-       a6xx_set_pagetable(a6xx_gpu, ring, submit);
+       OUT_RING(ring, CP_SET_THREAD_BR);
 
        /*
         * If preemption is enabled, then set the pseudo register for the save

---
base-commit: b5bad77e1e3c7249e4c0c88f98477e1ee7669b63
change-id: 20251027-msm-less-bv-sync-ab03721d0a3b

Best regards,
-- 
Connor Abbott <[email protected]>

Reply via email to