On 02.08.2016 12:27, Marek Olšák wrote:
From: Marek Olšák <[email protected]>

+23% Bioshock Infinite performance.
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 45 ++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 5e9d0b6..1a7bd7a 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -47,6 +47,12 @@ struct r600_multi_fence {
        struct pipe_reference reference;
        struct pipe_fence_handle *gfx;
        struct pipe_fence_handle *sdma;
+
+       /* If the context wasn't flushed at fence creation, this is non-NULL. */
+       struct {
+               struct r600_common_context *ctx;
+               unsigned ib_index;
+       } gfx_unflushed;
 };

 /*
@@ -262,6 +268,7 @@ static void r600_flush_from_st(struct pipe_context *ctx,
        unsigned rflags = 0;
        struct pipe_fence_handle *gfx_fence = NULL;
        struct pipe_fence_handle *sdma_fence = NULL;
+       bool deferred_fence = false;

        if (flags & PIPE_FLUSH_END_OF_FRAME)
                rflags |= RADEON_FLUSH_END_OF_FRAME;
@@ -271,7 +278,22 @@ static void r600_flush_from_st(struct pipe_context *ctx,
        if (rctx->dma.cs) {
                rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);
        }
-       rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
+
+       /* Instead of flushing, create a deferred fence. Constraints:
+        * - The state tracker must allow a deferred flush.
+        * - The state tracker must request a fence.
+        * - At most one API context can exist to ensure thread-safety.
+        *   (that's a weak contraint though, we must be careful about
+        *    how PIPE_FLUSH_DEFERRED is used)
+        */
+       if (flags & PIPE_FLUSH_DEFERRED && fence &&
+           /* internal aux_context + 1 API context */
+           rctx->screen->num_contexts <= 2) {
+               gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs);
+               deferred_fence = true;
+       } else {
+               rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
+       }

        /* Both engines can signal out of order, so we need to keep both 
fences. */
        if (gfx_fence || sdma_fence) {
@@ -284,6 +306,11 @@ static void r600_flush_from_st(struct pipe_context *ctx,
                multi_fence->gfx = gfx_fence;
                multi_fence->sdma = sdma_fence;

+               if (deferred_fence) {
+                       multi_fence->gfx_unflushed.ctx = rctx;
+                       multi_fence->gfx_unflushed.ib_index = 
rctx->num_gfx_cs_flushes;
+               }
+
                screen->fence_reference(screen, fence, NULL);
                *fence = (struct pipe_fence_handle*)multi_fence;
        }
@@ -962,6 +989,7 @@ static boolean r600_fence_finish(struct pipe_screen *screen,
 {
        struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
        struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
+       struct r600_common_context *rctx;
        int64_t abs_timeout = os_time_get_absolute_timeout(timeout);

        if (rfence->sdma) {
@@ -978,6 +1006,21 @@ static boolean r600_fence_finish(struct pipe_screen 
*screen,
        if (!rfence->gfx)
                return true;

+       /* Flush the gfx IB if it hasn't been flushed yet. */
+       rctx = rfence->gfx_unflushed.ctx;
+       if (rctx && rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) 
{
+               rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL);

It seems like a good idea to clear rfence->gfx_unflushed.ctx here.

So, I was feeling a bit iffy about the multi-context handling here -- e.g., what if a second context was created after the fence was created but before it was waited on -- and I think we should go back and read the spec on this again :-)

In particular, I'm looking at section 4.1.2 of OpenGL 4.5 (Compatibility Profile), and it says: [...] if ClientWaitSync is called and all of the following are true:

- the SYNC_FLUSH_COMMANDS_BIT bit is set in flags,
- sync is unsignaled when ClientWaitSync is called,
- and the calls to ClientWaitSync and FenceSync were issued *from the same context*,

then the GL will behave as if the equivalent of Flush were inserted immediately after the creation of sync.

So I think the multi-context dance isn't needed. Instead, while most of this patch is fine as is, it seems like the proper solution is slightly different: have a fence_finish callback associated to pipe_context rather than pipe_screen, which optionally does the flush on unflushed fences, but *only if in the same context*. Then the state tracker would call that fence function instead.

At least that's how I'm reading that part of the spec...

Cheers,
Nicolai

+
+               if (!timeout)
+                       return false;
+
+               /* Recompute the timeout after all that. */
+               if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
+                       int64_t time = os_time_get_nano();
+                       timeout = abs_timeout > time ? abs_timeout - time : 0;
+               }
+       }
+
        return rws->fence_wait(rws, rfence->gfx, timeout);
 }


_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to