From: Marek Olšák <marek.ol...@amd.com>

So that TC L2 doesn't need to be flushed.

The only problem is with index buffers, which don't use TC.
A simple solution is added that flushes TC L2 before a draw call (TC_L2_dirty).
---
 src/gallium/drivers/radeon/r600_pipe_common.h | 12 ++++++++++
 src/gallium/drivers/radeonsi/si_descriptors.c | 32 ++++++++++++++++++---------
 src/gallium/drivers/radeonsi/si_state_draw.c  |  5 +++++
 3 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index a9416b6..60b8fae 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -138,6 +138,18 @@ struct r600_resource {
         * the unsynchronized map flag and expect the driver to figure it out.
          */
        struct util_range               valid_buffer_range;
+
+       /* For buffers only. This indicates that a write operation has been
+        * performed by TC L2, but the cache hasn't been flushed.
+        * Any hw block which doesn't use or bypasses TC L2 should check this
+        * flag and flush the cache before using the buffer.
+        *
+        * For example, TC L2 must be flushed if a buffer which has been
+        * modified by a shader store instruction is about to be used as
+        * an index buffer. The reason is that VGT DMA index fetching doesn't
+        * use TC L2.
+        */
+       bool                            TC_L2_dirty;
 };
 
 struct r600_transfer {
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index e2da476..454e12c 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1073,7 +1073,7 @@ static void si_clear_buffer(struct pipe_context *ctx, 
struct pipe_resource *dst,
                            bool is_framebuffer)
 {
        struct si_context *sctx = (struct si_context*)ctx;
-       unsigned flush_flags;
+       unsigned flush_flags, tc_l2_flag;
 
        if (!size)
                return;
@@ -1098,19 +1098,22 @@ static void si_clear_buffer(struct pipe_context *ctx, 
struct pipe_resource *dst,
        uint64_t va = r600_resource(dst)->gpu_address + offset;
 
        /* Flush the caches where the resource is bound. */
-       if (is_framebuffer)
+       if (is_framebuffer) {
                flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
-       else
+               tc_l2_flag = 0;
+       } else {
                flush_flags = SI_CONTEXT_INV_TC_L1 |
-                             SI_CONTEXT_INV_TC_L2 |
+                             (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 
: 0) |
                              SI_CONTEXT_INV_KCACHE;
+               tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
+       }
 
        sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
                         flush_flags;
 
        while (size) {
                unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
-               unsigned dma_flags = 0;
+               unsigned dma_flags = tc_l2_flag;
 
                si_need_cs_space(sctx, 7 + (sctx->b.flags ? 
sctx->cache_flush.num_dw : 0),
                                 FALSE);
@@ -1141,6 +1144,9 @@ static void si_clear_buffer(struct pipe_context *ctx, 
struct pipe_resource *dst,
        /* Flush the caches again in case the 3D engine has been prefetching
         * the resource. */
        sctx->b.flags |= flush_flags;
+
+       if (tc_l2_flag)
+               r600_resource(dst)->TC_L2_dirty = true;
 }
 
 void si_copy_buffer(struct si_context *sctx,
@@ -1148,7 +1154,7 @@ void si_copy_buffer(struct si_context *sctx,
                    uint64_t dst_offset, uint64_t src_offset, unsigned size,
                    bool is_framebuffer)
 {
-       unsigned flush_flags;
+       unsigned flush_flags, tc_l2_flag;
 
        if (!size)
                return;
@@ -1163,18 +1169,21 @@ void si_copy_buffer(struct si_context *sctx,
        src_offset += r600_resource(src)->gpu_address;
 
        /* Flush the caches where the resource is bound. */
-       if (is_framebuffer)
+       if (is_framebuffer) {
                flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
-       else
+               tc_l2_flag = 0;
+       } else {
                flush_flags = SI_CONTEXT_INV_TC_L1 |
-                             SI_CONTEXT_INV_TC_L2 |
+                             (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 
: 0) |
                              SI_CONTEXT_INV_KCACHE;
+               tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
+       }
 
        sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
                         flush_flags;
 
        while (size) {
-               unsigned sync_flags = 0;
+               unsigned sync_flags = tc_l2_flag;
                unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
 
                si_need_cs_space(sctx, 7 + (sctx->b.flags ? 
sctx->cache_flush.num_dw : 0), FALSE);
@@ -1206,6 +1215,9 @@ void si_copy_buffer(struct si_context *sctx,
        /* Flush the caches again in case the 3D engine has been prefetching
         * the resource. */
        sctx->b.flags |= flush_flags;
+
+       if (tc_l2_flag)
+               r600_resource(dst)->TC_L2_dirty = true;
 }
 
 /* INIT/DEINIT */
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index ccc44d5..e6916c1 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -551,6 +551,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *info)
                }
        }
 
+       if (info->indexed && r600_resource(ib.buffer)->TC_L2_dirty) {
+               sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
+               r600_resource(ib.buffer)->TC_L2_dirty = false;
+       }
+
        /* Check flush flags. */
        if (sctx->b.flags)
                sctx->atoms.s.cache_flush->dirty = true;
-- 
2.1.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to