radeon: derive buffer placement and flags only once per buffer

Marek Olšák Thu, 18 Aug 2016 12:47:34 -0700

From: Marek Olšák <[email protected]>

Invalidated buffers don't have to do this.
---
 src/gallium/drivers/radeon/r600_buffer_common.c | 147 +++++++++++++-----------
 src/gallium/drivers/radeon/r600_pipe_common.h   |   2 +
 2 files changed, 80 insertions(+), 69 deletions(-)


diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
b/src/gallium/drivers/radeon/r600_buffer_common.c
index 4480293..113a7dc 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -98,91 +98,108 @@ void *r600_buffer_map_sync_with_rings(struct 
r600_common_context *ctx,
        /* Setting the CS to NULL will prevent doing checks we have done 
already. */
        return ctx->ws->buffer_map(resource->buf, NULL, usage);
 }
 
 bool r600_init_resource(struct r600_common_screen *rscreen,
                        struct r600_resource *res,
                        uint64_t size, unsigned alignment)
 {
        struct r600_texture *rtex = (struct r600_texture*)res;
        struct pb_buffer *old_buf, *new_buf;
-       enum radeon_bo_flag flags = 0;
-
-       switch (res->b.b.usage) {
-       case PIPE_USAGE_STREAM:
-               flags = RADEON_FLAG_GTT_WC;
-               /* fall through */
-       case PIPE_USAGE_STAGING:
-               /* Transfers are likely to occur more often with these 
resources. */
-               res->domains = RADEON_DOMAIN_GTT;
-               break;
-       case PIPE_USAGE_DYNAMIC:
-               /* Older kernels didn't always flush the HDP cache before
-                * CS execution
-                */
-               if (rscreen->info.drm_major == 2 &&
-                   rscreen->info.drm_minor < 40) {
+
+       if (!res->initialized) {
+               res->flags = 0;
+
+               switch (res->b.b.usage) {
+               case PIPE_USAGE_STREAM:
+                       res->flags = RADEON_FLAG_GTT_WC;
+                       /* fall through */
+               case PIPE_USAGE_STAGING:
+                       /* Transfers are likely to occur more often with these
+                        * resources. */
                        res->domains = RADEON_DOMAIN_GTT;
-                       flags |= RADEON_FLAG_GTT_WC;
+                       break;
+               case PIPE_USAGE_DYNAMIC:
+                       /* Older kernels didn't always flush the HDP cache 
before
+                        * CS execution
+                        */
+                       if (rscreen->info.drm_major == 2 &&
+                           rscreen->info.drm_minor < 40) {
+                               res->domains = RADEON_DOMAIN_GTT;
+                               res->flags |= RADEON_FLAG_GTT_WC;
+                               break;
+                       }
+                       res->flags |= RADEON_FLAG_CPU_ACCESS;
+                       /* fall through */
+               case PIPE_USAGE_DEFAULT:
+               case PIPE_USAGE_IMMUTABLE:
+               default:
+                       /* Not listing GTT here improves performance in some
+                        * apps. */
+                       res->domains = RADEON_DOMAIN_VRAM;
+                       res->flags |= RADEON_FLAG_GTT_WC;
                        break;
                }
-               flags |= RADEON_FLAG_CPU_ACCESS;
-               /* fall through */
-       case PIPE_USAGE_DEFAULT:
-       case PIPE_USAGE_IMMUTABLE:
-       default:
-               /* Not listing GTT here improves performance in some apps. */
-               res->domains = RADEON_DOMAIN_VRAM;
-               flags |= RADEON_FLAG_GTT_WC;
-               break;
-       }
 
-       if (res->b.b.target == PIPE_BUFFER &&
-           res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
-                             PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
-               /* Use GTT for all persistent mappings with older kernels,
-                * because they didn't always flush the HDP cache before CS
-                * execution.
-                *
-                * Write-combined CPU mappings are fine, the kernel ensures all 
CPU
-                * writes finish before the GPU executes a command stream.
+               if (res->b.b.target == PIPE_BUFFER &&
+                   res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
+                                     PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
+                       /* Use GTT for all persistent mappings with older
+                        * kernels, because they didn't always flush the HDP
+                        * cache before CS execution.
+                        *
+                        * Write-combined CPU mappings are fine, the kernel
+                        * ensures all CPU writes finish before the GPU
+                        * executes a command stream.
+                        */
+                       if (rscreen->info.drm_major == 2 &&
+                           rscreen->info.drm_minor < 40)
+                               res->domains = RADEON_DOMAIN_GTT;
+                       else if (res->domains & RADEON_DOMAIN_VRAM)
+                               res->flags |= RADEON_FLAG_CPU_ACCESS;
+               }
+
+               /* Tiled textures are unmappable. Always put them in VRAM. */
+               if (res->b.b.target != PIPE_BUFFER &&
+                   rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
+                       res->domains = RADEON_DOMAIN_VRAM;
+                       res->flags &= ~RADEON_FLAG_CPU_ACCESS;
+                       res->flags |= RADEON_FLAG_NO_CPU_ACCESS |
+                                RADEON_FLAG_GTT_WC;
+               }
+
+               /* If VRAM is just stolen system memory, allow both VRAM and
+                * GTT, whichever has free space. If a buffer is evicted from
+                * VRAM to GTT, it will stay there.
                 */
-               if (rscreen->info.drm_major == 2 &&
-                   rscreen->info.drm_minor < 40)
-                       res->domains = RADEON_DOMAIN_GTT;
-               else if (res->domains & RADEON_DOMAIN_VRAM)
-                       flags |= RADEON_FLAG_CPU_ACCESS;
-       }
+               if (!rscreen->info.has_dedicated_vram &&
+                   res->domains == RADEON_DOMAIN_VRAM)
+                       res->domains = RADEON_DOMAIN_VRAM_GTT;
 
-       /* Tiled textures are unmappable. Always put them in VRAM. */
-       if (res->b.b.target != PIPE_BUFFER &&
-           rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
-               res->domains = RADEON_DOMAIN_VRAM;
-               flags &= ~RADEON_FLAG_CPU_ACCESS;
-               flags |= RADEON_FLAG_NO_CPU_ACCESS |
-                        RADEON_FLAG_GTT_WC;
-       }
+               if (rscreen->debug_flags & DBG_NO_WC)
+                       res->flags &= ~RADEON_FLAG_GTT_WC;
 
-       /* If VRAM is just stolen system memory, allow both VRAM and GTT,
-        * whichever has free space. If a buffer is evicted from VRAM to GTT,
-        * it will stay there.
-        */
-       if (!rscreen->info.has_dedicated_vram &&
-           res->domains == RADEON_DOMAIN_VRAM)
-               res->domains = RADEON_DOMAIN_VRAM_GTT;
+               /* Set expected VRAM and GART usage for the buffer. */
+               res->vram_usage = 0;
+               res->gart_usage = 0;
 
-       if (rscreen->debug_flags & DBG_NO_WC)
-               flags &= ~RADEON_FLAG_GTT_WC;
+               if (res->domains & RADEON_DOMAIN_VRAM)
+                       res->vram_usage = size;
+               else if (res->domains & RADEON_DOMAIN_GTT)
+                       res->gart_usage = size;
+
+               res->initialized = true;
+       }
 
        /* Allocate a new resource. */
        new_buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
-                                            res->domains, flags);
+                                            res->domains, res->flags);
        if (!new_buf) {
                return false;
        }
 
        /* Replace the pointer such that if res->buf wasn't NULL, it won't be
         * NULL. This should prevent crashes with multiple contexts using
         * the same buffer where one of the contexts invalidates it while
         * the others are using it. */
        old_buf = res->buf;
        res->buf = new_buf; /* should be atomic */
@@ -190,29 +207,20 @@ bool r600_init_resource(struct r600_common_screen 
*rscreen,
        if (rscreen->info.has_virtual_memory)
                res->gpu_address = 
rscreen->ws->buffer_get_virtual_address(res->buf);
        else
                res->gpu_address = 0;
 
        pb_reference(&old_buf, NULL);
 
        util_range_set_empty(&res->valid_buffer_range);
        res->TC_L2_dirty = false;
 
-       /* Set expected VRAM and GART usage for the buffer. */
-       res->vram_usage = 0;
-       res->gart_usage = 0;
-
-       if (res->domains & RADEON_DOMAIN_VRAM)
-               res->vram_usage = size;
-       else if (res->domains & RADEON_DOMAIN_GTT)
-               res->gart_usage = size;
-
        /* Print debug information. */
        if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
                fprintf(stderr, "VM start=0x%"PRIX64"  end=0x%"PRIX64" | Buffer 
%"PRIu64" bytes\n",
                        res->gpu_address, res->gpu_address + res->buf->size,
                        res->buf->size);
        }
        return true;
 }
 
 static void r600_buffer_destroy(struct pipe_screen *screen,
@@ -496,20 +504,21 @@ r600_alloc_buffer_struct(struct pipe_screen *screen,
 {
        struct r600_resource *rbuffer;
 
        rbuffer = MALLOC_STRUCT(r600_resource);
 
        rbuffer->b.b = *templ;
        pipe_reference_init(&rbuffer->b.b.reference, 1);
        rbuffer->b.b.screen = screen;
        rbuffer->b.vtbl = &r600_buffer_vtbl;
        rbuffer->buf = NULL;
+       rbuffer->initialized = false;
        rbuffer->TC_L2_dirty = false;
        rbuffer->is_shared = false;
        util_range_init(&rbuffer->valid_buffer_range);
        return rbuffer;
 }
 
 struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
                                         const struct pipe_resource *templ,
                                         unsigned alignment)
 {
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 5375044..358d5f4 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -169,20 +169,22 @@ struct r600_resource {
 
        /* Winsys objects. */
        struct pb_buffer                *buf;
        uint64_t                        gpu_address;
        /* Memory usage if the buffer placement is optimal. */
        uint64_t                        vram_usage;
        uint64_t                        gart_usage;
 
        /* Resource state. */
        enum radeon_bo_domain           domains;
+       enum radeon_bo_flag             flags;
+       bool                            initialized;
 
        /* The buffer range which is initialized (with a write transfer,
         * streamout, DMA, or as a random access target). The rest of
         * the buffer is considered invalid and can be mapped unsynchronized.
         *
         * This allows unsychronized mapping of a buffer range which hasn't
         * been used yet. It's for applications which forget to use
         * the unsynchronized map flag and expect the driver to figure it out.
          */
        struct util_range               valid_buffer_range;
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 8/8] gallium/radeon: derive buffer placement and flags only once per buffer

Reply via email to