From: Marek Olšák <marek.ol...@amd.com>

This allows us to change the pointer arbitrarily.
---
 src/gallium/drivers/radeonsi/si_cp_dma.c      |  3 ++-
 src/gallium/drivers/radeonsi/si_descriptors.c | 27 +++++++++++++++------------
 src/gallium/drivers/radeonsi/si_state.h       |  2 +-
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 97adc27..9a3971a 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -453,21 +453,22 @@ static void cik_prefetch_shader_async(struct si_context 
*sctx,
 
        cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0);
 }
 
 static void cik_prefetch_VBO_descriptors(struct si_context *sctx)
 {
        if (!sctx->vertex_elements)
                return;
 
        cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b,
-                                sctx->vertex_buffers.buffer_offset,
+                                sctx->vertex_buffers.gpu_address -
+                                sctx->vertex_buffers.buffer->gpu_address,
                                 sctx->vertex_elements->desc_list_byte_size);
 }
 
 void cik_emit_prefetch_L2(struct si_context *sctx)
 {
        /* Prefetch shaders and VBO descriptors to TC L2. */
        if (sctx->b.chip_class >= GFX9) {
                /* Choose the right spot for the VBO prefetch. */
                if (sctx->tes_shader.cso) {
                        if (sctx->prefetch_L2_mask & SI_PREFETCH_HS)
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 3c33e85..0c1fca8 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -142,37 +142,41 @@ static bool si_upload_descriptors(struct si_context *sctx,
        unsigned upload_size = desc->num_active_slots * slot_size;
 
        /* Skip the upload if no shader is using the descriptors. dirty_mask
         * will stay dirty and the descriptors will be uploaded when there is
         * a shader using them.
         */
        if (!upload_size)
                return true;
 
        uint32_t *ptr;
+       int buffer_offset;
        u_upload_alloc(sctx->b.b.const_uploader, 0, upload_size,
                       si_optimal_tcc_alignment(sctx, upload_size),
-                      (unsigned*)&desc->buffer_offset,
+                      (unsigned*)&buffer_offset,
                       (struct pipe_resource**)&desc->buffer,
                       (void**)&ptr);
-       if (!desc->buffer)
+       if (!desc->buffer) {
+               desc->gpu_address = 0;
                return false; /* skip the draw call */
+       }
 
        util_memcpy_cpu_to_le32(ptr, (char*)desc->list + first_slot_offset,
                                upload_size);
        desc->gpu_list = ptr - first_slot_offset / 4;
 
        radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
                             RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
 
        /* The shader pointer should point to slot 0. */
-       desc->buffer_offset -= first_slot_offset;
+       buffer_offset -= first_slot_offset;
+       desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
 
        si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
        return true;
 }
 
 static void
 si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors 
*desc)
 {
        if (!desc->buffer)
                return;
@@ -990,28 +994,32 @@ bool si_upload_vertex_buffer_descriptors(struct 
si_context *sctx)
        if (!count)
                return true;
 
        desc_list_byte_size = velems->desc_list_byte_size;
        first_vb_use_mask = velems->first_vb_use_mask;
 
        /* Vertex buffer descriptors are the only ones which are uploaded
         * directly through a staging buffer and don't go through
         * the fine-grained upload path.
         */
+       unsigned buffer_offset = 0;
        u_upload_alloc(sctx->b.b.const_uploader, 0,
                       desc_list_byte_size,
                       si_optimal_tcc_alignment(sctx, desc_list_byte_size),
-                      (unsigned*)&desc->buffer_offset,
+                      &buffer_offset,
                       (struct pipe_resource**)&desc->buffer, (void**)&ptr);
-       if (!desc->buffer)
+       if (!desc->buffer) {
+               desc->gpu_address = 0;
                return false;
+       }
 
+       desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
        desc->list = ptr;
        radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
                              desc->buffer, RADEON_USAGE_READ,
                              RADEON_PRIO_DESCRIPTORS);
 
        assert(count <= SI_MAX_ATTRIBS);
 
        for (i = 0; i < count; i++) {
                struct pipe_vertex_buffer *vb;
                struct r600_resource *rbuffer;
@@ -1707,23 +1715,21 @@ static void si_upload_bindless_descriptor(struct 
si_context *sctx,
                                          unsigned desc_slot,
                                          unsigned num_dwords)
 {
        struct si_descriptors *desc = &sctx->bindless_descriptors;
        struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
        unsigned desc_slot_offset = desc_slot * 16;
        uint32_t *data;
        uint64_t va;
 
        data = desc->list + desc_slot_offset;
-
-       va = desc->buffer->gpu_address + desc->buffer_offset +
-            desc_slot_offset * 4;
+       va = desc->gpu_address + desc_slot_offset * 4;
 
        radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + num_dwords, 0));
        radeon_emit(cs, S_370_DST_SEL(V_370_TC_L2) |
                    S_370_WR_CONFIRM(1) |
                    S_370_ENGINE_SEL(V_370_ME));
        radeon_emit(cs, va);
        radeon_emit(cs, va >> 32);
        radeon_emit_array(cs, data, num_dwords);
 }
 
@@ -1960,24 +1966,21 @@ static void si_emit_shader_pointer_head(struct 
radeon_winsys_cs *cs,
                                        unsigned sh_base,
                                        unsigned pointer_count)
 {
        radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * 2, 0));
        radeon_emit(cs, (sh_base + desc->shader_userdata_offset - 
SI_SH_REG_OFFSET) >> 2);
 }
 
 static void si_emit_shader_pointer_body(struct radeon_winsys_cs *cs,
                                        struct si_descriptors *desc)
 {
-       uint64_t va = 0;
-
-       if (desc->buffer)
-               va = desc->buffer->gpu_address + desc->buffer_offset;
+       uint64_t va = desc->gpu_address;
 
        radeon_emit(cs, va);
        radeon_emit(cs, va >> 32);
 }
 
 static void si_emit_shader_pointer(struct si_context *sctx,
                                   struct si_descriptors *desc,
                                   unsigned sh_base)
 {
        struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 8ce3cdb..eb1901b 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -256,21 +256,21 @@ enum {
  * image resources, and sampler states.
  */
 struct si_descriptors {
        /* The list of descriptors in malloc'd memory. */
        uint32_t *list;
        /* The list in mapped GPU memory. */
        uint32_t *gpu_list;
 
        /* The buffer where the descriptors have been uploaded. */
        struct r600_resource *buffer;
-       int buffer_offset; /* can be negative if not using lower slots */
+       uint64_t gpu_address;
 
        /* The maximum number of descriptors. */
        uint32_t num_elements;
 
        /* Slots that are used by currently-bound shaders.
         * It determines which slots are uploaded.
         */
        uint32_t first_active_slot;
        uint32_t num_active_slots;
 
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to