From: Marek Olšák <marek.ol...@amd.com> This allows us to change the pointer arbitrarily. --- src/gallium/drivers/radeonsi/si_cp_dma.c | 3 ++- src/gallium/drivers/radeonsi/si_descriptors.c | 27 +++++++++++++++------------ src/gallium/drivers/radeonsi/si_state.h | 2 +- 3 files changed, 18 insertions(+), 14 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 97adc27..9a3971a 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -453,21 +453,22 @@ static void cik_prefetch_shader_async(struct si_context *sctx, cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0); } static void cik_prefetch_VBO_descriptors(struct si_context *sctx) { if (!sctx->vertex_elements) return; cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b, - sctx->vertex_buffers.buffer_offset, + sctx->vertex_buffers.gpu_address - + sctx->vertex_buffers.buffer->gpu_address, sctx->vertex_elements->desc_list_byte_size); } void cik_emit_prefetch_L2(struct si_context *sctx) { /* Prefetch shaders and VBO descriptors to TC L2. */ if (sctx->b.chip_class >= GFX9) { /* Choose the right spot for the VBO prefetch. */ if (sctx->tes_shader.cso) { if (sctx->prefetch_L2_mask & SI_PREFETCH_HS) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 3c33e85..0c1fca8 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -142,37 +142,41 @@ static bool si_upload_descriptors(struct si_context *sctx, unsigned upload_size = desc->num_active_slots * slot_size; /* Skip the upload if no shader is using the descriptors. dirty_mask * will stay dirty and the descriptors will be uploaded when there is * a shader using them. */ if (!upload_size) return true; uint32_t *ptr; + int buffer_offset; u_upload_alloc(sctx->b.b.const_uploader, 0, upload_size, si_optimal_tcc_alignment(sctx, upload_size), - (unsigned*)&desc->buffer_offset, + (unsigned*)&buffer_offset, (struct pipe_resource**)&desc->buffer, (void**)&ptr); - if (!desc->buffer) + if (!desc->buffer) { + desc->gpu_address = 0; return false; /* skip the draw call */ + } util_memcpy_cpu_to_le32(ptr, (char*)desc->list + first_slot_offset, upload_size); desc->gpu_list = ptr - first_slot_offset / 4; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); /* The shader pointer should point to slot 0. */ - desc->buffer_offset -= first_slot_offset; + buffer_offset -= first_slot_offset; + desc->gpu_address = desc->buffer->gpu_address + buffer_offset; si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom); return true; } static void si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc) { if (!desc->buffer) return; @@ -990,28 +994,32 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) if (!count) return true; desc_list_byte_size = velems->desc_list_byte_size; first_vb_use_mask = velems->first_vb_use_mask; /* Vertex buffer descriptors are the only ones which are uploaded * directly through a staging buffer and don't go through * the fine-grained upload path. */ + unsigned buffer_offset = 0; u_upload_alloc(sctx->b.b.const_uploader, 0, desc_list_byte_size, si_optimal_tcc_alignment(sctx, desc_list_byte_size), - (unsigned*)&desc->buffer_offset, + &buffer_offset, (struct pipe_resource**)&desc->buffer, (void**)&ptr); - if (!desc->buffer) + if (!desc->buffer) { + desc->gpu_address = 0; return false; + } + desc->gpu_address = desc->buffer->gpu_address + buffer_offset; desc->list = ptr; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); assert(count <= SI_MAX_ATTRIBS); for (i = 0; i < count; i++) { struct pipe_vertex_buffer *vb; struct r600_resource *rbuffer; @@ -1707,23 +1715,21 @@ static void si_upload_bindless_descriptor(struct si_context *sctx, unsigned desc_slot, unsigned num_dwords) { struct si_descriptors *desc = &sctx->bindless_descriptors; struct radeon_winsys_cs *cs = sctx->b.gfx.cs; unsigned desc_slot_offset = desc_slot * 16; uint32_t *data; uint64_t va; data = desc->list + desc_slot_offset; - - va = desc->buffer->gpu_address + desc->buffer_offset + - desc_slot_offset * 4; + va = desc->gpu_address + desc_slot_offset * 4; radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + num_dwords, 0)); radeon_emit(cs, S_370_DST_SEL(V_370_TC_L2) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); radeon_emit(cs, va); radeon_emit(cs, va >> 32); radeon_emit_array(cs, data, num_dwords); } @@ -1960,24 +1966,21 @@ static void si_emit_shader_pointer_head(struct radeon_winsys_cs *cs, unsigned sh_base, unsigned pointer_count) { radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * 2, 0)); radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2); } static void si_emit_shader_pointer_body(struct radeon_winsys_cs *cs, struct si_descriptors *desc) { - uint64_t va = 0; - - if (desc->buffer) - va = desc->buffer->gpu_address + desc->buffer_offset; + uint64_t va = desc->gpu_address; radeon_emit(cs, va); radeon_emit(cs, va >> 32); } static void si_emit_shader_pointer(struct si_context *sctx, struct si_descriptors *desc, unsigned sh_base) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 8ce3cdb..eb1901b 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -256,21 +256,21 @@ enum { * image resources, and sampler states. */ struct si_descriptors { /* The list of descriptors in malloc'd memory. */ uint32_t *list; /* The list in mapped GPU memory. */ uint32_t *gpu_list; /* The buffer where the descriptors have been uploaded. */ struct r600_resource *buffer; - int buffer_offset; /* can be negative if not using lower slots */ + uint64_t gpu_address; /* The maximum number of descriptors. */ uint32_t num_elements; /* Slots that are used by currently-bound shaders. * It determines which slots are uploaded. */ uint32_t first_active_slot; uint32_t num_active_slots; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev