From: Marek Olšák <[email protected]>

---
 src/gallium/drivers/radeonsi/si_descriptors.c | 13 ++++++++----
 src/gallium/drivers/radeonsi/si_shader.c      | 29 +++++++++++----------------
 src/gallium/drivers/radeonsi/si_shader.h      |  3 +++
 3 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index f04ed87..5b7298e 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1919,31 +1919,36 @@ void si_emit_graphics_shader_userdata(struct si_context 
*sctx,
        uint32_t *sh_base = sctx->shader_userdata.sh_base;
        struct si_descriptors *descs;
 
        descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
 
        if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) {
                si_emit_shader_pointer(sctx, descs,
                                       R_00B030_SPI_SHADER_USER_DATA_PS_0);
                si_emit_shader_pointer(sctx, descs,
                                       R_00B130_SPI_SHADER_USER_DATA_VS_0);
-               si_emit_shader_pointer(sctx, descs,
-                                      R_00B330_SPI_SHADER_USER_DATA_ES_0);
 
-               /* GFX9 merged LS-HS and ES-GS. Only set RW_BUFFERS for ES and 
LS. */
                if (sctx->b.chip_class >= GFX9) {
+                       /* GFX9 merged LS-HS and ES-GS.
+                        * Set RW_BUFFERS in the special registers, so that
+                        * it's preloaded into s[0:1] instead of s[8:9].
+                        */
                        si_emit_shader_pointer(sctx, descs,
-                                              
R_00B430_SPI_SHADER_USER_DATA_LS_0);
+                                              
R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS);
+                       si_emit_shader_pointer(sctx, descs,
+                                              
R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS);
                } else {
                        si_emit_shader_pointer(sctx, descs,
                                               
R_00B230_SPI_SHADER_USER_DATA_GS_0);
                        si_emit_shader_pointer(sctx, descs,
+                                              
R_00B330_SPI_SHADER_USER_DATA_ES_0);
+                       si_emit_shader_pointer(sctx, descs,
                                               
R_00B430_SPI_SHADER_USER_DATA_HS_0);
                }
        }
 
        mask = sctx->shader_pointers_dirty &
               u_bit_consecutive(SI_DESCS_FIRST_SHADER,
                                 SI_DESCS_FIRST_COMPUTE - 
SI_DESCS_FIRST_SHADER);
 
        while (mask) {
                unsigned i = u_bit_scan(&mask);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 9c5dd5e..9e51622 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2672,32 +2672,31 @@ static void si_llvm_emit_tcs_epilogue(struct 
lp_build_tgsi_context *bld_base)
        LLVMValueRef tf_soffset;
        unsigned vgpr;
 
        offchip_layout = LLVMGetParam(ctx->main_fn,
                                      ctx->param_tcs_offchip_layout);
        offchip_soffset = LLVMGetParam(ctx->main_fn,
                                       ctx->param_tcs_offchip_offset);
        tf_soffset = LLVMGetParam(ctx->main_fn,
                                  ctx->param_tcs_factor_offset);
 
+       ret = si_insert_input_ptr_as_2xi32(ctx, ret,
+                                          ctx->param_rw_buffers, 0);
+
        if (ctx->screen->b.chip_class >= GFX9) {
-               ret = si_insert_input_ptr_as_2xi32(ctx, ret,
-                                                  ctx->param_rw_buffers, 8);
                ret = LLVMBuildInsertValue(builder, ret, offchip_layout,
                                           8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT, 
"");
                /* Tess offchip and tess factor offsets are at the beginning. */
                ret = LLVMBuildInsertValue(builder, ret, offchip_soffset, 2, 
"");
                ret = LLVMBuildInsertValue(builder, ret, tf_soffset, 4, "");
                vgpr = 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT + 1;
        } else {
-               ret = si_insert_input_ptr_as_2xi32(ctx, ret,
-                                                  ctx->param_rw_buffers, 0);
                ret = LLVMBuildInsertValue(builder, ret, offchip_layout,
                                           GFX6_SGPR_TCS_OFFCHIP_LAYOUT, "");
                /* Tess offchip and tess factor offsets are after user SGPRs. */
                ret = LLVMBuildInsertValue(builder, ret, offchip_soffset,
                                           GFX6_TCS_NUM_USER_SGPR, "");
                ret = LLVMBuildInsertValue(builder, ret, tf_soffset,
                                           GFX6_TCS_NUM_USER_SGPR + 1, "");
                vgpr = GFX6_TCS_NUM_USER_SGPR + 2;
        }
 
@@ -2710,28 +2709,26 @@ static void si_llvm_emit_tcs_epilogue(struct 
lp_build_tgsi_context *bld_base)
        ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
        ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
        ctx->return_value = ret;
 }
 
 /* Pass TCS inputs from LS to TCS on GFX9. */
 static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
 {
        LLVMValueRef ret = ctx->return_value;
 
+       ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, 0);
        ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2);
        ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
        ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4);
        ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 
5);
 
-       ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers,
-                                          8 + SI_SGPR_RW_BUFFERS);
-
        ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits,
                                  8 + SI_SGPR_VS_STATE_BITS);
        ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
                                  8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
        ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_offsets,
                                  8 + GFX9_SGPR_TCS_OUT_OFFSETS);
        ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_layout,
                                  8 + GFX9_SGPR_TCS_OUT_LAYOUT);
 
        unsigned desc_param = ctx->param_tcs_out_lds_layout + 2;
@@ -5858,31 +5855,31 @@ static void create_function(struct si_shader_context 
*ctx)
                 * placed after the user SGPRs.
                 */
                for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
                        returns[num_returns++] = ctx->i32; /* SGPRs */
                for (i = 0; i < 3; i++)
                        returns[num_returns++] = ctx->f32; /* VGPRs */
                break;
 
        case SI_SHADER_MERGED_VERTEX_TESSCTRL:
                /* Merged stages have 8 system SGPRs at the beginning. */
-               params[num_params++] = ctx->i32; /* unused */
-               params[num_params++] = ctx->i32; /* unused */
+               params[ctx->param_rw_buffers = num_params++] = /* 
SPI_SHADER_USER_DATA_ADDR_LO_HS */
+                       const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
                params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
                params[ctx->param_merged_wave_info = num_params++] = ctx->i32;
                params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
                params[ctx->param_merged_scratch_offset = num_params++] = 
ctx->i32;
                params[num_params++] = ctx->i32; /* unused */
                params[num_params++] = ctx->i32; /* unused */
 
-               params[ctx->param_rw_buffers = num_params++] =
-                       const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
+               params[num_params++] = ctx->i32; /* unused */
+               params[num_params++] = ctx->i32; /* unused */
                declare_per_stage_desc_pointers(ctx, params, &num_params,
                                                ctx->type == 
PIPE_SHADER_VERTEX);
                declare_vs_specific_input_sgprs(ctx, params, &num_params);
 
                params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
                params[ctx->param_tcs_out_lds_offsets = num_params++] = 
ctx->i32;
                params[ctx->param_tcs_out_lds_layout = num_params++] = ctx->i32;
                params[num_params++] = ctx->i32; /* unused */
 
                declare_per_stage_desc_pointers(ctx, params, &num_params,
@@ -8459,33 +8456,31 @@ static bool si_shader_select_tes_parts(struct si_screen 
*sscreen,
 static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
                                         union si_shader_part_key *key)
 {
        struct gallivm_state *gallivm = &ctx->gallivm;
        struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
        LLVMTypeRef params[32];
        LLVMValueRef func;
        int last_sgpr, num_params = 0;
 
        /* Declare inputs. Only RW_BUFFERS and TESS_FACTOR_OFFSET are used. */
+       params[ctx->param_rw_buffers = num_params++] =
+               const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
+
        if (ctx->screen->b.chip_class >= GFX9) {
-               params[num_params++] = ctx->i32;
-               params[num_params++] = ctx->i32;
                params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
                params[num_params++] = ctx->i32; /* wave info */
                params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
                params[num_params++] = ctx->i32;
                params[num_params++] = ctx->i32;
                params[num_params++] = ctx->i32;
-       }
-       params[ctx->param_rw_buffers = num_params++] =
-               const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
-       if (ctx->screen->b.chip_class >= GFX9) {
+               params[num_params++] = ctx->i64;
                params[num_params++] = ctx->i64;
                params[num_params++] = ctx->i64;
                params[num_params++] = ctx->i64;
                params[num_params++] = ctx->i64;
                params[num_params++] = ctx->i64;
                params[num_params++] = ctx->i32;
                params[num_params++] = ctx->i32;
                params[num_params++] = ctx->i32;
                params[num_params++] = ctx->i32;
                params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index e24b8b8..39eee86 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -73,20 +73,23 @@
 #include "tgsi/tgsi_scan.h"
 #include "util/u_queue.h"
 #include "si_state.h"
 
 struct ac_shader_binary;
 
 #define SI_MAX_VS_OUTPUTS      40
 
 /* SGPR user data indices */
 enum {
+       /* GFX9 merged shaders have RW_BUFFERS among the first 8 system SGPRs,
+        * and these two are used for other purposes.
+        */
        SI_SGPR_RW_BUFFERS,  /* rings (& stream-out, VS only) */
        SI_SGPR_RW_BUFFERS_HI,
        SI_SGPR_CONST_BUFFERS,
        SI_SGPR_CONST_BUFFERS_HI,
        SI_SGPR_SAMPLERS,  /* images & sampler states interleaved */
        SI_SGPR_SAMPLERS_HI,
        SI_SGPR_IMAGES,
        SI_SGPR_IMAGES_HI,
        SI_SGPR_SHADER_BUFFERS,
        SI_SGPR_SHADER_BUFFERS_HI,
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to