From: Marek Olšák <marek.ol...@amd.com> Let's use the same user data SGPRs in all stages. (for SPI_SHADER_USER_DATA_COMMON_0) --- src/gallium/drivers/radeonsi/si_descriptors.c | 24 ++++----------------- src/gallium/drivers/radeonsi/si_shader.c | 31 +++++++++++---------------- 2 files changed, 16 insertions(+), 39 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index cda7d94..9ba8df6 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1973,47 +1973,31 @@ static void si_emit_shader_pointer(struct si_context *sctx, radeon_emit(cs, va >> 32); } static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_descriptors *descs) { si_emit_shader_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0); si_emit_shader_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0); + si_emit_shader_pointer(sctx, descs, + R_00B330_SPI_SHADER_USER_DATA_ES_0); if (sctx->b.chip_class >= GFX9) { /* GFX9 merged LS-HS and ES-GS. */ - if (descs == &sctx->descriptors[SI_DESCS_RW_BUFFERS]) { - /* Set RW_BUFFERS in the special registers, so that - * it's preloaded into s[0:1] instead of s[8:9]. - */ - si_emit_shader_pointer(sctx, descs, - R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS); - si_emit_shader_pointer(sctx, descs, - R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS); - } else { - /* Set BINDLESS_SAMPLERS_AND_IMAGES into s[10:11], - * s[8:9] remains unused for now. - */ - assert(descs == &sctx->bindless_descriptors); - si_emit_shader_pointer(sctx, descs, - R_00B330_SPI_SHADER_USER_DATA_ES_0); - si_emit_shader_pointer(sctx, descs, - R_00B430_SPI_SHADER_USER_DATA_LS_0); - } + si_emit_shader_pointer(sctx, descs, + R_00B430_SPI_SHADER_USER_DATA_LS_0); } else { si_emit_shader_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0); si_emit_shader_pointer(sctx, descs, - R_00B330_SPI_SHADER_USER_DATA_ES_0); - si_emit_shader_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0); si_emit_shader_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_LS_0); } } void si_emit_graphics_shader_pointers(struct si_context *sctx, struct r600_atom *atom) { unsigned mask; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 4456c3d..62a056d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3024,25 +3024,27 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, ""); } ctx->return_value = ret; } /* Pass TCS inputs from LS to TCS on GFX9. */ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) { LLVMValueRef ret = ctx->return_value; - ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, 0); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5); + + ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, + 8 + SI_SGPR_RW_BUFFERS); ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_bindless_samplers_and_images, 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_offsets, 8 + GFX9_SGPR_TCS_OUT_OFFSETS); @@ -3065,25 +3067,26 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) ret = si_insert_input_ret_float(ctx, ret, ctx->param_tcs_rel_ids, vgpr++); ctx->return_value = ret; } /* Pass GS inputs from ES to GS on GFX9. */ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx) { LLVMValueRef ret = ctx->return_value; - ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, 0); ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3); - ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5); + + ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, + 8 + SI_SGPR_RW_BUFFERS); ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_bindless_samplers_and_images, 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); unsigned desc_param = ctx->param_vs_state_bits + 1; ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param, 8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS); ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1, 8 + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES); @@ -4430,35 +4433,30 @@ static void create_function(struct si_shader_context *ctx) * placed after the user SGPRs. */ for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++) returns[num_returns++] = ctx->i32; /* SGPRs */ for (i = 0; i < 11; i++) returns[num_returns++] = ctx->f32; /* VGPRs */ break; case SI_SHADER_MERGED_VERTEX_TESSCTRL: /* Merged stages have 8 system SGPRs at the beginning. */ - ctx->param_rw_buffers = /* SPI_SHADER_USER_DATA_ADDR_LO_HS */ - add_arg(&fninfo, ARG_SGPR, si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS)); + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* SPI_SHADER_USER_DATA_ADDR_LO_HS */ + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* SPI_SHADER_USER_DATA_ADDR_HI_HS */ ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_merged_scratch_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ - - ctx->param_bindless_samplers_and_images = - add_arg(&fninfo, ARG_SGPR, si_const_array(ctx->v8i32, 0)); - + declare_global_desc_pointers(ctx, &fninfo); declare_per_stage_desc_pointers(ctx, &fninfo, ctx->type == PIPE_SHADER_VERTEX); declare_vs_specific_input_sgprs(ctx, &fninfo); ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ @@ -4488,35 +4486,30 @@ static void create_function(struct si_shader_context *ctx) */ for (i = 0; i <= 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K; i++) returns[num_returns++] = ctx->i32; /* SGPRs */ for (i = 0; i < 11; i++) returns[num_returns++] = ctx->f32; /* VGPRs */ } break; case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY: /* Merged stages have 8 system SGPRs at the beginning. */ - ctx->param_rw_buffers = /* SPI_SHADER_USER_DATA_ADDR_LO_GS */ - add_arg(&fninfo, ARG_SGPR, si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS)); + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_USER_DATA_ADDR_LO_GS) */ + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_USER_DATA_ADDR_HI_GS) */ ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_merged_scratch_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_PGM_LO/HI_GS << 8) */ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_PGM_LO/HI_GS >> 24) */ - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ - - ctx->param_bindless_samplers_and_images = - add_arg(&fninfo, ARG_SGPR, si_const_array(ctx->v8i32, 0)); - + declare_global_desc_pointers(ctx, &fninfo); declare_per_stage_desc_pointers(ctx, &fninfo, (ctx->type == PIPE_SHADER_VERTEX || ctx->type == PIPE_SHADER_TESS_EVAL)); if (ctx->type == PIPE_SHADER_VERTEX) { declare_vs_specific_input_sgprs(ctx, &fninfo); } else { /* TESS_EVAL (and also GEOMETRY): * Declare as many input SGPRs as the VS has. */ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev