Re: [Mesa-dev] [PATCH 08/10] radeonsi: count and report temp arrays in scratch separately

Nicolai Hähnle Tue, 29 Nov 2016 11:42:06 -0800

Maybe only do this when debug printing is enabled?

Nicolai


On 28.11.2016 12:17, Marek Olšák wrote:

From: Marek Olšák <[email protected]>

---
 src/gallium/drivers/radeonsi/si_shader.c | 40 ++++++++++++++++++++++++++++----
 src/gallium/drivers/radeonsi/si_shader.h |  1 +
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 20f4a1d..f4c6e9c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5341,20 +5341,23 @@ static unsigned llvm_get_type_size(LLVMTypeRef type)
        switch (kind) {
        case LLVMIntegerTypeKind:
                return LLVMGetIntTypeWidth(type) / 8;
        case LLVMFloatTypeKind:
                return 4;
        case LLVMPointerTypeKind:
                return 8;
        case LLVMVectorTypeKind:
                return LLVMGetVectorSize(type) *
                       llvm_get_type_size(LLVMGetElementType(type));
+       case LLVMArrayTypeKind:
+               return LLVMGetArrayLength(type) *
+                      llvm_get_type_size(LLVMGetElementType(type));
        default:
                assert(0);
                return 0;
        }
 }

 static void declare_tess_lds(struct si_shader_context *ctx)
 {
        struct gallivm_state *gallivm = &ctx->gallivm;
        struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
@@ -5989,39 +5992,41 @@ static void si_shader_dump_stats(struct si_screen 
*sscreen,
                                "SPI_PS_INPUT_ADDR = 0x%04x\n"
                                "SPI_PS_INPUT_ENA  = 0x%04x\n",
                                conf->spi_ps_input_addr, 
conf->spi_ps_input_ena);
                }

                fprintf(file, "*** SHADER STATS ***\n"
                        "SGPRS: %d\n"
                        "VGPRS: %d\n"
                        "Spilled SGPRs: %d\n"
                        "Spilled VGPRs: %d\n"
+                       "Private memory VGPRs: %d\n"
                        "Code Size: %d bytes\n"
                        "LDS: %d blocks\n"
                        "Scratch: %d bytes per wave\n"
                        "Max Waves: %d\n"
                        "********************\n\n\n",
                        conf->num_sgprs, conf->num_vgprs,
-                       conf->spilled_sgprs, conf->spilled_vgprs, code_size,
+                       conf->spilled_sgprs, conf->spilled_vgprs,
+                       conf->private_mem_vgprs, code_size,
                        conf->lds_size, conf->scratch_bytes_per_wave,
                        max_simd_waves);
        }

        pipe_debug_message(debug, SHADER_INFO,
                           "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d "
                           "LDS: %d Scratch: %d Max Waves: %d Spilled SGPRs: %d 
"
-                          "Spilled VGPRs: %d",
+                          "Spilled VGPRs: %d PrivMem VGPRs: %d",
                           conf->num_sgprs, conf->num_vgprs, code_size,
                           conf->lds_size, conf->scratch_bytes_per_wave,
                           max_simd_waves, conf->spilled_sgprs,
-                          conf->spilled_vgprs);
+                          conf->spilled_vgprs, conf->private_mem_vgprs);
 }

 static const char *si_get_shader_name(struct si_shader *shader,
                                      unsigned processor)
 {
        switch (processor) {
        case PIPE_SHADER_VERTEX:
                if (shader->key.as_es)
                        return "Vertex Shader as ES";
                else if (shader->key.as_ls)
@@ -6564,20 +6569,46 @@ static void si_eliminate_const_vs_outputs(struct 
si_shader_context *ctx)
                                                            
V_008DFC_SQ_EXP_PARAM + new_count, 0));
                                shader->info.vs_output_param_offset[out] = 
new_count;
                                new_count++;
                                break;
                        }
                }
                shader->info.nr_param_exports = new_count;
        }
 }

+static void si_count_scratch_private_memory(struct si_shader_context *ctx)
+{
+       ctx->shader->config.private_mem_vgprs = 0;
+
+       /* Process all LLVM instructions. */
+       LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(ctx->main_fn);
+       while (bb) {
+               LLVMValueRef next = LLVMGetFirstInstruction(bb);
+
+               while (next) {
+                       LLVMValueRef inst = next;
+                       next = LLVMGetNextInstruction(next);
+
+                       if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
+                               continue;
+
+                       LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
+                       /* No idea why LLVM aligns allocas to 4 elements. */
+                       unsigned alignment = LLVMGetAlignment(inst);
+                       unsigned dw_size = align(llvm_get_type_size(type) / 4, 
alignment);
+                       ctx->shader->config.private_mem_vgprs += dw_size;
+               }
+               bb = LLVMGetNextBasicBlock(bb);
+       }
+}
+
 static bool si_compile_tgsi_main(struct si_shader_context *ctx,
                                 struct si_shader *shader)
 {
        struct si_shader_selector *sel = shader->selector;
        struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;

        switch (ctx->type) {
        case PIPE_SHADER_VERTEX:
                ctx->load_input = declare_input_vs;
                if (shader->key.as_ls)
@@ -7220,22 +7251,23 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
        mod = bld_base->base.gallivm->module;

        /* Dump LLVM IR before any optimization passes */
        if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
            r600_can_dump_shader(&sscreen->b, ctx.type))
                LLVMDumpModule(mod);

        si_llvm_finalize_module(&ctx,
                                    r600_extra_shader_checks(&sscreen->b, 
ctx.type));

-       /* Post-optimization transformations. */
+       /* Post-optimization transformations and analysis. */
        si_eliminate_const_vs_outputs(&ctx);
+       si_count_scratch_private_memory(&ctx);

        /* Compile to bytecode. */
        r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
                            mod, debug, ctx.type, "TGSI shader");
        si_llvm_dispose(&ctx);
        if (r) {
                fprintf(stderr, "LLVM failed to compile shader\n");
                return r;
        }

diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index aa37676..d4bc47b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -441,20 +441,21 @@ struct si_shader_key {
                        unsigned        clip_disable:1;
                } hw_vs; /* HW VS (it can be VS, TES, GS) */
        } opt;
 };

 struct si_shader_config {
        unsigned                        num_sgprs;
        unsigned                        num_vgprs;
        unsigned                        spilled_sgprs;
        unsigned                        spilled_vgprs;
+       unsigned                        private_mem_vgprs;
        unsigned                        lds_size;
        unsigned                        spi_ps_input_ena;
        unsigned                        spi_ps_input_addr;
        unsigned                        float_mode;
        unsigned                        scratch_bytes_per_wave;
        unsigned                        rsrc1;
        unsigned                        rsrc2;
 };

 enum {

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 08/10] radeonsi: count and report temp arrays in scratch separately

Reply via email to