Maybe only do this when debug printing is enabled? Nicolai
On 28.11.2016 12:17, Marek Olšák wrote:
From: Marek Olšák <[email protected]> --- src/gallium/drivers/radeonsi/si_shader.c | 40 ++++++++++++++++++++++++++++---- src/gallium/drivers/radeonsi/si_shader.h | 1 + 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 20f4a1d..f4c6e9c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5341,20 +5341,23 @@ static unsigned llvm_get_type_size(LLVMTypeRef type) switch (kind) { case LLVMIntegerTypeKind: return LLVMGetIntTypeWidth(type) / 8; case LLVMFloatTypeKind: return 4; case LLVMPointerTypeKind: return 8; case LLVMVectorTypeKind: return LLVMGetVectorSize(type) * llvm_get_type_size(LLVMGetElementType(type)); + case LLVMArrayTypeKind: + return LLVMGetArrayLength(type) * + llvm_get_type_size(LLVMGetElementType(type)); default: assert(0); return 0; } } static void declare_tess_lds(struct si_shader_context *ctx) { struct gallivm_state *gallivm = &ctx->gallivm; struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; @@ -5989,39 +5992,41 @@ static void si_shader_dump_stats(struct si_screen *sscreen, "SPI_PS_INPUT_ADDR = 0x%04x\n" "SPI_PS_INPUT_ENA = 0x%04x\n", conf->spi_ps_input_addr, conf->spi_ps_input_ena); } fprintf(file, "*** SHADER STATS ***\n" "SGPRS: %d\n" "VGPRS: %d\n" "Spilled SGPRs: %d\n" "Spilled VGPRs: %d\n" + "Private memory VGPRs: %d\n" "Code Size: %d bytes\n" "LDS: %d blocks\n" "Scratch: %d bytes per wave\n" "Max Waves: %d\n" "********************\n\n\n", conf->num_sgprs, conf->num_vgprs, - conf->spilled_sgprs, conf->spilled_vgprs, code_size, + conf->spilled_sgprs, conf->spilled_vgprs, + conf->private_mem_vgprs, code_size, conf->lds_size, conf->scratch_bytes_per_wave, max_simd_waves); } pipe_debug_message(debug, SHADER_INFO, "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d " "LDS: %d Scratch: %d Max Waves: %d Spilled SGPRs: %d " - "Spilled VGPRs: %d", + "Spilled VGPRs: %d PrivMem VGPRs: %d", conf->num_sgprs, conf->num_vgprs, code_size, conf->lds_size, conf->scratch_bytes_per_wave, max_simd_waves, conf->spilled_sgprs, - conf->spilled_vgprs); + conf->spilled_vgprs, conf->private_mem_vgprs); } static const char *si_get_shader_name(struct si_shader *shader, unsigned processor) { switch (processor) { case PIPE_SHADER_VERTEX: if (shader->key.as_es) return "Vertex Shader as ES"; else if (shader->key.as_ls) @@ -6564,20 +6569,46 @@ static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx) V_008DFC_SQ_EXP_PARAM + new_count, 0)); shader->info.vs_output_param_offset[out] = new_count; new_count++; break; } } shader->info.nr_param_exports = new_count; } } +static void si_count_scratch_private_memory(struct si_shader_context *ctx) +{ + ctx->shader->config.private_mem_vgprs = 0; + + /* Process all LLVM instructions. */ + LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(ctx->main_fn); + while (bb) { + LLVMValueRef next = LLVMGetFirstInstruction(bb); + + while (next) { + LLVMValueRef inst = next; + next = LLVMGetNextInstruction(next); + + if (LLVMGetInstructionOpcode(inst) != LLVMAlloca) + continue; + + LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst)); + /* No idea why LLVM aligns allocas to 4 elements. */ + unsigned alignment = LLVMGetAlignment(inst); + unsigned dw_size = align(llvm_get_type_size(type) / 4, alignment); + ctx->shader->config.private_mem_vgprs += dw_size; + } + bb = LLVMGetNextBasicBlock(bb); + } +} + static bool si_compile_tgsi_main(struct si_shader_context *ctx, struct si_shader *shader) { struct si_shader_selector *sel = shader->selector; struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; switch (ctx->type) { case PIPE_SHADER_VERTEX: ctx->load_input = declare_input_vs; if (shader->key.as_ls) @@ -7220,22 +7251,23 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, mod = bld_base->base.gallivm->module; /* Dump LLVM IR before any optimization passes */ if (sscreen->b.debug_flags & DBG_PREOPT_IR && r600_can_dump_shader(&sscreen->b, ctx.type)) LLVMDumpModule(mod); si_llvm_finalize_module(&ctx, r600_extra_shader_checks(&sscreen->b, ctx.type)); - /* Post-optimization transformations. */ + /* Post-optimization transformations and analysis. */ si_eliminate_const_vs_outputs(&ctx); + si_count_scratch_private_memory(&ctx); /* Compile to bytecode. */ r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm, mod, debug, ctx.type, "TGSI shader"); si_llvm_dispose(&ctx); if (r) { fprintf(stderr, "LLVM failed to compile shader\n"); return r; } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index aa37676..d4bc47b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -441,20 +441,21 @@ struct si_shader_key { unsigned clip_disable:1; } hw_vs; /* HW VS (it can be VS, TES, GS) */ } opt; }; struct si_shader_config { unsigned num_sgprs; unsigned num_vgprs; unsigned spilled_sgprs; unsigned spilled_vgprs; + unsigned private_mem_vgprs; unsigned lds_size; unsigned spi_ps_input_ena; unsigned spi_ps_input_addr; unsigned float_mode; unsigned scratch_bytes_per_wave; unsigned rsrc1; unsigned rsrc2; }; enum {
_______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
