Hi Marek Tested-by: Edmondo Tommasina <[email protected]>
I tested the patch with Witcher 2. Thanks edmondo On Tue, Oct 18, 2016 at 6:28 PM, Marek Olšák <[email protected]> wrote: > From: Marek Olšák <[email protected]> > > These constant value VS PARAM exports: > - 0,0,0,0 > - 0,0,0,1 > - 1,1,1,0 > - 1,1,1,1 > can be loaded into PS inputs using the DEFAULT_VAL field, and the VS exports > can be removed from the IR to save export & parameter memory. > > After LLVM optimizations, analyze the IR to see which exports are equal to > the ones listed above (or undef) and remove them if they are. > > Targeted use cases: > - All DX9 eON ports always clear 10 VS outputs to 0.0 even if most of them > are unused by PS (such as Witcher 2 below). > - VS output arrays with unused elements that the GLSL compiler can't > eliminate (such as Batman below). > > The shader-db deltas are quite interesting: > (not from upstream si-report.py, it won't be upstreamed) > > PERCENTAGE DELTAS Shaders PARAM exports (affected only) > batman_arkham_origins 589 -67.17 % > bioshock-infinite 1769 -0.47 % > dirt-showdown 548 -2.68 % > dota2 1747 -3.36 % > f1-2015 776 -4.94 % > left_4_dead_2 1762 -0.07 % > metro_2033_redux 2670 -0.43 % > portal 474 -0.22 % > talos_principle 324 -3.63 % > warsow 176 -2.20 % > witcher2 1040 -73.78 % > ---------------------------------------- > All affected 991 -65.37 % ... 9681 -> 3353 > ---------------------------------------- > Total 26725 -10.82 % ... 58490 -> 52162 > --- > src/gallium/drivers/radeonsi/si_shader.c | 154 > ++++++++++++++++++++++++ > src/gallium/drivers/radeonsi/si_shader.h | 11 ++ > src/gallium/drivers/radeonsi/si_state_shaders.c | 17 ++- > 3 files changed, 180 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index a361418..7fc1df4 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -6593,20 +6593,167 @@ static void si_init_shader_ctx(struct > si_shader_context *ctx, > bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex; > bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = > si_llvm_emit_primitive; > bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier; > > bld_base->op_actions[TGSI_OPCODE_MAX].emit = > build_tgsi_intrinsic_nomem; > bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32"; > bld_base->op_actions[TGSI_OPCODE_MIN].emit = > build_tgsi_intrinsic_nomem; > bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32"; > } > > +/* Return true if the PARAM export has been eliminated. */ > +static bool si_eliminate_const_output(struct si_shader_context *ctx, > + LLVMValueRef inst, unsigned offset) > +{ > + struct si_shader *shader = ctx->shader; > + unsigned num_outputs = shader->selector->info.num_outputs; > + double v[4]; > + unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */ > + > + for (i = 0; i < 4; i++) { > + LLVMBool loses_info; > + LLVMValueRef p = LLVMGetOperand(inst, 5 + i); > + if (!LLVMIsConstant(p)) > + return false; > + > + /* It's a constant expression. Undef outputs are eliminated > too. */ > + if (LLVMIsUndef(p)) > + v[i] = 0; > + else > + v[i] = LLVMConstRealGetDouble(p, &loses_info); > + > + if (v[i] != 0 && v[i] != 1) > + return false; > + } > + > + /* Only certain combinations of 0 and 1 can be eliminated. */ > + if (v[0] == 0 && v[1] == 0 && v[2] == 0) > + default_val = v[3] == 0 ? 0 : 1; > + else if (v[0] == 1 && v[1] == 1 && v[2] == 1) > + default_val = v[3] == 0 ? 2 : 3; > + else > + return false; > + > + /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */ > + LLVMInstructionEraseFromParent(inst); > + > + /* Change OFFSET to DEFAULT_VAL. */ > + for (i = 0; i < num_outputs; i++) { > + if (shader->info.vs_output_param_offset[i] == offset) { > + shader->info.vs_output_param_offset[i] = > + EXP_PARAM_DEFAULT_VAL_0000 + default_val; > + break; > + } > + } > + return true; > +} > + > +struct si_vs_exports { > + unsigned num; > + unsigned offset[SI_MAX_VS_OUTPUTS]; > + LLVMValueRef inst[SI_MAX_VS_OUTPUTS]; > +}; > + > +static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx) > +{ > + struct si_shader *shader = ctx->shader; > + struct tgsi_shader_info *info = &shader->selector->info; > + LLVMBasicBlockRef bb; > + struct si_vs_exports exports; > + bool removed_any = false; > + > + exports.num = 0; > + > + if ((ctx->type == PIPE_SHADER_VERTEX && > + (shader->key.vs.as_es || shader->key.vs.as_ls)) || > + (ctx->type == PIPE_SHADER_TESS_EVAL && shader->key.tes.as_es)) > + return; > + > + /* Process all LLVM instructions. */ > + bb = LLVMGetFirstBasicBlock(ctx->radeon_bld.main_fn); > + while (bb) { > + LLVMValueRef inst = LLVMGetFirstInstruction(bb); > + > + while (inst) { > + LLVMValueRef cur = inst; > + inst = LLVMGetNextInstruction(inst); > + > + if (LLVMGetInstructionOpcode(cur) != LLVMCall) > + continue; > + > + LLVMValueRef callee = LLVMGetCalledValue(cur); > + LLVMValueKind kind = LLVMGetValueKind(callee); > + > + if (kind != LLVMFunctionValueKind) > + continue; > + > + const char *name = LLVMGetValueName(callee); > + unsigned num_args = LLVMCountParams(callee); > + > + /* Check if this is an export instruction. */ > + if (num_args != 9 || strcmp(name, "llvm.SI.export")) > + continue; > + > + LLVMValueRef arg = LLVMGetOperand(cur, 3); > + unsigned target = LLVMConstIntGetZExtValue(arg); > + > + if (target < V_008DFC_SQ_EXP_PARAM) > + continue; > + > + target -= V_008DFC_SQ_EXP_PARAM; > + > + /* Eliminate constant value PARAM exports. */ > + if (si_eliminate_const_output(ctx, cur, target)) { > + removed_any = true; > + } else { > + exports.offset[exports.num] = target; > + exports.inst[exports.num] = cur; > + exports.num++; > + } > + } > + bb = LLVMGetNextBasicBlock(bb); > + } > + > + /* Remove holes in export memory due to removed PARAM exports. > + * This is done by renumbering all PARAM exports. > + */ > + if (removed_any) { > + ubyte current_offset[SI_MAX_VS_OUTPUTS]; > + unsigned new_count = 0; > + unsigned out, i; > + > + /* Make a copy of the offsets. We need the old version while > + * we are modifying some of them. */ > + assert(sizeof(current_offset) == > + sizeof(shader->info.vs_output_param_offset)); > + memcpy(current_offset, shader->info.vs_output_param_offset, > + sizeof(current_offset)); > + > + for (i = 0; i < exports.num; i++) { > + unsigned offset = exports.offset[i]; > + > + for (out = 0; out < info->num_outputs; out++) { > + if (current_offset[out] != offset) > + continue; > + > + LLVMSetOperand(exports.inst[i], 3, > + LLVMConstInt(ctx->i32, > + > V_008DFC_SQ_EXP_PARAM + new_count, 0)); > + shader->info.vs_output_param_offset[out] = > new_count; > + new_count++; > + break; > + } > + } > + shader->info.nr_param_exports = new_count; > + } > +} > + > int si_compile_tgsi_shader(struct si_screen *sscreen, > LLVMTargetMachineRef tm, > struct si_shader *shader, > bool is_monolithic, > struct pipe_debug_callback *debug) > { > struct si_shader_selector *sel = shader->selector; > struct si_shader_context ctx; > struct lp_build_tgsi_context *bld_base; > LLVMModuleRef mod; > @@ -6616,20 +6763,23 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, > * conversion fails. */ > if (r600_can_dump_shader(&sscreen->b, sel->info.processor) && > !(sscreen->b.debug_flags & DBG_NO_TGSI)) { > tgsi_dump(sel->tokens, 0); > si_dump_streamout(&sel->so); > } > > si_init_shader_ctx(&ctx, sscreen, shader, tm); > ctx.is_monolithic = is_monolithic; > > + memset(shader->info.vs_output_param_offset, 0xff, > + sizeof(shader->info.vs_output_param_offset)); > + > shader->info.uses_instanceid = sel->info.uses_instanceid; > > bld_base = &ctx.radeon_bld.soa.bld_base; > ctx.radeon_bld.load_system_value = declare_system_value; > > switch (ctx.type) { > case PIPE_SHADER_VERTEX: > ctx.radeon_bld.load_input = declare_input_vs; > if (shader->key.vs.as_ls) > bld_base->emit_epilogue = si_llvm_emit_ls_epilogue; > @@ -6701,20 +6851,24 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, > > /* Dump LLVM IR before any optimization passes */ > if (sscreen->b.debug_flags & DBG_PREOPT_IR && > r600_can_dump_shader(&sscreen->b, ctx.type)) > LLVMDumpModule(mod); > > radeon_llvm_finalize_module( > &ctx.radeon_bld, > r600_extra_shader_checks(&sscreen->b, ctx.type)); > > + /* Post-optimization transformations. */ > + si_eliminate_const_vs_outputs(&ctx); > + > + /* Compile to bytecode. */ > r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm, > mod, debug, ctx.type, "TGSI shader"); > if (r) { > fprintf(stderr, "LLVM failed to compile shader\n"); > goto out; > } > > radeon_llvm_dispose(&ctx.radeon_bld); > > /* Validate SGPR and VGPR usage for compute to detect compiler bugs. > diff --git a/src/gallium/drivers/radeonsi/si_shader.h > b/src/gallium/drivers/radeonsi/si_shader.h > index b07210c..6c7a05f 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.h > +++ b/src/gallium/drivers/radeonsi/si_shader.h > @@ -408,20 +408,31 @@ struct si_shader_config { > unsigned spilled_vgprs; > unsigned lds_size; > unsigned spi_ps_input_ena; > unsigned spi_ps_input_addr; > unsigned float_mode; > unsigned scratch_bytes_per_wave; > unsigned rsrc1; > unsigned rsrc2; > }; > > +enum { > + /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */ > + EXP_PARAM_OFFSET_0 = 0, > + EXP_PARAM_OFFSET_31 = 31, > + /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */ > + EXP_PARAM_DEFAULT_VAL_0000 = 64, > + EXP_PARAM_DEFAULT_VAL_0001, > + EXP_PARAM_DEFAULT_VAL_1110, > + EXP_PARAM_DEFAULT_VAL_1111, > +}; > + > /* GCN-specific shader info. */ > struct si_shader_info { > ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS]; > ubyte num_input_sgprs; > ubyte num_input_vgprs; > char face_vgpr_index; > bool uses_instanceid; > ubyte nr_pos_exports; > ubyte nr_param_exports; > }; > diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c > b/src/gallium/drivers/radeonsi/si_state_shaders.c > index d339b84..c030ed9 100644 > --- a/src/gallium/drivers/radeonsi/si_state_shaders.c > +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c > @@ -1562,36 +1562,49 @@ static void si_delete_shader_selector(struct > pipe_context *ctx, void *state) > pipe_mutex_destroy(sel->mutex); > free(sel->tokens); > free(sel); > } > > static unsigned si_get_ps_input_cntl(struct si_context *sctx, > struct si_shader *vs, unsigned name, > unsigned index, unsigned interpolate) > { > struct tgsi_shader_info *vsinfo = &vs->selector->info; > - unsigned j, ps_input_cntl = 0; > + unsigned j, offset, ps_input_cntl = 0; > > if (interpolate == TGSI_INTERPOLATE_CONSTANT || > (interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade)) > ps_input_cntl |= S_028644_FLAT_SHADE(1); > > if (name == TGSI_SEMANTIC_PCOORD || > (name == TGSI_SEMANTIC_TEXCOORD && > sctx->sprite_coord_enable & (1 << index))) { > ps_input_cntl |= S_028644_PT_SPRITE_TEX(1); > } > > for (j = 0; j < vsinfo->num_outputs; j++) { > if (name == vsinfo->output_semantic_name[j] && > index == vsinfo->output_semantic_index[j]) { > - ps_input_cntl |= > S_028644_OFFSET(vs->info.vs_output_param_offset[j]); > + offset = vs->info.vs_output_param_offset[j]; > + > + if (offset <= EXP_PARAM_OFFSET_31) { > + /* The input is loaded from parameter memory. > */ > + ps_input_cntl |= S_028644_OFFSET(offset); > + } else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) { > + /* The input is a DEFAULT_VAL constant. */ > + assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 && > + offset <= EXP_PARAM_DEFAULT_VAL_1111); > + > + offset -= EXP_PARAM_DEFAULT_VAL_0000; > + ps_input_cntl = S_028644_OFFSET(0x20) | > + S_028644_DEFAULT_VAL(offset); > + } > break; > } > } > > if (name == TGSI_SEMANTIC_PRIMID) > /* PrimID is written after the last output. */ > ps_input_cntl |= > S_028644_OFFSET(vs->info.vs_output_param_offset[vsinfo->num_outputs]); > else if (j == vsinfo->num_outputs && > !G_028644_PT_SPRITE_TEX(ps_input_cntl)) { > /* No corresponding output found, load defaults into input. > * Don't set any other bits. > -- > 2.7.4 > > _______________________________________________ > mesa-dev mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
