On October 9, 2018 11:50:45 Ian Romanick <i...@freedesktop.org> wrote:
On 10/08/2018 01:34 PM, Thomas Helland wrote:Den tor. 30. aug. 2018 kl. 07:37 skrev Ian Romanick <i...@freedesktop.org>:From: Ian Romanick <ian.d.roman...@intel.com> On some GPUs, especially older Intel GPUs, some math instructions are very expensive. On those architectures, don't reduce flow control to a csel if one of the branches contains one of these expensive math instructions. This prevents a bunch of cycle count regressions on pre-Gen6 platforms with a later patch (intel/compiler: More peephole select for pre-Gen6). Signed-off-by: Ian Romanick <ian.d.roman...@intel.com> --- src/amd/vulkan/radv_shader.c | 2 +- src/broadcom/compiler/nir_to_vir.c | 2 +- src/compiler/nir/nir.h | 2 +- src/compiler/nir/nir_opt_peephole_select.c | 46 +++++++++++++++++++++++----- src/gallium/drivers/freedreno/ir3/ir3_nir.c | 2 +- src/gallium/drivers/radeonsi/si_shader_nir.c | 2 +- src/gallium/drivers/vc4/vc4_program.c | 2 +- src/intel/compiler/brw_nir.c | 4 +-- src/mesa/state_tracker/st_glsl_to_nir.cpp | 2 +- 9 files changed, 47 insertions(+), 17 deletions(-) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 632512db09b..c8d502a9e3a 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c@@ -143,7 +143,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively)NIR_PASS(progress, shader, nir_opt_if); NIR_PASS(progress, shader, nir_opt_dead_cf); NIR_PASS(progress, shader, nir_opt_cse); - NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true);+ NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true);NIR_PASS(progress, shader, nir_opt_algebraic); NIR_PASS(progress, shader, nir_opt_constant_folding); NIR_PASS(progress, shader, nir_opt_undef);diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.cindex 0d23cea4d5b..ec0ff4b907a 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1210,7 +1210,7 @@ v3d_optimize_nir(struct nir_shader *s) NIR_PASS(progress, s, nir_opt_dce); NIR_PASS(progress, s, nir_opt_dead_cf); NIR_PASS(progress, s, nir_opt_cse); - NIR_PASS(progress, s, nir_opt_peephole_select, 8, true); + NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); NIR_PASS(progress, s, nir_opt_algebraic); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_undef); diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 67fa46d5557..feb69be6b59 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3003,7 +3003,7 @@ bool nir_opt_move_comparisons(nir_shader *shader); bool nir_opt_move_load_ubo(nir_shader *shader); bool nir_opt_peephole_select(nir_shader *shader, unsigned limit, - bool indirect_load_ok); + bool indirect_load_ok, bool expensive_alu_ok); bool nir_opt_remove_phis_impl(nir_function_impl *impl); bool nir_opt_remove_phis(nir_shader *shader);diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.cindex 6808d3eda6c..09b55f3739e 100644 --- a/src/compiler/nir/nir_opt_peephole_select.c +++ b/src/compiler/nir/nir_opt_peephole_select.c @@ -59,7 +59,8 @@ static bool block_check_for_allowed_instrs(nir_block *block, unsigned *count, - bool alu_ok, bool indirect_load_ok) + bool alu_ok, bool indirect_load_ok, + bool expensive_alu_ok) { nir_foreach_instr(instr, block) { switch (instr->type) {@@ -117,6 +118,25 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count,case nir_op_vec3: case nir_op_vec4: break; + + case nir_op_fcos: + case nir_op_fdiv: + case nir_op_fexp2: + case nir_op_flog2: + case nir_op_fmod: + case nir_op_fpow: + case nir_op_frcp: + case nir_op_frem: + case nir_op_frsq: + case nir_op_fsin: + case nir_op_idiv: + case nir_op_irem: + case nir_op_udiv: + if (!alu_ok || !expensive_alu_ok) + return false; + + break; + default: if (!alu_ok) { /* It must be a move-like operation. */@@ -160,7 +180,8 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count,static bool nir_opt_peephole_select_block(nir_block *block, nir_shader *shader, - unsigned limit, bool indirect_load_ok) + unsigned limit, bool indirect_load_ok, + bool expensive_alu_ok) { if (nir_cf_node_is_first(&block->cf_node)) return false;@@ -180,10 +201,17 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,/* ... and those blocks must only contain "allowed" instructions. */ unsigned count = 0; +#if 1 if (!block_check_for_allowed_instrs(then_block, &count, limit != 0, - indirect_load_ok) || + indirect_load_ok, expensive_alu_ok) || !block_check_for_allowed_instrs(else_block, &count, limit != 0, - indirect_load_ok)) + indirect_load_ok, expensive_alu_ok)) +#else + if (!block_check_for_allowed_instrs(then_block, &count, shader->info.stage,+ limit != 0, indirect_load_ok, expensive_alu_ok) ||+ !block_check_for_allowed_instrs(else_block, &count, shader->info.stage,+ limit != 0, indirect_load_ok, expensive_alu_ok))+#endifLeftover testing stuff?Yes. :) I was experimenting with doing things differently for the scalar and vector units on older Intel GPUs.I like the idea of hiding expensive instructions in a branch. However, I'm wondering if it might be an idea to let drivers provide a callback with what instructions they want to allow? If no other driver plan on doing something like this then I guess this is OK. I would expect the "expensive" instructions to be mostly the same subset on most architectures too. Might want to get some input from others, but with the debug stuff above removed this is.There are a few things that could be done with this pass or a related pass. Before going to far down the expensive instruction path, I'd want to experiment with a pass that could rearrange things like: if (condition) x = sin(a); else x = sin(b); I'd also like experiment with taking into consideration whether or not 'condition' is uniform or not. Some architectures may want to make different flattening choices for uniform vs. non-uniform. Unfortunately, shader-db is not a good tool for measuring the effects of this.Reviewed-by: Thomas Helland<thomashellan...@gmail.com>return false; if (count > limit)@@ -250,14 +278,15 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,static bool nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit, - bool indirect_load_ok) + bool indirect_load_ok, bool expensive_alu_ok) { nir_shader *shader = impl->function->shader; bool progress = false; nir_foreach_block_safe(block, impl) { progress |= nir_opt_peephole_select_block(block, shader, limit, - indirect_load_ok); + indirect_load_ok, + expensive_alu_ok); } if (progress)@@ -268,14 +297,15 @@ nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit,bool nir_opt_peephole_select(nir_shader *shader, unsigned limit, - bool indirect_load_ok) + bool indirect_load_ok, bool expensive_alu_ok) { bool progress = false; nir_foreach_function(function, shader) { if (function->impl) progress |= nir_opt_peephole_select_impl(function->impl, limit, - indirect_load_ok); + indirect_load_ok, + expensive_alu_ok); } return progress;diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.cindex 5f66ef5d170..bb3bb73644a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c @@ -107,7 +107,7 @@ ir3_optimize_loop(nir_shader *s) progress |= OPT(s, nir_opt_gcm, true); else if (gcm == 2) progress |= OPT(s, nir_opt_gcm, false); - progress |= OPT(s, nir_opt_peephole_select, 16, true); + progress |= OPT(s, nir_opt_peephole_select, 16, true, true); progress |= OPT(s, nir_opt_intrinsics); progress |= OPT(s, nir_opt_algebraic); progress |= OPT(s, nir_opt_constant_folding);diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.cindex 9a7a8264283..cb1e208be8f 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -813,7 +813,7 @@ si_lower_nir(struct si_shader_selector* sel) NIR_PASS(progress, sel->nir, nir_opt_if); NIR_PASS(progress, sel->nir, nir_opt_dead_cf); NIR_PASS(progress, sel->nir, nir_opt_cse); - NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8, true);+ NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8, true, true);/* Needed for algebraic lowering */ NIR_PASS(progress, sel->nir, nir_opt_algebraic);diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.cindex 39f7db9148c..fa2a32922f4 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1580,7 +1580,7 @@ vc4_optimize_nir(struct nir_shader *s) NIR_PASS(progress, s, nir_opt_dce); NIR_PASS(progress, s, nir_opt_dead_cf); NIR_PASS(progress, s, nir_opt_cse); - NIR_PASS(progress, s, nir_opt_peephole_select, 8, true); + NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); NIR_PASS(progress, s, nir_opt_algebraic); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_undef); diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 1d65107a93d..04d399fd0b4 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c@@ -587,9 +587,9 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,const bool is_vec4_tessellation = !is_scalar && (nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_TESS_EVAL); - OPT(nir_opt_peephole_select, 0, is_vec4_tessellation); + OPT(nir_opt_peephole_select, 0, is_vec4_tessellation, false); if (compiler->devinfo->gen >= 6) - OPT(nir_opt_peephole_select, 1, is_vec4_tessellation); + OPT(nir_opt_peephole_select, 1, is_vec4_tessellation, true); OPT(nir_opt_intrinsics); OPT(nir_opt_algebraic);diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cppindex bfcef3a293f..098a1e98833 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -344,7 +344,7 @@ st_nir_opts(nir_shader *nir, bool scalar) NIR_PASS(progress, nir, nir_opt_if); NIR_PASS(progress, nir, nir_opt_dead_cf); NIR_PASS(progress, nir, nir_opt_cse); - NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true); + NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); NIR_PASS(progress, nir, nir_opt_algebraic); NIR_PASS(progress, nir, nir_opt_constant_folding); -- 2.14.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev