Drive-by comment: At some point, maybe we just want to replace all these booleans with a function pointer that takes an instruction and returns a bool. That way the driver can define "expensive". Do with that what you will.

On October 9, 2018 11:50:45 Ian Romanick <i...@freedesktop.org> wrote:

On 10/08/2018 01:34 PM, Thomas Helland wrote:
Den tor. 30. aug. 2018 kl. 07:37 skrev Ian Romanick <i...@freedesktop.org>:

From: Ian Romanick <ian.d.roman...@intel.com>

On some GPUs, especially older Intel GPUs, some math instructions are
very expensive.  On those architectures, don't reduce flow control to a
csel if one of the branches contains one of these expensive math
instructions.

This prevents a bunch of cycle count regressions on pre-Gen6 platforms
with a later patch (intel/compiler: More peephole select for pre-Gen6).

Signed-off-by: Ian Romanick <ian.d.roman...@intel.com>
---
 src/amd/vulkan/radv_shader.c                 |  2 +-
 src/broadcom/compiler/nir_to_vir.c           |  2 +-
 src/compiler/nir/nir.h                       |  2 +-
 src/compiler/nir/nir_opt_peephole_select.c   | 46 +++++++++++++++++++++++-----
 src/gallium/drivers/freedreno/ir3/ir3_nir.c  |  2 +-
 src/gallium/drivers/radeonsi/si_shader_nir.c |  2 +-
 src/gallium/drivers/vc4/vc4_program.c        |  2 +-
 src/intel/compiler/brw_nir.c                 |  4 +--
 src/mesa/state_tracker/st_glsl_to_nir.cpp    |  2 +-
 9 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 632512db09b..c8d502a9e3a 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -143,7 +143,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively)
                 NIR_PASS(progress, shader, nir_opt_if);
                 NIR_PASS(progress, shader, nir_opt_dead_cf);
                 NIR_PASS(progress, shader, nir_opt_cse);
-                NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true);
+ NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true);
                 NIR_PASS(progress, shader, nir_opt_algebraic);
                 NIR_PASS(progress, shader, nir_opt_constant_folding);
                 NIR_PASS(progress, shader, nir_opt_undef);
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 0d23cea4d5b..ec0ff4b907a 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1210,7 +1210,7 @@ v3d_optimize_nir(struct nir_shader *s)
                 NIR_PASS(progress, s, nir_opt_dce);
                 NIR_PASS(progress, s, nir_opt_dead_cf);
                 NIR_PASS(progress, s, nir_opt_cse);
-                NIR_PASS(progress, s, nir_opt_peephole_select, 8, true);
+                NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
                 NIR_PASS(progress, s, nir_opt_algebraic);
                 NIR_PASS(progress, s, nir_opt_constant_folding);
                 NIR_PASS(progress, s, nir_opt_undef);
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 67fa46d5557..feb69be6b59 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3003,7 +3003,7 @@ bool nir_opt_move_comparisons(nir_shader *shader);
 bool nir_opt_move_load_ubo(nir_shader *shader);

 bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
-                             bool indirect_load_ok);
+                             bool indirect_load_ok, bool expensive_alu_ok);

 bool nir_opt_remove_phis_impl(nir_function_impl *impl);
 bool nir_opt_remove_phis(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c
index 6808d3eda6c..09b55f3739e 100644
--- a/src/compiler/nir/nir_opt_peephole_select.c
+++ b/src/compiler/nir/nir_opt_peephole_select.c
@@ -59,7 +59,8 @@

 static bool
 block_check_for_allowed_instrs(nir_block *block, unsigned *count,
-                               bool alu_ok, bool indirect_load_ok)
+                               bool alu_ok, bool indirect_load_ok,
+                               bool expensive_alu_ok)
 {
    nir_foreach_instr(instr, block) {
       switch (instr->type) {
@@ -117,6 +118,25 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count,
          case nir_op_vec3:
          case nir_op_vec4:
             break;
+
+         case nir_op_fcos:
+         case nir_op_fdiv:
+         case nir_op_fexp2:
+         case nir_op_flog2:
+         case nir_op_fmod:
+         case nir_op_fpow:
+         case nir_op_frcp:
+         case nir_op_frem:
+         case nir_op_frsq:
+         case nir_op_fsin:
+         case nir_op_idiv:
+         case nir_op_irem:
+         case nir_op_udiv:
+            if (!alu_ok || !expensive_alu_ok)
+               return false;
+
+            break;
+
          default:
             if (!alu_ok) {
                /* It must be a move-like operation. */
@@ -160,7 +180,8 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count,

 static bool
 nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
-                              unsigned limit, bool indirect_load_ok)
+                              unsigned limit, bool indirect_load_ok,
+                              bool expensive_alu_ok)
 {
    if (nir_cf_node_is_first(&block->cf_node))
       return false;
@@ -180,10 +201,17 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,

    /* ... and those blocks must only contain "allowed" instructions. */
    unsigned count = 0;
+#if 1
    if (!block_check_for_allowed_instrs(then_block, &count, limit != 0,
-                                       indirect_load_ok) ||
+                                       indirect_load_ok, expensive_alu_ok) ||
        !block_check_for_allowed_instrs(else_block, &count, limit != 0,
-                                       indirect_load_ok))
+                                       indirect_load_ok, expensive_alu_ok))
+#else
+   if (!block_check_for_allowed_instrs(then_block, &count, shader->info.stage,
+ limit != 0, indirect_load_ok, expensive_alu_ok) ||
+       !block_check_for_allowed_instrs(else_block, &count, shader->info.stage,
+ limit != 0, indirect_load_ok, expensive_alu_ok))
+#endif

Leftover testing stuff?

Yes. :)  I was experimenting with doing things differently for the
scalar and vector units on older Intel GPUs.

I like the idea of hiding expensive instructions in a branch.
However, I'm wondering if it might be an idea to let drivers
provide a callback with what instructions they want to allow?
If no other driver plan on doing something like this then I
guess this is OK. I would expect the "expensive" instructions
to be mostly the same subset on most architectures too.
Might want to get some input from others, but with the debug
stuff above removed this is.

There are a few things that could be done with this pass or a related
pass.  Before going to far down the expensive instruction path, I'd want
to experiment with a pass that could rearrange things like:

        if (condition)
                x = sin(a);
        else
                x = sin(b);

I'd also like experiment with taking into consideration whether or not
'condition' is uniform or not.  Some architectures may want to make
different flattening choices for uniform vs. non-uniform.

Unfortunately, shader-db is not a good tool for measuring the effects of
this.

Reviewed-by: Thomas Helland<thomashellan...@gmail.com>

       return false;

    if (count > limit)
@@ -250,14 +278,15 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,

 static bool
 nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit,
-                             bool indirect_load_ok)
+                             bool indirect_load_ok, bool expensive_alu_ok)
 {
    nir_shader *shader = impl->function->shader;
    bool progress = false;

    nir_foreach_block_safe(block, impl) {
       progress |= nir_opt_peephole_select_block(block, shader, limit,
-                                                indirect_load_ok);
+                                                indirect_load_ok,
+                                                expensive_alu_ok);
    }

    if (progress)
@@ -268,14 +297,15 @@ nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit,

 bool
 nir_opt_peephole_select(nir_shader *shader, unsigned limit,
-                        bool indirect_load_ok)
+                        bool indirect_load_ok, bool expensive_alu_ok)
 {
    bool progress = false;

    nir_foreach_function(function, shader) {
       if (function->impl)
          progress |= nir_opt_peephole_select_impl(function->impl, limit,
-                                                  indirect_load_ok);
+                                                  indirect_load_ok,
+                                                  expensive_alu_ok);
    }

    return progress;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
index 5f66ef5d170..bb3bb73644a 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
@@ -107,7 +107,7 @@ ir3_optimize_loop(nir_shader *s)
                        progress |= OPT(s, nir_opt_gcm, true);
                else if (gcm == 2)
                        progress |= OPT(s, nir_opt_gcm, false);
-               progress |= OPT(s, nir_opt_peephole_select, 16, true);
+               progress |= OPT(s, nir_opt_peephole_select, 16, true, true);
                progress |= OPT(s, nir_opt_intrinsics);
                progress |= OPT(s, nir_opt_algebraic);
                progress |= OPT(s, nir_opt_constant_folding);
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 9a7a8264283..cb1e208be8f 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -813,7 +813,7 @@ si_lower_nir(struct si_shader_selector* sel)
                NIR_PASS(progress, sel->nir, nir_opt_if);
                NIR_PASS(progress, sel->nir, nir_opt_dead_cf);
                NIR_PASS(progress, sel->nir, nir_opt_cse);
-               NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8, true);
+ NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8, true, true);

                /* Needed for algebraic lowering */
                NIR_PASS(progress, sel->nir, nir_opt_algebraic);
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 39f7db9148c..fa2a32922f4 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1580,7 +1580,7 @@ vc4_optimize_nir(struct nir_shader *s)
                 NIR_PASS(progress, s, nir_opt_dce);
                 NIR_PASS(progress, s, nir_opt_dead_cf);
                 NIR_PASS(progress, s, nir_opt_cse);
-                NIR_PASS(progress, s, nir_opt_peephole_select, 8, true);
+                NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
                 NIR_PASS(progress, s, nir_opt_algebraic);
                 NIR_PASS(progress, s, nir_opt_constant_folding);
                 NIR_PASS(progress, s, nir_opt_undef);
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 1d65107a93d..04d399fd0b4 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -587,9 +587,9 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
       const bool is_vec4_tessellation = !is_scalar &&
          (nir->info.stage == MESA_SHADER_TESS_CTRL ||
           nir->info.stage == MESA_SHADER_TESS_EVAL);
-      OPT(nir_opt_peephole_select, 0, is_vec4_tessellation);
+      OPT(nir_opt_peephole_select, 0, is_vec4_tessellation, false);
       if (compiler->devinfo->gen >= 6)
-         OPT(nir_opt_peephole_select, 1, is_vec4_tessellation);
+         OPT(nir_opt_peephole_select, 1, is_vec4_tessellation, true);

       OPT(nir_opt_intrinsics);
       OPT(nir_opt_algebraic);
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index bfcef3a293f..098a1e98833 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -344,7 +344,7 @@ st_nir_opts(nir_shader *nir, bool scalar)
       NIR_PASS(progress, nir, nir_opt_if);
       NIR_PASS(progress, nir, nir_opt_dead_cf);
       NIR_PASS(progress, nir, nir_opt_cse);
-      NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true);
+      NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);

       NIR_PASS(progress, nir, nir_opt_algebraic);
       NIR_PASS(progress, nir, nir_opt_constant_folding);
--
2.14.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to