The register merging pass after GLSL-to-TGSI translation previously used
simple register usage tracking routines that were unable to work inside
loops (instead, all variables were assumed to be needed throughout the
whole loop). This is obviously suboptimal for complex programs and the
lack of register merging causes translation failures due to running out
of registers.

This patch enables usage tracking inside an arbitrary nested combination
of loops and conditionals, allowing the merging of registers that are
known not to preserve their value between loop iterations.

For simplicity, an unsigned long is used as a bit mask for recording
register usage in nested loops. This means that after 32 levels of
nested loops (or more if unsigned long is wider) the code reverts to
treating deeper levels as black boxes.

Signed-off-by: Tomáš Trnka <[email protected]>
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 136 +++++++++++++++++++++++------
 1 file changed, 108 insertions(+), 28 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index cac1e0f..8dd4003 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3447,25 +3447,14 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index)
 int
 glsl_to_tgsi_visitor::get_first_temp_write(int index)
 {
-   int depth = 0; /* loop depth */
-   int loop_start = -1; /* index of the first active BGNLOOP (if any) */
    int i = 0;
    
    foreach_list(node, &this->instructions) {
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
       
       if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
-         return (depth == 0) ? i : loop_start;
-      }
-      
-      if (inst->op == TGSI_OPCODE_BGNLOOP) {
-         if(depth++ == 0)
-            loop_start = i;
-      } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
-         if (--depth == 0)
-            loop_start = -1;
+         return i;
       }
-      assert(depth >= 0);
       
       i++;
    }
@@ -3476,33 +3465,124 @@ glsl_to_tgsi_visitor::get_first_temp_write(int index)
 int
 glsl_to_tgsi_visitor::get_last_temp_read(int index)
 {
-   int depth = 0; /* loop depth */
+   int loop_depth = 0; /* current loop depth, corresponding to loop_depth_bit 
*/
+   int cond_depth = 0; /* current conditional depth */
    int last = -1; /* index of last instruction that reads the temporary */
-   unsigned i = 0, j;
-   
+   int write_cond_depth = -1; /* lowest known conditional depth of a write */
+   int i = -1;
+   unsigned j;
+   unsigned long loop_depth_bit, write_loop_mask = 0; /* bitmask of loop 
depths where the temp was written */
+   int max_loop_depth = 8 * sizeof(loop_depth_bit);
+
    foreach_list(node, &this->instructions) {
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *) node;
+      i++;
+
+      /* treat loops as a kind of conditionals, too, as anything within the
+       * loop might not execute due to early loop exit */
+      if (inst->op == TGSI_OPCODE_IF
+         || inst->op == TGSI_OPCODE_UIF
+         || inst->op == TGSI_OPCODE_BGNLOOP) {
+         cond_depth++;
+      } else if (inst->op == TGSI_OPCODE_ENDIF
+         || inst->op == TGSI_OPCODE_ENDLOOP) {
+         cond_depth--;
+         /* leaving the conditional block where temp was known written
+          * - forget about the write */
+         if (cond_depth < write_cond_depth) {
+            write_cond_depth = -1;
+         }
+         assert(cond_depth >= 0);
+      }
+
       
+      if (inst->op == TGSI_OPCODE_ENDLOOP) {
+         --loop_depth;
+         if (loop_depth > 0) {
+            /* if loop_depth > max_loop_depth, we're not tracking writes 
anymore
+             * loop_depth_bit == 0 in that case */
+            if (loop_depth <= max_loop_depth) {
+               loop_depth_bit = 1L << (loop_depth - 1);
+               if (loop_depth < max_loop_depth) {
+                  /* forget about writes on any deeper level we've just left */
+                  write_loop_mask &= (loop_depth_bit << 1) - 1;
+               }
+            }
+         } else {
+            /* reached top level, forget everything */
+            loop_depth_bit = 0;
+            write_loop_mask = 0;
+         }
+
+         if (last == -2
+            && (loop_depth == 0
+               || write_loop_mask & loop_depth_bit
+               || loop_depth == (max_loop_depth + 1))) {
+            /* we're leaving a loop level that has a write preceding a read 
we've
+             * found somewhere deeper (that either had no write on its level 
or we
+             * were unable to track it due to being below max_loop_depth
+             * - we don't have to protect the temp any longer, anything past 
this
+             * point will not influence the workings inside the preceding 
loops */
+            last = i;
+         }
+
+         assert(loop_depth >= 0);
+         continue;
+      } else if (inst->op == TGSI_OPCODE_BGNLOOP) {
+         loop_depth++;
+         if (loop_depth <= max_loop_depth) {
+            loop_depth_bit = 1L << (loop_depth - 1);
+         } else {
+            /* stop tracking writes on this and deeper levels, mask is not 
long enough */
+            loop_depth_bit = 0;
+         }
+         continue;
+      }
+
+      if (last == -2) {
+         continue;
+      }
+
       for (j=0; j < num_inst_src_regs(inst->op); j++) {
          if (inst->src[j].file == PROGRAM_TEMPORARY && 
              inst->src[j].index == index) {
-            last = (depth == 0) ? i : -2;
+            if (loop_depth == 0
+               || (write_loop_mask & loop_depth_bit
+                  && write_cond_depth <= cond_depth)) {
+               /* this read is either outside of any loop or has a preceding
+                * write at this loop_depth that is guaranteed to execute (not 
in a
+                * conditional) */
+               last = i;
+            } else {
+               /* this read is in a loop without any write preceding it
+                * it might be relying on the value from previous iteration
+                * - protect the temp during the whole loop (and any enclosing
+                * loops that don't contain a preceding write either) */
+               last = -2;
+            }
          }
       }
+
       for (j=0; j < inst->tex_offset_num_offset; j++) {
-          if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
-              inst->tex_offsets[j].index == index)
-              last = (depth == 0) ? i : -2;
+         if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
+            inst->tex_offsets[j].index == index) {
+            if (loop_depth == 0
+               || (write_loop_mask & loop_depth_bit
+                  && write_cond_depth <= cond_depth)) {
+               last = i;
+            } else {
+               last = -2;
+            }
+         }
+      }
+
+      if (last != -2 &&
+          inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
+         write_loop_mask |= loop_depth_bit;
+         if (write_cond_depth == -1) {
+            write_cond_depth = cond_depth;
+         }
       }
-      
-      if (inst->op == TGSI_OPCODE_BGNLOOP)
-         depth++;
-      else if (inst->op == TGSI_OPCODE_ENDLOOP)
-         if (--depth == 0 && last == -2)
-            last = i;
-      assert(depth >= 0);
-      
-      i++;
    }
    
    assert(last >= -1);
-- 
1.9.3

Attachment: smime.p7s
Description: S/MIME cryptographic signature

_______________________________________________
mesa-dev mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to