https://gcc.gnu.org/g:987a7b58bf2a00936ee2f418ace7c596450a17cb

commit r14-10773-g987a7b58bf2a00936ee2f418ace7c596450a17cb
Author: Richard Biener <rguent...@suse.de>
Date:   Tue Aug 6 14:56:26 2024 +0200

    tree-optimization/116166 - forward jump-threading going wild
    
    Currently the forward threader isn't limited as to the search space
    it explores and with it now using path-ranger for simplifying
    conditions it runs into it became pretty slow for degenerate cases
    like compiling insn-emit.cc for RISC-V esp. when compiling for
    a host with LOGICAL_OP_NON_SHORT_CIRCUIT disabled.
    
    The following makes the forward threader honor the search space
    limit I introduced for the backward threader.  This reduces
    compile-time from minutes to seconds for the testcase in PR116166.
    
    Note this wasn't necessary before we had ranger but with ranger
    the work we do is quadatic in the length of the threading path
    we build up (the same is true for the backwards threader).
    
            PR tree-optimization/116166
            * tree-ssa-threadedge.h (jump_threader::thread_around_empty_blocks):
            Add limit parameter.
            (jump_threader::thread_through_normal_block): Likewise.
            * tree-ssa-threadedge.cc 
(jump_threader::thread_around_empty_blocks):
            Honor and decrement limit parameter.
            (jump_threader::thread_through_normal_block): Likewise.
            (jump_threader::thread_across_edge): Initialize limit from
            param_max_jump_thread_paths and pass it down to workers.
    
    (cherry picked from commit 2cf89ae83225f932b226cd57ef2d083a59bcf8a3)

Diff:
---
 gcc/tree-ssa-threadedge.cc | 30 ++++++++++++++++++++++--------
 gcc/tree-ssa-threadedge.h  |  4 ++--
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/gcc/tree-ssa-threadedge.cc b/gcc/tree-ssa-threadedge.cc
index 2bae82a074bf..50d49092f1c0 100644
--- a/gcc/tree-ssa-threadedge.cc
+++ b/gcc/tree-ssa-threadedge.cc
@@ -786,13 +786,17 @@ propagate_threaded_block_debug_into (basic_block dest, 
basic_block src)
 bool
 jump_threader::thread_around_empty_blocks (vec<jump_thread_edge *> *path,
                                           edge taken_edge,
-                                          bitmap visited)
+                                          bitmap visited, unsigned &limit)
 {
   basic_block bb = taken_edge->dest;
   gimple_stmt_iterator gsi;
   gimple *stmt;
   tree cond;
 
+  if (limit == 0)
+    return false;
+  --limit;
+
   /* The key property of these blocks is that they need not be duplicated
      when threading.  Thus they cannot have visible side effects such
      as PHI nodes.  */
@@ -830,7 +834,8 @@ jump_threader::thread_around_empty_blocks 
(vec<jump_thread_edge *> *path,
              m_registry->push_edge (path, taken_edge, EDGE_NO_COPY_SRC_BLOCK);
              m_state->append_path (taken_edge->dest);
              bitmap_set_bit (visited, taken_edge->dest->index);
-             return thread_around_empty_blocks (path, taken_edge, visited);
+             return thread_around_empty_blocks (path, taken_edge, visited,
+                                                limit);
            }
        }
 
@@ -872,7 +877,7 @@ jump_threader::thread_around_empty_blocks 
(vec<jump_thread_edge *> *path,
       m_registry->push_edge (path, taken_edge, EDGE_NO_COPY_SRC_BLOCK);
       m_state->append_path (taken_edge->dest);
 
-      thread_around_empty_blocks (path, taken_edge, visited);
+      thread_around_empty_blocks (path, taken_edge, visited, limit);
       return true;
     }
 
@@ -899,8 +904,13 @@ jump_threader::thread_around_empty_blocks 
(vec<jump_thread_edge *> *path,
 
 int
 jump_threader::thread_through_normal_block (vec<jump_thread_edge *> *path,
-                                           edge e, bitmap visited)
+                                           edge e, bitmap visited,
+                                           unsigned &limit)
 {
+  if (limit == 0)
+    return 0;
+  limit--;
+
   m_state->register_equivs_edge (e);
 
   /* PHIs create temporary equivalences.
@@ -989,7 +999,7 @@ jump_threader::thread_through_normal_block 
(vec<jump_thread_edge *> *path,
             visited.  This may be overly conservative.  */
          bitmap_set_bit (visited, dest->index);
          bitmap_set_bit (visited, e->dest->index);
-         thread_around_empty_blocks (path, taken_edge, visited);
+         thread_around_empty_blocks (path, taken_edge, visited, limit);
          return 1;
        }
     }
@@ -1075,9 +1085,12 @@ jump_threader::thread_across_edge (edge e)
   bitmap_set_bit (visited, e->src->index);
   bitmap_set_bit (visited, e->dest->index);
 
+  /* Limit search space.  */
+  unsigned limit = param_max_jump_thread_paths;
+
   int threaded = 0;
   if ((e->flags & EDGE_DFS_BACK) == 0)
-    threaded = thread_through_normal_block (path, e, visited);
+    threaded = thread_through_normal_block (path, e, visited, limit);
 
   if (threaded > 0)
     {
@@ -1148,11 +1161,12 @@ jump_threader::thread_across_edge (edge e)
        m_registry->push_edge (path, e, EDGE_START_JUMP_THREAD);
        m_registry->push_edge (path, taken_edge, EDGE_COPY_SRC_JOINER_BLOCK);
 
-       found = thread_around_empty_blocks (path, taken_edge, visited);
+       found = thread_around_empty_blocks (path, taken_edge, visited, limit);
 
        if (!found)
          found = thread_through_normal_block (path,
-                                              path->last ()->e, visited) > 0;
+                                              path->last ()->e, visited,
+                                              limit) > 0;
 
        /* If we were able to thread through a successor of E->dest, then
           record the jump threading opportunity.  */
diff --git a/gcc/tree-ssa-threadedge.h b/gcc/tree-ssa-threadedge.h
index 9f6cbfe93307..245b3506a550 100644
--- a/gcc/tree-ssa-threadedge.h
+++ b/gcc/tree-ssa-threadedge.h
@@ -101,9 +101,9 @@ private:
                                          unsigned limit);
 
   bool thread_around_empty_blocks (vec<class jump_thread_edge *> *path,
-                                  edge, bitmap visited);
+                                  edge, bitmap visited, unsigned &limit);
   int thread_through_normal_block (vec<jump_thread_edge *> *path,
-                                  edge, bitmap visited);
+                                  edge, bitmap visited, unsigned &limit);
   void thread_across_edge (edge);
   bool record_temporary_equivalences_from_phis (edge);
   gimple *record_temporary_equivalences_from_stmts_at_dest (edge);

Reply via email to