https://gcc.gnu.org/g:df9635322ab8f1297f3774d38a59e13cee4ae79b

commit r16-3202-gdf9635322ab8f1297f3774d38a59e13cee4ae79b
Author: Andrew Pinski <andrew.pin...@oss.qualcomm.com>
Date:   Tue Aug 12 23:31:15 2025 -0700

    forwprop: Limit alias walk in some cases [PR121474]
    
    https://gcc.gnu.org/pipermail/gcc-patches/2025-August/692091.html
    pointed out:
    '''
    Oh, as we now do alias walks in forwprop maybe we should make this
    conditional and do
    this not for all pass instances, since it makes forwprop possibly a lot 
slower?
    '''
    
    This does patch limits the walk in a few different ways.
    First only allow for a full walk in the first 2 forwprop (the one before 
inlining
    and the one after inlining).  The other 2 forwprop are less likely to find 
any extra
    zero prop so limit them so there is no walk.
    
    There is an exception to the rule though, allowing to skip over clobbers 
still since those
    will not take a long time for the walk and from when looking at benchmarks 
the only place
    where forwprop3/4 would cause a zero prop.
    
    The other thing is limit a full walk only if flag_expensive_optimizations 
is true.
    This limits the walk for -O1 since flag_expensive_optimizations is turned 
on at -O2+.
    
    Bootstrapped and tested on x86_64-linux-gnu.
    
            PR tree-optimization/121474
    gcc/ChangeLog:
    
            * passes.def: Update forwprop1/2 to have full_walk to be true.
            * tree-ssa-forwprop.cc (optimize_aggr_zeroprop): Add new argument
            full_walk. Take into account the full_walk and clobbers at the end
            of the limit can be done always.
            (simplify_builtin_call): Add new argument, full_walk.
            Update call to optimize_aggr_zeroprop.
            (pass_forwprop): Add m_full_walk field.
            (pass_forwprop::set_pass_param): Update for m_full_walk.
            (pass_forwprop::execute): Update call to simplify_builtin_call
            and optimize_aggr_zeroprop.
    
    Signed-off-by: Andrew Pinski <andrew.pin...@oss.qualcomm.com>

Diff:
---
 gcc/passes.def           |  8 ++++----
 gcc/tree-ssa-forwprop.cc | 42 ++++++++++++++++++++++++++++++------------
 2 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/gcc/passes.def b/gcc/passes.def
index d528a0477d9a..68ce53baa0f1 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -83,7 +83,7 @@ along with GCC; see the file COPYING3.  If not see
          NEXT_PASS (pass_ccp, false /* nonzero_p */);
          /* After CCP we rewrite no longer addressed locals into SSA
             form if possible.  */
-         NEXT_PASS (pass_forwprop, /*last=*/false);
+         NEXT_PASS (pass_forwprop, /*full_walk=*/true);
           NEXT_PASS (pass_early_thread_jumps, /*first=*/true);
          NEXT_PASS (pass_sra_early);
          /* pass_build_ealias is a dummy pass that ensures that we
@@ -221,7 +221,7 @@ along with GCC; see the file COPYING3.  If not see
       NEXT_PASS (pass_complete_unrolli);
       NEXT_PASS (pass_backprop);
       NEXT_PASS (pass_phiprop);
-      NEXT_PASS (pass_forwprop, /*last=*/false);
+      NEXT_PASS (pass_forwprop, /*full_walk=*/true);
       /* pass_build_alias is a dummy pass that ensures that we
         execute TODO_rebuild_alias at this point.  */
       NEXT_PASS (pass_build_alias);
@@ -261,7 +261,7 @@ along with GCC; see the file COPYING3.  If not see
       NEXT_PASS (pass_isolate_erroneous_paths);
       NEXT_PASS (pass_reassoc, true /* early_p */);
       NEXT_PASS (pass_dce);
-      NEXT_PASS (pass_forwprop, /*last=*/false);
+      NEXT_PASS (pass_forwprop);
       NEXT_PASS (pass_phiopt, false /* early_p */);
       NEXT_PASS (pass_ccp, true /* nonzero_p */);
       /* After CCP we rewrite no longer addressed locals into SSA
@@ -363,7 +363,7 @@ along with GCC; see the file COPYING3.  If not see
       NEXT_PASS (pass_dce, true /* update_address_taken_p */, true /* 
remove_unused_locals */);
       /* After late DCE we rewrite no longer addressed locals into SSA
         form if possible.  */
-      NEXT_PASS (pass_forwprop, /*last=*/true);
+      NEXT_PASS (pass_forwprop, /*full_walk=*/false, /*last=*/true);
       NEXT_PASS (pass_sink_code, true /* unsplit edges */);
       NEXT_PASS (pass_phiopt, false /* early_p */);
       NEXT_PASS (pass_fold_builtins);
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index ec4fbeb9e540..ebf625f9c7ea 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -1299,7 +1299,7 @@ optimize_aggr_zeroprop_1 (gimple *defstmt, gimple *stmt,
    and/or memcpy (&b, &a, sizeof (a)); instead of b = a;  */
 
 static bool
-optimize_aggr_zeroprop (gimple_stmt_iterator *gsip)
+optimize_aggr_zeroprop (gimple_stmt_iterator *gsip, bool full_walk)
 {
   ao_ref read;
   gimple *stmt = gsi_stmt (*gsip);
@@ -1383,7 +1383,7 @@ optimize_aggr_zeroprop (gimple_stmt_iterator *gsip)
 
   /* Setup the worklist.  */
   auto_vec<std::pair<tree, unsigned>> worklist;
-  unsigned limit = param_sccvn_max_alias_queries_per_access;
+  unsigned limit = full_walk ? param_sccvn_max_alias_queries_per_access : 0;
   worklist.safe_push (std::make_pair (gimple_vdef (stmt), limit));
 
   while (!worklist.is_empty ())
@@ -1400,13 +1400,17 @@ optimize_aggr_zeroprop (gimple_stmt_iterator *gsip)
            continue;
 
          /* If this statement does not clobber add the vdef stmt to the
-            worklist.  */
-         if (limit != 0
+            worklist.
+            After hitting the limit, allow clobbers to able to pass through.  
*/
+         if ((limit != 0 || gimple_clobber_p (use_stmt))
              && gimple_vdef (use_stmt)
              && !stmt_may_clobber_ref_p_1 (use_stmt, &read,
                                           /* tbaa_p = */ can_use_tbba))
-           worklist.safe_push (std::make_pair (gimple_vdef (use_stmt),
-                                               limit - 1));
+             {
+               unsigned new_limit = limit == 0 ? 0 : limit - 1;
+               worklist.safe_push (std::make_pair (gimple_vdef (use_stmt),
+                                                   new_limit));
+             }
 
          if (optimize_aggr_zeroprop_1 (stmt, use_stmt, dest_base, offset,
                                         val, wi::to_poly_offset (len)))
@@ -1591,7 +1595,7 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip)
    to __atomic_fetch_op (p, x, y) when possible (also __sync).  */
 
 static bool
-simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2)
+simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2, bool 
full_walk)
 {
   gimple *stmt1, *stmt2 = gsi_stmt (*gsi_p);
   enum built_in_function other_atomic = END_BUILTINS;
@@ -1670,7 +1674,7 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree 
callee2)
        {
          /* Try to prop the zeroing/value of the memset to memcpy
             if the dest is an address and the value is a constant. */
-         if (optimize_aggr_zeroprop (gsi_p))
+         if (optimize_aggr_zeroprop (gsi_p, full_walk))
            return true;
        }
       if (gimple_call_num_args (stmt2) != 3
@@ -4460,8 +4464,17 @@ public:
   opt_pass * clone () final override { return new pass_forwprop (m_ctxt); }
   void set_pass_param (unsigned int n, bool param) final override
     {
-      gcc_assert (n == 0);
-      last_p = param;
+      switch (n)
+       {
+         case 0:
+           m_full_walk = param;
+           break;
+         case 1:
+           last_p = param;
+           break;
+         default:
+         gcc_unreachable();
+       }
     }
   bool gate (function *) final override { return flag_tree_forwprop; }
   unsigned int execute (function *) final override;
@@ -4469,12 +4482,17 @@ public:
  private:
   /* Determines whether the pass instance should set PROP_last_full_fold.  */
   bool last_p;
+
+  /* True if the aggregate props are doing a full walk or not.  */
+  bool m_full_walk = false;
 }; // class pass_forwprop
 
 unsigned int
 pass_forwprop::execute (function *fun)
 {
   unsigned int todoflags = 0;
+  /* Handle a full walk only when expensive optimizations are on.  */
+  bool full_walk = m_full_walk && flag_expensive_optimizations;
 
   cfg_changed = false;
   if (last_p)
@@ -4991,7 +5009,7 @@ pass_forwprop::execute (function *fun)
                  {
                    tree rhs1 = gimple_assign_rhs1 (stmt);
                    enum tree_code code = gimple_assign_rhs_code (stmt);
-                   if (gimple_store_p (stmt) && optimize_aggr_zeroprop (&gsi))
+                   if (gimple_store_p (stmt) && optimize_aggr_zeroprop (&gsi, 
full_walk))
                      {
                        changed = true;
                        break;
@@ -5051,7 +5069,7 @@ pass_forwprop::execute (function *fun)
                    tree callee = gimple_call_fndecl (stmt);
                    if (callee != NULL_TREE
                        && fndecl_built_in_p (callee, BUILT_IN_NORMAL))
-                     changed |= simplify_builtin_call (&gsi, callee);
+                     changed |= simplify_builtin_call (&gsi, callee, 
full_walk);
                    break;
                  }

Reply via email to