> Am 13.08.2025 um 19:04 schrieb Andrew Pinski <andrew.pin...@oss.qualcomm.com>:
> 
> https://gcc.gnu.org/pipermail/gcc-patches/2025-August/692091.html
> pointed out:
> '''
> Oh, as we now do alias walks in forwprop maybe we should make this
> conditional and do
> this not for all pass instances, since it makes forwprop possibly a lot 
> slower?
> '''
> 
> This does patch limits the walk in a few different ways.
> First only allow for a full walk in the first 2 forwprop (the one before 
> inlining
> and the one after inlining).  The other 2 forwprop are less likely to find 
> any extra
> zero prop so limit them so there is no walk.
> 
> There is an exception to the rule though, allowing to skip over clobbers 
> still since those
> will not take a long time for the walk and from when looking at benchmarks 
> the only place
> where forwprop3/4 would cause a zero prop.
> 
> The other thing is limit a full walk only if flag_expensive_optimizations is 
> true.
> This limits the walk for -O1 since flag_expensive_optimizations is turned on 
> at -O2+.
> 
> Bootstrapped and tested on x86_64-linux-gnu.

Ok

Thanks,
Richard 

> gcc/ChangeLog:
> 
>    * passes.def: Update forwprop1/2 to have full_walk to be true.
>    * tree-ssa-forwprop.cc (optimize_aggr_zeroprop): Add new argument
>    full_walk. Take into account the full_walk and clobbers at the end
>    of the limit can be done always.
>    (simplify_builtin_call): Add new argument, full_walk.
>    Update call to optimize_aggr_zeroprop.
>    (pass_forwprop): Add m_full_walk field.
>    (pass_forwprop::set_pass_param): Update for m_full_walk.
>    (pass_forwprop::execute): Update call to simplify_builtin_call
>    and optimize_aggr_zeroprop.
> 
> Signed-off-by: Andrew Pinski <andrew.pin...@oss.qualcomm.com>
> ---
> gcc/passes.def           |  8 ++++----
> gcc/tree-ssa-forwprop.cc | 42 ++++++++++++++++++++++++++++------------
> 2 files changed, 34 insertions(+), 16 deletions(-)
> 
> diff --git a/gcc/passes.def b/gcc/passes.def
> index d528a0477d9..68ce53baa0f 100644
> --- a/gcc/passes.def
> +++ b/gcc/passes.def
> @@ -83,7 +83,7 @@ along with GCC; see the file COPYING3.  If not see
>      NEXT_PASS (pass_ccp, false /* nonzero_p */);
>      /* After CCP we rewrite no longer addressed locals into SSA
>         form if possible.  */
> -      NEXT_PASS (pass_forwprop, /*last=*/false);
> +      NEXT_PASS (pass_forwprop, /*full_walk=*/true);
>           NEXT_PASS (pass_early_thread_jumps, /*first=*/true);
>      NEXT_PASS (pass_sra_early);
>      /* pass_build_ealias is a dummy pass that ensures that we
> @@ -221,7 +221,7 @@ along with GCC; see the file COPYING3.  If not see
>       NEXT_PASS (pass_complete_unrolli);
>       NEXT_PASS (pass_backprop);
>       NEXT_PASS (pass_phiprop);
> -      NEXT_PASS (pass_forwprop, /*last=*/false);
> +      NEXT_PASS (pass_forwprop, /*full_walk=*/true);
>       /* pass_build_alias is a dummy pass that ensures that we
>     execute TODO_rebuild_alias at this point.  */
>       NEXT_PASS (pass_build_alias);
> @@ -261,7 +261,7 @@ along with GCC; see the file COPYING3.  If not see
>       NEXT_PASS (pass_isolate_erroneous_paths);
>       NEXT_PASS (pass_reassoc, true /* early_p */);
>       NEXT_PASS (pass_dce);
> -      NEXT_PASS (pass_forwprop, /*last=*/false);
> +      NEXT_PASS (pass_forwprop);
>       NEXT_PASS (pass_phiopt, false /* early_p */);
>       NEXT_PASS (pass_ccp, true /* nonzero_p */);
>       /* After CCP we rewrite no longer addressed locals into SSA
> @@ -363,7 +363,7 @@ along with GCC; see the file COPYING3.  If not see
>       NEXT_PASS (pass_dce, true /* update_address_taken_p */, true /* 
> remove_unused_locals */);
>       /* After late DCE we rewrite no longer addressed locals into SSA
>     form if possible.  */
> -      NEXT_PASS (pass_forwprop, /*last=*/true);
> +      NEXT_PASS (pass_forwprop, /*full_walk=*/false, /*last=*/true);
>       NEXT_PASS (pass_sink_code, true /* unsplit edges */);
>       NEXT_PASS (pass_phiopt, false /* early_p */);
>       NEXT_PASS (pass_fold_builtins);
> diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
> index ec4fbeb9e54..ebf625f9c7e 100644
> --- a/gcc/tree-ssa-forwprop.cc
> +++ b/gcc/tree-ssa-forwprop.cc
> @@ -1299,7 +1299,7 @@ optimize_aggr_zeroprop_1 (gimple *defstmt, gimple *stmt,
>    and/or memcpy (&b, &a, sizeof (a)); instead of b = a;  */
> 
> static bool
> -optimize_aggr_zeroprop (gimple_stmt_iterator *gsip)
> +optimize_aggr_zeroprop (gimple_stmt_iterator *gsip, bool full_walk)
> {
>   ao_ref read;
>   gimple *stmt = gsi_stmt (*gsip);
> @@ -1383,7 +1383,7 @@ optimize_aggr_zeroprop (gimple_stmt_iterator *gsip)
> 
>   /* Setup the worklist.  */
>   auto_vec<std::pair<tree, unsigned>> worklist;
> -  unsigned limit = param_sccvn_max_alias_queries_per_access;
> +  unsigned limit = full_walk ? param_sccvn_max_alias_queries_per_access : 0;
>   worklist.safe_push (std::make_pair (gimple_vdef (stmt), limit));
> 
>   while (!worklist.is_empty ())
> @@ -1400,13 +1400,17 @@ optimize_aggr_zeroprop (gimple_stmt_iterator *gsip)
>        continue;
> 
>      /* If this statement does not clobber add the vdef stmt to the
> -         worklist.  */
> -      if (limit != 0
> +         worklist.
> +         After hitting the limit, allow clobbers to able to pass through.  */
> +      if ((limit != 0 || gimple_clobber_p (use_stmt))
>          && gimple_vdef (use_stmt)
>          && !stmt_may_clobber_ref_p_1 (use_stmt, &read,
>                       /* tbaa_p = */ can_use_tbba))
> -        worklist.safe_push (std::make_pair (gimple_vdef (use_stmt),
> -                        limit - 1));
> +          {
> +        unsigned new_limit = limit == 0 ? 0 : limit - 1;
> +        worklist.safe_push (std::make_pair (gimple_vdef (use_stmt),
> +                            new_limit));
> +          }
> 
>      if (optimize_aggr_zeroprop_1 (stmt, use_stmt, dest_base, offset,
>                     val, wi::to_poly_offset (len)))
> @@ -1591,7 +1595,7 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip)
>    to __atomic_fetch_op (p, x, y) when possible (also __sync).  */
> 
> static bool
> -simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2)
> +simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2, bool 
> full_walk)
> {
>   gimple *stmt1, *stmt2 = gsi_stmt (*gsi_p);
>   enum built_in_function other_atomic = END_BUILTINS;
> @@ -1670,7 +1674,7 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, 
> tree callee2)
>    {
>      /* Try to prop the zeroing/value of the memset to memcpy
>         if the dest is an address and the value is a constant. */
> -      if (optimize_aggr_zeroprop (gsi_p))
> +      if (optimize_aggr_zeroprop (gsi_p, full_walk))
>        return true;
>    }
>       if (gimple_call_num_args (stmt2) != 3
> @@ -4460,8 +4464,17 @@ public:
>   opt_pass * clone () final override { return new pass_forwprop (m_ctxt); }
>   void set_pass_param (unsigned int n, bool param) final override
>     {
> -      gcc_assert (n == 0);
> -      last_p = param;
> +      switch (n)
> +    {
> +      case 0:
> +        m_full_walk = param;
> +        break;
> +      case 1:
> +        last_p = param;
> +        break;
> +      default:
> +      gcc_unreachable();
> +    }
>     }
>   bool gate (function *) final override { return flag_tree_forwprop; }
>   unsigned int execute (function *) final override;
> @@ -4469,12 +4482,17 @@ public:
>  private:
>   /* Determines whether the pass instance should set PROP_last_full_fold.  */
>   bool last_p;
> +
> +  /* True if the aggregate props are doing a full walk or not.  */
> +  bool m_full_walk = false;
> }; // class pass_forwprop
> 
> unsigned int
> pass_forwprop::execute (function *fun)
> {
>   unsigned int todoflags = 0;
> +  /* Handle a full walk only when expensive optimizations are on.  */
> +  bool full_walk = m_full_walk && flag_expensive_optimizations;
> 
>   cfg_changed = false;
>   if (last_p)
> @@ -4991,7 +5009,7 @@ pass_forwprop::execute (function *fun)
>          {
>            tree rhs1 = gimple_assign_rhs1 (stmt);
>            enum tree_code code = gimple_assign_rhs_code (stmt);
> -            if (gimple_store_p (stmt) && optimize_aggr_zeroprop (&gsi))
> +            if (gimple_store_p (stmt) && optimize_aggr_zeroprop (&gsi, 
> full_walk))
>              {
>            changed = true;
>            break;
> @@ -5051,7 +5069,7 @@ pass_forwprop::execute (function *fun)
>            tree callee = gimple_call_fndecl (stmt);
>            if (callee != NULL_TREE
>            && fndecl_built_in_p (callee, BUILT_IN_NORMAL))
> -              changed |= simplify_builtin_call (&gsi, callee);
> +              changed |= simplify_builtin_call (&gsi, callee, full_walk);
>            break;
>          }
> 
> --
> 2.43.0
> 

Reply via email to