On Fri, Oct 10, 2025 at 7:08 AM Andrew Pinski <[email protected]> wrote: > > After copy propagation for aggregates patches we might end up with > now: > ``` > tmp = a; > b = a; // was b = tmp; > tmp = {CLOBBER}; > ``` > To help out ESRA, it would be a good idea to remove the `tmp = a` statement as > there is no DSE between frowprop and ESRA. copy-prop-aggregate-sra-1.c is an > example > where the removal of the copy helps ESRA. > > This adds a simple DSE which is only designed to remove the `tmp = a` > statement. > This shows up a few times in many C++ code including the code from the > javascript > interpreter in ladybird, and in the "fake" testcase in PR 108653 and in the > aarch64 > specific PR 89967. > > This is disabled for -Og as we don't do dse there either. > intent_optimize_10.f90 testcase needed to be updated as the constant > shows up in a debug statement now. > > Changes since v1: > * v2: Add much more comments in the code instead of just relying on the > commit message. > Count the maybe_use towards the aliasing lookup limit (increase the > non-full walk limit to 4 > to account for that). > Use direct comparison instead of operand_equal_p since we are comparing > against a DECL. > > Bootstrapped and tested on x86_64-linux-gnu.
OK. > gcc/ChangeLog: > > * tree-ssa-forwprop.cc (do_simple_agr_dse): New function. > (pass_forwprop::execute): Call do_simple_agr_dse for clobbers. > > gcc/testsuite/ChangeLog: > > * gfortran.dg/intent_optimize_10.f90: Update so -g won't fail. > * gcc.dg/tree-ssa/copy-prop-aggregate-sra-1.c: New testcase. > > Signed-off-by: Andrew Pinski <[email protected]> > --- > .../tree-ssa/copy-prop-aggregate-sra-1.c | 33 ++++++ > .../gfortran.dg/intent_optimize_10.f90 | 2 +- > gcc/tree-ssa-forwprop.cc | 106 +++++++++++++++++- > 3 files changed, 139 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-sra-1.c > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-sra-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-sra-1.c > new file mode 100644 > index 00000000000..baabecf5d27 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-sra-1.c > @@ -0,0 +1,33 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-forwprop1-details -fdump-tree-esra-details > " } */ > + > +struct s1 > +{ > + int t[1024]; > +}; > + > +struct s1 f(void); > + > +void g(int a, int b, int ); > +void p(struct s1); > +static inline void p1(struct s1 inner) > +{ > + p(inner); > +} > + > +void h(struct s1 outer) > +{ > + p1(outer); > + g(outer.t[0], outer.t[1], outer.t[2]); > +} > +/* Forwprop should be able to copy prop the copy of `inner = outer` to the > call of p. > + Also remove this copy. */ > + > +/* { dg-final { scan-tree-dump-times "after previous" 1 "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "Removing dead store stmt inner = > outer" 1 "forwprop1" } } */ > + > +/* The extra copy that was done by inlining is removed so SRA should not > decide to cause > + inner nor outer to be scalarized even for the 3 elements accessed > afterwards. */ > +/* { dg-final { scan-tree-dump-times "Disqualifying inner" 2 "esra" } } */ > +/* { dg-final { scan-tree-dump-times "Disqualifying outer" 1 "esra" } } */ > + > diff --git a/gcc/testsuite/gfortran.dg/intent_optimize_10.f90 > b/gcc/testsuite/gfortran.dg/intent_optimize_10.f90 > index d8bc1bb3b7b..214f04cf70d 100644 > --- a/gcc/testsuite/gfortran.dg/intent_optimize_10.f90 > +++ b/gcc/testsuite/gfortran.dg/intent_optimize_10.f90 > @@ -63,4 +63,4 @@ end program main > > ! There is a clobber for tc, so we should manage to optimize away the > associated initialization constant (but not other > ! initialization constants). > -! { dg-final { scan-tree-dump-not "123456789" "optimized" { target > __OPTIMIZE__ } } } > +! { dg-final { scan-tree-dump-not "= 123456789" "optimized" { target > __OPTIMIZE__ } } } > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > index 749708f05a2..68e80baa46c 100644 > --- a/gcc/tree-ssa-forwprop.cc > +++ b/gcc/tree-ssa-forwprop.cc > @@ -1769,6 +1769,108 @@ optimize_agr_copyprop (gimple *stmt) > } > } > > +/* Simple DSE of the lhs from a clobber STMT. > + This is used mostly to clean up from optimize_agr_copyprop and > + to remove (exactly one) extra copy that might later on confuse SRA. > + An example is: > + ;; write to a and such. > + b = a; // This statement is to be removed > + b = {CLOBBER}; > + SRA will totally scalarize b (which means also a) here for the extra copy > + which is not something welcomed. So removing the copy will > + allow SRA to move the scalarization of a further down or not at all. > + */ > +static void > +do_simple_agr_dse (gassign *stmt, bool full_walk) > +{ > + /* Don't do this while in -Og as we want to keep around the copy > + for debuggability. */ > + if (optimize_debug) > + return; > + ao_ref read; > + basic_block bb = gimple_bb (stmt); > + tree lhs = gimple_assign_lhs (stmt); > + /* Only handle clobbers of a full decl. */ > + if (!DECL_P (lhs)) > + return; > + ao_ref_init (&read, lhs); > + tree vuse = gimple_vuse (stmt); > + unsigned limit = full_walk ? param_sccvn_max_alias_queries_per_access : 4; > + while (limit) > + { > + gimple *ostmt = SSA_NAME_DEF_STMT (vuse); > + /* Don't handle phis, just declare to be done. */ > + if (is_a<gphi*>(ostmt) || gimple_nop_p (ostmt)) > + break; > + basic_block obb = gimple_bb (ostmt); > + /* If the clobber is not fully dominating the statement define, > + then it is not "simple" to detect if the define is fully clobbered. > */ > + if (obb != bb && !dominated_by_p (CDI_DOMINATORS, bb, obb)) > + return; > + gimple *use_stmt; > + imm_use_iterator iter; > + FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_vdef (ostmt)) > + { > + basic_block ubb = gimple_bb (use_stmt); > + if (stmt == use_stmt) > + continue; > + /* The use needs to be dominating the clobber. */ > + if ((ubb != bb && !dominated_by_p (CDI_DOMINATORS, bb, ubb)) > + || ref_maybe_used_by_stmt_p (use_stmt, &read, false)) > + return; > + /* Count the above alias lookup towards the limit. */ > + limit--; > + if (limit == 0) > + return; > + } > + vuse = gimple_vuse (ostmt); > + > + /* This an assignment store to the clobbered decl, > + then maybe remove it. A call is not handled here as > + the rhs will not make a difference for SRA. */ > + if (is_a <gassign*>(ostmt) > + && gimple_store_p (ostmt) > + && !gimple_clobber_p (ostmt) > + && lhs == gimple_assign_lhs (ostmt)) > + { > + /* Don't remove stores/statements that are needed for non-call > + eh to work. */ > + if (stmt_unremovable_because_of_non_call_eh_p (cfun, ostmt)) > + return; > + /* If we delete a stmt that could throw, mark the block > + in to_purge to cleanup afterwards. */ > + if (stmt_could_throw_p (cfun, ostmt)) > + bitmap_set_bit (to_purge, obb->index); > + gimple_stmt_iterator gsi = gsi_for_stmt (ostmt); > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, "Removing dead store stmt "); > + print_gimple_stmt (dump_file, ostmt, 0); > + fprintf (dump_file, "\n"); > + } > + unlink_stmt_vdef (ostmt); > + release_defs (ostmt); > + gsi_remove (&gsi, true); > + statistics_counter_event (cfun, "delete dead store", 1); > + /* Only remove the first store previous statement. */ > + return; > + } > + /* If the statement uses or maybe writes to the decl, > + then nothing is to be removed. Don't know if the write > + to the decl is partial write or a full one so the need > + to stop. > + e.g. > + b.c = a; > + Easier to stop here rather than do a full partial > + dse of this statement. > + b = {CLOBBER}; */ > + if (stmt_may_clobber_ref_p_1 (ostmt, &read, false) > + || ref_maybe_used_by_stmt_p (ostmt, &read, false)) > + return; > + limit--; > + } > +} > + > /* Optimizes builtin memcmps for small constant sizes. > GSI_P is the GSI for the call. STMT is the call itself. > */ > @@ -5495,7 +5597,9 @@ pass_forwprop::execute (function *fun) > { > tree rhs1 = gimple_assign_rhs1 (stmt); > enum tree_code code = gimple_assign_rhs_code (stmt); > - if (gimple_store_p (stmt)) > + if (gimple_clobber_p (stmt)) > + do_simple_agr_dse (as_a<gassign*>(stmt), full_walk); > + else if (gimple_store_p (stmt)) > { > optimize_aggr_zeroprop (stmt, full_walk); > if (gimple_assign_load_p (stmt)) > -- > 2.43.0 >
