On Fri, Jun 6, 2025 at 11:50 AM Andrew Pinski <quic_apin...@quicinc.com> wrote: > > This improves copy prop for aggregates by working over statements that don't > modify the access > just like how it is done for copying zeros. > To speed up things, we should only have one loop back on the vuse instead of > doing it twice for > each load/store assignments.
This needs one minor testsuite change for aarch64 (gcc.target/aarch64/vld2-1.c) which I will do if this patch gets approved. Thanks, Andrew > > PR tree-optimization/14295 > > gcc/ChangeLog: > > * tree-ssa-forwprop.cc (find_clobbering_stmt): New function split out > from optimize_memcpy_to_memset. > (optimize_memcpy_to_memset): Split rewriting part out into ... > (optimize_memcpy_to_memset_1): This function. > (optimize_agr_copyprop): Call find_clobbering_stmt. > Call optimize_memcpy_to_memset_1. Check that the src > variable is not clobbered between the 2 statements. > (pass_forwprop::execute): Call optimize_agr_copyprop only instead of > optimize_memcpy_to_memset too. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/copy-prop-arg-1.c: New test. > * gcc.dg/tree-ssa/copy-prop-arg-2.c: New test. > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > --- > .../gcc.dg/tree-ssa/copy-prop-arg-1.c | 37 ++++ > .../gcc.dg/tree-ssa/copy-prop-arg-2.c | 35 ++++ > gcc/tree-ssa-forwprop.cc | 158 ++++++++++++------ > 3 files changed, 180 insertions(+), 50 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c > new file mode 100644 > index 00000000000..876556fc464 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c > @@ -0,0 +1,37 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized > -fdump-tree-forwprop1-details" } */ > + > +/* PR tree-optimization/14295 */ > + > +/* Check for copyprop on structs. */ > + > +struct s > +{ > + char d; > + int a, b; > + double m; > + int t[1024]; > +}; > + > +void g(); > +struct s foo (struct s r) > +{ > + struct s temp_struct1; > + struct s temp_struct2; > + struct s temp_struct3; > + temp_struct1 = r; > + temp_struct2 = temp_struct1; > + temp_struct3 = temp_struct2; > + /* The call here should not make > + a difference in removing all of the copies, > + this is true as we end up with r = r; */ > + g(); > + r = temp_struct3; > + return r; > +} > + > +/* There should be no references to any of "temp_struct*" > + temporaries. */ > +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */ > +/* Also check that forwprop pass did the copy prop. */ > +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c > b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c > new file mode 100644 > index 00000000000..b1a76b48953 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c > @@ -0,0 +1,35 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized > -fdump-tree-forwprop1-details" } */ > + > +/* PR tree-optimization/14295 */ > + > +/* Check for copyprop on structs. */ > + > +struct s > +{ > + char d; > + int a, b; > + double m; > + int t[1024]; > +}; > + > +void g(); > +struct s foo (struct s r) > +{ > + struct s temp_struct1; > + struct s temp_struct2; > + struct s temp_struct3; > + temp_struct1 = r; > + temp_struct2 = temp_struct1; > + temp_struct3 = temp_struct2; > + /* The call here should not make > + a difference in removing all of the copies. */ > + g(); > + return temp_struct3; > +} > + > +/* There should be no references to any of "temp_struct*" > + temporaries. */ > +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */ > +/* Also check that forwprop pass did the copy prop. */ > +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */ > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > index 27197bbc076..7a9a6357f7f 100644 > --- a/gcc/tree-ssa-forwprop.cc > +++ b/gcc/tree-ssa-forwprop.cc > @@ -1190,56 +1190,21 @@ constant_pointer_difference (tree p1, tree p2) > return NULL_TREE; > } > > - > -/* Optimize > - a = {}; > - b = a; > - into > - a = {}; > - b = {}; > - Similarly for memset (&a, ..., sizeof (a)); instead of a = {}; > - and/or memcpy (&b, &a, sizeof (a)); instead of b = a; */ > +/* Tries to prop a zero store from DEFSTMT into STMT (GSIP). > + Returns true if the proping happened. */ > > static bool > -optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, tree dest, tree src, > tree len) > +optimize_memcpy_to_memset_1 (gimple_stmt_iterator *gsip, tree dest, tree src, > + tree len, gimple *stmt, gimple *defstmt) > { > - ao_ref read; > - gimple *stmt = gsi_stmt (*gsip); > - if (gimple_has_volatile_ops (stmt)) > - return false; > - > - tree src2 = NULL_TREE, len2 = NULL_TREE; > poly_int64 offset, offset2; > tree val = integer_zero_node; > - bool len_was_null = len == NULL_TREE; > - if (len == NULL_TREE) > - len = (TREE_CODE (src) == COMPONENT_REF > - ? DECL_SIZE_UNIT (TREE_OPERAND (src, 1)) > - : TYPE_SIZE_UNIT (TREE_TYPE (src))); > + tree len2 = NULL_TREE; > + tree src2 = NULL_TREE; > if (len == NULL_TREE > || !poly_int_tree_p (len)) > return false; > > - ao_ref_init (&read, src); > - tree vuse = gimple_vuse (stmt); > - gimple *defstmt; > - unsigned limit = param_sccvn_max_alias_queries_per_access; > - do { > - /* If the vuse is the default definition, then there is no stores > beforhand. */ > - if (SSA_NAME_IS_DEFAULT_DEF (vuse)) > - return false; > - defstmt = SSA_NAME_DEF_STMT (vuse); > - if (is_a <gphi*>(defstmt)) > - return false; > - if (limit-- == 0) > - return false; > - /* If the len was null, then we can use TBBA. */ > - if (stmt_may_clobber_ref_p_1 (defstmt, &read, > - /* tbaa_p = */ len_was_null)) > - break; > - vuse = gimple_vuse (defstmt); > - } while (true); > - > if (gimple_store_p (defstmt) > && gimple_assign_single_p (defstmt) > && TREE_CODE (gimple_assign_rhs1 (defstmt)) == STRING_CST > @@ -1343,6 +1308,64 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, > tree dest, tree src, tree > } > return true; > } > + > + > +/* Finds the first statement starting at STMT that clobbers SRC. > + Returns false if there is none (Due to PHI or otherwise). */ > + > +static bool > +find_clobbering_stmt (tree src, gimple *stmt, gimple **clobber_stmt) > +{ > + gimple *defstmt; > + ao_ref read; > + ao_ref_init (&read, src); > + tree vuse = gimple_vuse (stmt); > + unsigned limit = param_sccvn_max_alias_queries_per_access; > + do { > + /* If the vuse is the default definition, then there is no stores > beforhand. */ > + if (SSA_NAME_IS_DEFAULT_DEF (vuse)) > + return false; > + defstmt = SSA_NAME_DEF_STMT (vuse); > + if (is_a <gphi*>(defstmt)) > + return false; > + if (limit-- == 0) > + return false; > + if (stmt_may_clobber_ref_p_1 (defstmt, &read, true)) > + break; > + vuse = gimple_vuse (defstmt); > + } while (true); > + *clobber_stmt = defstmt; > + return true; > +} > + > + > +/* Optimize > + a = {}; > + memcpy (&b, &a, sizeof (a)); > + into > + a = {}; > + b = {}; > + Similarly for memset (&a, ..., sizeof (a)); instead of a = {}; */ > + > +static bool > +optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, tree dest, tree src, > tree len) > +{ > + gcc_assert (len); > + gimple *stmt = gsi_stmt (*gsip); > + if (gimple_has_volatile_ops (stmt)) > + return false; > + > + if (!poly_int_tree_p (len)) > + return false; > + > + gimple *defstmt; > + if (!find_clobbering_stmt (src, stmt, &defstmt)) > + return false; > + > + return optimize_memcpy_to_memset_1 (gsip, dest, src, len, stmt, defstmt); > +} > + > + > /* Optimizes > a = c; > b = a; > @@ -1351,6 +1374,10 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, > tree dest, tree src, tree > b = c; > GSIP is the second statement and SRC is the common > between the statements. > + Also handles if store to a is: > + a = {}; > + or > + memset (&a, ..., sizeof (a)); > */ > static bool > optimize_agr_copyprop (gimple_stmt_iterator *gsip) > @@ -1369,7 +1396,19 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip) > /* If the vuse is the default definition, then there is no store > beforehand. */ > if (SSA_NAME_IS_DEFAULT_DEF (vuse)) > return false; > - gimple *defstmt = SSA_NAME_DEF_STMT (vuse); > + > + gimple *defstmt; > + if (!find_clobbering_stmt (src, stmt, &defstmt)) > + return false; > + > + /* See if the first store is a storing of zero, then > + optimize stmt that way. */ > + tree len = (TREE_CODE (src) == COMPONENT_REF > + ? DECL_SIZE_UNIT (TREE_OPERAND (src, 1)) > + : TYPE_SIZE_UNIT (TREE_TYPE (src))); > + if (optimize_memcpy_to_memset_1 (gsip, dest, src, len, stmt, defstmt)) > + return true; > + > if (!gimple_assign_load_p (defstmt) > || !gimple_store_p (defstmt)) > return false; > @@ -1385,6 +1424,33 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip) > if (!operand_equal_p (src, dest2, 0)) > return false; > > + /* Check that the src2 has not changed. */ > + ao_ref read; > + ao_ref_init (&read, src2); > + vuse = gimple_vuse (stmt); > + unsigned limit = param_sccvn_max_alias_queries_per_access; > + gimple *defstmt1; > + do { > + /* If the vuse is the default definition, then there is no stores > beforhand. */ > + if (SSA_NAME_IS_DEFAULT_DEF (vuse)) > + return false; > + defstmt1 = SSA_NAME_DEF_STMT (vuse); > + if (defstmt == defstmt1) > + break; > + if (is_a <gphi*>(defstmt1)) > + return false; > + if (limit-- == 0) > + return false; > + /* Even though this is moving a load past the statement, this needs to > + be treated like moving a store. */ > + if (stmt_may_clobber_ref_p_1 (defstmt1, &read, false)) > + { > + if (defstmt != defstmt1) > + return false; > + break; > + } > + vuse = gimple_vuse (defstmt1); > + } while (true); > > /* For 2 memory refences and using a temporary to do the copy, > don't remove the temporary as the 2 memory references might overlap. > @@ -4798,14 +4864,6 @@ pass_forwprop::execute (function *fun) > enum tree_code code = gimple_assign_rhs_code (stmt); > if (gimple_assign_load_p (stmt) && gimple_store_p (stmt)) > { > - if (optimize_memcpy_to_memset (&gsi, > - gimple_assign_lhs > (stmt), > - gimple_assign_rhs1 > (stmt), > - /* len = */NULL_TREE)) > - { > - changed = true; > - break; > - } > if (optimize_agr_copyprop (&gsi)) > { > changed = true; > -- > 2.43.0 >