On Mon, Apr 21, 2025 at 9:52 AM Andrew Pinski <quic_apin...@quicinc.com> wrote: > > This implements a simple copy propagation for aggregates in the similar > fashion as we already do for copy prop of zeroing. > > Right now this only looks at the previous vdef statement but this allows us > to catch a lot of cases that show up in C++ code. > > Also adds a variant of pr22237.c which was found while working on this patch.
Please ignore this patch, I am going to move this and the other memcpy/memset optimization that is already done in fold_stmt to forwprop. I decided that based on the review at https://gcc.gnu.org/pipermail/gcc-patches/2025-April/681507.html Thanks, Andrew Pinski > > PR tree-optimization/14295 > PR tree-optimization/108358 > PR tree-optimization/114169 > > gcc/ChangeLog: > > * gimple-fold.cc (optimize_agr_copyprop): New function. > (fold_stmt_1): Call optimize_agr_copyprop for load/store statements. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/20031106-6.c: Un-xfail. Add scan for forwprop1. > * g++.dg/opt/pr66119.C: Disable forwprop and vrp since that does > the copy prop now. > * gcc.dg/tree-ssa/pr108358-a.c: New test. > * gcc.dg/tree-ssa/pr114169-1.c: New test. > * gcc.c-torture/execute/builtins/pr22237-1-lib.c: New test. > * gcc.c-torture/execute/builtins/pr22237-1.c: New test. > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > --- > gcc/gimple-fold.cc | 73 +++++++++++++++++++ > gcc/testsuite/g++.dg/opt/pr66119.C | 2 +- > .../execute/builtins/pr22237-1-lib.c | 27 +++++++ > .../execute/builtins/pr22237-1.c | 57 +++++++++++++++ > gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c | 8 +- > gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c | 33 +++++++++ > gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c | 39 ++++++++++ > 7 files changed, 236 insertions(+), 3 deletions(-) > create mode 100644 > gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c > create mode 100644 gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c > > diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc > index 94d5a1ebbd7..c0b5046359c 100644 > --- a/gcc/gimple-fold.cc > +++ b/gcc/gimple-fold.cc > @@ -1043,6 +1043,73 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, > tree dest, tree src, tree > return true; > } > > +/* Optimizes > + a = c; > + b = a; > + into > + a = c; > + b = c; > + GSIP is the second statement and SRC is the common > + between the statements. > +*/ > +static bool > +optimize_agr_copyprop (gimple_stmt_iterator *gsip, tree dest, tree src) > +{ > + gimple *stmt = gsi_stmt (*gsip); > + if (gimple_has_volatile_ops (stmt)) > + return false; > + > + tree vuse = gimple_vuse (stmt); > + if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME) > + return false; > + > + gimple *defstmt = SSA_NAME_DEF_STMT (vuse); > + if (!gimple_assign_load_p (defstmt) > + || !gimple_store_p (defstmt)) > + return false; > + if (gimple_has_volatile_ops (defstmt)) > + return false; > + > + tree dest2 = gimple_assign_lhs (defstmt); > + tree src2 = gimple_assign_rhs1 (defstmt); > + if (!operand_equal_p (src, dest2, 0)) > + return false; > + /* If replacing with the same thing, just skip it. */ > + if (operand_equal_p (src, src2, 0)) > + return false; > + > + /* For 2 memory refences and using a temporary to do the copy, > + don't remove the temporary as the 2 memory references might overlap. > + Note t does not need to be decl as it could be field. > + See PR 22237 for full details. > + E.g. > + t = *a; > + *b = t; > + Cannot be convert into > + t = *a; > + *b = *a; > + */ > + if (!DECL_P (dest) && !DECL_P (src2)) > + return false; > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, "Simplified\n "); > + print_gimple_stmt (dump_file, stmt, 0, dump_flags); > + fprintf (dump_file, "after previous\n "); > + print_gimple_stmt (dump_file, defstmt, 0, dump_flags); > + } > + gimple_assign_set_rhs_from_tree (gsip, unshare_expr (src2)); > + update_stmt (stmt); > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, "into\n "); > + print_gimple_stmt (dump_file, stmt, 0, dump_flags); > + } > + return true; > +} > + > /* Fold function call to builtin mem{{,p}cpy,move}. Try to detect and > diagnose (otherwise undefined) overlapping copies without preventing > folding. When folded, GCC guarantees that overlapping memcpy has > @@ -6696,6 +6763,12 @@ fold_stmt_1 (gimple_stmt_iterator *gsi, bool inplace, > tree (*valueize) (tree), > changed = true; > break; > } > + if (optimize_agr_copyprop (gsi, gimple_assign_lhs (stmt), > + gimple_assign_rhs1 (stmt))) > + { > + changed = true; > + break; > + } > } > /* Try to canonicalize for boolean-typed X the comparisons > X == 0, X == 1, X != 0, and X != 1. */ > diff --git a/gcc/testsuite/g++.dg/opt/pr66119.C > b/gcc/testsuite/g++.dg/opt/pr66119.C > index d1b1845a258..3f1dee7f69a 100644 > --- a/gcc/testsuite/g++.dg/opt/pr66119.C > +++ b/gcc/testsuite/g++.dg/opt/pr66119.C > @@ -3,7 +3,7 @@ > the value of MOVE_RATIO now is. */ > > /* { dg-do compile { target { { i?86-*-* x86_64-*-* } && c++11 } } } */ > -/* { dg-options "-O3 -mavx -fdump-tree-sra -march=slm -mtune=slm > -fno-early-inlining" } */ > +/* { dg-options "-O3 -mavx -fdump-tree-sra -fno-tree-forwprop -fno-tree-vrp > -march=slm -mtune=slm -fno-early-inlining" } */ > // { dg-skip-if "requires hosted libstdc++ for cstdlib malloc" { ! hostedlib > } } > > #include <immintrin.h> > diff --git a/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c > b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c > new file mode 100644 > index 00000000000..44032357405 > --- /dev/null > +++ b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c > @@ -0,0 +1,27 @@ > +extern void abort (void); > + > +void * > +memcpy (void *dst, const void *src, __SIZE_TYPE__ n) > +{ > + const char *srcp; > + char *dstp; > + > + srcp = src; > + dstp = dst; > + > + if (dst < src) > + { > + if (dst + n > src) > + abort (); > + } > + else > + { > + if (src + n > dst) > + abort (); > + } > + > + while (n-- != 0) > + *dstp++ = *srcp++; > + > + return dst; > +} > diff --git a/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c > b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c > new file mode 100644 > index 00000000000..0a12b0fc9a1 > --- /dev/null > +++ b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c > @@ -0,0 +1,57 @@ > +extern void abort (void); > +extern void exit (int); > +struct s { unsigned char a[256]; }; > +union u { struct { struct s b; int c; } d; struct { int c; struct s b; } e; > }; > +static union u v; > +static union u v0; > +static struct s *p = &v.d.b; > +static struct s *q = &v.e.b; > + > +struct outers > +{ > + struct s inner; > +}; > + > +static inline struct s rp (void) { return *p; } > +static inline struct s rq (void) { return *q; } > +static void pq (void) > +{ > + struct outers o = {rq () }; > + *p = o.inner; > +} > +static void qp (void) > +{ > + struct outers o = {rp () }; > + *q = o.inner; > +} > + > +static void > +init (struct s *sp) > +{ > + int i; > + for (i = 0; i < 256; i++) > + sp->a[i] = i; > +} > + > +static void > +check (struct s *sp) > +{ > + int i; > + for (i = 0; i < 256; i++) > + if (sp->a[i] != i) > + abort (); > +} > + > +void > +main_test (void) > +{ > + v = v0; > + init (p); > + qp (); > + check (q); > + v = v0; > + init (q); > + pq (); > + check (p); > + exit (0); > +} > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c > b/gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c > index 56d1887bd78..c7e00887c16 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c > @@ -1,5 +1,7 @@ > /* { dg-do compile } */ > -/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized" } */ > +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized > -fdump-tree-forwprop1-details" } */ > + > +/* PR tree-optimization/14295 */ > > extern void link_error (void); > > @@ -25,4 +27,6 @@ struct s foo (struct s r) > > /* There should be no references to any of "temp_struct*" > temporaries. */ > -/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" { xfail > *-*-* } } } */ > +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */ > +/* Also check that forwprop pass did the copy prop. */ > +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c > new file mode 100644 > index 00000000000..342e1c1a5c2 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c > @@ -0,0 +1,33 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Os -fdump-tree-optimized" } */ > + > +/* PR tree-optimization/108358 */ > + > +struct a { > + int b; > + int c; > + short d; > + int e; > + int f; > +}; > +struct g { > + struct a f; > + struct a h; > +}; > +int i; > +void foo(); > +void bar31_(void); > +int main() { > + struct g j, l = {2, 1, 6, 1, 1, 7, 5, 1, 0, 1}; > + for (; i; ++i) > + bar31_(); > + j = l; > + struct g m = j; > + struct g k = m; > + if (k.h.b) > + ; > + else > + foo(); > +} > +/* The call to foo should be optimized away. */ > +/* { dg-final { scan-tree-dump-not "foo " "optimized" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c > new file mode 100644 > index 00000000000..37766fbe296 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c > @@ -0,0 +1,39 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-forwprop-details -fdump-tree-optimized" } */ > + > + > +/* PR tree-optimization/114169 */ > + > +#include <stdint.h> > + > +struct S1 { > + uint32_t f0; > + uint8_t f1; > + uint64_t f2; > + uint64_t f3; > + int32_t f4; > +}; > + > +union U8 { > + struct S1 f0; > + int32_t f1; > + int64_t f2; > + uint8_t f3; > + const int64_t f4; > +}; > + > +/* --- GLOBAL VARIABLES --- */ > +struct S1 g_16 = {4294967293UL,1UL,1UL,0xA9C1C73B017290B1LL,0x5ADF851FL}; > +union U8 g_37 = {{1UL,1UL,0x2361AE7D51263067LL,0xEEFD7F9B64A47447LL,0L}}; > +struct S1 g_50 = > {0x0CFC2012L,1UL,0x43E1243B3BE7B8BBLL,0x03C5CEC10C1A6FE1LL,1L}; > + > + > +/* --- FORWARD DECLARATIONS --- */ > + > +void func_32(union U8 e) { > + e.f3 = e.f0.f4; > + g_16 = e.f0 = g_50; > +} > +/* The union e should not make a difference here. */ > +/* { dg-final { scan-tree-dump-times "after previous" 1 "forwprop1" } } */ > +/* { dg-final { scan-tree-dump "g_16 = g_50;" "optimized" } } */ > -- > 2.43.0 >