This implements a simple copy propagation for aggregates in the similar fashion as we already do for copy prop of zeroing.
Right now this only looks at the previous vdef statement but this allows us to catch a lot of cases that show up in C++ code. Also adds a variant of pr22237.c which was found while working on this patch. PR tree-optimization/14295 PR tree-optimization/108358 PR tree-optimization/114169 gcc/ChangeLog: * gimple-fold.cc (optimize_agr_copyprop): New function. (fold_stmt_1): Call optimize_agr_copyprop for load/store statements. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/20031106-6.c: Un-xfail. Add scan for forwprop1. * g++.dg/opt/pr66119.C: Disable forwprop and vrp since that does the copy prop now. * gcc.dg/tree-ssa/pr108358-a.c: New test. * gcc.dg/tree-ssa/pr114169-1.c: New test. * gcc.c-torture/execute/builtins/pr22237-1-lib.c: New test. * gcc.c-torture/execute/builtins/pr22237-1.c: New test. Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> --- gcc/gimple-fold.cc | 73 +++++++++++++++++++ gcc/testsuite/g++.dg/opt/pr66119.C | 2 +- .../execute/builtins/pr22237-1-lib.c | 27 +++++++ .../execute/builtins/pr22237-1.c | 57 +++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c | 8 +- gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c | 33 +++++++++ gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c | 39 ++++++++++ 7 files changed, 236 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c create mode 100644 gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index 94d5a1ebbd7..c0b5046359c 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -1043,6 +1043,73 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, tree dest, tree src, tree return true; } +/* Optimizes + a = c; + b = a; + into + a = c; + b = c; + GSIP is the second statement and SRC is the common + between the statements. +*/ +static bool +optimize_agr_copyprop (gimple_stmt_iterator *gsip, tree dest, tree src) +{ + gimple *stmt = gsi_stmt (*gsip); + if (gimple_has_volatile_ops (stmt)) + return false; + + tree vuse = gimple_vuse (stmt); + if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME) + return false; + + gimple *defstmt = SSA_NAME_DEF_STMT (vuse); + if (!gimple_assign_load_p (defstmt) + || !gimple_store_p (defstmt)) + return false; + if (gimple_has_volatile_ops (defstmt)) + return false; + + tree dest2 = gimple_assign_lhs (defstmt); + tree src2 = gimple_assign_rhs1 (defstmt); + if (!operand_equal_p (src, dest2, 0)) + return false; + /* If replacing with the same thing, just skip it. */ + if (operand_equal_p (src, src2, 0)) + return false; + + /* For 2 memory refences and using a temporary to do the copy, + don't remove the temporary as the 2 memory references might overlap. + Note t does not need to be decl as it could be field. + See PR 22237 for full details. + E.g. + t = *a; + *b = t; + Cannot be convert into + t = *a; + *b = *a; + */ + if (!DECL_P (dest) && !DECL_P (src2)) + return false; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Simplified\n "); + print_gimple_stmt (dump_file, stmt, 0, dump_flags); + fprintf (dump_file, "after previous\n "); + print_gimple_stmt (dump_file, defstmt, 0, dump_flags); + } + gimple_assign_set_rhs_from_tree (gsip, unshare_expr (src2)); + update_stmt (stmt); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "into\n "); + print_gimple_stmt (dump_file, stmt, 0, dump_flags); + } + return true; +} + /* Fold function call to builtin mem{{,p}cpy,move}. Try to detect and diagnose (otherwise undefined) overlapping copies without preventing folding. When folded, GCC guarantees that overlapping memcpy has @@ -6696,6 +6763,12 @@ fold_stmt_1 (gimple_stmt_iterator *gsi, bool inplace, tree (*valueize) (tree), changed = true; break; } + if (optimize_agr_copyprop (gsi, gimple_assign_lhs (stmt), + gimple_assign_rhs1 (stmt))) + { + changed = true; + break; + } } /* Try to canonicalize for boolean-typed X the comparisons X == 0, X == 1, X != 0, and X != 1. */ diff --git a/gcc/testsuite/g++.dg/opt/pr66119.C b/gcc/testsuite/g++.dg/opt/pr66119.C index d1b1845a258..3f1dee7f69a 100644 --- a/gcc/testsuite/g++.dg/opt/pr66119.C +++ b/gcc/testsuite/g++.dg/opt/pr66119.C @@ -3,7 +3,7 @@ the value of MOVE_RATIO now is. */ /* { dg-do compile { target { { i?86-*-* x86_64-*-* } && c++11 } } } */ -/* { dg-options "-O3 -mavx -fdump-tree-sra -march=slm -mtune=slm -fno-early-inlining" } */ +/* { dg-options "-O3 -mavx -fdump-tree-sra -fno-tree-forwprop -fno-tree-vrp -march=slm -mtune=slm -fno-early-inlining" } */ // { dg-skip-if "requires hosted libstdc++ for cstdlib malloc" { ! hostedlib } } #include <immintrin.h> diff --git a/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c new file mode 100644 index 00000000000..44032357405 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c @@ -0,0 +1,27 @@ +extern void abort (void); + +void * +memcpy (void *dst, const void *src, __SIZE_TYPE__ n) +{ + const char *srcp; + char *dstp; + + srcp = src; + dstp = dst; + + if (dst < src) + { + if (dst + n > src) + abort (); + } + else + { + if (src + n > dst) + abort (); + } + + while (n-- != 0) + *dstp++ = *srcp++; + + return dst; +} diff --git a/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c new file mode 100644 index 00000000000..0a12b0fc9a1 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c @@ -0,0 +1,57 @@ +extern void abort (void); +extern void exit (int); +struct s { unsigned char a[256]; }; +union u { struct { struct s b; int c; } d; struct { int c; struct s b; } e; }; +static union u v; +static union u v0; +static struct s *p = &v.d.b; +static struct s *q = &v.e.b; + +struct outers +{ + struct s inner; +}; + +static inline struct s rp (void) { return *p; } +static inline struct s rq (void) { return *q; } +static void pq (void) +{ + struct outers o = {rq () }; + *p = o.inner; +} +static void qp (void) +{ + struct outers o = {rp () }; + *q = o.inner; +} + +static void +init (struct s *sp) +{ + int i; + for (i = 0; i < 256; i++) + sp->a[i] = i; +} + +static void +check (struct s *sp) +{ + int i; + for (i = 0; i < 256; i++) + if (sp->a[i] != i) + abort (); +} + +void +main_test (void) +{ + v = v0; + init (p); + qp (); + check (q); + v = v0; + init (q); + pq (); + check (p); + exit (0); +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c b/gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c index 56d1887bd78..c7e00887c16 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c @@ -1,5 +1,7 @@ /* { dg-do compile } */ -/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized" } */ +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized -fdump-tree-forwprop1-details" } */ + +/* PR tree-optimization/14295 */ extern void link_error (void); @@ -25,4 +27,6 @@ struct s foo (struct s r) /* There should be no references to any of "temp_struct*" temporaries. */ -/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */ +/* Also check that forwprop pass did the copy prop. */ +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c b/gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c new file mode 100644 index 00000000000..342e1c1a5c2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-Os -fdump-tree-optimized" } */ + +/* PR tree-optimization/108358 */ + +struct a { + int b; + int c; + short d; + int e; + int f; +}; +struct g { + struct a f; + struct a h; +}; +int i; +void foo(); +void bar31_(void); +int main() { + struct g j, l = {2, 1, 6, 1, 1, 7, 5, 1, 0, 1}; + for (; i; ++i) + bar31_(); + j = l; + struct g m = j; + struct g k = m; + if (k.h.b) + ; + else + foo(); +} +/* The call to foo should be optimized away. */ +/* { dg-final { scan-tree-dump-not "foo " "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c new file mode 100644 index 00000000000..37766fbe296 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-forwprop-details -fdump-tree-optimized" } */ + + +/* PR tree-optimization/114169 */ + +#include <stdint.h> + +struct S1 { + uint32_t f0; + uint8_t f1; + uint64_t f2; + uint64_t f3; + int32_t f4; +}; + +union U8 { + struct S1 f0; + int32_t f1; + int64_t f2; + uint8_t f3; + const int64_t f4; +}; + +/* --- GLOBAL VARIABLES --- */ +struct S1 g_16 = {4294967293UL,1UL,1UL,0xA9C1C73B017290B1LL,0x5ADF851FL}; +union U8 g_37 = {{1UL,1UL,0x2361AE7D51263067LL,0xEEFD7F9B64A47447LL,0L}}; +struct S1 g_50 = {0x0CFC2012L,1UL,0x43E1243B3BE7B8BBLL,0x03C5CEC10C1A6FE1LL,1L}; + + +/* --- FORWARD DECLARATIONS --- */ + +void func_32(union U8 e) { + e.f3 = e.f0.f4; + g_16 = e.f0 = g_50; +} +/* The union e should not make a difference here. */ +/* { dg-final { scan-tree-dump-times "after previous" 1 "forwprop1" } } */ +/* { dg-final { scan-tree-dump "g_16 = g_50;" "optimized" } } */ -- 2.43.0