On Mon, Apr 21, 2025 at 9:52 AM Andrew Pinski <quic_apin...@quicinc.com> wrote:
>
> This implements a simple copy propagation for aggregates in the similar
> fashion as we already do for copy prop of zeroing.
>
> Right now this only looks at the previous vdef statement but this allows us
> to catch a lot of cases that show up in C++ code.
>
> Also adds a variant of pr22237.c which was found while working on this patch.

Please ignore this patch, I am going to move this and the other
memcpy/memset optimization that is already done in fold_stmt to
forwprop.
I decided that based on the review at
https://gcc.gnu.org/pipermail/gcc-patches/2025-April/681507.html

Thanks,
Andrew Pinski

>
>         PR tree-optimization/14295
>         PR tree-optimization/108358
>         PR tree-optimization/114169
>
> gcc/ChangeLog:
>
>         * gimple-fold.cc (optimize_agr_copyprop): New function.
>         (fold_stmt_1): Call optimize_agr_copyprop for load/store statements.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/20031106-6.c: Un-xfail. Add scan for forwprop1.
>         * g++.dg/opt/pr66119.C: Disable forwprop and vrp since that does
>         the copy prop now.
>         * gcc.dg/tree-ssa/pr108358-a.c: New test.
>         * gcc.dg/tree-ssa/pr114169-1.c: New test.
>         * gcc.c-torture/execute/builtins/pr22237-1-lib.c: New test.
>         * gcc.c-torture/execute/builtins/pr22237-1.c: New test.
>
> Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
> ---
>  gcc/gimple-fold.cc                            | 73 +++++++++++++++++++
>  gcc/testsuite/g++.dg/opt/pr66119.C            |  2 +-
>  .../execute/builtins/pr22237-1-lib.c          | 27 +++++++
>  .../execute/builtins/pr22237-1.c              | 57 +++++++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c    |  8 +-
>  gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c    | 33 +++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c    | 39 ++++++++++
>  7 files changed, 236 insertions(+), 3 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c
>  create mode 100644 gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c
>
> diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
> index 94d5a1ebbd7..c0b5046359c 100644
> --- a/gcc/gimple-fold.cc
> +++ b/gcc/gimple-fold.cc
> @@ -1043,6 +1043,73 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, 
> tree dest, tree src, tree
>    return true;
>  }
>
> +/* Optimizes
> +   a = c;
> +   b = a;
> +   into
> +   a = c;
> +   b = c;
> +   GSIP is the second statement and SRC is the common
> +   between the statements.
> +*/
> +static bool
> +optimize_agr_copyprop (gimple_stmt_iterator *gsip, tree dest, tree src)
> +{
> +  gimple *stmt = gsi_stmt (*gsip);
> +  if (gimple_has_volatile_ops (stmt))
> +    return false;
> +
> +  tree vuse = gimple_vuse (stmt);
> +  if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME)
> +    return false;
> +
> +  gimple *defstmt = SSA_NAME_DEF_STMT (vuse);
> +  if (!gimple_assign_load_p (defstmt)
> +      || !gimple_store_p (defstmt))
> +    return false;
> +  if (gimple_has_volatile_ops (defstmt))
> +    return false;
> +
> +  tree dest2 = gimple_assign_lhs (defstmt);
> +  tree src2 = gimple_assign_rhs1 (defstmt);
> +  if (!operand_equal_p (src, dest2, 0))
> +    return false;
> +  /* If replacing with the same thing, just skip it. */
> +  if (operand_equal_p (src, src2, 0))
> +    return false;
> +
> +  /* For 2 memory refences and using a temporary to do the copy,
> +     don't remove the temporary as the 2 memory references might overlap.
> +     Note t does not need to be decl as it could be field.
> +     See PR 22237 for full details.
> +     E.g.
> +     t = *a;
> +     *b = t;
> +     Cannot be convert into
> +     t = *a;
> +     *b = *a;
> +  */
> +  if (!DECL_P (dest) && !DECL_P (src2))
> +    return false;
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "Simplified\n  ");
> +      print_gimple_stmt (dump_file, stmt, 0, dump_flags);
> +      fprintf (dump_file, "after previous\n  ");
> +      print_gimple_stmt (dump_file, defstmt, 0, dump_flags);
> +    }
> +  gimple_assign_set_rhs_from_tree (gsip, unshare_expr (src2));
> +  update_stmt (stmt);
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "into\n  ");
> +      print_gimple_stmt (dump_file, stmt, 0, dump_flags);
> +    }
> +  return true;
> +}
> +
>  /* Fold function call to builtin mem{{,p}cpy,move}.  Try to detect and
>     diagnose (otherwise undefined) overlapping copies without preventing
>     folding.  When folded, GCC guarantees that overlapping memcpy has
> @@ -6696,6 +6763,12 @@ fold_stmt_1 (gimple_stmt_iterator *gsi, bool inplace, 
> tree (*valueize) (tree),
>                 changed = true;
>                 break;
>               }
> +           if (optimize_agr_copyprop (gsi, gimple_assign_lhs (stmt),
> +                                      gimple_assign_rhs1 (stmt)))
> +             {
> +               changed = true;
> +               break;
> +             }
>           }
>         /* Try to canonicalize for boolean-typed X the comparisons
>            X == 0, X == 1, X != 0, and X != 1.  */
> diff --git a/gcc/testsuite/g++.dg/opt/pr66119.C 
> b/gcc/testsuite/g++.dg/opt/pr66119.C
> index d1b1845a258..3f1dee7f69a 100644
> --- a/gcc/testsuite/g++.dg/opt/pr66119.C
> +++ b/gcc/testsuite/g++.dg/opt/pr66119.C
> @@ -3,7 +3,7 @@
>     the value of MOVE_RATIO now is.  */
>
>  /* { dg-do compile  { target { { i?86-*-* x86_64-*-* } && c++11 } }  }  */
> -/* { dg-options "-O3 -mavx -fdump-tree-sra -march=slm -mtune=slm 
> -fno-early-inlining" } */
> +/* { dg-options "-O3 -mavx -fdump-tree-sra -fno-tree-forwprop -fno-tree-vrp 
> -march=slm -mtune=slm -fno-early-inlining" } */
>  // { dg-skip-if "requires hosted libstdc++ for cstdlib malloc" { ! hostedlib 
> } }
>
>  #include <immintrin.h>
> diff --git a/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c 
> b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c
> new file mode 100644
> index 00000000000..44032357405
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c
> @@ -0,0 +1,27 @@
> +extern void abort (void);
> +
> +void *
> +memcpy (void *dst, const void *src, __SIZE_TYPE__ n)
> +{
> +  const char *srcp;
> +  char *dstp;
> +
> +  srcp = src;
> +  dstp = dst;
> +
> +  if (dst < src)
> +    {
> +      if (dst + n > src)
> +       abort ();
> +    }
> +  else
> +    {
> +      if (src + n > dst)
> +       abort ();
> +    }
> +
> +  while (n-- != 0)
> +    *dstp++ = *srcp++;
> +
> +  return dst;
> +}
> diff --git a/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c 
> b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c
> new file mode 100644
> index 00000000000..0a12b0fc9a1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c
> @@ -0,0 +1,57 @@
> +extern void abort (void);
> +extern void exit (int);
> +struct s { unsigned char a[256]; };
> +union u { struct { struct s b; int c; } d; struct { int c; struct s b; } e; 
> };
> +static union u v;
> +static union u v0;
> +static struct s *p = &v.d.b;
> +static struct s *q = &v.e.b;
> +
> +struct outers
> +{
> +  struct s inner;
> +};
> +
> +static inline struct s rp (void) { return *p; }
> +static inline struct s rq (void) { return *q; }
> +static void pq (void)
> +{
> +  struct outers o = {rq () };
> +  *p = o.inner;
> +}
> +static void qp (void)
> +{
> +  struct outers o = {rp () };
> +  *q  = o.inner;
> +}
> +
> +static void
> +init (struct s *sp)
> +{
> +  int i;
> +  for (i = 0; i < 256; i++)
> +    sp->a[i] = i;
> +}
> +
> +static void
> +check (struct s *sp)
> +{
> +  int i;
> +  for (i = 0; i < 256; i++)
> +    if (sp->a[i] != i)
> +      abort ();
> +}
> +
> +void
> +main_test (void)
> +{
> +  v = v0;
> +  init (p);
> +  qp ();
> +  check (q);
> +  v = v0;
> +  init (q);
> +  pq ();
> +  check (p);
> +  exit (0);
> +}
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c
> index 56d1887bd78..c7e00887c16 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c
> @@ -1,5 +1,7 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized" } */
> +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized 
> -fdump-tree-forwprop1-details" } */
> +
> +/* PR tree-optimization/14295 */
>
>  extern void link_error (void);
>
> @@ -25,4 +27,6 @@ struct s foo (struct s r)
>
>  /* There should be no references to any of "temp_struct*"
>     temporaries.  */
> -/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" { xfail 
> *-*-* } } } */
> +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */
> +/* Also check that forwprop pass did the copy prop. */
> +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c
> new file mode 100644
> index 00000000000..342e1c1a5c2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c
> @@ -0,0 +1,33 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Os -fdump-tree-optimized" } */
> +
> +/* PR tree-optimization/108358 */
> +
> +struct a {
> +  int b;
> +  int c;
> +  short d;
> +  int e;
> +  int f;
> +};
> +struct g {
> +  struct a f;
> +  struct a h;
> +};
> +int i;
> +void foo();
> +void bar31_(void);
> +int main() {
> +  struct g j, l = {2, 1, 6, 1, 1, 7, 5, 1, 0, 1};
> +  for (; i; ++i)
> +    bar31_();
> +  j = l;
> +  struct g m = j;
> +  struct g k = m;
> +  if (k.h.b)
> +    ;
> +  else
> +    foo();
> +}
> +/* The call to foo should be optimized away. */
> +/* { dg-final { scan-tree-dump-not "foo " "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c
> new file mode 100644
> index 00000000000..37766fbe296
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-forwprop-details -fdump-tree-optimized" } */
> +
> +
> +/* PR tree-optimization/114169 */
> +
> +#include <stdint.h>
> +
> +struct S1 {
> +   uint32_t  f0;
> +   uint8_t  f1;
> +   uint64_t  f2;
> +   uint64_t  f3;
> +   int32_t  f4;
> +};
> +
> +union U8 {
> +   struct S1  f0;
> +   int32_t  f1;
> +   int64_t  f2;
> +   uint8_t  f3;
> +   const int64_t  f4;
> +};
> +
> +/* --- GLOBAL VARIABLES --- */
> +struct S1 g_16 = {4294967293UL,1UL,1UL,0xA9C1C73B017290B1LL,0x5ADF851FL};
> +union U8 g_37 = {{1UL,1UL,0x2361AE7D51263067LL,0xEEFD7F9B64A47447LL,0L}};
> +struct S1 g_50 = 
> {0x0CFC2012L,1UL,0x43E1243B3BE7B8BBLL,0x03C5CEC10C1A6FE1LL,1L};
> +
> +
> +/* --- FORWARD DECLARATIONS --- */
> +
> +void func_32(union U8 e) {
> +  e.f3 = e.f0.f4;
> +  g_16 = e.f0 = g_50;
> +}
> +/* The union e should not make a difference here.  */
> +/* { dg-final { scan-tree-dump-times "after previous" 1 "forwprop1" } } */
> +/* { dg-final { scan-tree-dump "g_16 = g_50;" "optimized" } } */
> --
> 2.43.0
>

Reply via email to