On Mon, Jun 2, 2025 at 8:31 AM Andrew Pinski <quic_apin...@quicinc.com> wrote:
>
> This implements a simple copy propagation for aggregates in the similar
> fashion as we already do for copy prop of zeroing.
>
> Right now this only looks at the previous vdef statement but this allows us
> to catch a lot of cases that show up in C++ code.
>
> This used to deleted aggregate copies that are to the same location (PR57361)
> But that was found to delete statements that are needed for aliasing markers 
> reason.
> So we need to keep them around until that is solved. Note DSE will delete the 
> statements
> anyways so there is no testcase added since we expose the latent bug in the 
> same way.
> See https://gcc.gnu.org/pipermail/gcc-patches/2025-May/685003.html for the 
> testcase and
> explaintation there.
>
> Also adds a variant of pr22237.c which was found while working on this patch.

OK.

Thanks,
Richard.

> Changes since v1:
> * v2: change check for vuse to use default definition.
>       Remove dest/src arguments for optimize_agr_copyprop
>       Changed dump messages slightly.
>       Added stats
>       Don't delete `a = a` until aliasing markers are added.
>
>         PR tree-optimization/14295
>         PR tree-optimization/108358
>         PR tree-optimization/114169
>
> gcc/ChangeLog:
>
>         * tree-ssa-forwprop.cc (optimize_agr_copyprop): New function.
>         (pass_forwprop::execute): Call optimize_agr_copyprop for load/store 
> statements.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/20031106-6.c: Un-xfail. Add scan for forwprop1.
>         * g++.dg/opt/pr66119.C: Disable forwprop since that does
>         the copy prop now.
>         * gcc.dg/tree-ssa/pr108358-a.c: New test.
>         * gcc.dg/tree-ssa/pr114169-1.c: New test.
>         * gcc.c-torture/execute/builtins/pr22237-1-lib.c: New test.
>         * gcc.c-torture/execute/builtins/pr22237-1.c: New test.
>         * gcc.dg/tree-ssa/pr57361.c: Disable forwprop1.
>         * gcc.dg/tree-ssa/pr57361-1.c: New test.
>
> Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
> ---
>  gcc/testsuite/g++.dg/opt/pr66119.C            |  2 +-
>  .../execute/builtins/pr22237-1-lib.c          | 27 ++++++
>  .../execute/builtins/pr22237-1.c              | 57 ++++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c    |  8 +-
>  gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c    | 33 +++++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c    | 39 +++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr57361-1.c     | 10 +++
>  gcc/testsuite/gcc.dg/tree-ssa/pr57361.c       |  2 +-
>  gcc/tree-ssa-forwprop.cc                      | 87 +++++++++++++++++++
>  9 files changed, 261 insertions(+), 4 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c
>  create mode 100644 gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr57361-1.c
>
> diff --git a/gcc/testsuite/g++.dg/opt/pr66119.C 
> b/gcc/testsuite/g++.dg/opt/pr66119.C
> index d1b1845a258..52362e44434 100644
> --- a/gcc/testsuite/g++.dg/opt/pr66119.C
> +++ b/gcc/testsuite/g++.dg/opt/pr66119.C
> @@ -3,7 +3,7 @@
>     the value of MOVE_RATIO now is.  */
>
>  /* { dg-do compile  { target { { i?86-*-* x86_64-*-* } && c++11 } }  }  */
> -/* { dg-options "-O3 -mavx -fdump-tree-sra -march=slm -mtune=slm 
> -fno-early-inlining" } */
> +/* { dg-options "-O3 -mavx -fdump-tree-sra -fno-tree-forwprop -march=slm 
> -mtune=slm -fno-early-inlining" } */
>  // { dg-skip-if "requires hosted libstdc++ for cstdlib malloc" { ! hostedlib 
> } }
>
>  #include <immintrin.h>
> diff --git a/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c 
> b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c
> new file mode 100644
> index 00000000000..44032357405
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1-lib.c
> @@ -0,0 +1,27 @@
> +extern void abort (void);
> +
> +void *
> +memcpy (void *dst, const void *src, __SIZE_TYPE__ n)
> +{
> +  const char *srcp;
> +  char *dstp;
> +
> +  srcp = src;
> +  dstp = dst;
> +
> +  if (dst < src)
> +    {
> +      if (dst + n > src)
> +       abort ();
> +    }
> +  else
> +    {
> +      if (src + n > dst)
> +       abort ();
> +    }
> +
> +  while (n-- != 0)
> +    *dstp++ = *srcp++;
> +
> +  return dst;
> +}
> diff --git a/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c 
> b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c
> new file mode 100644
> index 00000000000..0a12b0fc9a1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/builtins/pr22237-1.c
> @@ -0,0 +1,57 @@
> +extern void abort (void);
> +extern void exit (int);
> +struct s { unsigned char a[256]; };
> +union u { struct { struct s b; int c; } d; struct { int c; struct s b; } e; 
> };
> +static union u v;
> +static union u v0;
> +static struct s *p = &v.d.b;
> +static struct s *q = &v.e.b;
> +
> +struct outers
> +{
> +  struct s inner;
> +};
> +
> +static inline struct s rp (void) { return *p; }
> +static inline struct s rq (void) { return *q; }
> +static void pq (void)
> +{
> +  struct outers o = {rq () };
> +  *p = o.inner;
> +}
> +static void qp (void)
> +{
> +  struct outers o = {rp () };
> +  *q  = o.inner;
> +}
> +
> +static void
> +init (struct s *sp)
> +{
> +  int i;
> +  for (i = 0; i < 256; i++)
> +    sp->a[i] = i;
> +}
> +
> +static void
> +check (struct s *sp)
> +{
> +  int i;
> +  for (i = 0; i < 256; i++)
> +    if (sp->a[i] != i)
> +      abort ();
> +}
> +
> +void
> +main_test (void)
> +{
> +  v = v0;
> +  init (p);
> +  qp ();
> +  check (q);
> +  v = v0;
> +  init (q);
> +  pq ();
> +  check (p);
> +  exit (0);
> +}
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c
> index 56d1887bd78..c7e00887c16 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/20031106-6.c
> @@ -1,5 +1,7 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized" } */
> +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized 
> -fdump-tree-forwprop1-details" } */
> +
> +/* PR tree-optimization/14295 */
>
>  extern void link_error (void);
>
> @@ -25,4 +27,6 @@ struct s foo (struct s r)
>
>  /* There should be no references to any of "temp_struct*"
>     temporaries.  */
> -/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" { xfail 
> *-*-* } } } */
> +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */
> +/* Also check that forwprop pass did the copy prop. */
> +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c
> new file mode 100644
> index 00000000000..342e1c1a5c2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr108358-a.c
> @@ -0,0 +1,33 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Os -fdump-tree-optimized" } */
> +
> +/* PR tree-optimization/108358 */
> +
> +struct a {
> +  int b;
> +  int c;
> +  short d;
> +  int e;
> +  int f;
> +};
> +struct g {
> +  struct a f;
> +  struct a h;
> +};
> +int i;
> +void foo();
> +void bar31_(void);
> +int main() {
> +  struct g j, l = {2, 1, 6, 1, 1, 7, 5, 1, 0, 1};
> +  for (; i; ++i)
> +    bar31_();
> +  j = l;
> +  struct g m = j;
> +  struct g k = m;
> +  if (k.h.b)
> +    ;
> +  else
> +    foo();
> +}
> +/* The call to foo should be optimized away. */
> +/* { dg-final { scan-tree-dump-not "foo " "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c
> new file mode 100644
> index 00000000000..37766fbe296
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114169-1.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-forwprop-details -fdump-tree-optimized" } */
> +
> +
> +/* PR tree-optimization/114169 */
> +
> +#include <stdint.h>
> +
> +struct S1 {
> +   uint32_t  f0;
> +   uint8_t  f1;
> +   uint64_t  f2;
> +   uint64_t  f3;
> +   int32_t  f4;
> +};
> +
> +union U8 {
> +   struct S1  f0;
> +   int32_t  f1;
> +   int64_t  f2;
> +   uint8_t  f3;
> +   const int64_t  f4;
> +};
> +
> +/* --- GLOBAL VARIABLES --- */
> +struct S1 g_16 = {4294967293UL,1UL,1UL,0xA9C1C73B017290B1LL,0x5ADF851FL};
> +union U8 g_37 = {{1UL,1UL,0x2361AE7D51263067LL,0xEEFD7F9B64A47447LL,0L}};
> +struct S1 g_50 = 
> {0x0CFC2012L,1UL,0x43E1243B3BE7B8BBLL,0x03C5CEC10C1A6FE1LL,1L};
> +
> +
> +/* --- FORWARD DECLARATIONS --- */
> +
> +void func_32(union U8 e) {
> +  e.f3 = e.f0.f4;
> +  g_16 = e.f0 = g_50;
> +}
> +/* The union e should not make a difference here.  */
> +/* { dg-final { scan-tree-dump-times "after previous" 1 "forwprop1" } } */
> +/* { dg-final { scan-tree-dump "g_16 = g_50;" "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr57361-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr57361-1.c
> new file mode 100644
> index 00000000000..dc4fadbae41
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr57361-1.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-forwprop1-details" } */
> +
> +struct A { int x; double y; };
> +void f (struct A *a) {
> +  *a = *a;
> +}
> +
> +/* xfailed until figuring out the best way to handle aliasing barriers. */
> +/* { dg-final { scan-tree-dump "into a NOP" "forwprop1" { xfail *-*-* } } } 
> */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr57361.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr57361.c
> index 81f27b3cd1f..7e273dbe2e1 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr57361.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr57361.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O -fdump-tree-dse1-details" } */
> +/* { dg-options "-O -fdump-tree-dse1-details -fno-tree-forwprop" } */
>
>  struct A { int x; double y; };
>  void f (struct A *a) {
> diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
> index 81ea7d4195e..6a2a8dcce84 100644
> --- a/gcc/tree-ssa-forwprop.cc
> +++ b/gcc/tree-ssa-forwprop.cc
> @@ -1344,6 +1344,88 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, 
> tree dest, tree src, tree
>      }
>    return true;
>  }
> +/* Optimizes
> +   a = c;
> +   b = a;
> +   into
> +   a = c;
> +   b = c;
> +   GSIP is the second statement and SRC is the common
> +   between the statements.
> +*/
> +static bool
> +optimize_agr_copyprop (gimple_stmt_iterator *gsip)
> +{
> +  gimple *stmt = gsi_stmt (*gsip);
> +  if (gimple_has_volatile_ops (stmt))
> +    return false;
> +
> +  tree dest = gimple_assign_lhs (stmt);
> +  tree src = gimple_assign_rhs1 (stmt);
> +  /* If the statement is `src = src;` then ignore it. */
> +  if (operand_equal_p (dest, src, 0))
> +    return false;
> +
> +  tree vuse = gimple_vuse (stmt);
> +  /* If the vuse is the default definition, then there is no store 
> beforehand.  */
> +  if (SSA_NAME_IS_DEFAULT_DEF (vuse))
> +    return false;
> +  gimple *defstmt = SSA_NAME_DEF_STMT (vuse);
> +  if (!gimple_assign_load_p (defstmt)
> +      || !gimple_store_p (defstmt))
> +    return false;
> +  if (gimple_has_volatile_ops (defstmt))
> +    return false;
> +
> +  tree dest2 = gimple_assign_lhs (defstmt);
> +  tree src2 = gimple_assign_rhs1 (defstmt);
> +
> +  /* If the original store is `src2 = src2;` skip over it. */
> +  if (operand_equal_p (src2, dest2, 0))
> +    return false;
> +  if (!operand_equal_p (src, dest2, 0))
> +    return false;
> +
> +
> +  /* For 2 memory refences and using a temporary to do the copy,
> +     don't remove the temporary as the 2 memory references might overlap.
> +     Note t does not need to be decl as it could be field.
> +     See PR 22237 for full details.
> +     E.g.
> +     t = *a;
> +     *b = t;
> +     Cannot be convert into
> +     t = *a;
> +     *b = *a;
> +     Though the following is allowed to be done:
> +     t = *a;
> +     *a = t;
> +     And convert it into:
> +     t = *a;
> +     *a = *a;
> +  */
> +  if (!operand_equal_p (src2, dest, 0)
> +      && !DECL_P (dest) && !DECL_P (src2))
> +    return false;
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "Simplified\n  ");
> +      print_gimple_stmt (dump_file, stmt, 0, dump_flags);
> +      fprintf (dump_file, "after previous\n  ");
> +      print_gimple_stmt (dump_file, defstmt, 0, dump_flags);
> +    }
> +  gimple_assign_set_rhs_from_tree (gsip, unshare_expr (src2));
> +  update_stmt (stmt);
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "into\n  ");
> +      print_gimple_stmt (dump_file, stmt, 0, dump_flags);
> +    }
> +  statistics_counter_event (cfun, "copy prop for aggregate", 1);
> +  return true;
> +}
>
>  /* *GSI_P is a GIMPLE_CALL to a builtin function.
>     Optimize
> @@ -4720,6 +4802,11 @@ pass_forwprop::execute (function *fun)
>                             changed = true;
>                             break;
>                           }
> +                       if (optimize_agr_copyprop (&gsi))
> +                         {
> +                           changed = true;
> +                           break;
> +                         }
>                       }
>
>                     if (TREE_CODE_CLASS (code) == tcc_comparison)
> --
> 2.43.0
>

Reply via email to