https://gcc.gnu.org/g:899e7284bfa0b736165c3d9d5c18d5d883c5cbfb

commit r16-3094-g899e7284bfa0b736165c3d9d5c18d5d883c5cbfb
Author: Andrew Pinski <quic_apin...@quicinc.com>
Date:   Sun Jun 8 23:13:23 2025 -0700

    forwprop: Change proping memset into memcpy into a forwprop rather than a 
backwalk
    
    One thing I noticed while working on copy prop for aggregates is that we 
start with
    a memcpy like statement and then walk backwards. This means we could have a 
few walks
    backwards to see there was no statement for zeroing. Instead this changes 
the walk
    backwards into a true forwprop. In the future we can expand to forwprop the 
zeroing
    into say an function argument or something more than memcpy like statement.
    
    This should speed up slightly the compile time performance since there will 
be less
    memsets like statements than memcpy and there is only one walk forwards for 
memset like
    staments instead of multiple walk backwards to find the memset.
    
    Note this does add one extra improvement, the memcpy now does not need to 
have an address
    as its dest argument; this could have been done before too but it was even 
more noticable
    now because of the variable became only set so it was removed and the check 
was removed
    as well.
    
    There is also a fix on how ao_ref for the memset/memcpy is done, before it 
was just using
    ao_ref_init which is wrong since it should instead of used 
ao_ref_init_from_ptr_and_size.
    This part fixes PR 121422.
    
    Changes since v1:
    * v2: Add back limit on the walk which was missed in v1.
          Move the call to get_addr_base_and_unit_offset outside
            of the vuse loop.
    * v3: Remove extra check before the call to optimize_aggr_zeroprop_1.
          Fix setting up of ao_ref for memset (PR121422).
    
            PR tree-optimization/118946
            PR tree-optimization/121422
    
    gcc/ChangeLog:
    
            * tree-ssa-forwprop.cc (optimize_memcpy_to_memset): Remove.
            (optimize_aggr_zeroprop_1): New function.
            (optimize_aggr_zeroprop): New function.
            (simplify_builtin_call): Don't call optimize_memcpy_to_memset
            for memcpy but call optimize_aggr_zeroprop for memset.
            (pass_forwprop::execute): Don't call optimize_memcpy_to_memset
            for aggregate copies but rather call optimize_aggr_zeroprop
            for aggregate stores.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.dg/pr118946-1.c: New test.
            * gcc.dg/torture/pr121422-1.c: New test.
            * gcc.dg/torture/pr121422-2.c: New test.
    
    Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>

Diff:
---
 gcc/testsuite/gcc.dg/pr118946-1.c         |  15 ++
 gcc/testsuite/gcc.dg/torture/pr121422-1.c |  35 ++++
 gcc/testsuite/gcc.dg/torture/pr121422-2.c |  36 ++++
 gcc/tree-ssa-forwprop.cc                  | 300 +++++++++++++++++-------------
 4 files changed, 258 insertions(+), 128 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr118946-1.c 
b/gcc/testsuite/gcc.dg/pr118946-1.c
new file mode 100644
index 000000000000..6cf2661f2864
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr118946-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-forwprop1-details" } */
+
+/* PR tree-optimization/118946 */
+
+void f(char *a)
+{
+  char t[1024] = {};
+  __builtin_memcpy(a, t, 10);
+}
+
+/* We should be able to optimize the memcpy into a memset here. */
+/* { dg-final { scan-tree-dump-times "after previous" 1 "forwprop1"} } */
+/* { dg-final { scan-tree-dump-times "memset " 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "memcpy " "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/torture/pr121422-1.c 
b/gcc/testsuite/gcc.dg/torture/pr121422-1.c
new file mode 100644
index 000000000000..136f80d3ead1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr121422-1.c
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+/* PR tree-optimization/121422 */
+
+struct s1
+{
+  char a[4];
+};
+struct s1 b;
+char t[4];
+
+/* if both t and b startout zero initialized before this function,
+   t should end up being:
+   {0, 0, 1, 0}
+   while b.a should end up being:
+   {0, 0, 0, 1} 
+*/
+__attribute__((noipa,noinline))
+void f(void)
+{
+  b = (struct s1){};
+  b.a[3] = 1;
+  /* This memcpy should stay a memcpy and not become memset. */
+  __builtin_memcpy(&t[0], &b.a[1], 3*sizeof(t[0]));
+}
+
+
+int main()
+{
+  f();
+  for(int i = 0; i < 4; i++)
+  {
+        if (t[i] != (i == 2 ? 1 : 0))
+          __builtin_abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.dg/torture/pr121422-2.c 
b/gcc/testsuite/gcc.dg/torture/pr121422-2.c
new file mode 100644
index 000000000000..570559c6c734
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr121422-2.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* PR tree-optimization/121422 */
+
+struct s1
+{
+  char a[4];
+};
+struct s1 b;
+char t[4];
+
+/* if both t and b startout zero initialized before this function,
+   t should end up being:
+   {0, 0, 1, 0}
+   while b.a should end up being:
+   {0, 0, 0, 1}
+*/
+__attribute__((noipa,noinline))
+void f(void)
+{
+  __builtin_memset(&b.a[1], 0, 2);
+  b.a[3] = 1;
+  /* This memcpy should stay a memcpy and not become memset. */
+  __builtin_memcpy(&t[0], &b.a[1], 3);
+}
+
+
+int main()
+{
+  f();
+  for(int i = 0; i < 4; i++)
+  {
+        if (t[i] != (i == 2 ? 1 : 0))
+          __builtin_abort();
+  }
+}
+
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index 2dc77ccba1d7..156ea3228676 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -1190,117 +1190,55 @@ constant_pointer_difference (tree p1, tree p2)
   return NULL_TREE;
 }
 
-
-/* Optimize
-   a = {};
-   b = a;
-   into
-   a = {};
-   b = {};
-   Similarly for memset (&a, ..., sizeof (a)); instead of a = {};
-   and/or memcpy (&b, &a, sizeof (a)); instead of b = a;  */
-
+/* Helper function for optimize_aggr_zeroprop.
+   Props the zeroing (memset, VAL) that was done in DEST+OFFSET:LEN
+   (DEFSTMT) into the STMT.  Returns true if the STMT was updated.  */
 static bool
-optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, tree dest, tree src, 
tree len)
+optimize_aggr_zeroprop_1 (gimple *defstmt, gimple *stmt,
+                         tree dest, poly_int64 offset, tree val,
+                         poly_offset_int len)
 {
-  ao_ref read;
-  gimple *stmt = gsi_stmt (*gsip);
-  if (gimple_has_volatile_ops (stmt))
-    return false;
-
-  tree src2 = NULL_TREE, len2 = NULL_TREE;
-  poly_int64 offset, offset2;
-  tree val = integer_zero_node;
-  bool len_was_null = len == NULL_TREE;
-  if (len == NULL_TREE)
-    len = (TREE_CODE (src) == COMPONENT_REF
-          ? DECL_SIZE_UNIT (TREE_OPERAND (src, 1))
-          : TYPE_SIZE_UNIT (TREE_TYPE (src)));
-  if (len == NULL_TREE
-      || !poly_int_tree_p (len))
-    return false;
+  tree src2;
+  tree len2 = NULL_TREE;
+  poly_int64 offset2;
 
-  ao_ref_init (&read, src);
-  tree vuse = gimple_vuse (stmt);
-  gimple *defstmt;
-  unsigned limit = param_sccvn_max_alias_queries_per_access;
-  do {
-    /* If the vuse is the default definition, then there is no stores 
beforhand. */
-    if (SSA_NAME_IS_DEFAULT_DEF (vuse))
-      return false;
-    defstmt = SSA_NAME_DEF_STMT (vuse);
-    if (is_a <gphi*>(defstmt))
-      return false;
-    if (limit-- == 0)
-      return false;
-    /* If the len was null, then we can use TBBA. */
-    if (stmt_may_clobber_ref_p_1 (defstmt, &read,
-                                 /* tbaa_p = */ len_was_null))
-      break;
-    vuse = gimple_vuse (defstmt);
-  } while (true);
-
-  if (gimple_store_p (defstmt)
-      && gimple_assign_single_p (defstmt)
-      && TREE_CODE (gimple_assign_rhs1 (defstmt)) == STRING_CST
-      && !gimple_clobber_p (defstmt))
+  if (gimple_call_builtin_p (stmt, BUILT_IN_MEMCPY)
+      && TREE_CODE (gimple_call_arg (stmt, 1)) == ADDR_EXPR
+      && poly_int_tree_p (gimple_call_arg (stmt, 2)))
     {
-      tree str = gimple_assign_rhs1 (defstmt);
-      src2 = gimple_assign_lhs (defstmt);
-      /* The string must contain all null char's for now.  */
-      for (int i = 0; i < TREE_STRING_LENGTH (str); i++)
-       {
-         if (TREE_STRING_POINTER (str)[i] != 0)
-           {
-             src2 = NULL_TREE;
-             break;
-           }
-       }
-    }
-  else if (gimple_store_p (defstmt)
-      && gimple_assign_single_p (defstmt)
-      && TREE_CODE (gimple_assign_rhs1 (defstmt)) == CONSTRUCTOR
-      && !gimple_clobber_p (defstmt))
-    src2 = gimple_assign_lhs (defstmt);
-  else if (gimple_call_builtin_p (defstmt, BUILT_IN_MEMSET)
-          && TREE_CODE (gimple_call_arg (defstmt, 0)) == ADDR_EXPR
-          && TREE_CODE (gimple_call_arg (defstmt, 1)) == INTEGER_CST)
-    {
-      src2 = TREE_OPERAND (gimple_call_arg (defstmt, 0), 0);
-      len2 = gimple_call_arg (defstmt, 2);
-      val = gimple_call_arg (defstmt, 1);
-      /* For non-0 val, we'd have to transform stmt from assignment
-        into memset (only if dest is addressable).  */
-      if (!integer_zerop (val) && is_gimple_assign (stmt))
-       src2 = NULL_TREE;
+      src2 = TREE_OPERAND (gimple_call_arg (stmt, 1), 0);
+      len2 = gimple_call_arg (stmt, 2);
     }
+   else if (gimple_assign_load_p (stmt) && gimple_store_p (stmt))
+     {
+       src2 = gimple_assign_rhs1 (stmt);
+       len2 = (TREE_CODE (src2) == COMPONENT_REF
+               ? DECL_SIZE_UNIT (TREE_OPERAND (src2, 1))
+               : TYPE_SIZE_UNIT (TREE_TYPE (src2)));
+       /* Can only handle zero memsets. */
+       if (!integer_zerop (val))
+         return false;
+     }
+   else
+     return false;
 
-  if (src2 == NULL_TREE)
-    return false;
-
-  if (len2 == NULL_TREE)
-    len2 = (TREE_CODE (src2) == COMPONENT_REF
-           ? DECL_SIZE_UNIT (TREE_OPERAND (src2, 1))
-           : TYPE_SIZE_UNIT (TREE_TYPE (src2)));
   if (len2 == NULL_TREE
       || !poly_int_tree_p (len2))
     return false;
 
-  src = get_addr_base_and_unit_offset (src, &offset);
   src2 = get_addr_base_and_unit_offset (src2, &offset2);
-  if (src == NULL_TREE
-      || src2 == NULL_TREE
-      || maybe_lt (offset, offset2))
+  if (src2 == NULL_TREE
+      || maybe_lt (offset2, offset))
     return false;
 
-  if (!operand_equal_p (src, src2, 0))
+  if (!operand_equal_p (dest, src2, 0))
     return false;
 
-  /* [ src + offset2, src + offset2 + len2 - 1 ] is set to val.
+  /* [ dest + offset, dest + offset + len - 1 ] is set to val.
      Make sure that
-     [ src + offset, src + offset + len - 1 ] is a subset of that.  */
-  if (maybe_gt (wi::to_poly_offset (len) + (offset - offset2),
-               wi::to_poly_offset (len2)))
+     [ dest + offset2, dest + offset2 + len2 - 1 ] is a subset of that.  */
+  if (maybe_gt (wi::to_poly_offset (len2) + (offset2 - offset),
+               len))
     return false;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
@@ -1310,7 +1248,7 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, 
tree dest, tree src, tree
       fprintf (dump_file, "after previous\n  ");
       print_gimple_stmt (dump_file, defstmt, 0, dump_flags);
     }
-
+  gimple *orig_stmt = stmt;
   /* For simplicity, don't change the kind of the stmt,
      turn dest = src; into dest = {}; and memcpy (&dest, &src, len);
      into memset (&dest, val, len);
@@ -1320,8 +1258,10 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, 
tree dest, tree src, tree
      of dest, dest isn't volatile.  */
   if (is_gimple_assign (stmt))
     {
-      tree ctor = build_constructor (TREE_TYPE (dest), NULL);
-      gimple_assign_set_rhs_from_tree (gsip, ctor);
+      tree ctor_type = TREE_TYPE (gimple_assign_lhs (stmt));
+      tree ctor = build_constructor (ctor_type, NULL);
+      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+      gimple_assign_set_rhs_from_tree (&gsi, ctor);
       update_stmt (stmt);
       statistics_counter_event (cfun, "copy zeroing propagation of aggregate", 
1);
     }
@@ -1341,8 +1281,126 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, 
tree dest, tree src, tree
       fprintf (dump_file, "into\n  ");
       print_gimple_stmt (dump_file, stmt, 0, dump_flags);
     }
+
+  /* Mark the bb for eh cleanup if needed.  */
+  if (maybe_clean_or_replace_eh_stmt (orig_stmt, stmt))
+    bitmap_set_bit (to_purge, gimple_bb (stmt)->index);
+
   return true;
 }
+
+/* Optimize
+   a = {}; // DEST = value ;; LEN(nullptr)
+   b = a;
+   into
+   a = {};
+   b = {};
+   Similarly for memset (&a, ..., sizeof (a)); instead of a = {};
+   and/or memcpy (&b, &a, sizeof (a)); instead of b = a;  */
+
+static bool
+optimize_aggr_zeroprop (gimple_stmt_iterator *gsip)
+{
+  ao_ref read;
+  gimple *stmt = gsi_stmt (*gsip);
+  if (gimple_has_volatile_ops (stmt))
+    return false;
+
+  tree dest = NULL_TREE;
+  tree val = integer_zero_node;
+  tree len = NULL_TREE;
+  bool can_use_tbba = true;
+  bool changed = false;
+
+  if (gimple_call_builtin_p (stmt, BUILT_IN_MEMSET)
+      && TREE_CODE (gimple_call_arg (stmt, 0)) == ADDR_EXPR
+      && TREE_CODE (gimple_call_arg (stmt, 1)) == INTEGER_CST
+      && poly_int_tree_p (gimple_call_arg (stmt, 2)))
+    {
+      dest = TREE_OPERAND (gimple_call_arg (stmt, 0), 0);
+      len = gimple_call_arg (stmt, 2);
+      val = gimple_call_arg (stmt, 1);
+      ao_ref_init_from_ptr_and_size (&read, gimple_call_arg (stmt, 0), len);
+      can_use_tbba = false;
+    }
+  else if (gimple_store_p (stmt)
+          && gimple_assign_single_p (stmt)
+          && TREE_CODE (gimple_assign_rhs1 (stmt)) == STRING_CST)
+    {
+      tree str = gimple_assign_rhs1 (stmt);
+      dest = gimple_assign_lhs (stmt);
+      ao_ref_init (&read, dest);
+      /* The string must contain all null char's for now.  */
+      for (int i = 0; i < TREE_STRING_LENGTH (str); i++)
+       {
+         if (TREE_STRING_POINTER (str)[i] != 0)
+           {
+             dest = NULL_TREE;
+             break;
+           }
+       }
+    }
+  else if (gimple_store_p (stmt)
+          && gimple_assign_single_p (stmt)
+          && TREE_CODE (gimple_assign_rhs1 (stmt)) == CONSTRUCTOR
+          && !gimple_clobber_p (stmt))
+    {
+      dest = gimple_assign_lhs (stmt);
+      ao_ref_init (&read, dest);
+    }
+
+  if (dest == NULL_TREE)
+    return false;
+
+  if (len == NULL_TREE)
+    len = (TREE_CODE (dest) == COMPONENT_REF
+          ? DECL_SIZE_UNIT (TREE_OPERAND (dest, 1))
+          : TYPE_SIZE_UNIT (TREE_TYPE (dest)));
+  if (len == NULL_TREE
+      || !poly_int_tree_p (len))
+    return false;
+
+  /* This store needs to be on the byte boundary and pointing to an object.  */
+  poly_int64 offset;
+  tree dest_base = get_addr_base_and_unit_offset (dest, &offset);
+  if (dest_base == NULL_TREE)
+    return false;
+
+  /* Setup the worklist.  */
+  auto_vec<std::pair<tree, unsigned>> worklist;
+  unsigned limit = param_sccvn_max_alias_queries_per_access;
+  worklist.safe_push (std::make_pair (gimple_vdef (stmt), limit));
+
+  while (!worklist.is_empty ())
+    {
+      std::pair<tree, unsigned> top = worklist.pop ();
+      tree vdef = top.first;
+      limit = top.second;
+      gimple *use_stmt;
+      imm_use_iterator iter;
+      FOR_EACH_IMM_USE_STMT (use_stmt, iter, vdef)
+       {
+         /* Handling PHI nodes might not be worth it so don't.  */
+         if (is_a <gphi*> (use_stmt))
+           continue;
+
+         /* If this statement does not clobber add the vdef stmt to the
+            worklist.  */
+         if (gimple_vdef (use_stmt)
+             && !stmt_may_clobber_ref_p_1 (use_stmt, &read,
+                                          /* tbaa_p = */ can_use_tbba)
+             && limit != 0)
+           worklist.safe_push (std::make_pair (gimple_vdef (use_stmt),
+                                               limit - 1));
+
+         if (optimize_aggr_zeroprop_1 (stmt, use_stmt, dest_base, offset,
+                                        val, wi::to_poly_offset (len)))
+          changed = true;
+       }
+    }
+
+  return changed;
+}
 /* Optimizes
    DEST = SRC;
    DEST2 = DEST; # DEST2 = SRC2;
@@ -1462,22 +1520,6 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree 
callee2)
 
   switch (DECL_FUNCTION_CODE (callee2))
     {
-    case BUILT_IN_MEMCPY:
-      if (gimple_call_num_args (stmt2) == 3)
-       {
-         tree dest = gimple_call_arg (stmt2, 0);
-         tree src = gimple_call_arg (stmt2, 1);
-         tree len = gimple_call_arg (stmt2, 2);
-         /* Try to optimize the memcpy to memset if src
-            and dest are addresses. */
-         if (TREE_CODE (dest) == ADDR_EXPR
-             && TREE_CODE (src) == ADDR_EXPR
-             && TREE_CODE (len) == INTEGER_CST
-             && optimize_memcpy_to_memset (gsi_p, TREE_OPERAND (dest, 0),
-                                           TREE_OPERAND (src, 0), len))
-           return true;
-       }
-    break;
     case BUILT_IN_MEMCHR:
       if (gimple_call_num_args (stmt2) == 3
          && (res = gimple_call_lhs (stmt2)) != nullptr
@@ -1539,6 +1581,13 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree 
callee2)
       break;
 
     case BUILT_IN_MEMSET:
+      if (gimple_call_num_args (stmt2) == 3)
+       {
+         /* Try to prop the zeroing/value of the memset to memcpy
+            if the dest is an address and the value is a constant. */
+         if (optimize_aggr_zeroprop (gsi_p))
+           return true;
+       }
       if (gimple_call_num_args (stmt2) != 3
          || gimple_call_lhs (stmt2)
          || CHAR_BIT != 8
@@ -4857,21 +4906,16 @@ pass_forwprop::execute (function *fun)
                  {
                    tree rhs1 = gimple_assign_rhs1 (stmt);
                    enum tree_code code = gimple_assign_rhs_code (stmt);
-                   if (gimple_assign_load_p (stmt) && gimple_store_p (stmt))
+                   if (gimple_store_p (stmt) && optimize_aggr_zeroprop (&gsi))
                      {
-                       if (optimize_memcpy_to_memset (&gsi,
-                                                      gimple_assign_lhs (stmt),
-                                                      gimple_assign_rhs1 
(stmt),
-                                                      /* len = */NULL_TREE))
-                         {
-                           changed = true;
-                           break;
-                         }
-                       if (optimize_agr_copyprop (&gsi))
-                         {
-                           changed = true;
-                           break;
-                         }
+                       changed = true;
+                       break;
+                     }
+                   if (gimple_assign_load_p (stmt) && gimple_store_p (stmt)
+                       && optimize_agr_copyprop (&gsi))
+                     {
+                       changed = true;
+                       break;
                      }
 
                    if (TREE_CODE_CLASS (code) == tcc_comparison)

Reply via email to