The patch improves memset optimization when the length is known to be 0 or 1.
It uses Ranger information to recognize such cases, shrink-wraps the call on
the zero-length case and replaces the one-byte case with a direct byte store.
It also extends gimple_fold_builtin_memset to handle Ranger-proven singleton
lengths not just integer constants.

PR tree-optimization/102202

gcc/ChangeLog:
        * tree-call-cdce.cc: Include "tree-ssanames.h", "gimple-fold.h"
        and "gimplify-me.h".
        (memset_len_boolean_range_p): New function.
        (can_shrink_wrap_memset_p): New function.
        (gen_memset_conditions): New function.
        (shrink_wrap_one_memset_call): New function.
        (shrink_wrap_conditional_dead_built_in_calls): Dispatch to
        shrink_wrap_one_memset_call for memset calls eligible for the [0, 1]
        length transform, ahead of the generic LHS and range-test paths.
        (pass_call_cdce::execute): Collect memset calls satisfying
        can_shrink_wrap_memset_p as shrink-wrap candidates.

gcc/testsuite/ChangeLog:
        * gcc.dg/pr102202-1.c: New test.
        * gcc.dg/pr102202.c: New test.
        * gcc.target/aarch64/pr100518.c: Modify to handle the warning.

Signed-off-by: Naveen <[email protected]>
---
 gcc/testsuite/gcc.dg/pr102202-1.c           |  16 +++
 gcc/testsuite/gcc.dg/pr102202.c             |  54 ++++++++++
 gcc/testsuite/gcc.target/aarch64/pr100518.c |   2 +-
 gcc/tree-call-cdce.cc                       | 108 +++++++++++++++++++-
 4 files changed, 174 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr102202-1.c
 create mode 100644 gcc/testsuite/gcc.dg/pr102202.c

diff --git a/gcc/testsuite/gcc.dg/pr102202-1.c 
b/gcc/testsuite/gcc.dg/pr102202-1.c
new file mode 100644
index 00000000000..480bf749ef0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr102202-1.c
@@ -0,0 +1,16 @@
+/* PR tree-optimization/102202 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* A [0, 2] length is not a boolean range, so the call must be left alone.  */
+
+void
+g1 (int a, int c, char *d)
+{
+  if (a < 0 || a > 2)
+    __builtin_unreachable ();
+
+  __builtin_memset (d, c, a);
+}
+
+/* { dg-final { scan-tree-dump "__builtin_memset" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/pr102202.c b/gcc/testsuite/gcc.dg/pr102202.c
new file mode 100644
index 00000000000..7e6673e168d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr102202.c
@@ -0,0 +1,54 @@
+/* PR tree-optimization/102202 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void
+g1 (int a, char *d)
+{
+  if (a < 0 || a > 1)
+    __builtin_unreachable ();
+
+  __builtin_memset (d, 0, a);
+}
+
+char *
+g2 (unsigned a, char *d)
+{
+  return __builtin_memset (d, 1, a & 1);
+}
+
+void
+g3 (int a, int c, char *d)
+{
+  if (a < 0 || a > 1)
+    __builtin_unreachable ();
+
+  __builtin_memset (d, c, a);
+}
+
+void
+g4 (int a, int c, char *d)
+{
+  if (a < 0 || a > 1)
+    return;
+
+  __builtin_memset (d, c, a);
+}
+
+void *
+g5 (char *d, int c)
+{
+  return __builtin_memset (d, c, 1);
+}
+
+void
+g6 (char *d, int c)
+{
+  __builtin_memset (d, c, 1);
+}
+
+/* { dg-final { scan-tree-dump-not "__builtin_memset" "optimized" } } */
+/* { dg-final { scan-tree-dump-times {MEM[^;\n\r]*= 0;} 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times {MEM[^;\n\r]*= 1;} 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times {\(unsigned char\) c_[0-9]+\(D\)} 4 
"optimized" } } */
+/* { dg-final { scan-tree-dump-times {MEM[^;\n\r]*= _[0-9]+;} 4 "optimized" } 
} */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr100518.c 
b/gcc/testsuite/gcc.target/aarch64/pr100518.c
index 177991cfb22..eb8d947185a 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr100518.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr100518.c
@@ -5,5 +5,5 @@ int unsigned_range_min, unsigned_range_max, a11___trans_tmp_1;
 
 void a11() {
   a11___trans_tmp_1 = unsigned_range_max < unsigned_range_min;
-  __builtin_memset((char *)1, 0, a11___trans_tmp_1);
+  __builtin_memset((char *)1, 0, a11___trans_tmp_1); /* { dg-warning "writing 
1 byte into a region of size 0" } */
 }
diff --git a/gcc/tree-call-cdce.cc b/gcc/tree-call-cdce.cc
index 2be891a7222..0c0255eb022 100644
--- a/gcc/tree-call-cdce.cc
+++ b/gcc/tree-call-cdce.cc
@@ -37,7 +37,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "internal-fn.h"
 #include "tree-dfa.h"
 #include "tree-eh.h"
-
+#include "tree-ssanames.h"
+#include "gimple-fold.h"
+
 
 /* This pass serves two closely-related purposes:
 
@@ -1255,6 +1257,96 @@ use_internal_fn (gcall *call)
                                            is_arg_conds ? new_call : NULL);
 }
 
+/* Return true if LEN is an SSA_NAME known to have a boolean range, i.e. its
+   value is provably in [0, 1].  */
+
+static bool
+memset_len_boolean_range_p (tree len, gimple *stmt)
+{
+  if (TREE_CODE (len) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (len)))
+    return false;
+  return ssa_name_has_boolean_range (len, stmt);
+}
+
+/* Return true if CALL is a memset that may be shrink-wrapped because its
+   length argument is an SSA_NAME known to have a boolean range.  */
+
+static bool
+can_shrink_wrap_memset_p (gcall *call)
+{
+  tree fndecl = gimple_call_fndecl (call);
+
+  if (!fndecl || !fndecl_built_in_p (fndecl, BUILT_IN_MEMSET))
+    return false;
+
+  if (gimple_call_num_args (call) != 3)
+    return false;
+
+  /* memset should not be declared as pure/const.  */
+  if (!gimple_vdef (call))
+    return false;
+
+  return memset_len_boolean_range_p (gimple_call_arg (call, 2), call);
+}
+
+/* Generate the condition vector that guards a memset whose length is known
+   to be in [0, 1]: skip the call when the length is zero.  */
+
+static void
+gen_memset_conditions (gcall *call, vec<gimple *> &conds, unsigned *nconds)
+{
+  tree len = gimple_call_arg (call, 2);
+  tree zero = build_zero_cst (TREE_TYPE (len));
+
+  gcc_assert (nconds);
+  conds.quick_push (gimple_build_cond (EQ_EXPR, len, zero,
+                                      NULL_TREE, NULL_TREE));
+  *nconds = 1;
+}
+
+/* Shrink wrap a memset call whose length is known to be in [0, 1].  On the
+   guarded path the length is one, so pin it to a constant and let the generic
+   memset folder turn the call into a single byte store, instead of
+   open-coding the store here.  */
+
+static void
+shrink_wrap_one_memset_call (gcall *call)
+{
+  tree lhs = gimple_call_lhs (call);
+
+  /* The memset return value is always the destination pointer, so define it
+     on both the guarded and the bypass path before wrapping.  */
+  if (lhs)
+    {
+      tree dest = gimple_call_arg (call, 0);
+      location_t loc = gimple_location (call);
+      gimple_stmt_iterator gsi = gsi_for_stmt (call);
+
+      dest = gimple_convert (&gsi, true, GSI_SAME_STMT, loc,
+                            TREE_TYPE (lhs), dest);
+      gassign *stmt = gimple_build_assign (lhs, dest);
+      gimple_set_location (stmt, loc);
+      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+
+      gimple_call_set_lhs (call, NULL_TREE);
+      SSA_NAME_DEF_STMT (lhs) = stmt;
+    }
+
+  unsigned nconds = 0;
+  auto_vec<gimple *, 1> conds;
+  gen_memset_conditions (call, conds, &nconds);
+  gcc_assert (nconds != 0);
+
+  shrink_wrap_one_built_in_call_with_conds (call, conds, nconds);
+
+  /* On the guarded path the length is one.  Pin it to a constant so the
+     generic memset folder can replace the call with a single byte store.  */
+  tree len = gimple_call_arg (call, 2);
+  gimple_call_set_arg (call, 2, build_one_cst (TREE_TYPE (len)));
+  gimple_stmt_iterator gsi = gsi_for_stmt (call);
+  fold_stmt (&gsi);
+}
+
 /* The top level function for conditional dead code shrink
    wrapping transformation.  */
 
@@ -1267,8 +1359,13 @@ shrink_wrap_conditional_dead_built_in_calls (const 
vec<gcall *> &calls)
   for (; i < n ; i++)
     {
       gcall *bi_call = calls[i];
-      if (gimple_call_lhs (bi_call))
+
+      /* Use the memset specific transform for the [0, 1] length case.  */
+      if (can_shrink_wrap_memset_p (bi_call))
+       shrink_wrap_one_memset_call (bi_call);
+      else if (gimple_call_lhs (bi_call))
        use_internal_fn (bi_call);
+      /* Other eligible calls are shrink wrapped by the generic path.  */
       else
        shrink_wrap_one_built_in_call (bi_call);
     }
@@ -1328,9 +1425,10 @@ pass_call_cdce::execute (function *fun)
          gcall *stmt = dyn_cast <gcall *> (gsi_stmt (i));
           if (stmt
              && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)
-             && (gimple_call_lhs (stmt)
-                 ? can_use_internal_fn (stmt)
-                 : can_test_argument_range (stmt))
+             && (can_shrink_wrap_memset_p (stmt)
+                 || (gimple_call_lhs (stmt)
+                     ? can_use_internal_fn (stmt)
+                     : can_test_argument_range (stmt)))
              && can_guard_call_p (stmt))
             {
               if (dump_file && (dump_flags & TDF_DETAILS))
-- 
2.34.1

Reply via email to