The patch improves memset optimization when the length is known to be 0 or 1.
It uses Ranger information to recognize such cases, shrink-wraps the call on
the zero-length case and replaces the one-byte case with a direct byte store.
It also extends gimple_fold_builtin_memset to handle Ranger-proven singleton
lengths not just integer constants.
PR tree-optimization/102202
gcc/ChangeLog:
* tree-call-cdce.cc: Include "tree-ssanames.h", "gimple-fold.h"
and "gimplify-me.h".
(memset_len_boolean_range_p): New function.
(can_shrink_wrap_memset_p): New function.
(gen_memset_conditions): New function.
(shrink_wrap_one_memset_call): New function.
(shrink_wrap_conditional_dead_built_in_calls): Dispatch to
shrink_wrap_one_memset_call for memset calls eligible for the [0, 1]
length transform, ahead of the generic LHS and range-test paths.
(pass_call_cdce::execute): Collect memset calls satisfying
can_shrink_wrap_memset_p as shrink-wrap candidates.
gcc/testsuite/ChangeLog:
* gcc.dg/pr102202-1.c: New test.
* gcc.dg/pr102202.c: New test.
* gcc.target/aarch64/pr100518.c: Modify to handle the warning.
Signed-off-by: Naveen <[email protected]>
---
gcc/testsuite/gcc.dg/pr102202-1.c | 16 +++
gcc/testsuite/gcc.dg/pr102202.c | 54 ++++++++++
gcc/testsuite/gcc.target/aarch64/pr100518.c | 2 +-
gcc/tree-call-cdce.cc | 108 +++++++++++++++++++-
4 files changed, 174 insertions(+), 6 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/pr102202-1.c
create mode 100644 gcc/testsuite/gcc.dg/pr102202.c
diff --git a/gcc/testsuite/gcc.dg/pr102202-1.c
b/gcc/testsuite/gcc.dg/pr102202-1.c
new file mode 100644
index 00000000000..480bf749ef0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr102202-1.c
@@ -0,0 +1,16 @@
+/* PR tree-optimization/102202 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* A [0, 2] length is not a boolean range, so the call must be left alone. */
+
+void
+g1 (int a, int c, char *d)
+{
+ if (a < 0 || a > 2)
+ __builtin_unreachable ();
+
+ __builtin_memset (d, c, a);
+}
+
+/* { dg-final { scan-tree-dump "__builtin_memset" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/pr102202.c b/gcc/testsuite/gcc.dg/pr102202.c
new file mode 100644
index 00000000000..7e6673e168d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr102202.c
@@ -0,0 +1,54 @@
+/* PR tree-optimization/102202 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void
+g1 (int a, char *d)
+{
+ if (a < 0 || a > 1)
+ __builtin_unreachable ();
+
+ __builtin_memset (d, 0, a);
+}
+
+char *
+g2 (unsigned a, char *d)
+{
+ return __builtin_memset (d, 1, a & 1);
+}
+
+void
+g3 (int a, int c, char *d)
+{
+ if (a < 0 || a > 1)
+ __builtin_unreachable ();
+
+ __builtin_memset (d, c, a);
+}
+
+void
+g4 (int a, int c, char *d)
+{
+ if (a < 0 || a > 1)
+ return;
+
+ __builtin_memset (d, c, a);
+}
+
+void *
+g5 (char *d, int c)
+{
+ return __builtin_memset (d, c, 1);
+}
+
+void
+g6 (char *d, int c)
+{
+ __builtin_memset (d, c, 1);
+}
+
+/* { dg-final { scan-tree-dump-not "__builtin_memset" "optimized" } } */
+/* { dg-final { scan-tree-dump-times {MEM[^;\n\r]*= 0;} 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times {MEM[^;\n\r]*= 1;} 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times {\(unsigned char\) c_[0-9]+\(D\)} 4
"optimized" } } */
+/* { dg-final { scan-tree-dump-times {MEM[^;\n\r]*= _[0-9]+;} 4 "optimized" }
} */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr100518.c
b/gcc/testsuite/gcc.target/aarch64/pr100518.c
index 177991cfb22..eb8d947185a 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr100518.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr100518.c
@@ -5,5 +5,5 @@ int unsigned_range_min, unsigned_range_max, a11___trans_tmp_1;
void a11() {
a11___trans_tmp_1 = unsigned_range_max < unsigned_range_min;
- __builtin_memset((char *)1, 0, a11___trans_tmp_1);
+ __builtin_memset((char *)1, 0, a11___trans_tmp_1); /* { dg-warning "writing
1 byte into a region of size 0" } */
}
diff --git a/gcc/tree-call-cdce.cc b/gcc/tree-call-cdce.cc
index 2be891a7222..0c0255eb022 100644
--- a/gcc/tree-call-cdce.cc
+++ b/gcc/tree-call-cdce.cc
@@ -37,7 +37,9 @@ along with GCC; see the file COPYING3. If not see
#include "internal-fn.h"
#include "tree-dfa.h"
#include "tree-eh.h"
-
+#include "tree-ssanames.h"
+#include "gimple-fold.h"
+
/* This pass serves two closely-related purposes:
@@ -1255,6 +1257,96 @@ use_internal_fn (gcall *call)
is_arg_conds ? new_call : NULL);
}
+/* Return true if LEN is an SSA_NAME known to have a boolean range, i.e. its
+ value is provably in [0, 1]. */
+
+static bool
+memset_len_boolean_range_p (tree len, gimple *stmt)
+{
+ if (TREE_CODE (len) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (len)))
+ return false;
+ return ssa_name_has_boolean_range (len, stmt);
+}
+
+/* Return true if CALL is a memset that may be shrink-wrapped because its
+ length argument is an SSA_NAME known to have a boolean range. */
+
+static bool
+can_shrink_wrap_memset_p (gcall *call)
+{
+ tree fndecl = gimple_call_fndecl (call);
+
+ if (!fndecl || !fndecl_built_in_p (fndecl, BUILT_IN_MEMSET))
+ return false;
+
+ if (gimple_call_num_args (call) != 3)
+ return false;
+
+ /* memset should not be declared as pure/const. */
+ if (!gimple_vdef (call))
+ return false;
+
+ return memset_len_boolean_range_p (gimple_call_arg (call, 2), call);
+}
+
+/* Generate the condition vector that guards a memset whose length is known
+ to be in [0, 1]: skip the call when the length is zero. */
+
+static void
+gen_memset_conditions (gcall *call, vec<gimple *> &conds, unsigned *nconds)
+{
+ tree len = gimple_call_arg (call, 2);
+ tree zero = build_zero_cst (TREE_TYPE (len));
+
+ gcc_assert (nconds);
+ conds.quick_push (gimple_build_cond (EQ_EXPR, len, zero,
+ NULL_TREE, NULL_TREE));
+ *nconds = 1;
+}
+
+/* Shrink wrap a memset call whose length is known to be in [0, 1]. On the
+ guarded path the length is one, so pin it to a constant and let the generic
+ memset folder turn the call into a single byte store, instead of
+ open-coding the store here. */
+
+static void
+shrink_wrap_one_memset_call (gcall *call)
+{
+ tree lhs = gimple_call_lhs (call);
+
+ /* The memset return value is always the destination pointer, so define it
+ on both the guarded and the bypass path before wrapping. */
+ if (lhs)
+ {
+ tree dest = gimple_call_arg (call, 0);
+ location_t loc = gimple_location (call);
+ gimple_stmt_iterator gsi = gsi_for_stmt (call);
+
+ dest = gimple_convert (&gsi, true, GSI_SAME_STMT, loc,
+ TREE_TYPE (lhs), dest);
+ gassign *stmt = gimple_build_assign (lhs, dest);
+ gimple_set_location (stmt, loc);
+ gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+
+ gimple_call_set_lhs (call, NULL_TREE);
+ SSA_NAME_DEF_STMT (lhs) = stmt;
+ }
+
+ unsigned nconds = 0;
+ auto_vec<gimple *, 1> conds;
+ gen_memset_conditions (call, conds, &nconds);
+ gcc_assert (nconds != 0);
+
+ shrink_wrap_one_built_in_call_with_conds (call, conds, nconds);
+
+ /* On the guarded path the length is one. Pin it to a constant so the
+ generic memset folder can replace the call with a single byte store. */
+ tree len = gimple_call_arg (call, 2);
+ gimple_call_set_arg (call, 2, build_one_cst (TREE_TYPE (len)));
+ gimple_stmt_iterator gsi = gsi_for_stmt (call);
+ fold_stmt (&gsi);
+}
+
/* The top level function for conditional dead code shrink
wrapping transformation. */
@@ -1267,8 +1359,13 @@ shrink_wrap_conditional_dead_built_in_calls (const
vec<gcall *> &calls)
for (; i < n ; i++)
{
gcall *bi_call = calls[i];
- if (gimple_call_lhs (bi_call))
+
+ /* Use the memset specific transform for the [0, 1] length case. */
+ if (can_shrink_wrap_memset_p (bi_call))
+ shrink_wrap_one_memset_call (bi_call);
+ else if (gimple_call_lhs (bi_call))
use_internal_fn (bi_call);
+ /* Other eligible calls are shrink wrapped by the generic path. */
else
shrink_wrap_one_built_in_call (bi_call);
}
@@ -1328,9 +1425,10 @@ pass_call_cdce::execute (function *fun)
gcall *stmt = dyn_cast <gcall *> (gsi_stmt (i));
if (stmt
&& gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)
- && (gimple_call_lhs (stmt)
- ? can_use_internal_fn (stmt)
- : can_test_argument_range (stmt))
+ && (can_shrink_wrap_memset_p (stmt)
+ || (gimple_call_lhs (stmt)
+ ? can_use_internal_fn (stmt)
+ : can_test_argument_range (stmt)))
&& can_guard_call_p (stmt))
{
if (dump_file && (dump_flags & TDF_DETAILS))
--
2.34.1