Hi,
When niter runs after the copy-header pass it sometimes fails to
simplify assumptions in a ctz loop.
As the assumption is a simple nonzero test here and we can have
ranger get us the range of the shifted expression, then verify that
this range is nonzero.
This helps recognize a ctz loop in 502.gcc's compute_transp.
Changes from v1:
- Use ranger to verify assumption.
Bootstrapped on x86 and power10, regtested on rv64gcvb_zvl512b and aarch64.
Regards
Robin
PR/tree-optimization 122207
gcc/ChangeLog:
* tree-ssa-loop-niter.cc (number_of_iterations_cltz): Use
ranger to verify assumption.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/ctz-ch.c: New test.
---
gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c | 23 ++++++++
gcc/tree-ssa-loop-niter.cc | 81 ++++++++++++++++++++++++--
2 files changed, 100 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c
b/gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c
new file mode 100644
index 00000000000..d7a17c7464f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+typedef unsigned long BITMAP_WORD;
+
+bool
+bmp_iter_set (BITMAP_WORD bits, unsigned *bit_no)
+{
+ /* If our current word is nonzero, it contains the bit we want. */
+ if (bits)
+ {
+ while (!(bits & 1))
+ {
+ bits >>= 1;
+ *bit_no += 1;
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } }
*/
diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
index 6e130862549..1be3d8c1538 100644
--- a/gcc/tree-ssa-loop-niter.cc
+++ b/gcc/tree-ssa-loop-niter.cc
@@ -2335,6 +2335,8 @@ is_rshift_by_1 (gassign *stmt)
*/
+#include "gimple-range-path.h"
+
static bool
number_of_iterations_cltz (loop_p loop, edge exit,
enum tree_code code,
@@ -2438,6 +2440,8 @@ number_of_iterations_cltz (loop_p loop, edge exit,
tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
int src_precision = TYPE_PRECISION (TREE_TYPE (src));
+ tree unshifted_src = src;
+
/* Apply any needed preprocessing to src. */
int num_ignored_bits;
if (left_shift)
@@ -2448,7 +2452,7 @@ number_of_iterations_cltz (loop_p loop, edge exit,
if (modify_before_test)
num_ignored_bits++;
- if (num_ignored_bits != 0)
+ if (num_ignored_bits)
src = fold_build2 (left_shift ? LSHIFT_EXPR : RSHIFT_EXPR,
TREE_TYPE (src), src,
build_int_cst (integer_type_node, num_ignored_bits));
@@ -2463,10 +2467,76 @@ number_of_iterations_cltz (loop_p loop, edge exit,
expr = fold_convert (unsigned_type_node, expr);
- tree assumptions = fold_build2 (NE_EXPR, boolean_type_node, src,
- build_zero_cst (TREE_TYPE (src)));
+ /* If the copy-header (ch) pass peeled one iteration we're shifting
+ SRC by preprocessing it above.
+
+ A loop like
+ if (bits)
+ {
+ while (!(bits & 1))
+ {
+ bits >>= 1;
+ cnt += 1;
+ }
+ return cnt;
+ }
+ ch (roughly) transforms into:
+ if (bits)
+ {
+ if (!(bits & 1)
+ {
+ do
+ {
+ bits >>= 1;
+ cnt += 1;
+ } while (!(bits & 1));
+ }
+ else
+ cnt = 1;
+ return cnt;
+ }
- niter->assumptions = simplify_using_initial_conditions (loop, assumptions);
+ Then, our preprocessed SRC (that is used for c[tl]z computation)
+ will be bits >> 1, and the assumption is bits >> 1 != 0. */
+ tree assumptions = boolean_false_node;
+ int_range_max r (TREE_TYPE (unshifted_src));
+
+ /* As we don't have a gimple statement to query, use the range of the
+ unshifted SRC, then apply the shift manually and check if the
+ resulting range is nonzero. */
+ if (get_range_query (cfun)->range_on_edge
+ (r, loop_preheader_edge (loop), unshifted_src)
+ && !r.varying_p ()
+ && !r.undefined_p ())
+ {
+ if (num_ignored_bits)
+ {
+ range_op_handler op (left_shift ? LSHIFT_EXPR : RSHIFT_EXPR);
+ int_range_max shifted_range (TREE_TYPE (unshifted_src));
+ wide_int shift_count = wi::shwi (num_ignored_bits,
+ TYPE_PRECISION (TREE_TYPE (src)));
+ int_range_max shift_amount
+ (TREE_TYPE (unshifted_src), shift_count, shift_count);
+
+ if (op.fold_range (shifted_range, TREE_TYPE (unshifted_src), r,
+ shift_amount))
+ r = shifted_range;
+ }
+
+ /* If the range is non-zero we are good. */
+ if (!r.nonzero_p ())
+ assumptions = boolean_true_node;
+ }
+
+ /* If that didn't work use simplify_using_initial_conditions. */
+ if (assumptions == boolean_false_node)
+ {
+ assumptions = fold_build2 (NE_EXPR, boolean_type_node, src,
+ build_zero_cst (TREE_TYPE (src)));
+ assumptions = simplify_using_initial_conditions (loop, assumptions);
+ }
+
+ niter->assumptions = assumptions;
niter->may_be_zero = boolean_false_node;
niter->niter = simplify_using_initial_conditions (loop, expr);
@@ -5146,9 +5216,12 @@ estimate_numbers_of_iterations (function *fn)
loop iteration estimates. */
fold_defer_overflow_warnings ();
+ enable_ranger (fn);
+
for (auto loop : loops_list (fn, 0))
estimate_numbers_of_iterations (loop);
+ disable_ranger (fn);
fold_undefer_and_ignore_overflow_warnings ();
}
--
2.51.0