https://gcc.gnu.org/g:c4ab45ce4765a482c61af6c61dfb045b769ab7d1

commit c4ab45ce4765a482c61af6c61dfb045b769ab7d1
Author: Robin Dapp <[email protected]>
Date:   Fri Oct 17 11:07:17 2025 +0200

    niter: Use ranger to query ctz range.
    
    When niter runs after the copy-header pass it sometimes fails to
    simplify assumptions in a ctz loop.
    As the assumption is a simple nonzero test here we can have
    ranger get us the range of the shifted expression, then verify that
    this range is nonzero.
    
    This helps recognize a ctz loop in 502.gcc's compute_transp.
    
            PR/tree-optimization 122207
    
    gcc/ChangeLog:
    
            * tree-ssa-loop-niter.cc (shifted_range_nonzero_p): New
            function.
            (number_of_iterations_cltz): Call new function.
            * tree-ssa-loop.cc (pass_scev_cprop::execute): Enable ranger.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.dg/tree-ssa/ctz-char.c: Remove -fno-tree-ch.
            * gcc.dg/tree-ssa/ctz-complement-char.c: Ditto.
            * gcc.dg/tree-ssa/ctz-complement-int.c: Ditto.
            * gcc.dg/tree-ssa/ctz-complement-long-long.c: Ditto.
            * gcc.dg/tree-ssa/ctz-complement-long.c: Ditto.
            * gcc.dg/tree-ssa/ctz-int.c: Ditto.
            * gcc.dg/tree-ssa/ctz-long-long.c: Ditto.
            * gcc.dg/tree-ssa/ctz-long.c: Ditto.
            * gcc.dg/tree-ssa/ctz-ch.c: New test.
            * gcc.dg/pr41488.c: Add -fno-tree-scev-cprop.
    
    (cherry picked from commit 0919526efcb156a17947a2fc68d7aaa487e273ae)

Diff:
---
 gcc/testsuite/gcc.dg/pr41488.c                     |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c             | 23 ++++++
 gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c           |  2 +-
 .../gcc.dg/tree-ssa/ctz-complement-char.c          |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-int.c |  2 +-
 .../gcc.dg/tree-ssa/ctz-complement-long-long.c     |  2 +-
 .../gcc.dg/tree-ssa/ctz-complement-long.c          |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c            |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c      |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c           |  2 +-
 gcc/tree-ssa-loop-niter.cc                         | 93 +++++++++++++++++++++-
 gcc/tree-ssa-loop.cc                               |  5 ++
 12 files changed, 127 insertions(+), 12 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr41488.c b/gcc/testsuite/gcc.dg/pr41488.c
index 1e4bf19c7da9..a7ba3672efef 100644
--- a/gcc/testsuite/gcc.dg/pr41488.c
+++ b/gcc/testsuite/gcc.dg/pr41488.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-ivcanon-scev" } */
+/* { dg-options "-O2 -fno-tree-scev-cprop -fdump-tree-ivcanon-scev" } */
 
 struct struct_t
 {
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c
new file mode 100644
index 000000000000..5d725979971b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+typedef unsigned long BITMAP_WORD;
+
+bool
+bmp_iter_set (BITMAP_WORD bits, unsigned *bit_no)
+{
+  /* If our current word is nonzero, it contains the bit we want.  */
+  if (bits)
+    {
+      while (!(bits & 1))
+       {
+         bits >>= 1;
+         *bit_no += 1;
+       }
+      return true;
+    }
+
+  return false;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } } 
*/
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c
index 3cd166acbd46..fa8b7f39de4b 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target ctz } */
-/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
 
 #define PREC (__CHAR_BIT__)
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-char.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-char.c
index b9afe8852d8f..5ebc32131692 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-char.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-char.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target ctz } */
-/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
 
 #define PREC (__CHAR_BIT__)
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-int.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-int.c
index d2702a65daf3..0ce4b6beaa7d 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-int.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-int.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target ctz } */
-/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
 
 #define PREC (__CHAR_BIT__ * __SIZEOF_INT__)
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long-long.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long-long.c
index 1ea0d5d7d9f8..f98bec039b35 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long-long.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long-long.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target ctzll } */
-/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
 
 #define PREC (__CHAR_BIT__ * __SIZEOF_LONG_LONG__)
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long.c
index 80fb02dcfa68..8edb3728131c 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target ctzl } */
-/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
 
 #define PREC (__CHAR_BIT__ * __SIZEOF_LONG__)
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c
index 7f63493eb738..2bf3ae69b938 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target ctz } */
-/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
 
 #define PREC (__CHAR_BIT__ * __SIZEOF_INT__)
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c
index 924f61b76f01..2e159485cb98 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target ctzll } */
-/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
 
 #define PREC (__CHAR_BIT__ * __SIZEOF_LONG_LONG__)
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c
index 178945daa8a2..2e3be652a0bc 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target ctzl } */
-/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
 
 #define PREC (__CHAR_BIT__ * __SIZEOF_LONG__)
 
diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
index 6e1308625491..cc763839edcd 100644
--- a/gcc/tree-ssa-loop-niter.cc
+++ b/gcc/tree-ssa-loop-niter.cc
@@ -2321,6 +2321,48 @@ is_rshift_by_1 (gassign *stmt)
   return false;
 }
 
+/* Helper for number_of_iterations_cltz that uses ranger to determine
+   if SRC's range, shifted left (when LEFT_SHIFT is true) or right
+   by NUM_IGNORED_BITS, is guaranteed to be != 0 on LOOP's preheader
+   edge.
+   Return true if so or false otherwise.  */
+
+static bool
+shifted_range_nonzero_p (loop_p loop, tree src,
+                        bool left_shift, int num_ignored_bits)
+{
+  int_range_max r (TREE_TYPE (src));
+  gcc_assert (num_ignored_bits >= 0);
+
+  if (get_range_query (cfun)->range_on_edge
+      (r, loop_preheader_edge (loop), src)
+      && !r.varying_p ()
+      && !r.undefined_p ())
+    {
+      if (num_ignored_bits)
+       {
+         range_op_handler op (left_shift ? LSHIFT_EXPR : RSHIFT_EXPR);
+         int_range_max shifted_range (TREE_TYPE (src));
+         wide_int shift_count = wi::shwi (num_ignored_bits,
+                                          TYPE_PRECISION (TREE_TYPE
+                                                          (src)));
+         int_range_max shift_amount
+           (TREE_TYPE (src), shift_count, shift_count);
+
+         if (op.fold_range (shifted_range, TREE_TYPE (src), r,
+                            shift_amount))
+           r = shifted_range;
+       }
+
+      /* If the range does not contain zero we are good.  */
+      if (!range_includes_zero_p (r))
+       return true;
+    }
+
+  return false;
+}
+
+
 /* See comment below for number_of_iterations_bitcount.
    For c[lt]z, we have:
 
@@ -2438,6 +2480,9 @@ number_of_iterations_cltz (loop_p loop, edge exit,
   tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
   int src_precision = TYPE_PRECISION (TREE_TYPE (src));
 
+  /* Save the original SSA name before preprocessing for ranger queries.  */
+  tree unshifted_src = src;
+
   /* Apply any needed preprocessing to src.  */
   int num_ignored_bits;
   if (left_shift)
@@ -2463,10 +2508,52 @@ number_of_iterations_cltz (loop_p loop, edge exit,
 
   expr = fold_convert (unsigned_type_node, expr);
 
-  tree assumptions = fold_build2 (NE_EXPR, boolean_type_node, src,
-                                 build_zero_cst (TREE_TYPE (src)));
+  /* If the copy-header (ch) pass peeled one iteration we're shifting
+     SRC by preprocessing it above.
+
+     A loop like
+      if (bits)
+       {
+         while (!(bits & 1))
+           {
+             bits >>= 1;
+             cnt += 1;
+           }
+         return cnt;
+       }
+     ch (roughly) transforms into:
+      if (bits)
+       {
+         if (!(bits & 1)
+           {
+             do
+               {
+                 bits >>= 1;
+                 cnt += 1;
+               } while (!(bits & 1));
+           }
+          else
+            cnt = 1;
+         return cnt;
+       }
+
+     Then, our preprocessed SRC (that is used for c[tl]z computation)
+     will be bits >> 1, and the assumption is bits >> 1 != 0.  */
+
+  tree assumptions;
+  if (shifted_range_nonzero_p (loop, unshifted_src,
+                              left_shift, num_ignored_bits))
+    assumptions = boolean_true_node;
+  else
+    {
+      /* If ranger couldn't prove the assumption, try
+        simplify_using_initial_conditions.  */
+      assumptions = fold_build2 (NE_EXPR, boolean_type_node, src,
+                                build_zero_cst (TREE_TYPE (src)));
+      assumptions = simplify_using_initial_conditions (loop, assumptions);
+    }
 
-  niter->assumptions = simplify_using_initial_conditions (loop, assumptions);
+  niter->assumptions = assumptions;
   niter->may_be_zero = boolean_false_node;
   niter->niter = simplify_using_initial_conditions (loop, expr);
 
diff --git a/gcc/tree-ssa-loop.cc b/gcc/tree-ssa-loop.cc
index 5629524afb2f..dc4b560e9245 100644
--- a/gcc/tree-ssa-loop.cc
+++ b/gcc/tree-ssa-loop.cc
@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm_p.h"
 #include "fold-const.h"
 #include "gimple-iterator.h"
+#include "gimple-range.h"
 #include "tree-ssa-loop-ivopts.h"
 #include "tree-ssa-loop-manip.h"
 #include "tree-ssa-loop-niter.h"
@@ -404,11 +405,15 @@ pass_scev_cprop::execute (function *)
 {
   bool any = false;
 
+  enable_ranger (cfun);
+
   /* Perform final value replacement in loops, in case the replacement
      expressions are cheap.  */
   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
     any |= final_value_replacement_loop (loop);
 
+  disable_ranger (cfun);
+
   return any ? TODO_cleanup_cfg | TODO_update_ssa_only_virtuals : 0;
 }

Reply via email to