On Wed, Oct 22, 2025 at 9:17 PM Robin Dapp <[email protected]> wrote:
>
> Hi,
>
> When niter runs after the copy-header pass it sometimes fails to
> simplify assumptions in a ctz loop.
> As the assumption is a simple nonzero test here and we can have
> ranger get us the range of the shifted expression, then verify that
> this range is nonzero.
>
> This helps recognize a ctz loop in 502.gcc's compute_transp.

CCing Andrew for a look on the ranger API usage below

> Changes from v1:
>  - Use ranger to verify assumption.
>
> Bootstrapped on x86 and power10, regtested on rv64gcvb_zvl512b and aarch64.
>
> Regards
>  Robin
>
>         PR/tree-optimization 122207
>
> gcc/ChangeLog:
>
>         * tree-ssa-loop-niter.cc (number_of_iterations_cltz): Use
>         ranger to verify assumption.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/ctz-ch.c: New test.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c | 23 ++++++++
>  gcc/tree-ssa-loop-niter.cc             | 81 ++++++++++++++++++++++++--
>  2 files changed, 100 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c
> new file mode 100644
> index 00000000000..d7a17c7464f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-ch.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-tree-optimized" } */
> +
> +typedef unsigned long BITMAP_WORD;
> +
> +bool
> +bmp_iter_set (BITMAP_WORD bits, unsigned *bit_no)
> +{
> +  /* If our current word is nonzero, it contains the bit we want.  */
> +  if (bits)
> +    {
> +      while (!(bits & 1))
> +       {
> +         bits >>= 1;
> +         *bit_no += 1;
> +       }
> +      return true;
> +    }
> +
> +  return false;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } 
> } */
> diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
> index 6e130862549..1be3d8c1538 100644
> --- a/gcc/tree-ssa-loop-niter.cc
> +++ b/gcc/tree-ssa-loop-niter.cc
> @@ -2335,6 +2335,8 @@ is_rshift_by_1 (gassign *stmt)
>
>   */
>
> +#include "gimple-range-path.h"
> +

I think gimple-range.h should be enough, you are not using path ranger?

>  static bool
>  number_of_iterations_cltz (loop_p loop, edge exit,
>                                enum tree_code code,
> @@ -2438,6 +2440,8 @@ number_of_iterations_cltz (loop_p loop, edge exit,
>    tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
>    int src_precision = TYPE_PRECISION (TREE_TYPE (src));
>
> +  tree unshifted_src = src;
> +
>    /* Apply any needed preprocessing to src.  */
>    int num_ignored_bits;
>    if (left_shift)
> @@ -2448,7 +2452,7 @@ number_of_iterations_cltz (loop_p loop, edge exit,
>    if (modify_before_test)
>      num_ignored_bits++;
> -  if (num_ignored_bits != 0)
> +  if (num_ignored_bits)
>      src = fold_build2 (left_shift ? LSHIFT_EXPR : RSHIFT_EXPR,
>                        TREE_TYPE (src), src,
>                        build_int_cst (integer_type_node, num_ignored_bits));
> @@ -2463,10 +2467,76 @@ number_of_iterations_cltz (loop_p loop, edge exit,
>
>    expr = fold_convert (unsigned_type_node, expr);
>
> -  tree assumptions = fold_build2 (NE_EXPR, boolean_type_node, src,
> -                                 build_zero_cst (TREE_TYPE (src)));
> +  /* If the copy-header (ch) pass peeled one iteration we're shifting
> +     SRC by preprocessing it above.
> +
> +     A loop like
> +      if (bits)
> +       {
> +         while (!(bits & 1))
> +           {
> +             bits >>= 1;
> +             cnt += 1;
> +           }
> +         return cnt;
> +       }
> +     ch (roughly) transforms into:
> +      if (bits)
> +       {
> +         if (!(bits & 1)
> +           {
> +             do
> +               {
> +                 bits >>= 1;
> +                 cnt += 1;
> +               } while (!(bits & 1));
> +           }
> +          else
> +            cnt = 1;
> +         return cnt;
> +       }
>
> -  niter->assumptions = simplify_using_initial_conditions (loop, assumptions);
> +     Then, our preprocessed SRC (that is used for c[tl]z computation)
> +     will be bits >> 1, and the assumption is bits >> 1 != 0.  */
> +  tree assumptions = boolean_false_node;
> +  int_range_max r (TREE_TYPE (unshifted_src));
> +
> +  /* As we don't have a gimple statement to query, use the range of the
> +     unshifted SRC, then apply the shift manually and check if the
> +     resulting range is nonzero.  */
> +  if (get_range_query (cfun)->range_on_edge
> +      (r, loop_preheader_edge (loop), unshifted_src)
> +      && !r.varying_p ()
> +      && !r.undefined_p ())
> +    {
> +      if (num_ignored_bits)
> +       {
> +         range_op_handler op (left_shift ? LSHIFT_EXPR : RSHIFT_EXPR);
> +         int_range_max shifted_range (TREE_TYPE (unshifted_src));
> +         wide_int shift_count = wi::shwi (num_ignored_bits,
> +                                          TYPE_PRECISION (TREE_TYPE (src)));
> +         int_range_max shift_amount
> +           (TREE_TYPE (unshifted_src), shift_count, shift_count);
> +
> +         if (op.fold_range (shifted_range, TREE_TYPE (unshifted_src), r,
> +                            shift_amount))
> +           r = shifted_range;

(*) 'else'

> +       }
> +
> +      /* If the range is non-zero we are good.  */
> +      if (!r.nonzero_p ())

Hmm, code does not match comment?  I think you want r.nonzero_p ().
But that also means that at (*) we cannot simply leave 'r' to be the
range of the unshifted src.

> +       assumptions = boolean_true_node;
> +    }
> +
> +  /* If that didn't work use simplify_using_initial_conditions.  */
> +  if (assumptions == boolean_false_node)

Overall the flow is far from obvious ;)  A helper function allowing

  int_range_max r (...);
  if (compute_shifted_range (r, unshifted_src, ...)
      && r.nonzero_p ())
    assumptions = boolean_true_node;
  else
     {
 ... old code ...

would make that more obvious.


> +    {
> +      assumptions = fold_build2 (NE_EXPR, boolean_type_node, src,
> +                                build_zero_cst (TREE_TYPE (src)));
> +      assumptions = simplify_using_initial_conditions (loop, assumptions);
> +    }
> +
> +  niter->assumptions = assumptions;
>    niter->may_be_zero = boolean_false_node;
>    niter->niter = simplify_using_initial_conditions (loop, expr);
>
> @@ -5146,9 +5216,12 @@ estimate_numbers_of_iterations (function *fn)
>       loop iteration estimates.  */
>    fold_defer_overflow_warnings ();
>
> +  enable_ranger (fn);
> +
>    for (auto loop : loops_list (fn, 0))
>      estimate_numbers_of_iterations (loop);
>
> +  disable_ranger (fn);
>    fold_undefer_and_ignore_overflow_warnings ();
>  }
>
> --
> 2.51.0
>

Reply via email to