On Fri, Dec 6, 2024 at 2:56 PM Feng Xue OS <f...@os.amperecomputing.com> wrote:
>
> This patch refactors the procedure in tree-scalar-evolution.cc in order to 
> partially export its functionality to other module, so decomposes it to 
> several relatively independent utility functions.
>
> Thanks,
> Feng
> ---
> gcc/
>         PR tree-optimization/90594
>         * tree-scalar-evolution.cc (simple_scev_final_value): New function.
>         (apply_scev_final_value_replacement): Likewise.
>         (final_value_replacement_loop): Call new functions.
> ---
>  gcc/tree-scalar-evolution.cc | 288 ++++++++++++++++++++---------------
>  1 file changed, 165 insertions(+), 123 deletions(-)
>
> diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc
> index abb2bad7773..5733632aa78 100644
> --- a/gcc/tree-scalar-evolution.cc
> +++ b/gcc/tree-scalar-evolution.cc
> @@ -3775,13 +3775,17 @@ analyze_and_compute_bitop_with_inv_effect (class 
> loop* loop, tree phidef,
>    return fold_build2 (code1, type, inv, match_op[0]);
>  }
>
> -/* Do final value replacement for LOOP, return true if we did anything.  */
> +/* For induction VALUE of LOOP, return true if its SCEV is simple enough that
> +   its final value at loop exit could be directly calculated based on the
> +   initial value and loop niter, and this value is recorded in FINAL_VALUE,
> +   also set REWRITE_OVERFLOW to true in the case that we need to rewrite the
> +   final value to avoid overflow UB when replacement would really happen
> +   later.  */
>
> -bool
> -final_value_replacement_loop (class loop *loop)
> +static bool
> +simple_scev_final_value (class loop *loop, tree value, tree *final_value,
> +                        bool *rewrite_overflow)
>  {
> -  /* If we do not know exact number of iterations of the loop, we cannot
> -     replace the final value.  */
>    edge exit = single_exit (loop);
>    if (!exit)
>      return false;
> @@ -3790,100 +3794,170 @@ final_value_replacement_loop (class loop *loop)
>    if (niter == chrec_dont_know)
>      return false;
>
> -  /* Ensure that it is possible to insert new statements somewhere.  */
> -  if (!single_pred_p (exit->dest))
> -    split_loop_exit_edge (exit);
> -
> -  /* Set stmt insertion pointer.  All stmts are inserted before this point.  
> */
> +  /* TODO: allow float value for fast math.  */
> +  if (!POINTER_TYPE_P (TREE_TYPE (value))
> +       && !INTEGRAL_TYPE_P (TREE_TYPE (value)))
> +    return false;
>
>    class loop *ex_loop
> -    = superloop_at_depth (loop,
> -                         loop_depth (exit->dest->loop_father) + 1);
> +    = superloop_at_depth (loop, loop_depth (exit->dest->loop_father) + 1);
>
> -  bool any = false;
> -  gphi_iterator psi;
> -  for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); )
> +  bool folded_casts;
> +  tree def = analyze_scalar_evolution_in_loop (ex_loop, loop, value,
> +                                              &folded_casts);
> +  tree bitinv_def, bit_def;
> +  unsigned HOST_WIDE_INT niter_num;
> +
> +  if (def != chrec_dont_know)
> +    def = compute_overall_effect_of_inner_loop (ex_loop, def);
> +
> +  /* Handle bitop with invariant induction expression.
> +
> +     .i.e
> +     for (int i =0 ;i < 32; i++)
> +       tmp &= bit2;
> +     if bit2 is an invariant in loop which could simple to tmp &= bit2.  */
> +  else if ((bitinv_def
> +               = analyze_and_compute_bitop_with_inv_effect (loop,
> +                                                            value, niter)))
> +    def = bitinv_def;
> +
> +  /* Handle bitwise induction expression.
> +
> +     .i.e.
> +     for (int i = 0; i != 64; i+=3)
> +       res &= ~(1UL << i);
> +
> +     RES can't be analyzed out by SCEV because it is not polynomially
> +     expressible, but in fact final value of RES can be replaced by
> +     RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63}
> +     being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR.  */
> +  else if (tree_fits_uhwi_p (niter)
> +          && (niter_num = tree_to_uhwi (niter)) != 0
> +          && niter_num < TYPE_PRECISION (TREE_TYPE (value))
> +          && (bit_def
> +                  = analyze_and_compute_bitwise_induction_effect (loop, 
> value,
> +                                                                  
> niter_num)))
> +    def = bit_def;
> +
> +  bool cond_overflow_p;
> +  if (!tree_does_not_contain_chrecs (def)
> +      || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
> +      /* Moving the computation from the loop may prolong life range
> +        of some ssa names, which may cause problems if they appear
> +        on abnormal edges.  */
> +      || contains_abnormal_ssa_name_p (def)
> +      /* Do not emit expensive expressions.  The rationale is that
> +        when someone writes a code like
> +
> +        while (n > 45) n -= 45;
> +
> +        he probably knows that n is not large, and does not want it
> +        to be turned into n %= 45.  */
> +      || expression_expensive_p (def, &cond_overflow_p))
> +    return false;
> +
> +  *final_value = def;
> +
> +  if ((folded_casts
> +       && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def))
> +       && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def)))
> +      || cond_overflow_p)
> +    *rewrite_overflow = true;
> +  else
> +    *rewrite_overflow = false;
> +
> +  return true;
> +}
> +
> +/* Given a loop closed PHI, replace it with a new assignment from its
> +   FINAL_VALUE at loop exit. The flag REWRITE_OVERFLOW tells if we need to
> +   rewrite expressions in FINAL_VALUE to avoid overflow UB.  When FINAL_VALUE
> +   is constant, we could just propagate the constant, however, sometimes we
> +   have to leave an unused copy assignment around, if so, ALWAYS_KEEP is set
> +   to true.  */
> +
> +static void
> +apply_scev_final_value_replacement (gphi *phi, tree final_value,
> +                                   bool rewrite_overflow, bool always_keep)
> +{
> +  tree rslt = gimple_phi_result (phi);
> +  gphi_iterator psi = gsi_for_phi (phi);
> +  gimple_stmt_iterator gsi;
> +  location_t loc = gimple_phi_arg_location (phi, 0);
> +  basic_block bb = gimple_bb (phi);
> +  gimple_seq stmts;
> +
> +  /* This is a degenerate PHI with only one argument.  */
> +  gcc_assert (gimple_phi_num_args (phi) == 1);
> +
> +  remove_phi_node (&psi, false);
> +
> +  /* Propagate constants immediately.  */
> +  if (CONSTANT_CLASS_P (final_value)
> +      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt))
>      {
> -      gphi *phi = psi.phi ();
> -      tree rslt = PHI_RESULT (phi);
> -      tree phidef = PHI_ARG_DEF_FROM_EDGE (phi, exit);
> -      tree def = phidef;
> -      if (virtual_operand_p (def))
> -       {
> -         gsi_next (&psi);
> -         continue;
> -       }
> +      replace_uses_by (rslt, final_value);
> +
> +      /* Sometimes, we need to leave an unused initialization around to avoid
> +        invalidating the SCEV cache.  */
> +      if (!always_keep)
> +       return;
> +    }
> +
> +  tree def = force_gimple_operand (final_value, &stmts, false, NULL_TREE);
> +  gassign *ass = gimple_build_assign (rslt, def);
> +
> +  gimple_set_location (ass, loc);
> +  gimple_seq_add_stmt (&stmts, ass);
>
> -      if (!POINTER_TYPE_P (TREE_TYPE (def))
> -         && !INTEGRAL_TYPE_P (TREE_TYPE (def)))
> +  /* If def's type has undefined overflow and there were folded casts, 
> rewrite
> +     all stmts added for def into arithmetics with defined overflow
> +     behavior.  */
> +  if (rewrite_overflow)
> +    {
> +      gsi = gsi_start (stmts);
> +      while (!gsi_end_p (gsi))
>         {
> -         gsi_next (&psi);
> -         continue;
> +         gimple *stmt = gsi_stmt (gsi);
> +         if (is_gimple_assign (stmt)
> +             && arith_code_with_undefined_signed_overflow
> +                               (gimple_assign_rhs_code (stmt)))
> +           rewrite_to_defined_overflow (&gsi);
> +         gsi_next (&gsi);
>         }
> +    }
>
> -      bool folded_casts;
> -      def = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
> -                                             &folded_casts);
> +  gsi = gsi_after_labels (bb);
> +  gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
> +}
>
> -      tree bitinv_def, bit_def;
> -      unsigned HOST_WIDE_INT niter_num;
> +/* Do final value replacement for LOOP, return true if we did anything.  */
> +
> +bool
> +final_value_replacement_loop (class loop *loop)
> +{
> +  /* If we do not know exact number of iterations of the loop, we cannot
> +     replace the final value.  */
> +  edge exit = single_exit (loop);
> +  if (!exit || number_of_latch_executions (loop) == chrec_dont_know)
> +    return false;
>
> -      if (def != chrec_dont_know)
> -       def = compute_overall_effect_of_inner_loop (ex_loop, def);
> +  /* Ensure that it is possible to insert new statements somewhere.  */
> +  if (!single_pred_p (exit->dest))
> +    split_loop_exit_edge (exit);
>
> -      /* Handle bitop with invariant induction expression.
> +  bool any = false;
> +  gphi_iterator psi;
> +  for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); )
> +    {
> +      gphi *phi = psi.phi ();
> +      tree rslt = PHI_RESULT (phi);
> +      tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
> +      bool rewrite_overflow;
>
> -       .i.e
> -       for (int i =0 ;i < 32; i++)
> -         tmp &= bit2;
> -       if bit2 is an invariant in loop which could simple to
> -       tmp &= bit2.  */
> -      else if ((bitinv_def
> -               = analyze_and_compute_bitop_with_inv_effect (loop,
> -                                                            phidef, niter)))
> -       def = bitinv_def;
> -
> -      /* Handle bitwise induction expression.
> -
> -        .i.e.
> -        for (int i = 0; i != 64; i+=3)
> -          res &= ~(1UL << i);
> -
> -        RES can't be analyzed out by SCEV because it is not polynomially
> -        expressible, but in fact final value of RES can be replaced by
> -        RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63}
> -        being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR.  */
> -      else if (tree_fits_uhwi_p (niter)
> -              && (niter_num = tree_to_uhwi (niter)) != 0
> -              && niter_num < TYPE_PRECISION (TREE_TYPE (phidef))
> -              && (bit_def
> -                  = analyze_and_compute_bitwise_induction_effect (loop,
> -                                                                  phidef,
> -                                                                  
> niter_num)))
> -       def = bit_def;
> -
> -      bool cond_overflow_p;
> -      if (!tree_does_not_contain_chrecs (def)
> -         || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
> -         /* Moving the computation from the loop may prolong life range
> -            of some ssa names, which may cause problems if they appear
> -            on abnormal edges.  */
> -         || contains_abnormal_ssa_name_p (def)
> -         /* Do not emit expensive expressions.  The rationale is that
> -            when someone writes a code like
> -
> -            while (n > 45) n -= 45;
> -
> -            he probably knows that n is not large, and does not want it
> -            to be turned into n %= 45.  */
> -         || expression_expensive_p (def, &cond_overflow_p))
> +      if (!simple_scev_final_value (loop, def, &def, &rewrite_overflow))
>         {
> -         if (dump_file && (dump_flags & TDF_DETAILS))
> -           {
> -             fprintf (dump_file, "not replacing:\n  ");
> -             print_gimple_stmt (dump_file, phi, 0);
> -             fprintf (dump_file, "\n");
> -           }

Can you keep this dump please?

Otherwise OK.

Thanks,
Richard.

>           gsi_next (&psi);
>           continue;
>         }
> @@ -3904,43 +3978,11 @@ final_value_replacement_loop (class loop *loop)
>          to a GIMPLE sequence or to a statement list (keeping this a
>          GENERIC interface).  */
>        def = unshare_expr (def);
> -      auto loc = gimple_phi_arg_location (phi, exit->dest_idx);
> -      remove_phi_node (&psi, false);
> -
> -      /* Propagate constants immediately, but leave an unused initialization
> -        around to avoid invalidating the SCEV cache.  */
> -      if (CONSTANT_CLASS_P (def) && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt))
> -       replace_uses_by (rslt, def);
> -
> -      /* Create the replacement statements.  */
> -      gimple_seq stmts;
> -      def = force_gimple_operand (def, &stmts, false, NULL_TREE);
> -      gassign *ass = gimple_build_assign (rslt, def);
> -      gimple_set_location (ass, loc);
> -      gimple_seq_add_stmt (&stmts, ass);
> -
> -      /* If def's type has undefined overflow and there were folded
> -        casts, rewrite all stmts added for def into arithmetics
> -        with defined overflow behavior.  */
> -      if ((folded_casts
> -          && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def))
> -          && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def)))
> -         || cond_overflow_p)
> -       {
> -         gimple_stmt_iterator gsi2;
> -         gsi2 = gsi_start (stmts);
> -         while (!gsi_end_p (gsi2))
> -           {
> -             gimple *stmt = gsi_stmt (gsi2);
> -             if (is_gimple_assign (stmt)
> -                 && arith_code_with_undefined_signed_overflow
> -                      (gimple_assign_rhs_code (stmt)))
> -               rewrite_to_defined_overflow (&gsi2);
> -             gsi_next (&gsi2);
> -           }
> -       }
> -      gimple_stmt_iterator gsi = gsi_after_labels (exit->dest);
> -      gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
> +
> +      /* Advance iterator before the PHI is removed.  */
> +      gsi_next (&psi);
> +      apply_scev_final_value_replacement (phi, def, rewrite_overflow, true);
> +
>        if (dump_file)
>         {
>           fprintf (dump_file, " final stmt:\n  ");
> --
> 2.17.1

Reply via email to