On Thu, Sep 18, 2025 at 10:31 AM Avinash Jayakar <[email protected]> wrote:
>
> Hi,
>
> Following is version 2 of the patch proposed for master aiming to fix
> PR104116. This has been bootstrapped and regtested on powerpc64le with
> regression failures.
> Kindly review.
>
> Just had one question.
> If I have to implement similar thing for {CEIL, ROUND}_{MOD,DIV} can I
> create a separate patch, or should I follow it up with this patch like
> [PATCH v2 1/3] and so on.

If the patches are independent you can post them separately, otherwise
it helps when they are in a series.

Richard.

>
> Thanks and regards,
> Avinash Jayakar
>
> Changes from v1:
>         - Added new tests for checking vectorization of FLOOR_{DIV.MOD}
>           for multiple paths.
>         - Incorporated review comments to use proper vector masks and
>           checks for if the target supports generated code.
>
> vect: Add vectorization logic for FLOOR_{MOD,DIV}[PR104116]
>
> Add logic in tree-vectorizer for FLOOR_MOD_EXPR and FLOOR_DIV_EXPR. As
> mentioned in PR104116 the logic for
> FLOOR_MOD_EXPR:
>         r = x %[fl] y; is
>         r = x % y; if (r && (x ^ y) < 0) r += y;
> FLOOR_DIV_EXPR:
>         d = x /[fl] y; is
>         r = x % y; d = x / y; if (r && (x ^ y) < 0) --d;
>
> Added a new helper function "add_code_for_floor_divmod" in
> tree-vect-patterns.cc for adding compensating code for floor mod and
> floor div. This function checks if target supports all required
> operations required to implement floor_{div,mod} and generates
> vectorized code for the respective operations. A pseudocode of generated
> code is given below:
>         v0 = x^y
>         v1 = -r
>         v2 = r | -r (if r!=0, then v2 < 0)
>         v3 = v0 & v2
>         v4 = v3 < 0 (equivalent to (r && (x ^ y) < 0))
>         if floor_mod
>           v5 = v4 ? y : 0
>           v6 = r + v5  (final result)
>         else if floor_div
>           v5 = v4 ? 1 : 0
>           v6 = d - 1  (final result)
>
> Added tests to check vectorization in all paths
> 1. If operand1 == 2
> 2. If operand1 == power of 2
> 3. If operand1 != power of 2
>
> 2025-09-18  Avinash Jayakar  <[email protected]>
>
> gcc/ChangeLog:
>         PR vect/104116
>         * tree-vect-patterns.cc (add_code_for_floor_divmod): Helper to
>         generate vect code for floor_divmod.
>         (vect_recog_divmod_pattern): Added patterns for
>         floor_{div,mod}_expr.
>
> gcc/testsuite/ChangeLog:
>         PR vect/104116
>         * gcc.dg/vect/pr104116-floor-divmod.c: New test.
> ---
>  .../gcc.dg/vect/pr104116-floor-divmod.c       |  58 +++++++
>  gcc/tree-vect-patterns.cc                     | 148 ++++++++++++++++--
>  2 files changed, 193 insertions(+), 13 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c 
> b/gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c
> new file mode 100644
> index 00000000000..507c1d1e33a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c
> @@ -0,0 +1,58 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-fgimple -fdump-tree-optimized -mcpu=power8" } */

You cannot add -mcpu=power8 generally, instead add a separate

/* { dg-additional-options "-mcpu=power8" { target ppc64*-*-* } } */

> +/* { dg-require-effective-target vect_int} */
> +/* { dg-require-effective-target vect_condition} */
> +/* { dg-require-effective-target vect_shift} */
> +
> +#define TEST_FN(OP, CONST, NAME) void __GIMPLE (ssa,guessed_local(10737416)) 
> \
> +NAME (int * a) \
> +{ \
> +  int i; \
> +  long unsigned int _1; \
> +  long unsigned int _2; \
> +  int * _3; \
> +  int _4; \
> +  int _5; \
> +  unsigned int _12; \
> +  unsigned int _13; \
> + \
> +  __BB(2,guessed_local(10737416)): \
> +  goto __BB3(precise(134217728)); \
> + \
> +  __BB(3,loop_header(1),guessed_local(1063004408)): \
> +  i_14 = __PHI (__BB5: i_11, __BB2: 0); \
> +  _13 = __PHI (__BB5: _12, __BB2: 512u); \
> +  _1 = (long unsigned int) i_14; \
> +  _2 = _1 * 4ul; \
> +  _3 = a_9(D) + _2; \
> +  _4 = __MEM <int> (_3); \
> +  _5 = _4 OP CONST; \
> +  __MEM <int> (_3) = _5; \
> +  i_11 = i_14 + 2; \
> +  _12 = _13 - 1u; \
> +  if (_12 != 0u) \
> +    goto __BB5(guessed(132861994)); \
> +  else \
> +    goto __BB4(guessed(1355734)); \
> + \
> +  __BB(5,guessed_local(1052266995)): \
> +  goto __BB3(precise(134217728)); \
> + \
> +  __BB(4,guessed_local(10737416)): \
> +  return; \
> +} \
> +
> +TEST_FN(%, 2, trunc_mod_2)
> +TEST_FN(__FLOOR_MOD, 2, floor_mod_2)
> +TEST_FN(__FLOOR_DIV, 2, floor_div_2)
> +
> +TEST_FN(%, 4, trunc_mod_pow2)
> +TEST_FN(__FLOOR_MOD, 4, floor_mod_pow2)
> +TEST_FN(__FLOOR_DIV, 4, floor_div_pow2)
> +
> +TEST_FN(%, 5, trunc_mod)
> +TEST_FN(__FLOOR_MOD, 5, floor_mod)
> +TEST_FN(__FLOOR_DIV, 5, floor_div)

Can you make this a runtime testcase as well?  You can add a plain C
main() exercising the GIMPLE FN above.

> +
> +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 9 "vect" } 
> } */
> +
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index 70bf768d339..72b8b2768ef 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -4833,6 +4833,88 @@ vect_recog_sat_trunc_pattern (vec_info *vinfo, 
> stmt_vec_info stmt_vinfo,
>    return NULL;
>  }
>
> +gimple *
> +add_code_for_floor_divmod(tree vectype, vec_info* vinfo,
> +  stmt_vec_info stmt_vinfo, enum tree_code rhs_code,
> +  tree q, tree r, tree oprnd0, tree oprnd1, tree itype)

The last two lines should be indented to the '(', and before '('
should be a space.
The function should be static and it lacks a function level comment
explaining what
it does.

> +{
> +  gimple *def_stmt;
> +  tree mask_vectype = truth_type_for (vectype);
> +  if (!mask_vectype)
> +    return NULL;
> +  if (!target_has_vecop_for_code(NEGATE_EXPR, vectype)
> +      || !target_has_vecop_for_code(BIT_XOR_EXPR, vectype)
> +      || !target_has_vecop_for_code(BIT_IOR_EXPR, vectype)
> +      || !target_has_vecop_for_code(PLUS_EXPR, vectype)
> +      || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
> +      || !expand_vec_cond_expr_p (vectype, mask_vectype)
> +    )

The right brace should go to the previous line.

> +    return NULL;
> +
> +
> +  // r = x %[fl] y;
> +  // is
> +  // r = x % y; if (r && (x ^ y) < 0) r += y;
> +  // Produce following sequence
> +  // v0 = x^y
> +  // v1 = -r
> +  // v2 = r | -r
> +  // v3 = v0 & v2
> +  // v4 = v3 < 0 (equivalent to (r && (x ^ y) < 0))
> +  // v5 = v4 ? y : 0
> +  // v6 = r + v5  (final result)
> +  tree cond_reg = vect_recog_temp_ssa_var(itype, NULL);
> +  def_stmt = gimple_build_assign(cond_reg, BIT_XOR_EXPR, oprnd0, oprnd1);
> +  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
> +
> +  // -r
> +  tree negate_r = vect_recog_temp_ssa_var(itype, NULL);
> +  def_stmt = gimple_build_assign(negate_r, NEGATE_EXPR, r);
> +  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
> +
> +  // r | -r , sign bit is set if r!=0
> +  tree r_or_negr = vect_recog_temp_ssa_var(itype, NULL);
> +  def_stmt = gimple_build_assign(r_or_negr, BIT_IOR_EXPR, r, negate_r);
> +  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
> +
> +  // (x^y) & (r|-r)
> +  tree r_or_negr_and_xor = vect_recog_temp_ssa_var(itype, NULL);
> +  def_stmt = gimple_build_assign(r_or_negr_and_xor, BIT_AND_EXPR, r_or_negr,
> +      cond_reg);
> +  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
> +
> +  // (x^y) & (r|-r) < 0 which is equivalent to  (x^y < 0 && r!=0)
> +  tree bool_cond = vect_recog_temp_ssa_var(boolean_type_node,NULL);
> +  def_stmt = gimple_build_assign(bool_cond, LT_EXPR, r_or_negr_and_xor,
> +    build_int_cst(itype, 0));
> +  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
> +      mask_vectype, itype);
> +
> +  if (rhs_code == FLOOR_MOD_EXPR) {

The brace goes to the next line.

> +    //  (x^y < 0 && r) ? y : 0
> +    tree extr_cond = vect_recog_temp_ssa_var(itype, NULL);
> +    def_stmt = gimple_build_assign(extr_cond, COND_EXPR, bool_cond, oprnd1,
> +      build_int_cst(itype, 0));
> +    append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
> +
> +    // r += (x ^ y < 0 && r) ? y : 0
> +    tree floor_mod_r = vect_recog_temp_ssa_var(itype, NULL);
> +    return gimple_build_assign(floor_mod_r, PLUS_EXPR, r, extr_cond);
> +  } else if (rhs_code == FLOOR_DIV_EXPR) {
> +    //  (x^y < 0 && r) ? 1 : 0
> +    tree extr_cond = vect_recog_temp_ssa_var(itype, NULL);
> +    def_stmt = gimple_build_assign(extr_cond, COND_EXPR, bool_cond, 
> build_int_cst(itype, 1),
> +      build_int_cst(itype, 0));
> +    append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
> +
> +    // q -= (x ^ y < 0 && r) ? 1 : 0
> +    tree floor_mod_r = vect_recog_temp_ssa_var(itype, NULL);
> +    return gimple_build_assign(floor_mod_r, MINUS_EXPR, q, extr_cond);
> +  } else {
> +    return NULL;
> +  }
> +}
> +
>  /* Detect a signed division by a constant that wouldn't be
>     otherwise vectorized:
>
> @@ -4894,6 +4976,8 @@ vect_recog_divmod_pattern (vec_info *vinfo,
>      case TRUNC_DIV_EXPR:
>      case EXACT_DIV_EXPR:
>      case TRUNC_MOD_EXPR:
> +    case FLOOR_MOD_EXPR:
> +    case FLOOR_DIV_EXPR:
>        break;
>      default:
>        return NULL;
> @@ -4945,17 +5029,27 @@ vect_recog_divmod_pattern (vec_info *vinfo,
>           gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, 
> shift);
>           gimple_call_set_lhs (div_stmt, var_div);
>
> -         if (rhs_code == TRUNC_MOD_EXPR)
> +         if (rhs_code == TRUNC_MOD_EXPR || rhs_code == FLOOR_MOD_EXPR
> +        || rhs_code == FLOOR_DIV_EXPR)

All OR predicates should be vertically aligned

>             {
>               append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
> +        tree t1 = vect_recog_temp_ssa_var (itype, NULL);
>               def_stmt
> -               = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
> -                                      LSHIFT_EXPR, var_div, shift);
> +               = gimple_build_assign (t1, LSHIFT_EXPR, var_div, shift);
>               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
>               pattern_stmt
>                 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
> -                                      MINUS_EXPR, oprnd0,
> -                                      gimple_assign_lhs (def_stmt));
> +                                      MINUS_EXPR, oprnd0, t1);
> +        if (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR)
> +        {
> +          append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
> +          pattern_stmt = add_code_for_floor_divmod(vectype, vinfo, 
> stmt_vinfo,

space before the (

> +            rhs_code, var_div, t1, oprnd0, oprnd1,
> +            itype);
> +          if (pattern_stmt == NULL)
> +            return NULL;
> +        }
> +
>             }
>           else
>             pattern_stmt = div_stmt;
> @@ -4969,8 +5063,10 @@ vect_recog_divmod_pattern (vec_info *vinfo,
>                                       build_int_cst (itype, 0));
>        append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
>                               truth_type_for (vectype), itype);
> +      tree div_result = NULL_TREE;
>        if (rhs_code == TRUNC_DIV_EXPR
> -         || rhs_code == EXACT_DIV_EXPR)
> +         || rhs_code == EXACT_DIV_EXPR
> +    || rhs_code == FLOOR_DIV_EXPR)

sth is wrong with the indent

Otherwise this now looks better.

Richard.

>         {
>           tree var = vect_recog_temp_ssa_var (itype, NULL);
>           tree shift;
> @@ -4987,12 +5083,18 @@ vect_recog_divmod_pattern (vec_info *vinfo,
>           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
>
>           shift = build_int_cst (itype, tree_log2 (oprnd1));
> +    div_result = vect_recog_temp_ssa_var (itype, NULL);
>           pattern_stmt
> -           = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
> +           = gimple_build_assign (div_result,
>                                    RSHIFT_EXPR, var, shift);
>         }
> -      else
> +      if (rhs_code == TRUNC_MOD_EXPR
> +      || rhs_code == FLOOR_MOD_EXPR
> +      || rhs_code == FLOOR_DIV_EXPR)
>         {
> +    if (rhs_code == FLOOR_DIV_EXPR)
> +      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
> +
>           tree signmask;
>           if (compare_tree_int (oprnd1, 2) == 0)
>             {
> @@ -5037,10 +5139,19 @@ vect_recog_divmod_pattern (vec_info *vinfo,
>                                                 build_int_cst (itype, 1)));
>           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
>
> +    tree r = vect_recog_temp_ssa_var (itype, NULL);
>           pattern_stmt
> -           = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
> -                                  MINUS_EXPR, gimple_assign_lhs (def_stmt),
> +           = gimple_build_assign (r, MINUS_EXPR, gimple_assign_lhs 
> (def_stmt),
>                                    signmask);
> +    if (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR)
> +    {
> +      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
> +      pattern_stmt = add_code_for_floor_divmod(vectype, vinfo, stmt_vinfo,
> +        rhs_code, div_result, r, oprnd0, oprnd1,
> +        itype);
> +      if (pattern_stmt == NULL)
> +        return NULL;
> +    }
>         }
>
>        return pattern_stmt;
> @@ -5347,13 +5458,15 @@ vect_recog_divmod_pattern (vec_info *vinfo,
>         }
>      }
>
> -  if (rhs_code == TRUNC_MOD_EXPR)
> +  if (rhs_code == TRUNC_MOD_EXPR
> +      || rhs_code == FLOOR_MOD_EXPR
> +      || rhs_code == FLOOR_DIV_EXPR)
>      {
>        tree r, t1;
>
>        /* We divided.  Now finish by:
> -        t1 = q * oprnd1;
> -        r = oprnd0 - t1;  */
> +      t1 = q * oprnd1;
> +      r = oprnd0 - t1;  */
>        append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
>
>        t1 = vect_recog_temp_ssa_var (itype, NULL);
> @@ -5362,6 +5475,15 @@ vect_recog_divmod_pattern (vec_info *vinfo,
>
>        r = vect_recog_temp_ssa_var (itype, NULL);
>        pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
> +
> +      if (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR)
> +      {
> +        append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
> +        pattern_stmt = add_code_for_floor_divmod(vectype, vinfo, stmt_vinfo, 
> rhs_code,
> +          q, r, oprnd0, oprnd1, itype);
> +        if (pattern_stmt == NULL)
> +          return NULL;
> +      }
>      }
>
>    /* Pattern detected.  */
> --
> 2.47.3
>

Reply via email to