On Thu, Sep 18, 2025 at 10:31 AM Avinash Jayakar <[email protected]> wrote:
>
> Hi,
>
> Following is version 2 of the patch proposed for master aiming to fix
> PR104116. This has been bootstrapped and regtested on powerpc64le with
> regression failures.
> Kindly review.
>
> Just had one question.
> If I have to implement similar thing for {CEIL, ROUND}_{MOD,DIV} can I
> create a separate patch, or should I follow it up with this patch like
> [PATCH v2 1/3] and so on.
If the patches are independent you can post them separately, otherwise
it helps when they are in a series.
Richard.
>
> Thanks and regards,
> Avinash Jayakar
>
> Changes from v1:
> - Added new tests for checking vectorization of FLOOR_{DIV.MOD}
> for multiple paths.
> - Incorporated review comments to use proper vector masks and
> checks for if the target supports generated code.
>
> vect: Add vectorization logic for FLOOR_{MOD,DIV}[PR104116]
>
> Add logic in tree-vectorizer for FLOOR_MOD_EXPR and FLOOR_DIV_EXPR. As
> mentioned in PR104116 the logic for
> FLOOR_MOD_EXPR:
> r = x %[fl] y; is
> r = x % y; if (r && (x ^ y) < 0) r += y;
> FLOOR_DIV_EXPR:
> d = x /[fl] y; is
> r = x % y; d = x / y; if (r && (x ^ y) < 0) --d;
>
> Added a new helper function "add_code_for_floor_divmod" in
> tree-vect-patterns.cc for adding compensating code for floor mod and
> floor div. This function checks if target supports all required
> operations required to implement floor_{div,mod} and generates
> vectorized code for the respective operations. A pseudocode of generated
> code is given below:
> v0 = x^y
> v1 = -r
> v2 = r | -r (if r!=0, then v2 < 0)
> v3 = v0 & v2
> v4 = v3 < 0 (equivalent to (r && (x ^ y) < 0))
> if floor_mod
> v5 = v4 ? y : 0
> v6 = r + v5 (final result)
> else if floor_div
> v5 = v4 ? 1 : 0
> v6 = d - 1 (final result)
>
> Added tests to check vectorization in all paths
> 1. If operand1 == 2
> 2. If operand1 == power of 2
> 3. If operand1 != power of 2
>
> 2025-09-18 Avinash Jayakar <[email protected]>
>
> gcc/ChangeLog:
> PR vect/104116
> * tree-vect-patterns.cc (add_code_for_floor_divmod): Helper to
> generate vect code for floor_divmod.
> (vect_recog_divmod_pattern): Added patterns for
> floor_{div,mod}_expr.
>
> gcc/testsuite/ChangeLog:
> PR vect/104116
> * gcc.dg/vect/pr104116-floor-divmod.c: New test.
> ---
> .../gcc.dg/vect/pr104116-floor-divmod.c | 58 +++++++
> gcc/tree-vect-patterns.cc | 148 ++++++++++++++++--
> 2 files changed, 193 insertions(+), 13 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c
> b/gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c
> new file mode 100644
> index 00000000000..507c1d1e33a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c
> @@ -0,0 +1,58 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-fgimple -fdump-tree-optimized -mcpu=power8" } */
You cannot add -mcpu=power8 generally, instead add a separate
/* { dg-additional-options "-mcpu=power8" { target ppc64*-*-* } } */
> +/* { dg-require-effective-target vect_int} */
> +/* { dg-require-effective-target vect_condition} */
> +/* { dg-require-effective-target vect_shift} */
> +
> +#define TEST_FN(OP, CONST, NAME) void __GIMPLE (ssa,guessed_local(10737416))
> \
> +NAME (int * a) \
> +{ \
> + int i; \
> + long unsigned int _1; \
> + long unsigned int _2; \
> + int * _3; \
> + int _4; \
> + int _5; \
> + unsigned int _12; \
> + unsigned int _13; \
> + \
> + __BB(2,guessed_local(10737416)): \
> + goto __BB3(precise(134217728)); \
> + \
> + __BB(3,loop_header(1),guessed_local(1063004408)): \
> + i_14 = __PHI (__BB5: i_11, __BB2: 0); \
> + _13 = __PHI (__BB5: _12, __BB2: 512u); \
> + _1 = (long unsigned int) i_14; \
> + _2 = _1 * 4ul; \
> + _3 = a_9(D) + _2; \
> + _4 = __MEM <int> (_3); \
> + _5 = _4 OP CONST; \
> + __MEM <int> (_3) = _5; \
> + i_11 = i_14 + 2; \
> + _12 = _13 - 1u; \
> + if (_12 != 0u) \
> + goto __BB5(guessed(132861994)); \
> + else \
> + goto __BB4(guessed(1355734)); \
> + \
> + __BB(5,guessed_local(1052266995)): \
> + goto __BB3(precise(134217728)); \
> + \
> + __BB(4,guessed_local(10737416)): \
> + return; \
> +} \
> +
> +TEST_FN(%, 2, trunc_mod_2)
> +TEST_FN(__FLOOR_MOD, 2, floor_mod_2)
> +TEST_FN(__FLOOR_DIV, 2, floor_div_2)
> +
> +TEST_FN(%, 4, trunc_mod_pow2)
> +TEST_FN(__FLOOR_MOD, 4, floor_mod_pow2)
> +TEST_FN(__FLOOR_DIV, 4, floor_div_pow2)
> +
> +TEST_FN(%, 5, trunc_mod)
> +TEST_FN(__FLOOR_MOD, 5, floor_mod)
> +TEST_FN(__FLOOR_DIV, 5, floor_div)
Can you make this a runtime testcase as well? You can add a plain C
main() exercising the GIMPLE FN above.
> +
> +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 9 "vect" }
> } */
> +
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index 70bf768d339..72b8b2768ef 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -4833,6 +4833,88 @@ vect_recog_sat_trunc_pattern (vec_info *vinfo,
> stmt_vec_info stmt_vinfo,
> return NULL;
> }
>
> +gimple *
> +add_code_for_floor_divmod(tree vectype, vec_info* vinfo,
> + stmt_vec_info stmt_vinfo, enum tree_code rhs_code,
> + tree q, tree r, tree oprnd0, tree oprnd1, tree itype)
The last two lines should be indented to the '(', and before '('
should be a space.
The function should be static and it lacks a function level comment
explaining what
it does.
> +{
> + gimple *def_stmt;
> + tree mask_vectype = truth_type_for (vectype);
> + if (!mask_vectype)
> + return NULL;
> + if (!target_has_vecop_for_code(NEGATE_EXPR, vectype)
> + || !target_has_vecop_for_code(BIT_XOR_EXPR, vectype)
> + || !target_has_vecop_for_code(BIT_IOR_EXPR, vectype)
> + || !target_has_vecop_for_code(PLUS_EXPR, vectype)
> + || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
> + || !expand_vec_cond_expr_p (vectype, mask_vectype)
> + )
The right brace should go to the previous line.
> + return NULL;
> +
> +
> + // r = x %[fl] y;
> + // is
> + // r = x % y; if (r && (x ^ y) < 0) r += y;
> + // Produce following sequence
> + // v0 = x^y
> + // v1 = -r
> + // v2 = r | -r
> + // v3 = v0 & v2
> + // v4 = v3 < 0 (equivalent to (r && (x ^ y) < 0))
> + // v5 = v4 ? y : 0
> + // v6 = r + v5 (final result)
> + tree cond_reg = vect_recog_temp_ssa_var(itype, NULL);
> + def_stmt = gimple_build_assign(cond_reg, BIT_XOR_EXPR, oprnd0, oprnd1);
> + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
> +
> + // -r
> + tree negate_r = vect_recog_temp_ssa_var(itype, NULL);
> + def_stmt = gimple_build_assign(negate_r, NEGATE_EXPR, r);
> + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
> +
> + // r | -r , sign bit is set if r!=0
> + tree r_or_negr = vect_recog_temp_ssa_var(itype, NULL);
> + def_stmt = gimple_build_assign(r_or_negr, BIT_IOR_EXPR, r, negate_r);
> + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
> +
> + // (x^y) & (r|-r)
> + tree r_or_negr_and_xor = vect_recog_temp_ssa_var(itype, NULL);
> + def_stmt = gimple_build_assign(r_or_negr_and_xor, BIT_AND_EXPR, r_or_negr,
> + cond_reg);
> + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
> +
> + // (x^y) & (r|-r) < 0 which is equivalent to (x^y < 0 && r!=0)
> + tree bool_cond = vect_recog_temp_ssa_var(boolean_type_node,NULL);
> + def_stmt = gimple_build_assign(bool_cond, LT_EXPR, r_or_negr_and_xor,
> + build_int_cst(itype, 0));
> + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
> + mask_vectype, itype);
> +
> + if (rhs_code == FLOOR_MOD_EXPR) {
The brace goes to the next line.
> + // (x^y < 0 && r) ? y : 0
> + tree extr_cond = vect_recog_temp_ssa_var(itype, NULL);
> + def_stmt = gimple_build_assign(extr_cond, COND_EXPR, bool_cond, oprnd1,
> + build_int_cst(itype, 0));
> + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
> +
> + // r += (x ^ y < 0 && r) ? y : 0
> + tree floor_mod_r = vect_recog_temp_ssa_var(itype, NULL);
> + return gimple_build_assign(floor_mod_r, PLUS_EXPR, r, extr_cond);
> + } else if (rhs_code == FLOOR_DIV_EXPR) {
> + // (x^y < 0 && r) ? 1 : 0
> + tree extr_cond = vect_recog_temp_ssa_var(itype, NULL);
> + def_stmt = gimple_build_assign(extr_cond, COND_EXPR, bool_cond,
> build_int_cst(itype, 1),
> + build_int_cst(itype, 0));
> + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
> +
> + // q -= (x ^ y < 0 && r) ? 1 : 0
> + tree floor_mod_r = vect_recog_temp_ssa_var(itype, NULL);
> + return gimple_build_assign(floor_mod_r, MINUS_EXPR, q, extr_cond);
> + } else {
> + return NULL;
> + }
> +}
> +
> /* Detect a signed division by a constant that wouldn't be
> otherwise vectorized:
>
> @@ -4894,6 +4976,8 @@ vect_recog_divmod_pattern (vec_info *vinfo,
> case TRUNC_DIV_EXPR:
> case EXACT_DIV_EXPR:
> case TRUNC_MOD_EXPR:
> + case FLOOR_MOD_EXPR:
> + case FLOOR_DIV_EXPR:
> break;
> default:
> return NULL;
> @@ -4945,17 +5029,27 @@ vect_recog_divmod_pattern (vec_info *vinfo,
> gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0,
> shift);
> gimple_call_set_lhs (div_stmt, var_div);
>
> - if (rhs_code == TRUNC_MOD_EXPR)
> + if (rhs_code == TRUNC_MOD_EXPR || rhs_code == FLOOR_MOD_EXPR
> + || rhs_code == FLOOR_DIV_EXPR)
All OR predicates should be vertically aligned
> {
> append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
> + tree t1 = vect_recog_temp_ssa_var (itype, NULL);
> def_stmt
> - = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
> - LSHIFT_EXPR, var_div, shift);
> + = gimple_build_assign (t1, LSHIFT_EXPR, var_div, shift);
> append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
> pattern_stmt
> = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
> - MINUS_EXPR, oprnd0,
> - gimple_assign_lhs (def_stmt));
> + MINUS_EXPR, oprnd0, t1);
> + if (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR)
> + {
> + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
> + pattern_stmt = add_code_for_floor_divmod(vectype, vinfo,
> stmt_vinfo,
space before the (
> + rhs_code, var_div, t1, oprnd0, oprnd1,
> + itype);
> + if (pattern_stmt == NULL)
> + return NULL;
> + }
> +
> }
> else
> pattern_stmt = div_stmt;
> @@ -4969,8 +5063,10 @@ vect_recog_divmod_pattern (vec_info *vinfo,
> build_int_cst (itype, 0));
> append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
> truth_type_for (vectype), itype);
> + tree div_result = NULL_TREE;
> if (rhs_code == TRUNC_DIV_EXPR
> - || rhs_code == EXACT_DIV_EXPR)
> + || rhs_code == EXACT_DIV_EXPR
> + || rhs_code == FLOOR_DIV_EXPR)
sth is wrong with the indent
Otherwise this now looks better.
Richard.
> {
> tree var = vect_recog_temp_ssa_var (itype, NULL);
> tree shift;
> @@ -4987,12 +5083,18 @@ vect_recog_divmod_pattern (vec_info *vinfo,
> append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
>
> shift = build_int_cst (itype, tree_log2 (oprnd1));
> + div_result = vect_recog_temp_ssa_var (itype, NULL);
> pattern_stmt
> - = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
> + = gimple_build_assign (div_result,
> RSHIFT_EXPR, var, shift);
> }
> - else
> + if (rhs_code == TRUNC_MOD_EXPR
> + || rhs_code == FLOOR_MOD_EXPR
> + || rhs_code == FLOOR_DIV_EXPR)
> {
> + if (rhs_code == FLOOR_DIV_EXPR)
> + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
> +
> tree signmask;
> if (compare_tree_int (oprnd1, 2) == 0)
> {
> @@ -5037,10 +5139,19 @@ vect_recog_divmod_pattern (vec_info *vinfo,
> build_int_cst (itype, 1)));
> append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
>
> + tree r = vect_recog_temp_ssa_var (itype, NULL);
> pattern_stmt
> - = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
> - MINUS_EXPR, gimple_assign_lhs (def_stmt),
> + = gimple_build_assign (r, MINUS_EXPR, gimple_assign_lhs
> (def_stmt),
> signmask);
> + if (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR)
> + {
> + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
> + pattern_stmt = add_code_for_floor_divmod(vectype, vinfo, stmt_vinfo,
> + rhs_code, div_result, r, oprnd0, oprnd1,
> + itype);
> + if (pattern_stmt == NULL)
> + return NULL;
> + }
> }
>
> return pattern_stmt;
> @@ -5347,13 +5458,15 @@ vect_recog_divmod_pattern (vec_info *vinfo,
> }
> }
>
> - if (rhs_code == TRUNC_MOD_EXPR)
> + if (rhs_code == TRUNC_MOD_EXPR
> + || rhs_code == FLOOR_MOD_EXPR
> + || rhs_code == FLOOR_DIV_EXPR)
> {
> tree r, t1;
>
> /* We divided. Now finish by:
> - t1 = q * oprnd1;
> - r = oprnd0 - t1; */
> + t1 = q * oprnd1;
> + r = oprnd0 - t1; */
> append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
>
> t1 = vect_recog_temp_ssa_var (itype, NULL);
> @@ -5362,6 +5475,15 @@ vect_recog_divmod_pattern (vec_info *vinfo,
>
> r = vect_recog_temp_ssa_var (itype, NULL);
> pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
> +
> + if (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR)
> + {
> + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
> + pattern_stmt = add_code_for_floor_divmod(vectype, vinfo, stmt_vinfo,
> rhs_code,
> + q, r, oprnd0, oprnd1, itype);
> + if (pattern_stmt == NULL)
> + return NULL;
> + }
> }
>
> /* Pattern detected. */
> --
> 2.47.3
>