https://gcc.gnu.org/g:9bbad3685131ec95d970f81bf75f9556d4d92742
commit r15-3082-g9bbad3685131ec95d970f81bf75f9556d4d92742 Author: Jennifer Schmitz <jschm...@nvidia.com> Date: Wed Aug 7 08:56:45 2024 -0700 PR tree-optimization/101390: Vectorize modulo operator This patch adds a new vectorization pattern that detects the modulo operation where the second operand is a variable. It replaces the statement by division, multiplication, and subtraction. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. Ok for mainline? Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com> gcc/ PR tree-optimization/101390 * tree-vect-patterns.cc (vect_recog_mod_var_pattern): Add new pattern. gcc/testsuite/ PR tree-optimization/101390 * gcc.dg/vect/vect-mod-var.c: New test. * gcc.target/aarch64/sve/mod_1.c: Likewise. * lib/target-supports.exp: New selector expression. Diff: --- gcc/testsuite/gcc.dg/vect/vect-mod-var.c | 37 ++++++++++++++++ gcc/testsuite/gcc.target/aarch64/sve/mod_1.c | 28 ++++++++++++ gcc/testsuite/lib/target-supports.exp | 5 +++ gcc/tree-vect-patterns.cc | 66 ++++++++++++++++++++++++++++ 4 files changed, 136 insertions(+) diff --git a/gcc/testsuite/gcc.dg/vect/vect-mod-var.c b/gcc/testsuite/gcc.dg/vect/vect-mod-var.c new file mode 100644 index 000000000000..eeed318c62b0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-mod-var.c @@ -0,0 +1,37 @@ +#include "tree-vect.h" + +#define N 64 + +__attribute__ ((noinline)) int +f (int *restrict a, int *restrict b, int *restrict c) +{ + for (int i = 0; i < N; ++i) + c[i] = a[i] % b[i]; +} + +#define BASE1 -126 +#define BASE2 116 + +int +main (void) +{ + check_vect (); + + int a[N], b[N], c[N]; + + for (int i = 0; i < N; ++i) + { + a[i] = BASE1 + i * 5; + b[i] = BASE2 - i * 4; + __asm__ volatile (""); + } + + f (a, b, c); + +#pragma GCC novector + for (int i = 0; i < N; ++i) + if (c[i] != a[i] % b[i]) + __builtin_abort (); +} + +/* { dg-final { scan-tree-dump "vect_recog_mod_var_pattern: detected" "vect" { target vect_int_div } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c b/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c new file mode 100644 index 000000000000..eb37f1e36360 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c @@ -0,0 +1,28 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-Ofast -ftree-vectorize -fno-vect-cost-model --save-temps" } */ + +#include <stdint.h> + +#define DEF_LOOP(TYPE) \ +void __attribute__ ((noipa)) \ +mod_##TYPE (TYPE *restrict dst, TYPE *restrict src1, \ + TYPE *restrict src2, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = src1[i] % src2[i]; \ +} + +#define TEST_ALL(T) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 91995bff65f7..3501ce44b761 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -4234,6 +4234,11 @@ proc check_effective_target_vect_int { } { }}] } +# Return 1 if the target supports vector integer division, 0 otherwise. +proc check_effective_target_vect_int_div { } { + return [check_effective_target_aarch64_sve] +} + # Return 1 if the target supports vectorization of early breaks, # 0 otherwise. # diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index f52de2b6972d..18b322c63b8e 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -5264,6 +5264,71 @@ vect_recog_divmod_pattern (vec_info *vinfo, return pattern_stmt; } +/* Detects pattern with a modulo operation (S1) where both arguments + are variables of integral type. + The statement is replaced by division, multiplication, and subtraction. + The last statement (S4) is returned. + + Example: + S1 c_t = a_t % b_t; + + is replaced by + S2 x_t = a_t / b_t; + S3 y_t = x_t * b_t; + S4 z_t = a_t - y_t; */ + +static gimple * +vect_recog_mod_var_pattern (vec_info *vinfo, + stmt_vec_info stmt_vinfo, tree *type_out) +{ + gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo); + tree oprnd0, oprnd1, vectype, itype; + gimple *pattern_stmt, *def_stmt; + enum tree_code rhs_code; + + if (!is_gimple_assign (last_stmt)) + return NULL; + + rhs_code = gimple_assign_rhs_code (last_stmt); + if (rhs_code != TRUNC_MOD_EXPR) + return NULL; + + oprnd0 = gimple_assign_rhs1 (last_stmt); + oprnd1 = gimple_assign_rhs2 (last_stmt); + itype = TREE_TYPE (oprnd0); + if (TREE_CODE (oprnd0) != SSA_NAME + || TREE_CODE (oprnd1) != SSA_NAME + || TREE_CODE (itype) != INTEGER_TYPE) + return NULL; + + vectype = get_vectype_for_scalar_type (vinfo, itype); + + if (!vectype + || target_has_vecop_for_code (TRUNC_MOD_EXPR, vectype) + || !target_has_vecop_for_code (TRUNC_DIV_EXPR, vectype) + || !target_has_vecop_for_code (MULT_EXPR, vectype) + || !target_has_vecop_for_code (MINUS_EXPR, vectype)) + return NULL; + + tree q, tmp, r; + q = vect_recog_temp_ssa_var (itype, NULL); + def_stmt = gimple_build_assign (q, TRUNC_DIV_EXPR, oprnd0, oprnd1); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype); + + tmp = vect_recog_temp_ssa_var (itype, NULL); + def_stmt = gimple_build_assign (tmp, MULT_EXPR, q, oprnd1); + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype); + + r = vect_recog_temp_ssa_var (itype, NULL); + pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, tmp); + + /* Pattern detected. */ + *type_out = vectype; + vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt); + + return pattern_stmt; +} + /* Function vect_recog_mixed_size_cond_pattern Try to find the following pattern: @@ -7343,6 +7408,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = { { vect_recog_rotate_pattern, "rotate" }, { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" }, { vect_recog_divmod_pattern, "divmod" }, + { vect_recog_mod_var_pattern, "modvar" }, { vect_recog_mult_pattern, "mult" }, { vect_recog_sat_add_pattern, "sat_add" }, { vect_recog_sat_sub_pattern, "sat_sub" },