[gcc r15-3082] PR tree-optimization/101390: Vectorize modulo operator

Jennifer Schmitz via Gcc-cvs Thu, 22 Aug 2024 00:12:54 -0700

https://gcc.gnu.org/g:9bbad3685131ec95d970f81bf75f9556d4d92742


commit r15-3082-g9bbad3685131ec95d970f81bf75f9556d4d92742
Author: Jennifer Schmitz <jschm...@nvidia.com>
Date:   Wed Aug 7 08:56:45 2024 -0700

    PR tree-optimization/101390: Vectorize modulo operator
    
    This patch adds a new vectorization pattern that detects the modulo
    operation where the second operand is a variable.
    It replaces the statement by division, multiplication, and subtraction.
    
    The patch was bootstrapped and regtested on aarch64-linux-gnu, no 
regression.
    Ok for mainline?
    
    Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com>
    
    gcc/
            PR tree-optimization/101390
            * tree-vect-patterns.cc (vect_recog_mod_var_pattern): Add new 
pattern.
    
    gcc/testsuite/
            PR tree-optimization/101390
            * gcc.dg/vect/vect-mod-var.c: New test.
            * gcc.target/aarch64/sve/mod_1.c: Likewise.
            * lib/target-supports.exp: New selector expression.

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-mod-var.c     | 37 ++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/sve/mod_1.c | 28 ++++++++++++
 gcc/testsuite/lib/target-supports.exp        |  5 +++
 gcc/tree-vect-patterns.cc                    | 66 ++++++++++++++++++++++++++++
 4 files changed, 136 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-mod-var.c 
b/gcc/testsuite/gcc.dg/vect/vect-mod-var.c
new file mode 100644
index 000000000000..eeed318c62b0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-mod-var.c
@@ -0,0 +1,37 @@
+#include "tree-vect.h"
+
+#define N 64
+
+__attribute__ ((noinline)) int
+f (int *restrict a, int *restrict b, int *restrict c)
+{
+  for (int i = 0; i < N; ++i)
+    c[i] = a[i] % b[i];
+}
+
+#define BASE1 -126
+#define BASE2 116
+
+int
+main (void)
+{
+  check_vect ();
+
+  int a[N], b[N], c[N];
+
+  for (int i = 0; i < N; ++i)
+    {
+      a[i] = BASE1 + i * 5;
+      b[i] = BASE2 - i * 4;
+      __asm__ volatile ("");
+    }
+
+  f (a, b, c);
+
+#pragma GCC novector
+  for (int i = 0; i < N; ++i)
+    if (c[i] != a[i] % b[i])
+      __builtin_abort ();
+}
+
+/* { dg-final { scan-tree-dump "vect_recog_mod_var_pattern: detected" "vect" { 
target vect_int_div } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c
new file mode 100644
index 000000000000..eb37f1e36360
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c
@@ -0,0 +1,28 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-Ofast -ftree-vectorize -fno-vect-cost-model --save-temps" } 
*/
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE)                                 \
+void __attribute__ ((noipa))                           \
+mod_##TYPE (TYPE *restrict dst, TYPE *restrict src1,   \
+           TYPE *restrict src2, int count)             \
+{                                                      \
+  for (int i = 0; i < count; ++i)                      \
+    dst[i] = src1[i] % src2[i];                                \
+}
+
+#define TEST_ALL(T) \
+  T (int32_t) \
+  T (uint32_t) \
+  T (int64_t) \
+  T (uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, 
z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d, p[0-7]/m, 
z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d, p[0-7]/m, 
z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, 
z[0-9]+\.d\n} 2 } } */
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 91995bff65f7..3501ce44b761 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4234,6 +4234,11 @@ proc check_effective_target_vect_int { } {
        }}]
 }
 
+# Return 1 if the target supports vector integer division, 0 otherwise.
+proc check_effective_target_vect_int_div { } {
+    return [check_effective_target_aarch64_sve]
+}
+
 # Return 1 if the target supports vectorization of early breaks,
 # 0 otherwise.
 #
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index f52de2b6972d..18b322c63b8e 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -5264,6 +5264,71 @@ vect_recog_divmod_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+/* Detects pattern with a modulo operation (S1) where both arguments
+   are variables of integral type.
+   The statement is replaced by division, multiplication, and subtraction.
+   The last statement (S4) is returned.
+
+   Example:
+   S1 c_t = a_t % b_t;
+
+   is replaced by
+   S2 x_t = a_t / b_t;
+   S3 y_t = x_t * b_t;
+   S4 z_t = a_t - y_t;  */
+
+static gimple *
+vect_recog_mod_var_pattern (vec_info *vinfo,
+                           stmt_vec_info stmt_vinfo, tree *type_out)
+{
+  gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
+  tree oprnd0, oprnd1, vectype, itype;
+  gimple *pattern_stmt, *def_stmt;
+  enum tree_code rhs_code;
+
+  if (!is_gimple_assign (last_stmt))
+    return NULL;
+
+  rhs_code = gimple_assign_rhs_code (last_stmt);
+  if (rhs_code != TRUNC_MOD_EXPR)
+    return NULL;
+
+  oprnd0 = gimple_assign_rhs1 (last_stmt);
+  oprnd1 = gimple_assign_rhs2 (last_stmt);
+  itype = TREE_TYPE (oprnd0);
+  if (TREE_CODE (oprnd0) != SSA_NAME
+      || TREE_CODE (oprnd1) != SSA_NAME
+      || TREE_CODE (itype) != INTEGER_TYPE)
+    return NULL;
+
+  vectype = get_vectype_for_scalar_type (vinfo, itype);
+
+  if (!vectype
+      || target_has_vecop_for_code (TRUNC_MOD_EXPR, vectype)
+      || !target_has_vecop_for_code (TRUNC_DIV_EXPR, vectype)
+      || !target_has_vecop_for_code (MULT_EXPR, vectype)
+      || !target_has_vecop_for_code (MINUS_EXPR, vectype))
+    return NULL;
+
+  tree q, tmp, r;
+  q = vect_recog_temp_ssa_var (itype, NULL);
+  def_stmt = gimple_build_assign (q, TRUNC_DIV_EXPR, oprnd0, oprnd1);
+  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
+
+  tmp = vect_recog_temp_ssa_var (itype, NULL);
+  def_stmt = gimple_build_assign (tmp, MULT_EXPR, q, oprnd1);
+  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
+
+  r = vect_recog_temp_ssa_var (itype, NULL);
+  pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, tmp);
+
+  /* Pattern detected.  */
+  *type_out = vectype;
+  vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt);
+
+  return pattern_stmt;
+}
+
 /* Function vect_recog_mixed_size_cond_pattern
 
    Try to find the following pattern:
@@ -7343,6 +7408,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_rotate_pattern, "rotate" },
   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
   { vect_recog_divmod_pattern, "divmod" },
+  { vect_recog_mod_var_pattern, "modvar" },
   { vect_recog_mult_pattern, "mult" },
   { vect_recog_sat_add_pattern, "sat_add" },
   { vect_recog_sat_sub_pattern, "sat_sub" },

[gcc r15-3082] PR tree-optimization/101390: Vectorize modulo operator

Reply via email to