The following teaches vectorizable_simd_clone_call to handle
integer mode masks.  The tricky bit is to second-guess the
number of lanes represented by a single mask argument - the following
uses simdlen and the number of mask arguments to calculate that,
assuming ABIs have them uniform.

Similar to the VOIDmode handling there's a restriction on not
supporting splitting/merging of incoming vector masks to
more/less SIMD call arguments.

Bootstrapped and tested on x86_64-unknown-linux-gnu, re-testing
after a minor change.  Will push later.

Richard.

        PR tree-optimization/111795
        * tree-vect-stmts.cc (vectorizable_simd_clone_call): Handle
        integer mode mask arguments.

        * gcc.target/i386/vect-simd-clone-avx512-1.c: New testcase.
        * gcc.target/i386/vect-simd-clone-avx512-2.c: Likewise.
        * gcc.target/i386/vect-simd-clone-avx512-3.c: Likewise.
---
 .../i386/vect-simd-clone-avx512-1.c           |  43 +++++
 .../i386/vect-simd-clone-avx512-2.c           |   6 +
 .../i386/vect-simd-clone-avx512-3.c           |   6 +
 gcc/tree-vect-stmts.cc                        | 150 ++++++++++++++----
 4 files changed, 175 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-3.c

diff --git a/gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-1.c 
b/gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-1.c
new file mode 100644
index 00000000000..e350996439e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-1.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512vl" } */
+
+#include "avx512vl-check.h"
+
+#ifndef SIMDLEN
+#define SIMDLEN 4
+#endif
+
+int x[1024];
+
+#pragma omp declare simd simdlen(SIMDLEN)
+__attribute__((noinline)) int
+foo (int a, int b)
+{
+  return a + b;
+}
+
+void __attribute__((noipa))
+bar (void)
+{
+#pragma omp simd
+  for (int i = 0; i < 1024; i++)
+    if (x[i] < 20)
+      x[i] = foo (x[i], x[i]);
+}
+
+void avx512vl_test ()
+{
+  int i;
+#pragma GCC novector
+  for (i = 0; i < 1024; i++)
+    x[i] = i;
+
+  bar ();
+
+#pragma GCC novector
+  for (i = 0; i < 1024; i++)
+    if ((i < 20 && x[i] != i + i)
+       || (i >= 20 && x[i] != i))
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-2.c 
b/gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-2.c
new file mode 100644
index 00000000000..d9968ae30f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-2.c
@@ -0,0 +1,6 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512vl" } */
+
+#define SIMDLEN 8
+#include "vect-simd-clone-avx512-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-3.c 
b/gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-3.c
new file mode 100644
index 00000000000..c05f6c8ce91
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-3.c
@@ -0,0 +1,6 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512vl" } */
+
+#define SIMDLEN 16
+#include "vect-simd-clone-avx512-1.c"
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 0fb6fc3394a..abc8603f67c 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -4492,6 +4492,9 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
                i = -1;
                break;
              case SIMD_CLONE_ARG_TYPE_MASK:
+               if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
+                   != SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
+                 i = -1;
                break;
              }
            if (i == (size_t) -1)
@@ -4517,6 +4520,12 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
   if (bestn == NULL)
     return false;
 
+  unsigned int num_mask_args = 0;
+  if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
+    for (i = 0; i < nargs; i++)
+      if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
+       num_mask_args++;
+
   for (i = 0; i < nargs; i++)
     {
       if ((arginfo[i].dt == vect_constant_def
@@ -4541,30 +4550,50 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
          return false;
        }
 
-      if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
-         && bestn->simdclone->mask_mode == VOIDmode
-         && (simd_clone_subparts (bestn->simdclone->args[i].vector_type)
-             != simd_clone_subparts (arginfo[i].vectype)))
+      if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
        {
-         /* FORNOW we only have partial support for vector-type masks that
-            can't hold all of simdlen. */
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
-                            vect_location,
-                            "in-branch vector clones are not yet"
-                            " supported for mismatched vector sizes.\n");
-         return false;
-       }
-      if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
-         && bestn->simdclone->mask_mode != VOIDmode)
-       {
-         /* FORNOW don't support integer-type masks.  */
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
-                            vect_location,
-                            "in-branch vector clones are not yet"
-                            " supported for integer mask modes.\n");
-         return false;
+         if (bestn->simdclone->mask_mode == VOIDmode)
+           {
+             if (simd_clone_subparts (bestn->simdclone->args[i].vector_type)
+                 != simd_clone_subparts (arginfo[i].vectype))
+               {
+                 /* FORNOW we only have partial support for vector-type masks
+                    that can't hold all of simdlen. */
+                 if (dump_enabled_p ())
+                   dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+                                    vect_location,
+                                    "in-branch vector clones are not yet"
+                                    " supported for mismatched vector 
sizes.\n");
+                 return false;
+               }
+           }
+         else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
+           {
+             if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype))
+                 || maybe_ne (exact_div (bestn->simdclone->simdlen,
+                                         num_mask_args),
+                              simd_clone_subparts (arginfo[i].vectype)))
+               {
+                 /* FORNOW we only have partial support for integer-type masks
+                    that represent the same number of lanes as the
+                    vectorized mask inputs. */
+                 if (dump_enabled_p ())
+                   dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+                                    vect_location,
+                                    "in-branch vector clones are not yet "
+                                    "supported for mismatched vector 
sizes.\n");
+                 return false;
+               }
+           }
+         else
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+                                vect_location,
+                                "in-branch vector clones not supported"
+                                " on this target.\n");
+             return false;
+           }
        }
     }
 
@@ -4781,14 +4810,9 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
                }
              break;
            case SIMD_CLONE_ARG_TYPE_MASK:
-             atype = bestn->simdclone->args[i].vector_type;
-             if (bestn->simdclone->mask_mode != VOIDmode)
-               {
-                 /* FORNOW: this is disabled above.  */
-                 gcc_unreachable ();
-               }
-             else
+             if (bestn->simdclone->mask_mode == VOIDmode)
                {
+                 atype = bestn->simdclone->args[i].vector_type;
                  tree elt_type = TREE_TYPE (atype);
                  tree one = fold_convert (elt_type, integer_one_node);
                  tree zero = fold_convert (elt_type, integer_zero_node);
@@ -4839,6 +4863,72 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
                        }
                    }
                }
+             else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
+               {
+                 atype = bestn->simdclone->args[i].vector_type;
+                 /* Guess the number of lanes represented by atype.  */
+                 unsigned HOST_WIDE_INT atype_subparts
+                   = exact_div (bestn->simdclone->simdlen,
+                                num_mask_args).to_constant ();
+                 o = vector_unroll_factor (nunits, atype_subparts);
+                 for (m = j * o; m < (j + 1) * o; m++)
+                   {
+                     if (m == 0)
+                       {
+                         if (!slp_node)
+                           vect_get_vec_defs_for_operand (vinfo, stmt_info,
+                                                          o * ncopies,
+                                                          op,
+                                                          &vec_oprnds[i]);
+                         vec_oprnds_i[i] = 0;
+                       }
+                     if (atype_subparts
+                         < simd_clone_subparts (arginfo[i].vectype))
+                       {
+                         /* The mask argument has fewer elements than the
+                            input vector.  */
+                         /* FORNOW */
+                         gcc_unreachable ();
+                       }
+                     else if (atype_subparts
+                              == simd_clone_subparts (arginfo[i].vectype))
+                       {
+                         /* The vector mask argument matches the input
+                            in the number of lanes, but not necessarily
+                            in the mode.  */
+                         vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
+                         tree st = lang_hooks.types.type_for_mode
+                                     (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
+                         vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
+                                              vec_oprnd0);
+                         gassign *new_stmt
+                           = gimple_build_assign (make_ssa_name (st),
+                                                  vec_oprnd0);
+                         vect_finish_stmt_generation (vinfo, stmt_info,
+                                                      new_stmt, gsi);
+                         if (!types_compatible_p (atype, st))
+                           {
+                             new_stmt
+                               = gimple_build_assign (make_ssa_name (atype),
+                                                      NOP_EXPR,
+                                                      gimple_assign_lhs
+                                                        (new_stmt));
+                             vect_finish_stmt_generation (vinfo, stmt_info,
+                                                          new_stmt, gsi);
+                           }
+                         vargs.safe_push (gimple_assign_lhs (new_stmt));
+                       }
+                     else
+                       {
+                         /* The mask argument has more elements than the
+                            input vector.  */
+                         /* FORNOW */
+                         gcc_unreachable ();
+                       }
+                   }
+               }
+             else
+               gcc_unreachable ();
              break;
            case SIMD_CLONE_ARG_TYPE_UNIFORM:
              vargs.safe_push (op);
-- 
2.35.3

Reply via email to