Hi!

This patch fixes masked gather load vectorization if the narrowing (or
widening) gather needs to be used (one where either the index is 32-bit and
loaded type 64-bit or index is 64-bit and loaded type 32-bit).

Bootstrapped/regtested on x86_64-linux and i686-linux, Kyrill has also
kindly tested the testcases on Haswell CPU.  Ok for trunk?

2013-12-30  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/59591
        * tree-vect-stmts.c (vectorizable_mask_load_store): Fix up handling
        of modifier = NARROW masked gathers.
        (permute_vec_elements): Use gimple_get_lhs instead of
        gimple_assign_lhs.

        * gcc.dg/vect/pr59591-1.c: New test.
        * gcc.dg/vect/pr59591-2.c: New test.
        * gcc.target/i386/pr59591-1.c: New test.
        * gcc.target/i386/pr59591-2.c: New test.

--- gcc/tree-vect-stmts.c.jj    2013-12-27 19:24:33.000000000 +0100
+++ gcc/tree-vect-stmts.c       2013-12-30 13:10:24.366030631 +0100
@@ -1855,14 +1855,24 @@ vectorizable_mask_load_store (gimple stm
       tree vec_oprnd0 = NULL_TREE, op;
       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
-      tree ptr, vec_mask = NULL_TREE, mask_op, var, scale;
+      tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
       tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
+      tree mask_perm_mask = NULL_TREE;
       edge pe = loop_preheader_edge (loop);
       gimple_seq seq;
       basic_block new_bb;
       enum { NARROW, NONE, WIDEN } modifier;
       int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
 
+      rettype = TREE_TYPE (TREE_TYPE (gather_decl));
+      srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
+      ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
+      idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
+      masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
+      scaletype = TREE_VALUE (arglist);
+      gcc_checking_assert (types_compatible_p (srctype, rettype)
+                          && types_compatible_p (srctype, masktype));
+
       if (nunits == gather_off_nunits)
        modifier = NONE;
       else if (nunits == gather_off_nunits / 2)
@@ -1888,19 +1898,14 @@ vectorizable_mask_load_store (gimple stm
          perm_mask = vect_gen_perm_mask (vectype, sel);
          gcc_assert (perm_mask != NULL_TREE);
          ncopies *= 2;
+         for (i = 0; i < nunits; ++i)
+           sel[i] = i | gather_off_nunits;
+         mask_perm_mask = vect_gen_perm_mask (masktype, sel);
+         gcc_assert (mask_perm_mask != NULL_TREE);
        }
       else
        gcc_unreachable ();
 
-      rettype = TREE_TYPE (TREE_TYPE (gather_decl));
-      srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
-      ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
-      idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
-      masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
-      scaletype = TREE_VALUE (arglist);
-      gcc_checking_assert (types_compatible_p (srctype, rettype)
-                          && types_compatible_p (srctype, masktype));
-
       vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
 
       ptr = fold_convert (ptrtype, gather_base);
@@ -1940,28 +1945,35 @@ vectorizable_mask_load_store (gimple stm
              op = var;
            }
 
-         if (j == 0)
-           vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
+         if (mask_perm_mask && (j & 1))
+           mask_op = permute_vec_elements (mask_op, mask_op,
+                                           mask_perm_mask, stmt, gsi);
          else
            {
-             vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
-                                 &def, &dt);
-             vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
-           }
+             if (j == 0)
+               vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
+             else
+               {
+                 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
+                                     &def_stmt, &def, &dt);
+                 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
+               }
 
-         mask_op = vec_mask;
-         if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
-           {
-             gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
-                         == TYPE_VECTOR_SUBPARTS (masktype));
-             var = vect_get_new_vect_var (masktype, vect_simple_var, NULL);
-             var = make_ssa_name (var, NULL);
-             mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
-             new_stmt
-               = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
-                                               mask_op, NULL_TREE);
-             vect_finish_stmt_generation (stmt, new_stmt, gsi);
-             mask_op = var;
+             mask_op = vec_mask;
+             if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
+               {
+                 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
+                             == TYPE_VECTOR_SUBPARTS (masktype));
+                 var = vect_get_new_vect_var (masktype, vect_simple_var,
+                                              NULL);
+                 var = make_ssa_name (var, NULL);
+                 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
+                 new_stmt
+                   = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
+                                                   mask_op, NULL_TREE);
+                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
+                 mask_op = var;
+               }
            }
 
          new_stmt
@@ -5446,7 +5458,7 @@ permute_vec_elements (tree x, tree y, tr
   tree perm_dest, data_ref;
   gimple perm_stmt;
 
-  perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
+  perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
   data_ref = make_ssa_name (perm_dest, NULL);
 
   /* Generate the permute statement.  */
--- gcc/testsuite/gcc.dg/vect/pr59591-1.c.jj    2013-12-30 12:59:27.012435290 
+0100
+++ gcc/testsuite/gcc.dg/vect/pr59591-1.c       2013-12-30 13:08:40.386562796 
+0100
@@ -0,0 +1,55 @@
+/* PR tree-optimization/59591 */
+/* { dg-do run } */
+/* { dg-additional-options "-fopenmp-simd" } */
+
+#ifndef CHECK_H
+#include "tree-vect.h"
+#endif
+
+extern void abort (void);
+
+int p[256], q[256], r[256], t[256];
+
+__attribute__((noinline, noclone)) void
+foo (void)
+{
+  int i;
+  #pragma omp simd safelen(64)
+  for (i = 0; i < 256; i++)
+    if (r[i] > 32)
+      t[i] = p[q[i] * 3L + 2L];
+}
+
+__attribute__((noinline, noclone)) void
+bar (void)
+{
+  int i;
+  for (i = 0; i < 256; i++)
+    {
+      r[i] = ((i >> 2) & (1 << (i & 3))) ? 32 + i : 32 - i;
+      q[i] = r[i] > 32 ? ((i * 7) % 84) : 99 + i;
+      p[i] = i * 11;
+      t[i] = i * 13;
+    }
+  foo ();
+  for (i = 0; i < 256; i++)
+    if ((i >> 2) & (1 << (i & 3)))
+      {
+       if (t[i] != (((i * 7) % 84) * 3 + 2) * 11)
+         abort ();
+      }
+    else if (t[i] != i * 13)
+      abort ();
+}
+
+#ifndef CHECK_H
+int
+main ()
+{
+  check_vect ();
+  bar ();
+  return 0;
+}
+#endif
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.dg/vect/pr59591-2.c.jj    2013-12-30 12:59:35.249391492 
+0100
+++ gcc/testsuite/gcc.dg/vect/pr59591-2.c       2013-12-30 13:08:58.791467078 
+0100
@@ -0,0 +1,56 @@
+/* PR tree-optimization/59591 */
+/* { dg-do run } */
+/* { dg-additional-options "-fopenmp-simd" } */
+
+#ifndef CHECK_H
+#include "tree-vect.h"
+#endif
+
+extern void abort (void);
+
+long long int p[256], r[256], t[256];
+int q[256];
+
+__attribute__((noinline, noclone)) void
+foo (void)
+{
+  int i;
+  #pragma omp simd safelen(64)
+  for (i = 0; i < 256; i++)
+    if (r[i] > 32LL)
+      t[i] = p[q[i]];
+}
+
+__attribute__((noinline, noclone)) void
+bar (void)
+{
+  int i;
+  for (i = 0; i < 256; i++)
+    {
+      r[i] = ((i >> 2) & (1 << (i & 3))) ? 32 + i : 32 - i;
+      q[i] = r[i] > 32 ? ((i * 7) % 256) : 258 + i;
+      p[i] = i * 11;
+      t[i] = i * 13;
+    }
+  foo ();
+  for (i = 0; i < 256; i++)
+    if ((i >> 2) & (1 << (i & 3)))
+      {
+       if (t[i] != ((i * 7) % 256) * 11)
+         abort ();
+      }
+    else if (t[i] != i * 13)
+      abort ();
+}
+
+#ifndef CHECK_H
+int
+main ()
+{
+  check_vect ();
+  bar ();
+  return 0;
+}
+#endif
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.target/i386/pr59591-1.c.jj        2013-12-30 
13:05:51.243440518 +0100
+++ gcc/testsuite/gcc.target/i386/pr59591-1.c   2013-12-30 13:09:16.100377227 
+0100
@@ -0,0 +1,17 @@
+/* PR tree-optimization/59591 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx2 -fno-vect-cost-model" } */
+/* { dg-require-effective-target avx2 } */
+
+#define CHECK_H "avx2-check.h"
+#define TEST avx2_test
+
+#include "../../gcc.dg/vect/pr59591-1.c"
+
+#include CHECK_H
+
+static void
+TEST (void)
+{
+  bar ();
+}
--- gcc/testsuite/gcc.target/i386/pr59591-2.c.jj        2013-12-30 
13:06:13.000328094 +0100
+++ gcc/testsuite/gcc.target/i386/pr59591-2.c   2013-12-30 13:09:24.093319805 
+0100
@@ -0,0 +1,17 @@
+/* PR tree-optimization/59591 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx2 -fno-vect-cost-model" } */
+/* { dg-require-effective-target avx2 } */
+
+#define CHECK_H "avx2-check.h"
+#define TEST avx2_test
+
+#include "../../gcc.dg/vect/pr59591-2.c"
+
+#include CHECK_H
+
+static void
+TEST (void)
+{
+  bar ();
+}

        Jakub

Reply via email to