Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >This patch fixes masked gather load vectorization if the narrowing (or >widening) gather needs to be used (one where either the index is 32-bit >and >loaded type 64-bit or index is 64-bit and loaded type 32-bit). > >Bootstrapped/regtested on x86_64-linux and i686-linux, Kyrill has also >kindly tested the testcases on Haswell CPU. Ok for trunk?
Ok. Thanks, Richard. >2013-12-30 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/59591 > * tree-vect-stmts.c (vectorizable_mask_load_store): Fix up handling > of modifier = NARROW masked gathers. > (permute_vec_elements): Use gimple_get_lhs instead of > gimple_assign_lhs. > > * gcc.dg/vect/pr59591-1.c: New test. > * gcc.dg/vect/pr59591-2.c: New test. > * gcc.target/i386/pr59591-1.c: New test. > * gcc.target/i386/pr59591-2.c: New test. > >--- gcc/tree-vect-stmts.c.jj 2013-12-27 19:24:33.000000000 +0100 >+++ gcc/tree-vect-stmts.c 2013-12-30 13:10:24.366030631 +0100 >@@ -1855,14 +1855,24 @@ vectorizable_mask_load_store (gimple stm > tree vec_oprnd0 = NULL_TREE, op; > tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl)); > tree rettype, srctype, ptrtype, idxtype, masktype, scaletype; >- tree ptr, vec_mask = NULL_TREE, mask_op, var, scale; >+ tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale; > tree perm_mask = NULL_TREE, prev_res = NULL_TREE; >+ tree mask_perm_mask = NULL_TREE; > edge pe = loop_preheader_edge (loop); > gimple_seq seq; > basic_block new_bb; > enum { NARROW, NONE, WIDEN } modifier; > int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype); > >+ rettype = TREE_TYPE (TREE_TYPE (gather_decl)); >+ srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); >+ ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); >+ idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); >+ masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); >+ scaletype = TREE_VALUE (arglist); >+ gcc_checking_assert (types_compatible_p (srctype, rettype) >+ && types_compatible_p (srctype, masktype)); >+ > if (nunits == gather_off_nunits) > modifier = NONE; > else if (nunits == gather_off_nunits / 2) >@@ -1888,19 +1898,14 @@ vectorizable_mask_load_store (gimple stm > perm_mask = vect_gen_perm_mask (vectype, sel); > gcc_assert (perm_mask != NULL_TREE); > ncopies *= 2; >+ for (i = 0; i < nunits; ++i) >+ sel[i] = i | gather_off_nunits; >+ mask_perm_mask = vect_gen_perm_mask (masktype, sel); >+ gcc_assert (mask_perm_mask != NULL_TREE); > } > else > gcc_unreachable (); > >- rettype = TREE_TYPE (TREE_TYPE (gather_decl)); >- srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); >- ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); >- idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); >- masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); >- scaletype = TREE_VALUE (arglist); >- gcc_checking_assert (types_compatible_p (srctype, rettype) >- && types_compatible_p (srctype, masktype)); >- >vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), >vectype); > > ptr = fold_convert (ptrtype, gather_base); >@@ -1940,28 +1945,35 @@ vectorizable_mask_load_store (gimple stm > op = var; > } > >- if (j == 0) >- vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL); >+ if (mask_perm_mask && (j & 1)) >+ mask_op = permute_vec_elements (mask_op, mask_op, >+ mask_perm_mask, stmt, gsi); > else > { >- vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, >&def_stmt, >- &def, &dt); >- vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask); >- } >+ if (j == 0) >+ vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL); >+ else >+ { >+ vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, >+ &def_stmt, &def, &dt); >+ vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask); >+ } > >- mask_op = vec_mask; >- if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask))) >- { >- gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)) >- == TYPE_VECTOR_SUBPARTS (masktype)); >- var = vect_get_new_vect_var (masktype, vect_simple_var, NULL); >- var = make_ssa_name (var, NULL); >- mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op); >- new_stmt >- = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, >- mask_op, NULL_TREE); >- vect_finish_stmt_generation (stmt, new_stmt, gsi); >- mask_op = var; >+ mask_op = vec_mask; >+ if (!useless_type_conversion_p (masktype, TREE_TYPE >(vec_mask))) >+ { >+ gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)) >+ == TYPE_VECTOR_SUBPARTS (masktype)); >+ var = vect_get_new_vect_var (masktype, vect_simple_var, >+ NULL); >+ var = make_ssa_name (var, NULL); >+ mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op); >+ new_stmt >+ = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, >+ mask_op, NULL_TREE); >+ vect_finish_stmt_generation (stmt, new_stmt, gsi); >+ mask_op = var; >+ } > } > > new_stmt >@@ -5446,7 +5458,7 @@ permute_vec_elements (tree x, tree y, tr > tree perm_dest, data_ref; > gimple perm_stmt; > >- perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), >vectype); >+ perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), >vectype); > data_ref = make_ssa_name (perm_dest, NULL); > > /* Generate the permute statement. */ >--- gcc/testsuite/gcc.dg/vect/pr59591-1.c.jj 2013-12-30 >12:59:27.012435290 +0100 >+++ gcc/testsuite/gcc.dg/vect/pr59591-1.c 2013-12-30 13:08:40.386562796 >+0100 >@@ -0,0 +1,55 @@ >+/* PR tree-optimization/59591 */ >+/* { dg-do run } */ >+/* { dg-additional-options "-fopenmp-simd" } */ >+ >+#ifndef CHECK_H >+#include "tree-vect.h" >+#endif >+ >+extern void abort (void); >+ >+int p[256], q[256], r[256], t[256]; >+ >+__attribute__((noinline, noclone)) void >+foo (void) >+{ >+ int i; >+ #pragma omp simd safelen(64) >+ for (i = 0; i < 256; i++) >+ if (r[i] > 32) >+ t[i] = p[q[i] * 3L + 2L]; >+} >+ >+__attribute__((noinline, noclone)) void >+bar (void) >+{ >+ int i; >+ for (i = 0; i < 256; i++) >+ { >+ r[i] = ((i >> 2) & (1 << (i & 3))) ? 32 + i : 32 - i; >+ q[i] = r[i] > 32 ? ((i * 7) % 84) : 99 + i; >+ p[i] = i * 11; >+ t[i] = i * 13; >+ } >+ foo (); >+ for (i = 0; i < 256; i++) >+ if ((i >> 2) & (1 << (i & 3))) >+ { >+ if (t[i] != (((i * 7) % 84) * 3 + 2) * 11) >+ abort (); >+ } >+ else if (t[i] != i * 13) >+ abort (); >+} >+ >+#ifndef CHECK_H >+int >+main () >+{ >+ check_vect (); >+ bar (); >+ return 0; >+} >+#endif >+ >+/* { dg-final { cleanup-tree-dump "vect" } } */ >--- gcc/testsuite/gcc.dg/vect/pr59591-2.c.jj 2013-12-30 >12:59:35.249391492 +0100 >+++ gcc/testsuite/gcc.dg/vect/pr59591-2.c 2013-12-30 13:08:58.791467078 >+0100 >@@ -0,0 +1,56 @@ >+/* PR tree-optimization/59591 */ >+/* { dg-do run } */ >+/* { dg-additional-options "-fopenmp-simd" } */ >+ >+#ifndef CHECK_H >+#include "tree-vect.h" >+#endif >+ >+extern void abort (void); >+ >+long long int p[256], r[256], t[256]; >+int q[256]; >+ >+__attribute__((noinline, noclone)) void >+foo (void) >+{ >+ int i; >+ #pragma omp simd safelen(64) >+ for (i = 0; i < 256; i++) >+ if (r[i] > 32LL) >+ t[i] = p[q[i]]; >+} >+ >+__attribute__((noinline, noclone)) void >+bar (void) >+{ >+ int i; >+ for (i = 0; i < 256; i++) >+ { >+ r[i] = ((i >> 2) & (1 << (i & 3))) ? 32 + i : 32 - i; >+ q[i] = r[i] > 32 ? ((i * 7) % 256) : 258 + i; >+ p[i] = i * 11; >+ t[i] = i * 13; >+ } >+ foo (); >+ for (i = 0; i < 256; i++) >+ if ((i >> 2) & (1 << (i & 3))) >+ { >+ if (t[i] != ((i * 7) % 256) * 11) >+ abort (); >+ } >+ else if (t[i] != i * 13) >+ abort (); >+} >+ >+#ifndef CHECK_H >+int >+main () >+{ >+ check_vect (); >+ bar (); >+ return 0; >+} >+#endif >+ >+/* { dg-final { cleanup-tree-dump "vect" } } */ >--- gcc/testsuite/gcc.target/i386/pr59591-1.c.jj 2013-12-30 >13:05:51.243440518 +0100 >+++ gcc/testsuite/gcc.target/i386/pr59591-1.c 2013-12-30 >13:09:16.100377227 +0100 >@@ -0,0 +1,17 @@ >+/* PR tree-optimization/59591 */ >+/* { dg-do run } */ >+/* { dg-options "-O2 -fopenmp-simd -mavx2 -fno-vect-cost-model" } */ >+/* { dg-require-effective-target avx2 } */ >+ >+#define CHECK_H "avx2-check.h" >+#define TEST avx2_test >+ >+#include "../../gcc.dg/vect/pr59591-1.c" >+ >+#include CHECK_H >+ >+static void >+TEST (void) >+{ >+ bar (); >+} >--- gcc/testsuite/gcc.target/i386/pr59591-2.c.jj 2013-12-30 >13:06:13.000328094 +0100 >+++ gcc/testsuite/gcc.target/i386/pr59591-2.c 2013-12-30 >13:09:24.093319805 +0100 >@@ -0,0 +1,17 @@ >+/* PR tree-optimization/59591 */ >+/* { dg-do run } */ >+/* { dg-options "-O2 -fopenmp-simd -mavx2 -fno-vect-cost-model" } */ >+/* { dg-require-effective-target avx2 } */ >+ >+#define CHECK_H "avx2-check.h" >+#define TEST avx2_test >+ >+#include "../../gcc.dg/vect/pr59591-2.c" >+ >+#include CHECK_H >+ >+static void >+TEST (void) >+{ >+ bar (); >+} > > Jakub