On Wed, Oct 28, 2015 at 4:23 PM, Ilya Enkovich <enkovich....@gmail.com> wrote: > On 23 Oct 13:36, Ilya Enkovich wrote: >> 2015-10-23 13:32 GMT+03:00 Richard Biener <richard.guent...@gmail.com>: >> > >> > No, we'd get >> > >> > mask_1 = bool != 1; >> > >> > and the 'mask' variable should have been simplified to 'bool' >> > (yes, we'd insert a dead stmt). gimple_build simplifies >> > stmts via the match-and-simplify machinery and match.pd >> > knows how to invert conditions. >> > >> >> Thanks! I'll try it. >> >> Ilya > > Hi, > > Here is a new version. Changes you suggested cause BIT_NOT_EXPR used for > generated mask (instead of != 1 used before). It required a small fix to get > it vectorized to avoid regressions. Is this version OK?
Ok. Thanks, Richard. > Thanks, > Ilya > -- > gcc/ > > 2015-10-28 Ilya Enkovich <enkovich....@gmail.com> > > * internal-fn.c (expand_MASK_LOAD): Adjust to maskload optab changes. > (expand_MASK_STORE): Adjust to maskstore optab changes. > * optabs-query.c (can_vec_mask_load_store_p): Add MASK_MODE arg. > Adjust to maskload, maskstore optab changes. > * optabs-query.h (can_vec_mask_load_store_p): Add MASK_MODE arg. > * optabs.def (maskload_optab): Transform into convert optab. > (maskstore_optab): Likewise. > * tree-if-conv.c (ifcvt_can_use_mask_load_store): Adjust to > can_vec_mask_load_store_p signature change. > (predicate_mem_writes): Use boolean mask. > * tree-vect-stmts.c (vectorizable_mask_load_store): Adjust to > can_vec_mask_load_store_p signature change. Allow invariant masks. > (vectorizable_operation): Ignore type precision for boolean vectors. > > gcc/testsuite/ > > 2015-10-28 Ilya Enkovich <enkovich....@gmail.com> > > * gcc.target/i386/avx2-vec-mask-bit-not.c: New test. > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c > index f12d3af..2317e20 100644 > --- a/gcc/internal-fn.c > +++ b/gcc/internal-fn.c > @@ -1901,7 +1901,9 @@ expand_MASK_LOAD (gcall *stmt) > create_output_operand (&ops[0], target, TYPE_MODE (type)); > create_fixed_operand (&ops[1], mem); > create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt))); > - expand_insn (optab_handler (maskload_optab, TYPE_MODE (type)), 3, ops); > + expand_insn (convert_optab_handler (maskload_optab, TYPE_MODE (type), > + TYPE_MODE (TREE_TYPE (maskt))), > + 3, ops); > } > > static void > @@ -1924,7 +1926,9 @@ expand_MASK_STORE (gcall *stmt) > create_fixed_operand (&ops[0], mem); > create_input_operand (&ops[1], reg, TYPE_MODE (type)); > create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt))); > - expand_insn (optab_handler (maskstore_optab, TYPE_MODE (type)), 3, ops); > + expand_insn (convert_optab_handler (maskstore_optab, TYPE_MODE (type), > + TYPE_MODE (TREE_TYPE (maskt))), > + 3, ops); > } > > static void > diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c > index 254089f..c20597c 100644 > --- a/gcc/optabs-query.c > +++ b/gcc/optabs-query.c > @@ -466,7 +466,9 @@ can_mult_highpart_p (machine_mode mode, bool uns_p) > /* Return true if target supports vector masked load/store for mode. */ > > bool > -can_vec_mask_load_store_p (machine_mode mode, bool is_load) > +can_vec_mask_load_store_p (machine_mode mode, > + machine_mode mask_mode, > + bool is_load) > { > optab op = is_load ? maskload_optab : maskstore_optab; > machine_mode vmode; > @@ -474,7 +476,7 @@ can_vec_mask_load_store_p (machine_mode mode, bool > is_load) > > /* If mode is vector mode, check it directly. */ > if (VECTOR_MODE_P (mode)) > - return optab_handler (op, mode) != CODE_FOR_nothing; > + return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing; > > /* Otherwise, return true if there is some vector mode with > the mask load/store supported. */ > @@ -485,7 +487,12 @@ can_vec_mask_load_store_p (machine_mode mode, bool > is_load) > if (!VECTOR_MODE_P (vmode)) > return false; > > - if (optab_handler (op, vmode) != CODE_FOR_nothing) > + mask_mode = targetm.vectorize.get_mask_mode (GET_MODE_NUNITS (vmode), > + GET_MODE_SIZE (vmode)); > + if (mask_mode == VOIDmode) > + return false; > + > + if (convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing) > return true; > > vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); > @@ -496,8 +503,10 @@ can_vec_mask_load_store_p (machine_mode mode, bool > is_load) > if (cur <= GET_MODE_SIZE (mode)) > continue; > vmode = mode_for_vector (mode, cur / GET_MODE_SIZE (mode)); > + mask_mode = targetm.vectorize.get_mask_mode (GET_MODE_NUNITS (vmode), > + cur); > if (VECTOR_MODE_P (vmode) > - && optab_handler (op, vmode) != CODE_FOR_nothing) > + && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing) > return true; > } > return false; > diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h > index 81ac362..162d2e9 100644 > --- a/gcc/optabs-query.h > +++ b/gcc/optabs-query.h > @@ -140,7 +140,7 @@ enum insn_code find_widening_optab_handler_and_mode > (optab, machine_mode, > machine_mode, int, > machine_mode *); > int can_mult_highpart_p (machine_mode, bool); > -bool can_vec_mask_load_store_p (machine_mode, bool); > +bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool); > bool can_compare_and_swap_p (machine_mode, bool); > bool can_atomic_exchange_p (machine_mode, bool); > bool lshift_cheap_p (bool); > diff --git a/gcc/optabs.def b/gcc/optabs.def > index 1f9c1cf..9804378 100644 > --- a/gcc/optabs.def > +++ b/gcc/optabs.def > @@ -63,6 +63,8 @@ OPTAB_CD(vcond_optab, "vcond$a$b") > OPTAB_CD(vcondu_optab, "vcondu$a$b") > OPTAB_CD(vec_cmp_optab, "vec_cmp$a$b") > OPTAB_CD(vec_cmpu_optab, "vec_cmpu$a$b") > +OPTAB_CD(maskload_optab, "maskload$a$b") > +OPTAB_CD(maskstore_optab, "maskstore$a$b") > > OPTAB_NL(add_optab, "add$P$a3", PLUS, "add", '3', gen_int_fp_fixed_libfunc) > OPTAB_NX(add_optab, "add$F$a3") > @@ -266,8 +268,6 @@ OPTAB_D (udot_prod_optab, "udot_prod$I$a") > OPTAB_D (usum_widen_optab, "widen_usum$I$a3") > OPTAB_D (usad_optab, "usad$I$a") > OPTAB_D (ssad_optab, "ssad$I$a") > -OPTAB_D (maskload_optab, "maskload$a") > -OPTAB_D (maskstore_optab, "maskstore$a") > OPTAB_D (vec_extract_optab, "vec_extract$a") > OPTAB_D (vec_init_optab, "vec_init$a") > OPTAB_D (vec_pack_sfix_trunc_optab, "vec_pack_sfix_trunc_$a") > diff --git a/gcc/testsuite/gcc.target/i386/avx2-vec-mask-bit-not.c > b/gcc/testsuite/gcc.target/i386/avx2-vec-mask-bit-not.c > new file mode 100644 > index 0000000..0c946ca > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx2-vec-mask-bit-not.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target avx2 } */ > +/* { dg-options "-mavx2 -O3 -fopenmp-simd -fdump-tree-vect-details" } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ > + > +#define N 1024 > + > +int a[N], b[N], c[N], d[N], e[N]; > + > +void > +test (void) > +{ > + int i; > + #pragma omp simd > + for (i = 0; i < N; i++) > + if (!(a[i] > b[i] && c[i] < d[i])) > + e[i] = 0; > +} > diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c > index f201ab5..50e959f 100644 > --- a/gcc/tree-if-conv.c > +++ b/gcc/tree-if-conv.c > @@ -811,7 +811,7 @@ ifcvt_can_use_mask_load_store (gimple *stmt) > || VECTOR_MODE_P (mode)) > return false; > > - if (can_vec_mask_load_store_p (mode, is_load)) > + if (can_vec_mask_load_store_p (mode, VOIDmode, is_load)) > return true; > > return false; > @@ -2068,8 +2068,9 @@ predicate_mem_writes (loop_p loop) > { > tree lhs = gimple_assign_lhs (stmt); > tree rhs = gimple_assign_rhs1 (stmt); > - tree ref, addr, ptr, masktype, mask_op0, mask_op1, mask; > + tree ref, addr, ptr, mask; > gimple *new_stmt; > + gimple_seq stmts = NULL; > int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (lhs))); > ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs; > mark_addressable (ref); > @@ -2082,16 +2083,27 @@ predicate_mem_writes (loop_p loop) > mask = vect_masks[index]; > else > { > - masktype = build_nonstandard_integer_type (bitsize, 1); > - mask_op0 = build_int_cst (masktype, swap ? 0 : -1); > - mask_op1 = build_int_cst (masktype, swap ? -1 : 0); > - cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond), > - is_gimple_condexpr, > - NULL_TREE, > - true, GSI_SAME_STMT); > - mask = fold_build_cond_expr (masktype, unshare_expr (cond), > - mask_op0, mask_op1); > - mask = ifc_temp_var (masktype, mask, &gsi); > + if (COMPARISON_CLASS_P (cond)) > + mask = gimple_build (&stmts, TREE_CODE (cond), > + boolean_type_node, > + TREE_OPERAND (cond, 0), > + TREE_OPERAND (cond, 1)); > + else > + { > + gcc_assert (TREE_CODE (cond) == SSA_NAME); > + mask = cond; > + } > + > + if (swap) > + { > + tree true_val > + = constant_boolean_node (true, TREE_TYPE (mask)); > + mask = gimple_build (&stmts, BIT_XOR_EXPR, > + TREE_TYPE (mask), mask, true_val); > + } > + gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); > + > + mask = ifc_temp_var (TREE_TYPE (mask), mask, &gsi); > /* Save mask and its size for further use. */ > vect_sizes.safe_push (bitsize); > vect_masks.safe_push (mask); > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c > index 9413197..195ecf8 100644 > --- a/gcc/tree-vect-stmts.c > +++ b/gcc/tree-vect-stmts.c > @@ -1708,6 +1708,7 @@ vectorizable_mask_load_store (gimple *stmt, > gimple_stmt_iterator *gsi, > bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt); > struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); > tree vectype = STMT_VINFO_VECTYPE (stmt_info); > + tree mask_vectype; > tree elem_type; > gimple *new_stmt; > tree dummy; > @@ -1734,8 +1735,8 @@ vectorizable_mask_load_store (gimple *stmt, > gimple_stmt_iterator *gsi, > > is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE; > mask = gimple_call_arg (stmt, 2); > - if (TYPE_PRECISION (TREE_TYPE (mask)) > - != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)))) > + > + if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE) > return false; > > /* FORNOW. This restriction should be relaxed. */ > @@ -1764,6 +1765,18 @@ vectorizable_mask_load_store (gimple *stmt, > gimple_stmt_iterator *gsi, > if (STMT_VINFO_STRIDED_P (stmt_info)) > return false; > > + if (TREE_CODE (mask) != SSA_NAME) > + return false; > + > + if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype)) > + return false; > + > + if (!mask_vectype) > + mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype)); > + > + if (!mask_vectype) > + return false; > + > if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) > { > gimple *def_stmt; > @@ -1795,13 +1808,9 @@ vectorizable_mask_load_store (gimple *stmt, > gimple_stmt_iterator *gsi, > : DR_STEP (dr), size_zero_node) <= 0) > return false; > else if (!VECTOR_MODE_P (TYPE_MODE (vectype)) > - || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store)) > - return false; > - > - if (TREE_CODE (mask) != SSA_NAME) > - return false; > - > - if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt)) > + || !can_vec_mask_load_store_p (TYPE_MODE (vectype), > + TYPE_MODE (mask_vectype), > + !is_store)) > return false; > > if (is_store) > @@ -4702,8 +4711,9 @@ vectorizable_operation (gimple *stmt, > gimple_stmt_iterator *gsi, > > /* Most operations cannot handle bit-precision types without extra > truncations. */ > - if ((TYPE_PRECISION (TREE_TYPE (scalar_dest)) > - != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) > + if (!VECTOR_BOOLEAN_TYPE_P (vectype_out) > + && (TYPE_PRECISION (TREE_TYPE (scalar_dest)) > + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) > /* Exception are bitwise binary operations. */ > && code != BIT_IOR_EXPR > && code != BIT_XOR_EXPR