Hi! On the following testcase we end up with a comparison (EQ_EXPR in this case) with unsupported vector operands, but supported result (vector boolean type with scalar mode, i.e. the AVX512F-ish integer bitmask) and later a VEC_COND_EXPR which is also not supported by the optab and has the vector boolean type with scalar mode as the first operand.
The last hunk makes sure that we don't just ignore lowering of the comparison when it has an integer bitmask result but unsupported vector operands. The expand_vector_comparison changes makes sure we lower the comparison properly into the integer bitmask and finally the expand_vector_condition changes makes sure we lower properly the VEC_COND_EXPR. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2019-07-17 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/91157 * tree-vect-generic.c (expand_vector_comparison): Handle lhs being a vector boolean with scalar mode. (expand_vector_condition): Handle first operand being a vector boolean with scalar mode. (expand_vector_operations_1): For comparisons, don't bail out early if the return type is vector boolean with scalar mode, but comparison operand type is not. * gcc.target/i386/avx512f-pr91157.c: New test. * gcc.target/i386/avx512bw-pr91157.c: New test. --- gcc/tree-vect-generic.c.jj 2019-07-04 00:18:37.063010439 +0200 +++ gcc/tree-vect-generic.c 2019-07-16 12:40:41.343059690 +0200 @@ -382,8 +382,48 @@ expand_vector_comparison (gimple_stmt_it tree t; if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code) && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code)) - t = expand_vector_piecewise (gsi, do_compare, type, - TREE_TYPE (TREE_TYPE (op0)), op0, op1, code); + { + if (VECTOR_BOOLEAN_TYPE_P (type) + && VECTOR_BOOLEAN_TYPE_P (type) + && SCALAR_INT_MODE_P (TYPE_MODE (type)) + && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)), + TYPE_VECTOR_SUBPARTS (type) + * GET_MODE_BITSIZE (SCALAR_TYPE_MODE + (TREE_TYPE (type))))) + { + tree inner_type = TREE_TYPE (TREE_TYPE (op0)); + tree part_width = TYPE_SIZE (inner_type); + tree index = bitsize_int (0); + int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0)); + int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type)); + tree ret_type = build_nonstandard_integer_type (prec, 1); + tree ret_inner_type = boolean_type_node; + int i; + location_t loc = gimple_location (gsi_stmt (*gsi)); + t = build_zero_cst (ret_type); + + if (TYPE_PRECISION (ret_inner_type) != 1) + ret_inner_type = build_nonstandard_integer_type (1, 1); + warning_at (loc, OPT_Wvector_operation_performance, + "vector operation will be expanded piecewise"); + for (i = 0; i < nunits; + i++, index = int_const_binop (PLUS_EXPR, index, part_width)) + { + tree a = tree_vec_extract (gsi, inner_type, op0, part_width, + index); + tree b = tree_vec_extract (gsi, inner_type, op1, part_width, + index); + tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b); + t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result, + bitsize_int (i)); + } + t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t); + } + else + t = expand_vector_piecewise (gsi, do_compare, type, + TREE_TYPE (TREE_TYPE (op0)), op0, op1, + code); + } else t = NULL_TREE; @@ -879,6 +919,7 @@ expand_vector_condition (gimple_stmt_ite tree a1 = a; tree a2 = NULL_TREE; bool a_is_comparison = false; + bool a_is_scalar_bitmask = false; tree b = gimple_assign_rhs2 (stmt); tree c = gimple_assign_rhs3 (stmt); vec<constructor_elt, va_gc> *v; @@ -942,6 +983,20 @@ expand_vector_condition (gimple_stmt_ite warning_at (loc, OPT_Wvector_operation_performance, "vector condition will be expanded piecewise"); + if (!a_is_comparison + && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a)) + && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a))) + && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))), + TYPE_VECTOR_SUBPARTS (TREE_TYPE (a)) + * GET_MODE_BITSIZE (SCALAR_TYPE_MODE + (TREE_TYPE (TREE_TYPE (a)))))) + { + a_is_scalar_bitmask = true; + int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a))); + tree atype = build_nonstandard_integer_type (prec, 1); + a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a); + } + int nunits = nunits_for_known_piecewise_op (type); vec_alloc (v, nunits); for (i = 0; i < nunits; i++) @@ -957,6 +1012,14 @@ expand_vector_condition (gimple_stmt_ite comp_width, comp_index); aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2); } + else if (a_is_scalar_bitmask) + { + wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a))); + result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a), + a, wide_int_to_tree (TREE_TYPE (a), w)); + aa = fold_build2 (NE_EXPR, boolean_type_node, result, + build_zero_cst (TREE_TYPE (a))); + } else aa = tree_vec_extract (gsi, cond_type, a, width, index); result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc); @@ -1941,7 +2004,11 @@ expand_vector_operations_1 (gimple_stmt_ /* A scalar operation pretending to be a vector one. */ if (VECTOR_BOOLEAN_TYPE_P (type) && !VECTOR_MODE_P (TYPE_MODE (type)) - && TYPE_MODE (type) != BLKmode) + && TYPE_MODE (type) != BLKmode + && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison + || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)) + && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1))) + && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode))) return; /* If the vector operation is operating on all same vector elements --- gcc/testsuite/gcc.target/i386/avx512f-pr91157.c.jj 2019-07-16 12:54:55.928900526 +0200 +++ gcc/testsuite/gcc.target/i386/avx512f-pr91157.c 2019-07-16 13:01:39.217714434 +0200 @@ -0,0 +1,29 @@ +/* PR tree-optimization/91157 */ +/* { dg-do run { target { avx512f && lp64 } } } */ +/* { dg-options "-O2 -mavx512f -fexceptions -fnon-call-exceptions -fsignaling-nans" } */ + +#include "avx512f-helper.h" + +typedef long double V __attribute__ ((vector_size (4 * sizeof (long double)))); +typedef __int128 W __attribute__ ((vector_size (4 * sizeof (__int128)))); + +__attribute__((noipa)) W +foo (V x) +{ + return x == 0; +} + +static void +test_512 (void) +{ + V a = { 5.0L, 0.0L, -0.0L, -17.0L }; + V b = { -0.0L, 16.0L, 0.0L, 18.0L }; + V c = { 6.0L, 7.0L, 8.0L, 0.0L }; + W ar = foo (a); + W br = foo (b); + W cr = foo (c); + if (ar[0] != 0 || ar[1] != -1 || ar[2] != -1 || ar[3] != 0 + || br[0] != -1 || br[1] != 0 || br[2] != -1 || br[3] != 0 + || cr[0] != 0 || cr[1] != 0 || cr[2] != 0 || cr[3] != -1) + __builtin_abort (); +} --- gcc/testsuite/gcc.target/i386/avx512bw-pr91157.c.jj 2019-07-16 12:55:11.609659992 +0200 +++ gcc/testsuite/gcc.target/i386/avx512bw-pr91157.c 2019-07-16 13:01:10.438155882 +0200 @@ -0,0 +1,6 @@ +/* PR tree-optimization/91157 */ +/* { dg-do run { target { avx512bw && lp64 } } } */ +/* { dg-options "-O2 -mavx512bw -fexceptions -fnon-call-exceptions -fsignaling-nans" } */ + +#define AVX512BW +#include "avx512f-pr91157.c" Jakub