On Wed, 17 Jul 2019, Jakub Jelinek wrote: > Hi! > > On the following testcase we end up with a comparison (EQ_EXPR in this case) > with unsupported vector operands, but supported result (vector boolean > type with scalar mode, i.e. the AVX512F-ish integer bitmask) and later > a VEC_COND_EXPR which is also not supported by the optab and has the vector > boolean type with scalar mode as the first operand. > > The last hunk makes sure that we don't just ignore lowering of the comparison > when it has an integer bitmask result but unsupported vector operands. > The expand_vector_comparison changes makes sure we lower the comparison > properly into the integer bitmask and finally the expand_vector_condition > changes makes sure we lower properly the VEC_COND_EXPR. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK. Thanks, Richard. > 2019-07-17 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/91157 > * tree-vect-generic.c (expand_vector_comparison): Handle lhs being > a vector boolean with scalar mode. > (expand_vector_condition): Handle first operand being a vector boolean > with scalar mode. > (expand_vector_operations_1): For comparisons, don't bail out early > if the return type is vector boolean with scalar mode, but comparison > operand type is not. > > * gcc.target/i386/avx512f-pr91157.c: New test. > * gcc.target/i386/avx512bw-pr91157.c: New test. > > --- gcc/tree-vect-generic.c.jj 2019-07-04 00:18:37.063010439 +0200 > +++ gcc/tree-vect-generic.c 2019-07-16 12:40:41.343059690 +0200 > @@ -382,8 +382,48 @@ expand_vector_comparison (gimple_stmt_it > tree t; > if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code) > && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code)) > - t = expand_vector_piecewise (gsi, do_compare, type, > - TREE_TYPE (TREE_TYPE (op0)), op0, op1, code); > + { > + if (VECTOR_BOOLEAN_TYPE_P (type) > + && VECTOR_BOOLEAN_TYPE_P (type) > + && SCALAR_INT_MODE_P (TYPE_MODE (type)) > + && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)), > + TYPE_VECTOR_SUBPARTS (type) > + * GET_MODE_BITSIZE (SCALAR_TYPE_MODE > + (TREE_TYPE (type))))) > + { > + tree inner_type = TREE_TYPE (TREE_TYPE (op0)); > + tree part_width = TYPE_SIZE (inner_type); > + tree index = bitsize_int (0); > + int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0)); > + int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type)); > + tree ret_type = build_nonstandard_integer_type (prec, 1); > + tree ret_inner_type = boolean_type_node; > + int i; > + location_t loc = gimple_location (gsi_stmt (*gsi)); > + t = build_zero_cst (ret_type); > + > + if (TYPE_PRECISION (ret_inner_type) != 1) > + ret_inner_type = build_nonstandard_integer_type (1, 1); > + warning_at (loc, OPT_Wvector_operation_performance, > + "vector operation will be expanded piecewise"); > + for (i = 0; i < nunits; > + i++, index = int_const_binop (PLUS_EXPR, index, part_width)) > + { > + tree a = tree_vec_extract (gsi, inner_type, op0, part_width, > + index); > + tree b = tree_vec_extract (gsi, inner_type, op1, part_width, > + index); > + tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b); > + t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result, > + bitsize_int (i)); > + } > + t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t); > + } > + else > + t = expand_vector_piecewise (gsi, do_compare, type, > + TREE_TYPE (TREE_TYPE (op0)), op0, op1, > + code); > + } > else > t = NULL_TREE; > > @@ -879,6 +919,7 @@ expand_vector_condition (gimple_stmt_ite > tree a1 = a; > tree a2 = NULL_TREE; > bool a_is_comparison = false; > + bool a_is_scalar_bitmask = false; > tree b = gimple_assign_rhs2 (stmt); > tree c = gimple_assign_rhs3 (stmt); > vec<constructor_elt, va_gc> *v; > @@ -942,6 +983,20 @@ expand_vector_condition (gimple_stmt_ite > warning_at (loc, OPT_Wvector_operation_performance, > "vector condition will be expanded piecewise"); > > + if (!a_is_comparison > + && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a)) > + && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a))) > + && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))), > + TYPE_VECTOR_SUBPARTS (TREE_TYPE (a)) > + * GET_MODE_BITSIZE (SCALAR_TYPE_MODE > + (TREE_TYPE (TREE_TYPE (a)))))) > + { > + a_is_scalar_bitmask = true; > + int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a))); > + tree atype = build_nonstandard_integer_type (prec, 1); > + a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a); > + } > + > int nunits = nunits_for_known_piecewise_op (type); > vec_alloc (v, nunits); > for (i = 0; i < nunits; i++) > @@ -957,6 +1012,14 @@ expand_vector_condition (gimple_stmt_ite > comp_width, comp_index); > aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2); > } > + else if (a_is_scalar_bitmask) > + { > + wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a))); > + result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a), > + a, wide_int_to_tree (TREE_TYPE (a), w)); > + aa = fold_build2 (NE_EXPR, boolean_type_node, result, > + build_zero_cst (TREE_TYPE (a))); > + } > else > aa = tree_vec_extract (gsi, cond_type, a, width, index); > result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc); > @@ -1941,7 +2004,11 @@ expand_vector_operations_1 (gimple_stmt_ > /* A scalar operation pretending to be a vector one. */ > if (VECTOR_BOOLEAN_TYPE_P (type) > && !VECTOR_MODE_P (TYPE_MODE (type)) > - && TYPE_MODE (type) != BLKmode) > + && TYPE_MODE (type) != BLKmode > + && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison > + || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)) > + && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1))) > + && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode))) > return; > > /* If the vector operation is operating on all same vector elements > --- gcc/testsuite/gcc.target/i386/avx512f-pr91157.c.jj 2019-07-16 > 12:54:55.928900526 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-pr91157.c 2019-07-16 > 13:01:39.217714434 +0200 > @@ -0,0 +1,29 @@ > +/* PR tree-optimization/91157 */ > +/* { dg-do run { target { avx512f && lp64 } } } */ > +/* { dg-options "-O2 -mavx512f -fexceptions -fnon-call-exceptions > -fsignaling-nans" } */ > + > +#include "avx512f-helper.h" > + > +typedef long double V __attribute__ ((vector_size (4 * sizeof (long > double)))); > +typedef __int128 W __attribute__ ((vector_size (4 * sizeof (__int128)))); > + > +__attribute__((noipa)) W > +foo (V x) > +{ > + return x == 0; > +} > + > +static void > +test_512 (void) > +{ > + V a = { 5.0L, 0.0L, -0.0L, -17.0L }; > + V b = { -0.0L, 16.0L, 0.0L, 18.0L }; > + V c = { 6.0L, 7.0L, 8.0L, 0.0L }; > + W ar = foo (a); > + W br = foo (b); > + W cr = foo (c); > + if (ar[0] != 0 || ar[1] != -1 || ar[2] != -1 || ar[3] != 0 > + || br[0] != -1 || br[1] != 0 || br[2] != -1 || br[3] != 0 > + || cr[0] != 0 || cr[1] != 0 || cr[2] != 0 || cr[3] != -1) > + __builtin_abort (); > +} > --- gcc/testsuite/gcc.target/i386/avx512bw-pr91157.c.jj 2019-07-16 > 12:55:11.609659992 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512bw-pr91157.c 2019-07-16 > 13:01:10.438155882 +0200 > @@ -0,0 +1,6 @@ > +/* PR tree-optimization/91157 */ > +/* { dg-do run { target { avx512bw && lp64 } } } */ > +/* { dg-options "-O2 -mavx512bw -fexceptions -fnon-call-exceptions > -fsignaling-nans" } */ > + > +#define AVX512BW > +#include "avx512f-pr91157.c" > > Jakub > -- Richard Biener <rguent...@suse.de> SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG Nürnberg)