On Thu, 10 May 2018, Jakub Jelinek wrote: > On Wed, May 09, 2018 at 04:53:19PM +0200, Allan Sandfeld Jensen wrote: > > > > @@ -2022,8 +2022,9 @@ simplify_vector_constructor (gimple_stmt_iterator > > > > *gsi)> > > > > elem_type = TREE_TYPE (type); > > > > elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); > > > > > > > > - vec_perm_builder sel (nelts, nelts, 1); > > > > - orig = NULL; > > > > + vec_perm_builder sel (nelts, 2, nelts); > > > > > > Why this change? I admit the vec_parm_builder arguments are confusing, > > > but > > > I think the second times third is the number of how many indices are being > > > pushed into the vector, so I think (nelts, nelts, 1) is right. > > > > > I had the impression it was what was selected from. In any case, I changed > > it > > because without I get crash when vec_perm_indices is created later with a > > possible nparms of 2. > > The documentation is apparently in vector-builder.h: > This class is a wrapper around auto_vec<T> for building vectors of T. > It aims to encode each vector as npatterns interleaved patterns, > where each pattern represents a sequence: > > { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... } > > The first three elements in each pattern provide enough information > to derive the other elements. If all patterns have a STEP of zero, > we only need to encode the first two elements in each pattern. > If BASE1 is also equal to BASE0 for all patterns, we only need to > encode the first element in each pattern. The number of encoded > elements per pattern is given by nelts_per_pattern. > > The class can be used in two ways: > > 1. It can be used to build a full image of the vector, which is then > canonicalized by finalize (). In this case npatterns is initially > the number of elements in the vector and nelts_per_pattern is > initially 1. > > 2. It can be used to build a vector that already has a known encoding. > This is preferred since it is more efficient and copes with > variable-length vectors. finalize () then canonicalizes the encoding > to a simpler form if possible. > > As the vector is constant width and we are building the full image of the > vector, the right arguments are (nelts, nelts, 1) as per 1. above, and the > finalization can perhaps change it to something more compact. > > > > (and sorry for missing your patch first, the PR wasn't ASSIGNED and there > > > was no link to gcc-patches for it). > > > > > It is okay. You are welcome to take it over. I am not a regular gcc > > contributor and thus not well-versed in the details, only the basic logic > > of > > how things work. > > Ok, here is my version of the patch. Bootstrapped/regtested on x86_64-linux > and i686-linux, ok for trunk?
OK. Thanks, Richard. > 2018-05-10 Allan Sandfeld Jensen <allan.jen...@qt.io> > Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/85692 > * tree-ssa-forwprop.c (simplify_vector_constructor): Try two > source permute as well. > > * gcc.target/i386/pr85692.c: New test. > > --- gcc/tree-ssa-forwprop.c.jj 2018-05-08 18:16:36.866614130 +0200 > +++ gcc/tree-ssa-forwprop.c 2018-05-09 20:44:32.621900540 +0200 > @@ -2004,7 +2004,7 @@ simplify_vector_constructor (gimple_stmt > { > gimple *stmt = gsi_stmt (*gsi); > gimple *def_stmt; > - tree op, op2, orig, type, elem_type; > + tree op, op2, orig[2], type, elem_type; > unsigned elem_size, i; > unsigned HOST_WIDE_INT nelts; > enum tree_code code, conv_code; > @@ -2023,7 +2023,8 @@ simplify_vector_constructor (gimple_stmt > elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); > > vec_perm_builder sel (nelts, nelts, 1); > - orig = NULL; > + orig[0] = NULL; > + orig[1] = NULL; > conv_code = ERROR_MARK; > maybe_ident = true; > FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt) > @@ -2063,25 +2064,35 @@ simplify_vector_constructor (gimple_stmt > return false; > op1 = gimple_assign_rhs1 (def_stmt); > ref = TREE_OPERAND (op1, 0); > - if (orig) > + unsigned int j; > + for (j = 0; j < 2; ++j) > { > - if (ref != orig) > - return false; > - } > - else > - { > - if (TREE_CODE (ref) != SSA_NAME) > - return false; > - if (! VECTOR_TYPE_P (TREE_TYPE (ref)) > - || ! useless_type_conversion_p (TREE_TYPE (op1), > - TREE_TYPE (TREE_TYPE (ref)))) > - return false; > - orig = ref; > + if (!orig[j]) > + { > + if (TREE_CODE (ref) != SSA_NAME) > + return false; > + if (! VECTOR_TYPE_P (TREE_TYPE (ref)) > + || ! useless_type_conversion_p (TREE_TYPE (op1), > + TREE_TYPE (TREE_TYPE (ref)))) > + return false; > + if (j && !useless_type_conversion_p (TREE_TYPE (orig[0]), > + TREE_TYPE (ref))) > + return false; > + orig[j] = ref; > + break; > + } > + else if (ref == orig[j]) > + break; > } > + if (j == 2) > + return false; > + > unsigned int elt; > if (maybe_ne (bit_field_size (op1), elem_size) > || !constant_multiple_p (bit_field_offset (op1), elem_size, &elt)) > return false; > + if (j) > + elt += nelts; > if (elt != i) > maybe_ident = false; > sel.quick_push (elt); > @@ -2089,14 +2100,15 @@ simplify_vector_constructor (gimple_stmt > if (i < nelts) > return false; > > - if (! VECTOR_TYPE_P (TREE_TYPE (orig)) > + if (! VECTOR_TYPE_P (TREE_TYPE (orig[0])) > || maybe_ne (TYPE_VECTOR_SUBPARTS (type), > - TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig)))) > + TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig[0])))) > return false; > > tree tem; > if (conv_code != ERROR_MARK > - && (! supportable_convert_operation (conv_code, type, TREE_TYPE (orig), > + && (! supportable_convert_operation (conv_code, type, > + TREE_TYPE (orig[0]), > &tem, &conv_code) > || conv_code == CALL_EXPR)) > return false; > @@ -2104,16 +2116,16 @@ simplify_vector_constructor (gimple_stmt > if (maybe_ident) > { > if (conv_code == ERROR_MARK) > - gimple_assign_set_rhs_from_tree (gsi, orig); > + gimple_assign_set_rhs_from_tree (gsi, orig[0]); > else > - gimple_assign_set_rhs_with_ops (gsi, conv_code, orig, > + gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0], > NULL_TREE, NULL_TREE); > } > else > { > tree mask_type; > > - vec_perm_indices indices (sel, 1, nelts); > + vec_perm_indices indices (sel, orig[1] ? 2 : 1, nelts); > if (!can_vec_perm_const_p (TYPE_MODE (type), indices)) > return false; > mask_type > @@ -2124,16 +2136,19 @@ simplify_vector_constructor (gimple_stmt > GET_MODE_SIZE (TYPE_MODE (type)))) > return false; > op2 = vec_perm_indices_to_tree (mask_type, indices); > + if (!orig[1]) > + orig[1] = orig[0]; > if (conv_code == ERROR_MARK) > - gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig, orig, op2); > + gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig[0], > + orig[1], op2); > else > { > gimple *perm > - = gimple_build_assign (make_ssa_name (TREE_TYPE (orig)), > - VEC_PERM_EXPR, orig, orig, op2); > - orig = gimple_assign_lhs (perm); > + = gimple_build_assign (make_ssa_name (TREE_TYPE (orig[0])), > + VEC_PERM_EXPR, orig[0], orig[1], op2); > + orig[0] = gimple_assign_lhs (perm); > gsi_insert_before (gsi, perm, GSI_SAME_STMT); > - gimple_assign_set_rhs_with_ops (gsi, conv_code, orig, > + gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0], > NULL_TREE, NULL_TREE); > } > } > --- gcc/testsuite/gcc.target/i386/pr85692.c.jj 2018-05-09 > 20:44:37.153904405 +0200 > +++ gcc/testsuite/gcc.target/i386/pr85692.c 2018-05-09 20:44:37.153904405 > +0200 > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msse4.1" } */ > +/* { dg-final { scan-assembler "unpcklps" } } */ > +/* { dg-final { scan-assembler "blendps" } } */ > +/* { dg-final { scan-assembler-not "shufps" } } */ > +/* { dg-final { scan-assembler-not "unpckhps" } } */ > + > +typedef float v4sf __attribute__ ((vector_size (16))); > + > +v4sf unpcklps(v4sf a, v4sf b) > +{ > + return (v4sf){a[0],b[0],a[1],b[1]}; > +} > + > +v4sf blendps(v4sf a, v4sf b) > +{ > + return (v4sf){a[0],b[1],a[2],b[3]}; > +} > > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)