On Mon, 20 May 2019, Richard Sandiford wrote: > Richard Biener <rguent...@suse.de> writes: > > On Fri, 17 May 2019, Richard Biener wrote: > > > >> > >> This moves things from fold-const.c to match.pd. > > > > So there's an issue that was appearantly side-stepped in the GENERIC > > folding by not re-folding the result. > > > > /* Generate a canonical form of the selector. */ > > if (sel.encoding () != builder) > > > > "spuriously" returns true for some cases where we end up with > > the same tree representation of the selector (I see > > m_npatterns == 4 vs. 1 and m_nelts_per_pattern 1 vs. 3). > > > > So I either have to double-check whether the re-created > > op2 is really different from the original one or remove > > those "premature" checks from vector_builder::operator== > > where we then no longer can compare encoded elts... > > But the point of the comparison is to test whether the canonical > form now in "sel" is different from the original form. Which it > is if you see m_npatterns == 4 vs. 1 and m_nelts_per_pattern 1 vs. 3. > So I don't think there's anything premature in vector_bolder::operator==. > > Isn't the problem coming from: > > /* Some targets are deficient and fail to expand a single > argument permutation while still allowing an equivalent > 2-argument version. */ > > I.e. those targets force us to create a non-canonical VEC_PERM_EXPR > (possibly the same as the original VEC_PERM_EXPR) because otherwise the > targets won't recognise the result. Then we'll recanonicalise the > selector internally next time through the code. And so on and so on.
Ah, of course. > The folds always want to operate on the canonical form, so I think > the code up to and including the above comparison is correct. > It's just that, although we tried to replace a non-canonical > VEC_PERM_EXPR with a canonical VEC_PERM_EXPR, the target wouldn't > let us. > > > Testing the variant with changed = operand_equal_p (...). > > Yeah, agree that's the right fix FWIW. So the following is what I have applied. Bootstrapped / tested on x86_64-unknown-linux-gnu. Richard. 2019-05-20 Richard Biener <rguent...@suse.de> * gimple-match-head.c: Include vec-perm-indices.h. * generic-match-head.c: Likewise. * fold-const.h (fold_vec_perm): Declare when vec-perm-indices.h is included. * fold-const.c (fold_vec_perm): Export. (fold_ternary_loc): Move non-constant folding of VEC_PERM_EXPR... (match.pd): ...here. Index: gcc/gimple-match-head.c =================================================================== --- gcc/gimple-match-head.c (revision 271320) +++ gcc/gimple-match-head.c (working copy) @@ -27,6 +27,7 @@ along with GCC; see the file COPYING3. #include "gimple.h" #include "ssa.h" #include "cgraph.h" +#include "vec-perm-indices.h" #include "fold-const.h" #include "fold-const-call.h" #include "stor-layout.h" Index: gcc/generic-match-head.c =================================================================== --- gcc/generic-match-head.c (revision 271320) +++ gcc/generic-match-head.c (working copy) @@ -27,6 +27,7 @@ along with GCC; see the file COPYING3. #include "gimple.h" #include "ssa.h" #include "cgraph.h" +#include "vec-perm-indices.h" #include "fold-const.h" #include "stor-layout.h" #include "tree-dfa.h" Index: gcc/fold-const.c =================================================================== --- gcc/fold-const.c (revision 271320) +++ gcc/fold-const.c (working copy) @@ -9015,7 +9015,7 @@ vec_cst_ctor_to_array (tree arg, unsigne selector. Return the folded VECTOR_CST or CONSTRUCTOR if successful, NULL_TREE otherwise. */ -static tree +tree fold_vec_perm (tree type, tree arg0, tree arg1, const vec_perm_indices &sel) { unsigned int i; @@ -11763,7 +11763,10 @@ fold_ternary_loc (location_t loc, enum t return NULL_TREE; case VEC_PERM_EXPR: - if (TREE_CODE (arg2) == VECTOR_CST) + /* Perform constant folding of BIT_INSERT_EXPR. */ + if (TREE_CODE (arg2) == VECTOR_CST + && TREE_CODE (op0) == VECTOR_CST + && TREE_CODE (op1) == VECTOR_CST) { /* Build a vector of integers from the tree mask. */ vec_perm_builder builder; @@ -11774,61 +11777,7 @@ fold_ternary_loc (location_t loc, enum t poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type); bool single_arg = (op0 == op1); vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts); - - /* Check for cases that fold to OP0 or OP1 in their original - element order. */ - if (sel.series_p (0, 1, 0, 1)) - return op0; - if (sel.series_p (0, 1, nelts, 1)) - return op1; - - if (!single_arg) - { - if (sel.all_from_input_p (0)) - op1 = op0; - else if (sel.all_from_input_p (1)) - { - op0 = op1; - sel.rotate_inputs (1); - } - } - - if ((TREE_CODE (op0) == VECTOR_CST - || TREE_CODE (op0) == CONSTRUCTOR) - && (TREE_CODE (op1) == VECTOR_CST - || TREE_CODE (op1) == CONSTRUCTOR)) - { - tree t = fold_vec_perm (type, op0, op1, sel); - if (t != NULL_TREE) - return t; - } - - bool changed = (op0 == op1 && !single_arg); - - /* Generate a canonical form of the selector. */ - if (arg2 == op2 && sel.encoding () != builder) - { - /* Some targets are deficient and fail to expand a single - argument permutation while still allowing an equivalent - 2-argument version. */ - if (sel.ninputs () == 2 - || can_vec_perm_const_p (TYPE_MODE (type), sel, false)) - op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); - else - { - vec_perm_indices sel2 (builder, 2, nelts); - if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false)) - op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel2); - else - /* Not directly supported with either encoding, - so use the preferred form. */ - op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); - } - changed = true; - } - - if (changed) - return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2); + return fold_vec_perm (type, op0, op1, sel); } return NULL_TREE; Index: gcc/fold-const.h =================================================================== --- gcc/fold-const.h (revision 271320) +++ gcc/fold-const.h (working copy) @@ -100,6 +100,9 @@ extern tree fold_bit_and_mask (tree, tre tree, enum tree_code, tree, tree, tree, enum tree_code, tree, tree, tree *); extern tree fold_read_from_constant_string (tree); +#if GCC_VEC_PERN_INDICES_H +extern tree fold_vec_perm (tree, tree, tree, const vec_perm_indices &); +#endif extern bool wide_int_binop (wide_int &res, enum tree_code, const wide_int &arg1, const wide_int &arg2, signop, wi::overflow_type *); Index: gcc/match.pd =================================================================== --- gcc/match.pd (revision 271395) +++ gcc/match.pd (working copy) @@ -5374,3 +5374,86 @@ (define_operator_list COND_TERNARY (bit_and:elt_type (BIT_FIELD_REF:elt_type @0 { size; } { pos; }) { elt; }))))))) + +(simplify + (vec_perm @0 @1 VECTOR_CST@2) + (with + { + tree op0 = @0, op1 = @1, op2 = @2; + + /* Build a vector of integers from the tree mask. */ + vec_perm_builder builder; + if (!tree_to_vec_perm_builder (&builder, op2)) + return NULL_TREE; + + /* Create a vec_perm_indices for the integer vector. */ + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type); + bool single_arg = (op0 == op1); + vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts); + } + (if (sel.series_p (0, 1, 0, 1)) + { op0; } + (if (sel.series_p (0, 1, nelts, 1)) + { op1; } + (with + { + if (!single_arg) + { + if (sel.all_from_input_p (0)) + op1 = op0; + else if (sel.all_from_input_p (1)) + { + op0 = op1; + sel.rotate_inputs (1); + } + } + gassign *def; + tree cop0 = op0, cop1 = op1; + if (TREE_CODE (op0) == SSA_NAME + && (def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op0))) + && gimple_assign_rhs_code (def) == CONSTRUCTOR) + cop0 = gimple_assign_rhs1 (def); + if (TREE_CODE (op1) == SSA_NAME + && (def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op1))) + && gimple_assign_rhs_code (def) == CONSTRUCTOR) + cop1 = gimple_assign_rhs1 (def); + + tree t; + } + (if ((TREE_CODE (cop0) == VECTOR_CST + || TREE_CODE (cop0) == CONSTRUCTOR) + && (TREE_CODE (cop1) == VECTOR_CST + || TREE_CODE (cop1) == CONSTRUCTOR) + && (t = fold_vec_perm (type, cop0, cop1, sel))) + { t; } + (with + { + bool changed = (op0 == op1 && !single_arg); + + /* Generate a canonical form of the selector. */ + if (sel.encoding () != builder) + { + /* Some targets are deficient and fail to expand a single + argument permutation while still allowing an equivalent + 2-argument version. */ + tree oldop2 = op2; + if (sel.ninputs () == 2 + || can_vec_perm_const_p (TYPE_MODE (type), sel, false)) + op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel); + else + { + vec_perm_indices sel2 (builder, 2, nelts); + if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false)) + op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel2); + else + /* Not directly supported with either encoding, + so use the preferred form. */ + op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel); + } + /* Differences in the encoder do not necessarily mean + differences in the resulting vector. */ + changed = !operand_equal_p (op2, oldop2, 0); + } + } + (if (changed) + (vec_perm { op0; } { op1; } { op2; }))))))))) kk