On Fri, 17 May 2019, Richard Biener wrote:

> 
> This moves things from fold-const.c to match.pd.

So there's an issue that was appearantly side-stepped in the GENERIC
folding by not re-folding the result.

        /* Generate a canonical form of the selector.  */
        if (sel.encoding () != builder)

"spuriously" returns true for some cases where we end up with
the same tree representation of the selector (I see
m_npatterns == 4 vs. 1 and m_nelts_per_pattern 1 vs. 3).

So I either have to double-check whether the re-created
op2 is really different from the original one or remove
those "premature" checks from vector_builder::operator==
where we then no longer can compare encoded elts...

Testing the variant with changed = operand_equal_p (...).

Richard.

> Bootstrap / regtest running on x86_64-unknown-linux-gnu.
> 
> Richard.
> 
> 2019-05-17  Richard Biener  <rguent...@suse.de>
> 
>       * gimple-match-head.c: Include vec-perm-indices.h.
>       * generic-match-head.c: Likewise.
>       * fold-const.h (fold_vec_perm): Declare when vec-perm-indices.h
>       is included.
>       * fold-const.c (fold_vec_perm): Export.
>       (fold_ternary_loc): Move non-constant folding of VEC_PERM_EXPR...
>       (match.pd): ...here.
> 
> Index: gcc/gimple-match-head.c
> ===================================================================
> --- gcc/gimple-match-head.c   (revision 271320)
> +++ gcc/gimple-match-head.c   (working copy)
> @@ -27,6 +27,7 @@ along with GCC; see the file COPYING3.
>  #include "gimple.h"
>  #include "ssa.h"
>  #include "cgraph.h"
> +#include "vec-perm-indices.h"
>  #include "fold-const.h"
>  #include "fold-const-call.h"
>  #include "stor-layout.h"
> Index: gcc/generic-match-head.c
> ===================================================================
> --- gcc/generic-match-head.c  (revision 271320)
> +++ gcc/generic-match-head.c  (working copy)
> @@ -27,6 +27,7 @@ along with GCC; see the file COPYING3.
>  #include "gimple.h"
>  #include "ssa.h"
>  #include "cgraph.h"
> +#include "vec-perm-indices.h"
>  #include "fold-const.h"
>  #include "stor-layout.h"
>  #include "tree-dfa.h"
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c  (revision 271320)
> +++ gcc/fold-const.c  (working copy)
> @@ -9015,7 +9015,7 @@ vec_cst_ctor_to_array (tree arg, unsigne
>     selector.  Return the folded VECTOR_CST or CONSTRUCTOR if successful,
>     NULL_TREE otherwise.  */
>  
> -static tree
> +tree
>  fold_vec_perm (tree type, tree arg0, tree arg1, const vec_perm_indices &sel)
>  {
>    unsigned int i;
> @@ -11763,7 +11763,10 @@ fold_ternary_loc (location_t loc, enum t
>        return NULL_TREE;
>  
>      case VEC_PERM_EXPR:
> -      if (TREE_CODE (arg2) == VECTOR_CST)
> +      /* Perform constant folding of BIT_INSERT_EXPR.  */
> +      if (TREE_CODE (arg2) == VECTOR_CST
> +       && TREE_CODE (op0) == VECTOR_CST
> +       && TREE_CODE (op1) == VECTOR_CST)
>       {
>         /* Build a vector of integers from the tree mask.  */
>         vec_perm_builder builder;
> @@ -11774,61 +11777,7 @@ fold_ternary_loc (location_t loc, enum t
>         poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
>         bool single_arg = (op0 == op1);
>         vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts);
> -
> -       /* Check for cases that fold to OP0 or OP1 in their original
> -          element order.  */
> -       if (sel.series_p (0, 1, 0, 1))
> -         return op0;
> -       if (sel.series_p (0, 1, nelts, 1))
> -         return op1;
> -
> -       if (!single_arg)
> -         {
> -           if (sel.all_from_input_p (0))
> -             op1 = op0;
> -           else if (sel.all_from_input_p (1))
> -             {
> -               op0 = op1;
> -               sel.rotate_inputs (1);
> -             }
> -         }
> -
> -       if ((TREE_CODE (op0) == VECTOR_CST
> -            || TREE_CODE (op0) == CONSTRUCTOR)
> -           && (TREE_CODE (op1) == VECTOR_CST
> -               || TREE_CODE (op1) == CONSTRUCTOR))
> -         {
> -           tree t = fold_vec_perm (type, op0, op1, sel);
> -           if (t != NULL_TREE)
> -             return t;
> -         }
> -
> -       bool changed = (op0 == op1 && !single_arg);
> -
> -       /* Generate a canonical form of the selector.  */
> -       if (arg2 == op2 && sel.encoding () != builder)
> -         {
> -           /* Some targets are deficient and fail to expand a single
> -              argument permutation while still allowing an equivalent
> -              2-argument version.  */
> -           if (sel.ninputs () == 2
> -               || can_vec_perm_const_p (TYPE_MODE (type), sel, false))
> -             op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel);
> -           else
> -             {
> -               vec_perm_indices sel2 (builder, 2, nelts);
> -               if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false))
> -                 op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel2);
> -               else
> -                 /* Not directly supported with either encoding,
> -                    so use the preferred form.  */
> -                 op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel);
> -             }
> -           changed = true;
> -         }
> -
> -       if (changed)
> -         return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2);
> +       return fold_vec_perm (type, op0, op1, sel);
>       }
>        return NULL_TREE;
>  
> Index: gcc/fold-const.h
> ===================================================================
> --- gcc/fold-const.h  (revision 271320)
> +++ gcc/fold-const.h  (working copy)
> @@ -100,6 +100,9 @@ extern tree fold_bit_and_mask (tree, tre
>                              tree, enum tree_code, tree, tree,
>                              tree, enum tree_code, tree, tree, tree *);
>  extern tree fold_read_from_constant_string (tree);
> +#if GCC_VEC_PERN_INDICES_H
> +extern tree fold_vec_perm (tree, tree, tree, const vec_perm_indices &);
> +#endif
>  extern bool wide_int_binop (wide_int &res, enum tree_code,
>                           const wide_int &arg1, const wide_int &arg2,
>                           signop, wi::overflow_type *);
> Index: gcc/match.pd
> ===================================================================
> --- gcc/match.pd      (revision 271320)
> +++ gcc/match.pd      (working copy)
> @@ -5374,3 +5374,83 @@ (define_operator_list COND_TERNARY
>        (bit_and:elt_type
>         (BIT_FIELD_REF:elt_type @0 { size; } { pos; })
>         { elt; })))))))
> +
> +(simplify
> + (vec_perm @0 @1 VECTOR_CST@2)
> + (with
> +  {
> +    tree op0 = @0, op1 = @1, op2 = @2;
> +
> +    /* Build a vector of integers from the tree mask.  */
> +    vec_perm_builder builder;
> +    if (!tree_to_vec_perm_builder (&builder, op2))
> +      return NULL_TREE;
> +
> +    /* Create a vec_perm_indices for the integer vector.  */
> +    poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
> +    bool single_arg = (op0 == op1);
> +    vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts);
> +  }
> +  (if (sel.series_p (0, 1, 0, 1))
> +   { op0; }
> +   (if (sel.series_p (0, 1, nelts, 1))
> +    { op1; }
> +    (with
> +     {
> +       if (!single_arg)
> +         {
> +        if (sel.all_from_input_p (0))
> +          op1 = op0;
> +        else if (sel.all_from_input_p (1))
> +          {
> +            op0 = op1;
> +            sel.rotate_inputs (1);
> +          }
> +         }
> +       gassign *def;
> +       tree cop0 = op0, cop1 = op1;
> +       if (TREE_CODE (op0) == SSA_NAME
> +           && (def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op0)))
> +        && gimple_assign_rhs_code (def) == CONSTRUCTOR)
> +      cop0 = gimple_assign_rhs1 (def);
> +       if (TREE_CODE (op1) == SSA_NAME
> +           && (def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op1)))
> +        && gimple_assign_rhs_code (def) == CONSTRUCTOR)
> +      cop1 = gimple_assign_rhs1 (def);
> +
> +       tree t;
> +    }
> +    (if ((TREE_CODE (cop0) == VECTOR_CST
> +       || TREE_CODE (cop0) == CONSTRUCTOR)
> +      && (TREE_CODE (cop1) == VECTOR_CST
> +          || TREE_CODE (cop1) == CONSTRUCTOR)
> +      && (t = fold_vec_perm (type, cop0, cop1, sel)))
> +     { t; }
> +     (with
> +      {
> +     bool changed = (op0 == op1 && !single_arg);
> +
> +     /* Generate a canonical form of the selector.  */
> +     if (sel.encoding () != builder)
> +       {
> +         /* Some targets are deficient and fail to expand a single
> +            argument permutation while still allowing an equivalent
> +            2-argument version.  */
> +         if (sel.ninputs () == 2
> +            || can_vec_perm_const_p (TYPE_MODE (type), sel, false))
> +           op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel);
> +         else
> +           {
> +             vec_perm_indices sel2 (builder, 2, nelts);
> +             if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false))
> +               op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel2);
> +             else
> +               /* Not directly supported with either encoding,
> +                  so use the preferred form.  */
> +               op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel);
> +           }
> +         changed = true;
> +       }
> +      }
> +      (if (changed)
> +       (vec_perm { op0; } { op1; } { op2; })))))))))
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany;
GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG Nürnberg)

Reply via email to