On Fri, Dec 15, 2017 at 1:29 AM, Richard Sandiford
<richard.sandif...@linaro.org> wrote:
> This patch just adds VEC_DUPLICATE_EXPR, since the VEC_DUPLICATE_CST
> isn't needed with the new VECTOR_CST layout.  It's really just the
> original patch with bits removed, but just in case:
>
> Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64-linux-gnu.
> OK to install?

To keep things simple at this point OK.  Note that I'd eventually
like to see this as VEC_PERM_EXPR <scalar_type_1, scalar_type_1, { 0, ... }>.
For reductions when we need { x, 0, ... } we now have to use a
VEC_DUPLICATE_EXPR to make x a vector and then a VEC_PERM_EXPR
to merge it with {0, ... }, right?  Rather than VEC_PERM_EXPR <x_1, 0,
{ 0, 1, 1, 1.... }>

Thanks,
Richard.

> Richard
>
>
> 2017-12-15  Richard Sandiford  <richard.sandif...@linaro.org>
>             Alan Hayward  <alan.hawy...@arm.com>
>             David Sherwood  <david.sherw...@arm.com>
>
> gcc/
>         * doc/generic.texi (VEC_DUPLICATE_EXPR): Document.
>         (VEC_COND_EXPR): Add missing @tindex.
>         * doc/md.texi (vec_duplicate@var{m}): Document.
>         * tree.def (VEC_DUPLICATE_EXPR): New tree codes.
>         * tree.c (build_vector_from_val): Add stubbed-out handling of
>         variable-length vectors, using VEC_DUPLICATE_EXPR.
>         (uniform_vector_p): Handle VEC_DUPLICATE_EXPR.
>         * cfgexpand.c (expand_debug_expr): Likewise.
>         * tree-cfg.c (verify_gimple_assign_unary): Likewise.
>         * tree-inline.c (estimate_operator_cost): Likewise.
>         * tree-pretty-print.c (dump_generic_node): Likewise.
>         * tree-vect-generic.c (ssa_uniform_vector_p): Likewise.
>         * fold-const.c (const_unop): Fold VEC_DUPLICATE_EXPRs of a constant.
>         (test_vec_duplicate_folding): New function.
>         (fold_const_c_tests): Call it.
>         * optabs.def (vec_duplicate_optab): New optab.
>         * optabs-tree.c (optab_for_tree_code): Handle VEC_DUPLICATE_EXPR.
>         * optabs.h (expand_vector_broadcast): Declare.
>         * optabs.c (expand_vector_broadcast): Make non-static.  Try using
>         vec_duplicate_optab.
>         * expr.c (store_constructor): Try using vec_duplicate_optab for
>         uniform vectors.
>         (expand_expr_real_2): Handle VEC_DUPLICATE_EXPR.
>
> Index: gcc/doc/generic.texi
> ===================================================================
> --- gcc/doc/generic.texi        2017-12-15 00:24:47.213516622 +0000
> +++ gcc/doc/generic.texi        2017-12-15 00:24:47.498459276 +0000
> @@ -1768,6 +1768,7 @@ a value from @code{enum annot_expr_kind}
>
>  @node Vectors
>  @subsection Vectors
> +@tindex VEC_DUPLICATE_EXPR
>  @tindex VEC_LSHIFT_EXPR
>  @tindex VEC_RSHIFT_EXPR
>  @tindex VEC_WIDEN_MULT_HI_EXPR
> @@ -1779,9 +1780,14 @@ a value from @code{enum annot_expr_kind}
>  @tindex VEC_PACK_TRUNC_EXPR
>  @tindex VEC_PACK_SAT_EXPR
>  @tindex VEC_PACK_FIX_TRUNC_EXPR
> +@tindex VEC_COND_EXPR
>  @tindex SAD_EXPR
>
>  @table @code
> +@item VEC_DUPLICATE_EXPR
> +This node has a single operand and represents a vector in which every
> +element is equal to that operand.
> +
>  @item VEC_LSHIFT_EXPR
>  @itemx VEC_RSHIFT_EXPR
>  These nodes represent whole vector left and right shifts, respectively.
> Index: gcc/doc/md.texi
> ===================================================================
> --- gcc/doc/md.texi     2017-12-15 00:24:47.213516622 +0000
> +++ gcc/doc/md.texi     2017-12-15 00:24:47.499459075 +0000
> @@ -4888,6 +4888,17 @@ and operand 1 is parallel containing val
>  the vector mode @var{m}, or a vector mode with the same element mode and
>  smaller number of elements.
>
> +@cindex @code{vec_duplicate@var{m}} instruction pattern
> +@item @samp{vec_duplicate@var{m}}
> +Initialize vector output operand 0 so that each element has the value given
> +by scalar input operand 1.  The vector has mode @var{m} and the scalar has
> +the mode appropriate for one element of @var{m}.
> +
> +This pattern only handles duplicates of non-constant inputs.  Constant
> +vectors go through the @code{mov@var{m}} pattern instead.
> +
> +This pattern is not allowed to @code{FAIL}.
> +
>  @cindex @code{vec_cmp@var{m}@var{n}} instruction pattern
>  @item @samp{vec_cmp@var{m}@var{n}}
>  Output a vector comparison.  Operand 0 of mode @var{n} is the destination for
> Index: gcc/tree.def
> ===================================================================
> --- gcc/tree.def        2017-12-15 00:24:47.213516622 +0000
> +++ gcc/tree.def        2017-12-15 00:24:47.505457868 +0000
> @@ -537,6 +537,9 @@ DEFTREECODE (TARGET_EXPR, "target_expr",
>     1 and 2 are NULL.  The operands are then taken from the cfg edges. */
>  DEFTREECODE (COND_EXPR, "cond_expr", tcc_expression, 3)
>
> +/* Represents a vector in which every element is equal to operand 0.  */
> +DEFTREECODE (VEC_DUPLICATE_EXPR, "vec_duplicate_expr", tcc_unary, 1)
> +
>  /* Vector conditional expression. It is like COND_EXPR, but with
>     vector operands.
>
> Index: gcc/tree.c
> ===================================================================
> --- gcc/tree.c  2017-12-15 00:24:47.213516622 +0000
> +++ gcc/tree.c  2017-12-15 00:24:47.505457868 +0000
> @@ -1785,6 +1785,8 @@ build_vector_from_val (tree vectype, tre
>        v.quick_push (sc);
>        return v.build ();
>      }
> +  else if (0)
> +    return fold_build1 (VEC_DUPLICATE_EXPR, vectype, sc);
>    else
>      {
>        vec<constructor_elt, va_gc> *v;
> @@ -10468,7 +10470,10 @@ uniform_vector_p (const_tree vec)
>
>    gcc_assert (VECTOR_TYPE_P (TREE_TYPE (vec)));
>
> -  if (TREE_CODE (vec) == VECTOR_CST)
> +  if (TREE_CODE (vec) == VEC_DUPLICATE_EXPR)
> +    return TREE_OPERAND (vec, 0);
> +
> +  else if (TREE_CODE (vec) == VECTOR_CST)
>      {
>        if (VECTOR_CST_NPATTERNS (vec) == 1 && VECTOR_CST_DUPLICATE_P (vec))
>         return VECTOR_CST_ENCODED_ELT (vec, 0);
> Index: gcc/cfgexpand.c
> ===================================================================
> --- gcc/cfgexpand.c     2017-12-15 00:24:47.213516622 +0000
> +++ gcc/cfgexpand.c     2017-12-15 00:24:47.498459276 +0000
> @@ -5069,6 +5069,7 @@ expand_debug_expr (tree exp)
>      case VEC_WIDEN_LSHIFT_HI_EXPR:
>      case VEC_WIDEN_LSHIFT_LO_EXPR:
>      case VEC_PERM_EXPR:
> +    case VEC_DUPLICATE_EXPR:
>        return NULL;
>
>      /* Misc codes.  */
> Index: gcc/tree-cfg.c
> ===================================================================
> --- gcc/tree-cfg.c      2017-12-15 00:24:47.213516622 +0000
> +++ gcc/tree-cfg.c      2017-12-15 00:24:47.503458270 +0000
> @@ -3857,6 +3857,17 @@ verify_gimple_assign_unary (gassign *stm
>      case CONJ_EXPR:
>        break;
>
> +    case VEC_DUPLICATE_EXPR:
> +      if (TREE_CODE (lhs_type) != VECTOR_TYPE
> +         || !useless_type_conversion_p (TREE_TYPE (lhs_type), rhs1_type))
> +       {
> +         error ("vec_duplicate should be from a scalar to a like vector");
> +         debug_generic_expr (lhs_type);
> +         debug_generic_expr (rhs1_type);
> +         return true;
> +       }
> +      return false;
> +
>      default:
>        gcc_unreachable ();
>      }
> Index: gcc/tree-inline.c
> ===================================================================
> --- gcc/tree-inline.c   2017-12-15 00:24:47.213516622 +0000
> +++ gcc/tree-inline.c   2017-12-15 00:24:47.504458069 +0000
> @@ -3928,6 +3928,7 @@ estimate_operator_cost (enum tree_code c
>      case VEC_PACK_FIX_TRUNC_EXPR:
>      case VEC_WIDEN_LSHIFT_HI_EXPR:
>      case VEC_WIDEN_LSHIFT_LO_EXPR:
> +    case VEC_DUPLICATE_EXPR:
>
>        return 1;
>
> Index: gcc/tree-pretty-print.c
> ===================================================================
> --- gcc/tree-pretty-print.c     2017-12-15 00:24:47.213516622 +0000
> +++ gcc/tree-pretty-print.c     2017-12-15 00:24:47.504458069 +0000
> @@ -3178,6 +3178,15 @@ dump_generic_node (pretty_printer *pp, t
>        pp_string (pp, " > ");
>        break;
>
> +    case VEC_DUPLICATE_EXPR:
> +      pp_space (pp);
> +      for (str = get_tree_code_name (code); *str; str++)
> +       pp_character (pp, TOUPPER (*str));
> +      pp_string (pp, " < ");
> +      dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
> +      pp_string (pp, " > ");
> +      break;
> +
>      case VEC_UNPACK_HI_EXPR:
>        pp_string (pp, " VEC_UNPACK_HI_EXPR < ");
>        dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
> Index: gcc/tree-vect-generic.c
> ===================================================================
> --- gcc/tree-vect-generic.c     2017-12-15 00:24:47.213516622 +0000
> +++ gcc/tree-vect-generic.c     2017-12-15 00:24:47.504458069 +0000
> @@ -1418,6 +1418,7 @@ lower_vec_perm (gimple_stmt_iterator *gs
>  ssa_uniform_vector_p (tree op)
>  {
>    if (TREE_CODE (op) == VECTOR_CST
> +      || TREE_CODE (op) == VEC_DUPLICATE_EXPR
>        || TREE_CODE (op) == CONSTRUCTOR)
>      return uniform_vector_p (op);
>    if (TREE_CODE (op) == SSA_NAME)
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c    2017-12-15 00:24:47.213516622 +0000
> +++ gcc/fold-const.c    2017-12-15 00:24:47.501458673 +0000
> @@ -1771,6 +1771,11 @@ const_unop (enum tree_code code, tree ty
>         return elts.build ();
>        }
>
> +    case VEC_DUPLICATE_EXPR:
> +      if (CONSTANT_CLASS_P (arg0))
> +       return build_vector_from_val (type, arg0);
> +      return NULL_TREE;
> +
>      default:
>        break;
>      }
> @@ -14442,6 +14447,22 @@ test_vector_folding ()
>    ASSERT_FALSE (integer_nonzerop (fold_build2 (NE_EXPR, res_type, one, 
> one)));
>  }
>
> +/* Verify folding of VEC_DUPLICATE_EXPRs.  */
> +
> +static void
> +test_vec_duplicate_folding ()
> +{
> +  scalar_int_mode int_mode = SCALAR_INT_TYPE_MODE (ssizetype);
> +  machine_mode vec_mode = targetm.vectorize.preferred_simd_mode (int_mode);
> +  /* This will be 1 if VEC_MODE isn't a vector mode.  */
> +  unsigned int nunits = GET_MODE_NUNITS (vec_mode);
> +
> +  tree type = build_vector_type (ssizetype, nunits);
> +  tree dup5_expr = fold_unary (VEC_DUPLICATE_EXPR, type, ssize_int (5));
> +  tree dup5_cst = build_vector_from_val (type, ssize_int (5));
> +  ASSERT_TRUE (operand_equal_p (dup5_expr, dup5_cst, 0));
> +}
> +
>  /* Run all of the selftests within this file.  */
>
>  void
> @@ -14449,6 +14470,7 @@ fold_const_c_tests ()
>  {
>    test_arithmetic_folding ();
>    test_vector_folding ();
> +  test_vec_duplicate_folding ();
>  }
>
>  } // namespace selftest
> Index: gcc/optabs.def
> ===================================================================
> --- gcc/optabs.def      2017-12-15 00:24:47.213516622 +0000
> +++ gcc/optabs.def      2017-12-15 00:24:47.502458472 +0000
> @@ -363,3 +363,5 @@ OPTAB_D (atomic_xor_optab, "atomic_xor$I
>
>  OPTAB_D (get_thread_pointer_optab, "get_thread_pointer$I$a")
>  OPTAB_D (set_thread_pointer_optab, "set_thread_pointer$I$a")
> +
> +OPTAB_DC (vec_duplicate_optab, "vec_duplicate$a", VEC_DUPLICATE)
> Index: gcc/optabs-tree.c
> ===================================================================
> --- gcc/optabs-tree.c   2017-12-15 00:24:47.213516622 +0000
> +++ gcc/optabs-tree.c   2017-12-15 00:24:47.501458673 +0000
> @@ -199,6 +199,9 @@ optab_for_tree_code (enum tree_code code
>        return TYPE_UNSIGNED (type) ?
>         vec_pack_ufix_trunc_optab : vec_pack_sfix_trunc_optab;
>
> +    case VEC_DUPLICATE_EXPR:
> +      return vec_duplicate_optab;
> +
>      default:
>        break;
>      }
> Index: gcc/optabs.h
> ===================================================================
> --- gcc/optabs.h        2017-12-15 00:24:47.213516622 +0000
> +++ gcc/optabs.h        2017-12-15 00:24:47.502458472 +0000
> @@ -182,6 +182,7 @@ extern rtx simplify_expand_binop (machin
>                                   enum optab_methods methods);
>  extern bool force_expand_binop (machine_mode, optab, rtx, rtx, rtx, int,
>                                 enum optab_methods);
> +extern rtx expand_vector_broadcast (machine_mode, rtx);
>
>  /* Generate code for a simple binary or unary operation.  "Simple" in
>     this case means "can be unambiguously described by a (mode, code)
> Index: gcc/optabs.c
> ===================================================================
> --- gcc/optabs.c        2017-12-15 00:24:47.213516622 +0000
> +++ gcc/optabs.c        2017-12-15 00:24:47.502458472 +0000
> @@ -367,7 +367,7 @@ force_expand_binop (machine_mode mode, o
>     mode of OP must be the element mode of VMODE.  If OP is a constant,
>     then the return value will be a constant.  */
>
> -static rtx
> +rtx
>  expand_vector_broadcast (machine_mode vmode, rtx op)
>  {
>    enum insn_code icode;
> @@ -380,6 +380,16 @@ expand_vector_broadcast (machine_mode vm
>    if (valid_for_const_vec_duplicate_p (vmode, op))
>      return gen_const_vec_duplicate (vmode, op);
>
> +  icode = optab_handler (vec_duplicate_optab, vmode);
> +  if (icode != CODE_FOR_nothing)
> +    {
> +      struct expand_operand ops[2];
> +      create_output_operand (&ops[0], NULL_RTX, vmode);
> +      create_input_operand (&ops[1], op, GET_MODE (op));
> +      expand_insn (icode, 2, ops);
> +      return ops[0].value;
> +    }
> +
>    /* ??? If the target doesn't have a vec_init, then we have no easy way
>       of performing this operation.  Most of this sort of generic support
>       is hidden away in the vector lowering support in gimple.  */
> Index: gcc/expr.c
> ===================================================================
> --- gcc/expr.c  2017-12-15 00:24:47.213516622 +0000
> +++ gcc/expr.c  2017-12-15 00:24:47.500458874 +0000
> @@ -6598,7 +6598,8 @@ store_constructor (tree exp, rtx target,
>         constructor_elt *ce;
>         int i;
>         int need_to_clear;
> -       int icode = CODE_FOR_nothing;
> +       insn_code icode = CODE_FOR_nothing;
> +       tree elt;
>         tree elttype = TREE_TYPE (type);
>         int elt_size = tree_to_uhwi (TYPE_SIZE (elttype));
>         machine_mode eltmode = TYPE_MODE (elttype);
> @@ -6608,13 +6609,30 @@ store_constructor (tree exp, rtx target,
>         unsigned n_elts;
>         alias_set_type alias;
>         bool vec_vec_init_p = false;
> +       machine_mode mode = GET_MODE (target);
>
>         gcc_assert (eltmode != BLKmode);
>
> +       /* Try using vec_duplicate_optab for uniform vectors.  */
> +       if (!TREE_SIDE_EFFECTS (exp)
> +           && VECTOR_MODE_P (mode)
> +           && eltmode == GET_MODE_INNER (mode)
> +           && ((icode = optab_handler (vec_duplicate_optab, mode))
> +               != CODE_FOR_nothing)
> +           && (elt = uniform_vector_p (exp)))
> +         {
> +           struct expand_operand ops[2];
> +           create_output_operand (&ops[0], target, mode);
> +           create_input_operand (&ops[1], expand_normal (elt), eltmode);
> +           expand_insn (icode, 2, ops);
> +           if (!rtx_equal_p (target, ops[0].value))
> +             emit_move_insn (target, ops[0].value);
> +           break;
> +         }
> +
>         n_elts = TYPE_VECTOR_SUBPARTS (type);
> -       if (REG_P (target) && VECTOR_MODE_P (GET_MODE (target)))
> +       if (REG_P (target) && VECTOR_MODE_P (mode))
>           {
> -           machine_mode mode = GET_MODE (target);
>             machine_mode emode = eltmode;
>
>             if (CONSTRUCTOR_NELTS (exp)
> @@ -6626,7 +6644,7 @@ store_constructor (tree exp, rtx target,
>                             == n_elts);
>                 emode = TYPE_MODE (etype);
>               }
> -           icode = (int) convert_optab_handler (vec_init_optab, mode, emode);
> +           icode = convert_optab_handler (vec_init_optab, mode, emode);
>             if (icode != CODE_FOR_nothing)
>               {
>                 unsigned int i, n = n_elts;
> @@ -6674,7 +6692,7 @@ store_constructor (tree exp, rtx target,
>         if (need_to_clear && size > 0 && !vector)
>           {
>             if (REG_P (target))
> -             emit_move_insn (target, CONST0_RTX (GET_MODE (target)));
> +             emit_move_insn (target, CONST0_RTX (mode));
>             else
>               clear_storage (target, GEN_INT (size), BLOCK_OP_NORMAL);
>             cleared = 1;
> @@ -6682,7 +6700,7 @@ store_constructor (tree exp, rtx target,
>
>         /* Inform later passes that the old value is dead.  */
>         if (!cleared && !vector && REG_P (target))
> -         emit_move_insn (target, CONST0_RTX (GET_MODE (target)));
> +         emit_move_insn (target, CONST0_RTX (mode));
>
>          if (MEM_P (target))
>           alias = MEM_ALIAS_SET (target);
> @@ -6733,8 +6751,7 @@ store_constructor (tree exp, rtx target,
>
>         if (vector)
>           emit_insn (GEN_FCN (icode) (target,
> -                                     gen_rtx_PARALLEL (GET_MODE (target),
> -                                                       vector)));
> +                                     gen_rtx_PARALLEL (mode, vector)));
>         break;
>        }
>
> @@ -9563,6 +9580,12 @@ #define REDUCE_BIT_FIELD(expr)   (reduce_b
>        target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, 
> target);
>        return target;
>
> +    case VEC_DUPLICATE_EXPR:
> +      op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier);
> +      target = expand_vector_broadcast (mode, op0);
> +      gcc_assert (target);
> +      return target;
> +
>      case BIT_INSERT_EXPR:
>        {
>         unsigned bitpos = tree_to_uhwi (treeop2);

Reply via email to