PING

> -----Original Message-----
> From: Claudiu Zissulescu
> Sent: Friday, April 08, 2016 10:31 AM
> To: gcc-patches@gcc.gnu.org
> Cc: Claudiu Zissulescu; g...@amylaar.uk; francois.bed...@synopsys.com;
> jeremy.benn...@embecosm.com
> Subject: [PATCH] [ARC] Add SIMD extensions for ARC HS
> 
> This patch adds support for the new SIMD operations added to ARC HS
> cpu class. The proposed patch doesn't chase for performance but offers
> support for those newly added operations, and autovectorization.
> 
> The patch is tested using dg.exp, compile.exp, and execute.exp for
> both arc700 and archs with and without SIMD support enabled.
> 
> OK to apply?
> Claudiu
> 
> gcc/
> 2016-03-14  Claudiu Zissulescu  <claz...@synopsys.com>
> 
>       * config/arc/arc.c (arc_vector_mode_supported_p): Add support for
>       the new ARC HS SIMD instructions.
>       (arc_preferred_simd_mode): New function.
>       (arc_autovectorize_vector_sizes): Likewise.
>       (TARGET_VECTORIZE_PREFERRED_SIMD_MODE)
>       (TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.
>       (arc_init_reg_tables): Accept new ARC HS SIMD modes.
>       (arc_init_builtins): Add new SIMD builtin types.
>       (arc_split_move): Handle 64 bit vector moves.
>       * config/arc/arc.h (TARGET_PLUS_DMPY, TARGET_PLUS_MACD)
>       (TARGET_PLUS_QMACW): Define.
>       * config/arc/builtins.def (QMACH, QMACHU, QMPYH, QMPYHU,
> DMACH)
>       (DMACHU, DMPYH, DMPYHU, DMACWH, DMACWHU, VMAC2H,
> VMAC2HU, VMPY2H)
>       (VMPY2HU, VADDSUB2H, VSUBADD2H, VADDSUB, VSUBADD,
> VADDSUB4H)
>       (VSUBADD4H): New builtins.
>       * config/arc/simdext.md: Add new ARC HS SIMD instructions.
>       * testsuite/gcc.target/arc/builtin_simdarc.c: New file.
> ---
>  gcc/config/arc/arc.c                           | 112 ++++-
>  gcc/config/arc/arc.h                           |   6 +
>  gcc/config/arc/builtins.def                    |  27 ++
>  gcc/config/arc/simdext.md                      | 571 
> +++++++++++++++++++++++++
>  gcc/testsuite/gcc.target/arc/builtin_simdarc.c |  38 ++
>  5 files changed, 747 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/arc/builtin_simdarc.c
> 
> diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
> index d60db50..d120946 100644
> --- a/gcc/config/arc/arc.c
> +++ b/gcc/config/arc/arc.c
> @@ -247,16 +247,47 @@ static bool arc_use_by_pieces_infrastructure_p
> (unsigned HOST_WIDE_INT,
>  static bool
>  arc_vector_mode_supported_p (machine_mode mode)
>  {
> -  if (!TARGET_SIMD_SET)
> -    return false;
> +  switch (mode)
> +    {
> +    case V2HImode:
> +      return TARGET_PLUS_DMPY;
> +    case V4HImode:
> +    case V2SImode:
> +      return TARGET_PLUS_QMACW;
> +    case V4SImode:
> +    case V8HImode:
> +      return TARGET_SIMD_SET;
> 
> -  if ((mode == V4SImode)
> -      || (mode == V8HImode))
> -    return true;
> +    default:
> +      return false;
> +    }
> +}
> 
> -  return false;
> +/* Implements target hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
> */
> +
> +static enum machine_mode
> +arc_preferred_simd_mode (enum machine_mode mode)
> +{
> +  switch (mode)
> +    {
> +    case HImode:
> +      return TARGET_PLUS_QMACW ? V4HImode : V2HImode;
> +    case SImode:
> +      return V2SImode;
> +
> +    default:
> +      return word_mode;
> +    }
>  }
> 
> +/* Implements target hook
> +   TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES.  */
> +
> +static unsigned int
> +arc_autovectorize_vector_sizes (void)
> +{
> +  return TARGET_PLUS_QMACW ? (8 | 4) : 0;
> +}
> 
>  /* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation /
> review.  */
>  static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED;
> @@ -345,6 +376,12 @@ static void arc_finalize_pic (void);
>  #undef TARGET_VECTOR_MODE_SUPPORTED_P
>  #define TARGET_VECTOR_MODE_SUPPORTED_P
> arc_vector_mode_supported_p
> 
> +#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
> +#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE
> arc_preferred_simd_mode
> +
> +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
> +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
> arc_autovectorize_vector_sizes
> +
>  #undef TARGET_CAN_USE_DOLOOP_P
>  #define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p
> 
> @@ -1214,7 +1251,12 @@ arc_init_reg_tables (void)
>           arc_mode_class[i] = 0;
>         break;
>       case MODE_VECTOR_INT:
> -       arc_mode_class [i] = (1<< (int) V_MODE);
> +       if (GET_MODE_SIZE (m) == 4)
> +         arc_mode_class[i] = (1 << (int) S_MODE);
> +       else if (GET_MODE_SIZE (m) == 8)
> +         arc_mode_class[i] = (1 << (int) D_MODE);
> +       else
> +         arc_mode_class[i] = (1 << (int) V_MODE);
>         break;
>       case MODE_CC:
>       default:
> @@ -5277,6 +5319,15 @@ arc_builtin_decl (unsigned id, bool initialize_p
> ATTRIBUTE_UNUSED)
>  static void
>  arc_init_builtins (void)
>  {
> +  tree V4HI_type_node;
> +  tree V2SI_type_node;
> +  tree V2HI_type_node;
> +
> +  /* Vector types based on HS SIMD elements.  */
> +  V4HI_type_node = build_vector_type_for_mode (intHI_type_node,
> V4HImode);
> +  V2SI_type_node = build_vector_type_for_mode (intSI_type_node,
> V2SImode);
> +  V2HI_type_node = build_vector_type_for_mode (intHI_type_node,
> V2HImode);
> +
>    tree pcvoid_type_node
>      = build_pointer_type (build_qualified_type (void_type_node,
>                                               TYPE_QUAL_CONST));
> @@ -5341,6 +5392,28 @@ arc_init_builtins (void)
>    tree v8hi_ftype_v8hi
>      = build_function_type_list (V8HI_type_node, V8HI_type_node,
>                               NULL_TREE);
> +  /* ARCv2 SIMD types.  */
> +  tree long_ftype_v4hi_v4hi
> +    = build_function_type_list (long_long_integer_type_node,
> +                             V4HI_type_node, V4HI_type_node,
> NULL_TREE);
> +  tree int_ftype_v2hi_v2hi
> +    = build_function_type_list (integer_type_node,
> +                             V2HI_type_node, V2HI_type_node,
> NULL_TREE);
> +  tree v2si_ftype_v2hi_v2hi
> +    = build_function_type_list (V2SI_type_node,
> +                             V2HI_type_node, V2HI_type_node,
> NULL_TREE);
> +  tree v2hi_ftype_v2hi_v2hi
> +    = build_function_type_list (V2HI_type_node,
> +                             V2HI_type_node, V2HI_type_node,
> NULL_TREE);
> +  tree v2si_ftype_v2si_v2si
> +    = build_function_type_list (V2SI_type_node,
> +                             V2SI_type_node, V2SI_type_node,
> NULL_TREE);
> +  tree v4hi_ftype_v4hi_v4hi
> +    = build_function_type_list (V4HI_type_node,
> +                             V4HI_type_node, V4HI_type_node,
> NULL_TREE);
> +  tree long_ftype_v2si_v2hi
> +    = build_function_type_list (long_long_integer_type_node,
> +                             V2SI_type_node, V2HI_type_node,
> NULL_TREE);
> 
>    /* Add the builtins.  */
>  #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)
>       \
> @@ -8706,6 +8779,31 @@ arc_split_move (rtx *operands)
>        return;
>      }
> 
> +  if (TARGET_PLUS_QMACW
> +      && GET_CODE (operands[1]) == CONST_VECTOR)
> +    {
> +      HOST_WIDE_INT intval0, intval1;
> +      if (GET_MODE (operands[1]) == V2SImode)
> +     {
> +       intval0 = INTVAL (XVECEXP (operands[1], 0, 0));
> +       intval1 = INTVAL (XVECEXP (operands[1], 0, 1));
> +     }
> +      else
> +     {
> +       intval1  = INTVAL (XVECEXP (operands[1], 0, 3)) << 16;
> +       intval1 |= INTVAL (XVECEXP (operands[1], 0, 2)) & 0xFFFF;
> +       intval0  = INTVAL (XVECEXP (operands[1], 0, 1)) << 16;
> +       intval0 |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF;
> +     }
> +      xop[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
> +      xop[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
> +      xop[2] = GEN_INT (trunc_int_for_mode (intval0, SImode));
> +      xop[1] = GEN_INT (trunc_int_for_mode (intval1, SImode));
> +      emit_move_insn (xop[0], xop[2]);
> +      emit_move_insn (xop[3], xop[1]);
> +      return;
> +    }
> +
>    for (i = 0; i < 2; i++)
>      {
>        if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
> diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
> index 21c049f..7fc465b 100644
> --- a/gcc/config/arc/arc.h
> +++ b/gcc/config/arc/arc.h
> @@ -1723,6 +1723,12 @@ enum
>  /* Any multiplication feature macro.  */
>  #define TARGET_ANY_MPY                                               \
>    (TARGET_MPY || TARGET_MUL64_SET || TARGET_MULMAC_32BY16_SET)
> +/* PLUS_DMPY feature macro.  */
> +#define TARGET_PLUS_DMPY  ((arc_mpy_option > 6) && TARGET_HS)
> +/* PLUS_MACD feature macro.  */
> +#define TARGET_PLUS_MACD  ((arc_mpy_option > 7) && TARGET_HS)
> +/* PLUS_QMACW feature macro.  */
> +#define TARGET_PLUS_QMACW ((arc_mpy_option > 8) && TARGET_HS)
> 
>  /* ARC600 and ARC601 feature macro.  */
>  #define TARGET_ARC600_FAMILY (TARGET_ARC600 || TARGET_ARC601)
> diff --git a/gcc/config/arc/builtins.def b/gcc/config/arc/builtins.def
> index 19be1d2..8c71d30 100644
> --- a/gcc/config/arc/builtins.def
> +++ b/gcc/config/arc/builtins.def
> @@ -193,3 +193,30 @@ DEF_BUILTIN (VINTI, 1, void_ftype_int, vinti_insn,
> TARGET_SIMD_SET)
> 
>  /* END SIMD marker.  */
>  DEF_BUILTIN (SIMD_END, 0, void_ftype_void, nothing, 0)
> +
> +/* ARCv2 SIMD instructions that use/clobber the accumulator reg.  */
> +DEF_BUILTIN (QMACH,      2, long_ftype_v4hi_v4hi,   qmach,
> TARGET_PLUS_QMACW)
> +DEF_BUILTIN (QMACHU,     2, long_ftype_v4hi_v4hi,   qmachu,
> TARGET_PLUS_QMACW)
> +DEF_BUILTIN (QMPYH,      2, long_ftype_v4hi_v4hi,   qmpyh,
> TARGET_PLUS_QMACW)
> +DEF_BUILTIN (QMPYHU,     2, long_ftype_v4hi_v4hi,   qmpyhu,
> TARGET_PLUS_QMACW)
> +
> +DEF_BUILTIN (DMACH,      2, int_ftype_v2hi_v2hi,    dmach,
> TARGET_PLUS_DMPY)
> +DEF_BUILTIN (DMACHU,     2, int_ftype_v2hi_v2hi,    dmachu,
> TARGET_PLUS_DMPY)
> +DEF_BUILTIN (DMPYH,      2, int_ftype_v2hi_v2hi,    dmpyh,
> TARGET_PLUS_DMPY)
> +DEF_BUILTIN (DMPYHU,     2, int_ftype_v2hi_v2hi,    dmpyhu,
> TARGET_PLUS_DMPY)
> +
> +DEF_BUILTIN (DMACWH,     2, long_ftype_v2si_v2hi,   dmacwh,
> TARGET_PLUS_QMACW)
> +DEF_BUILTIN (DMACWHU,    2, long_ftype_v2si_v2hi,   dmacwhu,
> TARGET_PLUS_QMACW)
> +
> +DEF_BUILTIN (VMAC2H,     2, v2si_ftype_v2hi_v2hi,   vmac2h,
> TARGET_PLUS_MACD)
> +DEF_BUILTIN (VMAC2HU,    2, v2si_ftype_v2hi_v2hi,   vmac2hu,
> TARGET_PLUS_MACD)
> +DEF_BUILTIN (VMPY2H,     2, v2si_ftype_v2hi_v2hi,   vmpy2h,
> TARGET_PLUS_MACD)
> +DEF_BUILTIN (VMPY2HU,    2, v2si_ftype_v2hi_v2hi,   vmpy2hu,
> TARGET_PLUS_MACD)
> +
> +/* Combined add/sub HS SIMD instructions.  */
> +DEF_BUILTIN (VADDSUB2H,  2, v2hi_ftype_v2hi_v2hi,   addsubv2hi3,
> TARGET_PLUS_DMPY)
> +DEF_BUILTIN (VSUBADD2H,  2, v2hi_ftype_v2hi_v2hi,   subaddv2hi3,
> TARGET_PLUS_DMPY)
> +DEF_BUILTIN (VADDSUB,    2, v2si_ftype_v2si_v2si,   addsubv2si3,
> TARGET_PLUS_QMACW)
> +DEF_BUILTIN (VSUBADD,    2, v2si_ftype_v2si_v2si,   subaddv2si3,
> TARGET_PLUS_QMACW)
> +DEF_BUILTIN (VADDSUB4H,  2, v4hi_ftype_v4hi_v4hi,   addsubv4hi3,
> TARGET_PLUS_QMACW)
> +DEF_BUILTIN (VSUBADD4H,  2, v4hi_ftype_v4hi_v4hi,   subaddv4hi3,
> TARGET_PLUS_QMACW)
> diff --git a/gcc/config/arc/simdext.md b/gcc/config/arc/simdext.md
> index 9fd9d62..51869e3 100644
> --- a/gcc/config/arc/simdext.md
> +++ b/gcc/config/arc/simdext.md
> @@ -1288,3 +1288,574 @@
>    [(set_attr "type" "simd_vcontrol")
>     (set_attr "length" "4")
>     (set_attr "cond" "nocond")])
> +
> +;; New ARCv2 SIMD extensions
> +
> +;;64-bit vectors of halwords and words
> +(define_mode_iterator VWH [V4HI V2SI])
> +
> +;;double element vectors
> +(define_mode_iterator VDV [V2HI V2SI])
> +(define_mode_attr V_addsub [(V2HI "HI") (V2SI "SI")])
> +(define_mode_attr V_addsub_suffix [(V2HI "2h") (V2SI "")])
> +
> +;;all vectors
> +(define_mode_iterator VCT [V2HI V4HI V2SI])
> +(define_mode_attr V_suffix [(V2HI "2h") (V4HI "4h") (V2SI "2")])
> +
> +;; Widening operations.
> +(define_code_iterator SE [sign_extend zero_extend])
> +(define_code_attr V_US [(sign_extend "s") (zero_extend "u")])
> +(define_code_attr V_US_suffix [(sign_extend "") (zero_extend "u")])
> +
> +
> +;; Move patterns
> +(define_expand "movv2hi"
> +  [(set (match_operand:V2HI 0 "move_dest_operand" "")
> +     (match_operand:V2HI 1 "general_operand" ""))]
> +  ""
> +  "{
> +    if (prepare_move_operands (operands, V2HImode))
> +         DONE;
> +   }")
> +
> +(define_insn_and_split "*movv2hi_insn"
> +  [(set (match_operand:V2HI 0 "nonimmediate_operand" "=r,r,r,m")
> +     (match_operand:V2HI 1 "general_operand"       "i,r,m,r"))]
> +  "(register_operand (operands[0], V2HImode)
> +    || register_operand (operands[1], V2HImode))"
> +  "@
> +   #
> +   mov%? %0, %1
> +   ld%U1%V1 %0,%1
> +   st%U0%V0 %1,%0"
> +  "reload_completed && GET_CODE (operands[1]) == CONST_VECTOR"
> +  [(set (match_dup 0) (match_dup 2))]
> +  {
> +   HOST_WIDE_INT intval = INTVAL (XVECEXP (operands[1], 0, 1)) << 16;
> +   intval |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF;
> +
> +   operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
> +   operands[2] = GEN_INT (trunc_int_for_mode (intval, SImode));
> +  }
> +  [(set_attr "type" "move,move,load,store")
> +   (set_attr "predicable" "yes,yes,no,no")
> +   (set_attr "iscompact"  "false,false,false,false")
> +   ])
> +
> +(define_expand "movmisalignv2hi"
> + [(set (match_operand:V2HI 0 "general_operand" "")
> +       (match_operand:V2HI 1 "general_operand" ""))]
> + ""
> +{
> + if (!register_operand (operands[0], V2HImode)
> +      && !register_operand (operands[1], V2HImode))
> +    operands[1] = force_reg (V2HImode, operands[1]);
> +})
> +
> +(define_expand "mov<mode>"
> +  [(set (match_operand:VWH 0 "move_dest_operand" "")
> +     (match_operand:VWH 1 "general_operand" ""))]
> +  ""
> +  "{
> +    if (GET_CODE (operands[0]) == MEM)
> +     operands[1] = force_reg (<MODE>mode, operands[1]);
> +   }")
> +
> +(define_insn_and_split "*mov<mode>_insn"
> +  [(set (match_operand:VWH 0 "move_dest_operand" "=r,r,r,m")
> +     (match_operand:VWH 1 "general_operand"    "i,r,m,r"))]
> +  "TARGET_PLUS_QMACW
> +   && (register_operand (operands[0], <MODE>mode)
> +       || register_operand (operands[1], <MODE>mode))"
> +  "*
> +{
> +  switch (which_alternative)
> +    {
> +     default:
> +       return \"#\";
> +
> +     case 1:
> +       return \"vadd2 %0, %1, 0\";
> +
> +     case 2:
> +       if (TARGET_LL64)
> +         return \"ldd%U1%V1 %0,%1\";
> +       return \"#\";
> +
> +     case 3:
> +       if (TARGET_LL64)
> +        return \"std%U0%V0 %1,%0\";
> +      return \"#\";
> +    }
> +}"
> +  "reload_completed"
> +  [(const_int 0)]
> +  {
> +   arc_split_move (operands);
> +   DONE;
> +  }
> +  [(set_attr "type" "move,move,load,store")
> +   (set_attr "predicable" "yes,no,no,no")
> +   (set_attr "iscompact"  "false,false,false,false")
> +   ])
> +
> +(define_expand "movmisalign<mode>"
> + [(set (match_operand:VWH 0 "general_operand" "")
> +       (match_operand:VWH 1 "general_operand" ""))]
> + ""
> +{
> + if (!register_operand (operands[0], <MODE>mode)
> +      && !register_operand (operands[1], <MODE>mode))
> +    operands[1] = force_reg (<MODE>mode, operands[1]);
> +})
> +
> +(define_insn "bswapv2hi2"
> +  [(set (match_operand:V2HI 0 "register_operand" "=r,r")
> +        (bswap:V2HI (match_operand:V2HI 1 "nonmemory_operand" "r,i")))]
> +  "TARGET_V2 && TARGET_SWAP"
> +  "swape %0, %1"
> +  [(set_attr "length" "4,8")
> +   (set_attr "type" "two_cycle_core")])
> +
> +;; Simple arithmetic insns
> +(define_insn "add<mode>3"
> +  [(set (match_operand:VCT 0 "register_operand"          "=r,r")
> +     (plus:VCT (match_operand:VCT 1 "register_operand" "0,r")
> +               (match_operand:VCT 2 "register_operand" "r,r")))]
> +  "TARGET_PLUS_DMPY"
> +  "vadd<V_suffix>%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +(define_insn "sub<mode>3"
> +  [(set (match_operand:VCT 0 "register_operand"           "=r,r")
> +     (minus:VCT (match_operand:VCT 1 "register_operand" "0,r")
> +                (match_operand:VCT 2 "register_operand" "r,r")))]
> +  "TARGET_PLUS_DMPY"
> +  "vsub<V_suffix>%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +;; Combined arithmetic ops
> +(define_insn "addsub<mode>3"
> +  [(set (match_operand:VDV 0 "register_operand" "=r,r")
> +     (vec_concat:VDV
> +      (plus:<V_addsub> (vec_select:<V_addsub> (match_operand:VDV 1
> "register_operand" "0,r")
> +                                              (parallel [(const_int 0)]))
> +                       (vec_select:<V_addsub> (match_operand:VDV 2
> "register_operand" "r,r")
> +                                              (parallel [(const_int 0)])))
> +      (minus:<V_addsub> (vec_select:<V_addsub> (match_dup 1)
> (parallel [(const_int 1)]))
> +                        (vec_select:<V_addsub> (match_dup 2) (parallel
> [(const_int 1)])))))]
> +  "TARGET_PLUS_DMPY"
> +  "vaddsub<V_addsub_suffix>%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +(define_insn "subadd<mode>3"
> +  [(set (match_operand:VDV 0 "register_operand" "=r,r")
> +     (vec_concat:VDV
> +      (minus:<V_addsub> (vec_select:<V_addsub> (match_operand:VDV
> 1 "register_operand" "0,r")
> +                                               (parallel [(const_int 0)]))
> +                        (vec_select:<V_addsub> (match_operand:VDV 2
> "register_operand" "r,r")
> +                                               (parallel [(const_int 0)])))
> +      (plus:<V_addsub> (vec_select:<V_addsub> (match_dup 1) (parallel
> [(const_int 1)]))
> +                       (vec_select:<V_addsub> (match_dup 2) (parallel
> [(const_int 1)])))))]
> +  "TARGET_PLUS_DMPY"
> +  "vsubadd<V_addsub_suffix>%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +(define_insn "addsubv4hi3"
> +  [(set (match_operand:V4HI 0 "even_register_operand" "=r,r")
> +     (vec_concat:V4HI
> +      (vec_concat:V2HI
> +       (plus:HI (vec_select:HI (match_operand:V4HI 1
> "even_register_operand" "0,r")
> +                               (parallel [(const_int 0)]))
> +                (vec_select:HI (match_operand:V4HI 2
> "even_register_operand" "r,r")
> +                               (parallel [(const_int 0)])))
> +       (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
> +                 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))
> +      (vec_concat:V2HI
> +       (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
> +                (vec_select:HI (match_dup 2) (parallel [(const_int 2)])))
> +       (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))
> +                 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
> +      ))]
> +  "TARGET_PLUS_QMACW"
> +  "vaddsub4h%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +(define_insn "subaddv4hi3"
> +  [(set (match_operand:V4HI 0 "even_register_operand" "=r,r")
> +     (vec_concat:V4HI
> +      (vec_concat:V2HI
> +       (minus:HI (vec_select:HI (match_operand:V4HI 1
> "even_register_operand" "0,r")
> +                                (parallel [(const_int 0)]))
> +                 (vec_select:HI (match_operand:V4HI 2
> "even_register_operand" "r,r")
> +                               (parallel [(const_int 0)])))
> +       (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
> +                (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))
> +      (vec_concat:V2HI
> +       (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
> +                 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])))
> +       (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))
> +                (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
> +      ))]
> +  "TARGET_PLUS_QMACW"
> +  "vsubadd4h%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +;; Multiplication
> +(define_insn "dmpyh<V_US_suffix>"
> +  [(set (match_operand:SI 0 "register_operand" "=r,r")
> +     (plus:SI
> +      (mult:SI
> +       (SE:SI
> +        (vec_select:HI (match_operand:V2HI 1 "register_operand" "0,r")
> +                       (parallel [(const_int 0)])))
> +       (SE:SI
> +        (vec_select:HI (match_operand:V2HI 2 "register_operand" "r,r")
> +                       (parallel [(const_int 0)]))))
> +      (mult:SI
> +       (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
> +       (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))))
> +   (set (reg:DI ARCV2_ACC)
> +     (zero_extend:DI
> +      (plus:SI
> +       (mult:SI
> +        (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
> +        (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 0)]))))
> +       (mult:SI
> +        (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
> +        (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))))]
> +  "TARGET_PLUS_DMPY"
> +  "dmpy<V_US_suffix>%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +;; We can use dmac as well here.  To be investigated which version
> +;; brings more.
> +(define_expand "sdot_prodv2hi"
> +  [(match_operand:SI 0 "register_operand" "")
> +   (match_operand:V2HI 1 "register_operand" "")
> +   (match_operand:V2HI 2 "register_operand" "")
> +   (match_operand:SI 3 "register_operand" "")]
> +  "TARGET_PLUS_DMPY"
> +{
> + rtx t = gen_reg_rtx (SImode);
> + emit_insn (gen_dmpyh (t, operands[1], operands[2]));
> + emit_insn (gen_addsi3 (operands[0], operands[3], t));
> + DONE;
> +})
> +
> +(define_expand "udot_prodv2hi"
> +  [(match_operand:SI 0 "register_operand" "")
> +   (match_operand:V2HI 1 "register_operand" "")
> +   (match_operand:V2HI 2 "register_operand" "")
> +   (match_operand:SI 3 "register_operand" "")]
> +  "TARGET_PLUS_DMPY"
> +{
> + rtx t = gen_reg_rtx (SImode);
> + emit_insn (gen_dmpyhu (t, operands[1], operands[2]));
> + emit_insn (gen_addsi3 (operands[0], operands[3], t));
> + DONE;
> +})
> +
> +(define_insn "arc_vec_<V_US>mult_lo_v4hi"
> + [(set (match_operand:V2SI 0 "even_register_operand"                     
> "=r,r")
> +       (mult:V2SI (SE:V2SI (vec_select:V2HI
> +                         (match_operand:V4HI 1 "even_register_operand"
> "0,r")
> +                         (parallel [(const_int 0) (const_int 1)])))
> +               (SE:V2SI (vec_select:V2HI
> +                         (match_operand:V4HI 2 "even_register_operand"
> "r,r")
> +                         (parallel [(const_int 0) (const_int 1)])))))
> +  (set (reg:V2SI ARCV2_ACC)
> +       (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
> +                                         (parallel [(const_int 0) (const_int
> 1)])))
> +               (SE:V2SI (vec_select:V2HI (match_dup 2)
> +                                         (parallel [(const_int 0) (const_int
> 1)])))))
> +  ]
> +  "TARGET_PLUS_MACD"
> +  "vmpy2h<V_US_suffix>%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +(define_insn "arc_vec_<V_US>multacc_lo_v4hi"
> +  [(set (reg:V2SI ARCV2_ACC)
> +     (mult:V2SI (SE:V2SI (vec_select:V2HI
> +                          (match_operand:V4HI 0 "even_register_operand"
> "r")
> +                          (parallel [(const_int 0) (const_int 1)])))
> +                (SE:V2SI (vec_select:V2HI
> +                          (match_operand:V4HI 1 "even_register_operand"
> "r")
> +                          (parallel [(const_int 0) (const_int 1)])))))
> +  ]
> +  "TARGET_PLUS_MACD"
> +  "vmpy2h<V_US_suffix>%? 0, %0, %1"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "no")
> +   (set_attr "cond" "nocond")])
> +
> +(define_expand "vec_widen_<V_US>mult_lo_v4hi"
> + [(set (match_operand:V2SI 0 "even_register_operand"                 "")
> +       (mult:V2SI (SE:V2SI (vec_select:V2HI
> +                         (match_operand:V4HI 1 "even_register_operand"
> "")
> +                         (parallel [(const_int 0) (const_int 1)])))
> +               (SE:V2SI (vec_select:V2HI
> +                         (match_operand:V4HI 2 "even_register_operand"
> "")
> +                         (parallel [(const_int 0) (const_int 1)])))))]
> +  "TARGET_PLUS_QMACW"
> +  {
> +     emit_insn (gen_arc_vec_<V_US>mult_lo_v4hi (operands[0],
> +                                             operands[1],
> +                                             operands[2]));
> +     DONE;
> +  }
> +)
> +
> +(define_insn "arc_vec_<V_US>mult_hi_v4hi"
> + [(set (match_operand:V2SI 0 "even_register_operand"                     
> "=r,r")
> +       (mult:V2SI (SE:V2SI (vec_select:V2HI
> +                         (match_operand:V4HI 1 "even_register_operand"
> "0,r")
> +                         (parallel [(const_int 2) (const_int 3)])))
> +               (SE:V2SI (vec_select:V2HI
> +                         (match_operand:V4HI 2 "even_register_operand"
> "r,r")
> +                         (parallel [(const_int 2) (const_int 3)])))))
> +  (set (reg:V2SI ARCV2_ACC)
> +       (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
> +                                         (parallel [(const_int 2) (const_int
> 3)])))
> +               (SE:V2SI (vec_select:V2HI (match_dup 2)
> +                                         (parallel [(const_int 2) (const_int
> 3)])))))
> +  ]
> +  "TARGET_PLUS_QMACW"
> +  "vmpy2h<V_US_suffix>%? %0, %R1, %R2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +(define_expand "vec_widen_<V_US>mult_hi_v4hi"
> + [(set (match_operand:V2SI 0 "even_register_operand"                         
>       "")
> +       (mult:V2SI (SE:V2SI (vec_select:V2HI
> +                                  (match_operand:V4HI 1
> "even_register_operand" "")
> +                                  (parallel [(const_int 2) (const_int 3)])))
> +               (SE:V2SI (vec_select:V2HI
> +                                  (match_operand:V4HI 2
> "even_register_operand" "")
> +                                  (parallel [(const_int 2) (const_int 
> 3)])))))]
> +  "TARGET_PLUS_MACD"
> +  {
> +     emit_insn (gen_arc_vec_<V_US>mult_hi_v4hi (operands[0],
> +                                             operands[1],
> +                                             operands[2]));
> +     DONE;
> +  }
> +)
> +
> +(define_insn "arc_vec_<V_US>mac_hi_v4hi"
> + [(set (match_operand:V2SI 0 "even_register_operand"                     
> "=r,r")
> +       (plus:V2SI
> +     (reg:V2SI ARCV2_ACC)
> +     (mult:V2SI (SE:V2SI (vec_select:V2HI
> +                          (match_operand:V4HI 1 "even_register_operand"
> "0,r")
> +                          (parallel [(const_int 2) (const_int 3)])))
> +                (SE:V2SI (vec_select:V2HI
> +                          (match_operand:V4HI 2 "even_register_operand"
> "r,r")
> +                          (parallel [(const_int 2) (const_int 3)]))))))
> +  (set (reg:V2SI ARCV2_ACC)
> +       (plus:V2SI
> +     (reg:V2SI ARCV2_ACC)
> +     (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
> +                                          (parallel [(const_int 2) (const_int
> 3)])))
> +                (SE:V2SI (vec_select:V2HI (match_dup 2)
> +                                          (parallel [(const_int 2) (const_int
> 3)]))))))
> +  ]
> +  "TARGET_PLUS_MACD"
> +  "vmac2h<V_US_suffix>%? %0, %R1, %R2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +;; Builtins
> +(define_insn "dmach"
> +  [(set (match_operand:SI 0 "register_operand" "=r,r")
> +     (unspec:SI [(match_operand:V2HI 1 "register_operand" "0,r")
> +                 (match_operand:V2HI 2 "register_operand" "r,r")
> +                 (reg:DI ARCV2_ACC)]
> +                UNSPEC_ARC_DMACH))
> +   (clobber (reg:DI ARCV2_ACC))]
> +  "TARGET_PLUS_DMPY"
> +  "dmach%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +(define_insn "dmachu"
> +  [(set (match_operand:SI 0 "register_operand" "=r,r")
> +     (unspec:SI [(match_operand:V2HI 1 "register_operand" "0,r")
> +                 (match_operand:V2HI 2 "register_operand" "r,r")
> +                 (reg:DI ARCV2_ACC)]
> +                UNSPEC_ARC_DMACHU))
> +   (clobber (reg:DI ARCV2_ACC))]
> +  "TARGET_PLUS_DMPY"
> +  "dmachu%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +(define_insn "dmacwh"
> +  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
> +     (unspec:DI [(match_operand:V2SI 1 "even_register_operand" "0,r")
> +                 (match_operand:V2HI 2 "register_operand"      "r,r")
> +                 (reg:DI ARCV2_ACC)]
> +                UNSPEC_ARC_DMACWH))
> +   (clobber (reg:DI ARCV2_ACC))]
> +  "TARGET_PLUS_QMACW"
> +  "dmacwh%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +(define_insn "dmacwhu"
> +  [(set (match_operand:DI 0 "register_operand" "=r,r")
> +     (unspec:DI [(match_operand:V2SI 1 "even_register_operand" "0,r")
> +                 (match_operand:V2HI 2 "register_operand"      "r,r")
> +                 (reg:DI ARCV2_ACC)]
> +                UNSPEC_ARC_DMACWHU))
> +   (clobber (reg:DI ARCV2_ACC))]
> +  "TARGET_PLUS_QMACW"
> +  "dmacwhu%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +(define_insn "vmac2h"
> +  [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
> +     (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
> +                   (match_operand:V2HI 2 "register_operand" "r,r")
> +                   (reg:DI ARCV2_ACC)]
> +                  UNSPEC_ARC_VMAC2H))
> +   (clobber (reg:DI ARCV2_ACC))]
> +  "TARGET_PLUS_MACD"
> +  "vmac2h%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +(define_insn "vmac2hu"
> +  [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
> +     (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
> +                   (match_operand:V2HI 2 "register_operand" "r,r")
> +                   (reg:DI ARCV2_ACC)]
> +                UNSPEC_ARC_VMAC2HU))
> +   (clobber (reg:DI ARCV2_ACC))]
> +  "TARGET_PLUS_MACD"
> +  "vmac2hu%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +(define_insn "vmpy2h"
> +  [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
> +     (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
> +                   (match_operand:V2HI 2 "register_operand" "r,r")]
> +                  UNSPEC_ARC_VMPY2H))
> +   (clobber (reg:DI ARCV2_ACC))]
> +  "TARGET_PLUS_MACD"
> +  "vmpy2h%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +(define_insn "vmpy2hu"
> +  [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
> +     (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
> +                   (match_operand:V2HI 2 "register_operand" "r,r")]
> +                  UNSPEC_ARC_VMPY2HU))
> +   (clobber (reg:DI ARCV2_ACC))]
> +  "TARGET_PLUS_MACD"
> +  "vmpy2hu%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +(define_insn "qmach"
> +  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
> +     (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
> +                 (match_operand:V4HI 2 "even_register_operand" "r,r")
> +                 (reg:DI ARCV2_ACC)]
> +                  UNSPEC_ARC_QMACH))
> +   (clobber (reg:DI ARCV2_ACC))]
> +  "TARGET_PLUS_QMACW"
> +  "qmach%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +(define_insn "qmachu"
> +  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
> +     (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
> +                 (match_operand:V4HI 2 "even_register_operand" "r,r")
> +                 (reg:DI ARCV2_ACC)]
> +                UNSPEC_ARC_QMACHU))
> +   (clobber (reg:DI ARCV2_ACC))]
> +  "TARGET_PLUS_QMACW"
> +  "qmachu%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +(define_insn "qmpyh"
> +  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
> +     (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
> +                 (match_operand:V4HI 2 "even_register_operand" "r,r")]
> +                  UNSPEC_ARC_QMPYH))
> +   (clobber (reg:DI ARCV2_ACC))]
> +  "TARGET_PLUS_QMACW"
> +  "qmpyh%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> +
> +(define_insn "qmpyhu"
> +  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
> +     (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
> +                 (match_operand:V4HI 2 "even_register_operand" "r,r")]
> +                UNSPEC_ARC_QMPYHU))
> +   (clobber (reg:DI ARCV2_ACC))]
> +  "TARGET_PLUS_QMACW"
> +  "qmpyhu%? %0, %1, %2"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")
> +   (set_attr "predicable" "yes,no")
> +   (set_attr "cond" "canuse,nocond")])
> diff --git a/gcc/testsuite/gcc.target/arc/builtin_simdarc.c
> b/gcc/testsuite/gcc.target/arc/builtin_simdarc.c
> new file mode 100644
> index 0000000..68aae40
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arc/builtin_simdarc.c
> @@ -0,0 +1,38 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mcpu=archs -O2 -Werror-implicit-function-declaration -
> mmpy-option=9" } */
> +
> +#define STEST(name, rettype, op1type, op2type)       \
> +  rettype test_ ## name                              \
> +  (op1type a, op2type b)                     \
> +  {                                          \
> +    return __builtin_arc_ ## name (a, b);    \
> +  }
> +
> +typedef short v2hi __attribute__ ((vector_size (4)));
> +typedef short v4hi __attribute__ ((vector_size (8)));
> +typedef int   v2si __attribute__ ((vector_size (8)));
> +
> +STEST (qmach,  long long, v4hi, v4hi)
> +STEST (qmachu, long long, v4hi, v4hi)
> +STEST (qmpyh,  long long, v4hi, v4hi)
> +STEST (qmpyhu, long long, v4hi, v4hi)
> +
> +STEST (dmach,  int, v2hi, v2hi)
> +STEST (dmachu, int, v2hi, v2hi)
> +STEST (dmpyh,  int, v2hi, v2hi)
> +STEST (dmpyhu, int, v2hi, v2hi)
> +
> +STEST (dmacwh,  long, v2si, v2hi)
> +STEST (dmacwhu, long, v2si, v2hi)
> +
> +STEST (vmac2h,  v2si, v2hi, v2hi)
> +STEST (vmac2hu, v2si, v2hi, v2hi)
> +STEST (vmpy2h,  v2si, v2hi, v2hi)
> +STEST (vmpy2hu, v2si, v2hi, v2hi)
> +
> +STEST (vaddsub2h, v2hi, v2hi, v2hi)
> +STEST (vsubadd2h, v2hi, v2hi, v2hi)
> +STEST (vaddsub,   v2si, v2si, v2si)
> +STEST (vsubadd,   v2si, v2si, v2si)
> +STEST (vaddsub4h, v4hi, v4hi, v4hi)
> +STEST (vsubadd4h, v4hi, v4hi, v4hi)
> --
> 1.9.1

Reply via email to