PING > -----Original Message----- > From: Claudiu Zissulescu > Sent: Friday, April 08, 2016 10:31 AM > To: gcc-patches@gcc.gnu.org > Cc: Claudiu Zissulescu; g...@amylaar.uk; francois.bed...@synopsys.com; > jeremy.benn...@embecosm.com > Subject: [PATCH] [ARC] Add SIMD extensions for ARC HS > > This patch adds support for the new SIMD operations added to ARC HS > cpu class. The proposed patch doesn't chase for performance but offers > support for those newly added operations, and autovectorization. > > The patch is tested using dg.exp, compile.exp, and execute.exp for > both arc700 and archs with and without SIMD support enabled. > > OK to apply? > Claudiu > > gcc/ > 2016-03-14 Claudiu Zissulescu <claz...@synopsys.com> > > * config/arc/arc.c (arc_vector_mode_supported_p): Add support for > the new ARC HS SIMD instructions. > (arc_preferred_simd_mode): New function. > (arc_autovectorize_vector_sizes): Likewise. > (TARGET_VECTORIZE_PREFERRED_SIMD_MODE) > (TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define. > (arc_init_reg_tables): Accept new ARC HS SIMD modes. > (arc_init_builtins): Add new SIMD builtin types. > (arc_split_move): Handle 64 bit vector moves. > * config/arc/arc.h (TARGET_PLUS_DMPY, TARGET_PLUS_MACD) > (TARGET_PLUS_QMACW): Define. > * config/arc/builtins.def (QMACH, QMACHU, QMPYH, QMPYHU, > DMACH) > (DMACHU, DMPYH, DMPYHU, DMACWH, DMACWHU, VMAC2H, > VMAC2HU, VMPY2H) > (VMPY2HU, VADDSUB2H, VSUBADD2H, VADDSUB, VSUBADD, > VADDSUB4H) > (VSUBADD4H): New builtins. > * config/arc/simdext.md: Add new ARC HS SIMD instructions. > * testsuite/gcc.target/arc/builtin_simdarc.c: New file. > --- > gcc/config/arc/arc.c | 112 ++++- > gcc/config/arc/arc.h | 6 + > gcc/config/arc/builtins.def | 27 ++ > gcc/config/arc/simdext.md | 571 > +++++++++++++++++++++++++ > gcc/testsuite/gcc.target/arc/builtin_simdarc.c | 38 ++ > 5 files changed, 747 insertions(+), 7 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/arc/builtin_simdarc.c > > diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c > index d60db50..d120946 100644 > --- a/gcc/config/arc/arc.c > +++ b/gcc/config/arc/arc.c > @@ -247,16 +247,47 @@ static bool arc_use_by_pieces_infrastructure_p > (unsigned HOST_WIDE_INT, > static bool > arc_vector_mode_supported_p (machine_mode mode) > { > - if (!TARGET_SIMD_SET) > - return false; > + switch (mode) > + { > + case V2HImode: > + return TARGET_PLUS_DMPY; > + case V4HImode: > + case V2SImode: > + return TARGET_PLUS_QMACW; > + case V4SImode: > + case V8HImode: > + return TARGET_SIMD_SET; > > - if ((mode == V4SImode) > - || (mode == V8HImode)) > - return true; > + default: > + return false; > + } > +} > > - return false; > +/* Implements target hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE. > */ > + > +static enum machine_mode > +arc_preferred_simd_mode (enum machine_mode mode) > +{ > + switch (mode) > + { > + case HImode: > + return TARGET_PLUS_QMACW ? V4HImode : V2HImode; > + case SImode: > + return V2SImode; > + > + default: > + return word_mode; > + } > } > > +/* Implements target hook > + TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */ > + > +static unsigned int > +arc_autovectorize_vector_sizes (void) > +{ > + return TARGET_PLUS_QMACW ? (8 | 4) : 0; > +} > > /* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / > review. */ > static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED; > @@ -345,6 +376,12 @@ static void arc_finalize_pic (void); > #undef TARGET_VECTOR_MODE_SUPPORTED_P > #define TARGET_VECTOR_MODE_SUPPORTED_P > arc_vector_mode_supported_p > > +#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE > +#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE > arc_preferred_simd_mode > + > +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES > +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES > arc_autovectorize_vector_sizes > + > #undef TARGET_CAN_USE_DOLOOP_P > #define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p > > @@ -1214,7 +1251,12 @@ arc_init_reg_tables (void) > arc_mode_class[i] = 0; > break; > case MODE_VECTOR_INT: > - arc_mode_class [i] = (1<< (int) V_MODE); > + if (GET_MODE_SIZE (m) == 4) > + arc_mode_class[i] = (1 << (int) S_MODE); > + else if (GET_MODE_SIZE (m) == 8) > + arc_mode_class[i] = (1 << (int) D_MODE); > + else > + arc_mode_class[i] = (1 << (int) V_MODE); > break; > case MODE_CC: > default: > @@ -5277,6 +5319,15 @@ arc_builtin_decl (unsigned id, bool initialize_p > ATTRIBUTE_UNUSED) > static void > arc_init_builtins (void) > { > + tree V4HI_type_node; > + tree V2SI_type_node; > + tree V2HI_type_node; > + > + /* Vector types based on HS SIMD elements. */ > + V4HI_type_node = build_vector_type_for_mode (intHI_type_node, > V4HImode); > + V2SI_type_node = build_vector_type_for_mode (intSI_type_node, > V2SImode); > + V2HI_type_node = build_vector_type_for_mode (intHI_type_node, > V2HImode); > + > tree pcvoid_type_node > = build_pointer_type (build_qualified_type (void_type_node, > TYPE_QUAL_CONST)); > @@ -5341,6 +5392,28 @@ arc_init_builtins (void) > tree v8hi_ftype_v8hi > = build_function_type_list (V8HI_type_node, V8HI_type_node, > NULL_TREE); > + /* ARCv2 SIMD types. */ > + tree long_ftype_v4hi_v4hi > + = build_function_type_list (long_long_integer_type_node, > + V4HI_type_node, V4HI_type_node, > NULL_TREE); > + tree int_ftype_v2hi_v2hi > + = build_function_type_list (integer_type_node, > + V2HI_type_node, V2HI_type_node, > NULL_TREE); > + tree v2si_ftype_v2hi_v2hi > + = build_function_type_list (V2SI_type_node, > + V2HI_type_node, V2HI_type_node, > NULL_TREE); > + tree v2hi_ftype_v2hi_v2hi > + = build_function_type_list (V2HI_type_node, > + V2HI_type_node, V2HI_type_node, > NULL_TREE); > + tree v2si_ftype_v2si_v2si > + = build_function_type_list (V2SI_type_node, > + V2SI_type_node, V2SI_type_node, > NULL_TREE); > + tree v4hi_ftype_v4hi_v4hi > + = build_function_type_list (V4HI_type_node, > + V4HI_type_node, V4HI_type_node, > NULL_TREE); > + tree long_ftype_v2si_v2hi > + = build_function_type_list (long_long_integer_type_node, > + V2SI_type_node, V2HI_type_node, > NULL_TREE); > > /* Add the builtins. */ > #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) > \ > @@ -8706,6 +8779,31 @@ arc_split_move (rtx *operands) > return; > } > > + if (TARGET_PLUS_QMACW > + && GET_CODE (operands[1]) == CONST_VECTOR) > + { > + HOST_WIDE_INT intval0, intval1; > + if (GET_MODE (operands[1]) == V2SImode) > + { > + intval0 = INTVAL (XVECEXP (operands[1], 0, 0)); > + intval1 = INTVAL (XVECEXP (operands[1], 0, 1)); > + } > + else > + { > + intval1 = INTVAL (XVECEXP (operands[1], 0, 3)) << 16; > + intval1 |= INTVAL (XVECEXP (operands[1], 0, 2)) & 0xFFFF; > + intval0 = INTVAL (XVECEXP (operands[1], 0, 1)) << 16; > + intval0 |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF; > + } > + xop[0] = gen_rtx_REG (SImode, REGNO (operands[0])); > + xop[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); > + xop[2] = GEN_INT (trunc_int_for_mode (intval0, SImode)); > + xop[1] = GEN_INT (trunc_int_for_mode (intval1, SImode)); > + emit_move_insn (xop[0], xop[2]); > + emit_move_insn (xop[3], xop[1]); > + return; > + } > + > for (i = 0; i < 2; i++) > { > if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0))) > diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h > index 21c049f..7fc465b 100644 > --- a/gcc/config/arc/arc.h > +++ b/gcc/config/arc/arc.h > @@ -1723,6 +1723,12 @@ enum > /* Any multiplication feature macro. */ > #define TARGET_ANY_MPY \ > (TARGET_MPY || TARGET_MUL64_SET || TARGET_MULMAC_32BY16_SET) > +/* PLUS_DMPY feature macro. */ > +#define TARGET_PLUS_DMPY ((arc_mpy_option > 6) && TARGET_HS) > +/* PLUS_MACD feature macro. */ > +#define TARGET_PLUS_MACD ((arc_mpy_option > 7) && TARGET_HS) > +/* PLUS_QMACW feature macro. */ > +#define TARGET_PLUS_QMACW ((arc_mpy_option > 8) && TARGET_HS) > > /* ARC600 and ARC601 feature macro. */ > #define TARGET_ARC600_FAMILY (TARGET_ARC600 || TARGET_ARC601) > diff --git a/gcc/config/arc/builtins.def b/gcc/config/arc/builtins.def > index 19be1d2..8c71d30 100644 > --- a/gcc/config/arc/builtins.def > +++ b/gcc/config/arc/builtins.def > @@ -193,3 +193,30 @@ DEF_BUILTIN (VINTI, 1, void_ftype_int, vinti_insn, > TARGET_SIMD_SET) > > /* END SIMD marker. */ > DEF_BUILTIN (SIMD_END, 0, void_ftype_void, nothing, 0) > + > +/* ARCv2 SIMD instructions that use/clobber the accumulator reg. */ > +DEF_BUILTIN (QMACH, 2, long_ftype_v4hi_v4hi, qmach, > TARGET_PLUS_QMACW) > +DEF_BUILTIN (QMACHU, 2, long_ftype_v4hi_v4hi, qmachu, > TARGET_PLUS_QMACW) > +DEF_BUILTIN (QMPYH, 2, long_ftype_v4hi_v4hi, qmpyh, > TARGET_PLUS_QMACW) > +DEF_BUILTIN (QMPYHU, 2, long_ftype_v4hi_v4hi, qmpyhu, > TARGET_PLUS_QMACW) > + > +DEF_BUILTIN (DMACH, 2, int_ftype_v2hi_v2hi, dmach, > TARGET_PLUS_DMPY) > +DEF_BUILTIN (DMACHU, 2, int_ftype_v2hi_v2hi, dmachu, > TARGET_PLUS_DMPY) > +DEF_BUILTIN (DMPYH, 2, int_ftype_v2hi_v2hi, dmpyh, > TARGET_PLUS_DMPY) > +DEF_BUILTIN (DMPYHU, 2, int_ftype_v2hi_v2hi, dmpyhu, > TARGET_PLUS_DMPY) > + > +DEF_BUILTIN (DMACWH, 2, long_ftype_v2si_v2hi, dmacwh, > TARGET_PLUS_QMACW) > +DEF_BUILTIN (DMACWHU, 2, long_ftype_v2si_v2hi, dmacwhu, > TARGET_PLUS_QMACW) > + > +DEF_BUILTIN (VMAC2H, 2, v2si_ftype_v2hi_v2hi, vmac2h, > TARGET_PLUS_MACD) > +DEF_BUILTIN (VMAC2HU, 2, v2si_ftype_v2hi_v2hi, vmac2hu, > TARGET_PLUS_MACD) > +DEF_BUILTIN (VMPY2H, 2, v2si_ftype_v2hi_v2hi, vmpy2h, > TARGET_PLUS_MACD) > +DEF_BUILTIN (VMPY2HU, 2, v2si_ftype_v2hi_v2hi, vmpy2hu, > TARGET_PLUS_MACD) > + > +/* Combined add/sub HS SIMD instructions. */ > +DEF_BUILTIN (VADDSUB2H, 2, v2hi_ftype_v2hi_v2hi, addsubv2hi3, > TARGET_PLUS_DMPY) > +DEF_BUILTIN (VSUBADD2H, 2, v2hi_ftype_v2hi_v2hi, subaddv2hi3, > TARGET_PLUS_DMPY) > +DEF_BUILTIN (VADDSUB, 2, v2si_ftype_v2si_v2si, addsubv2si3, > TARGET_PLUS_QMACW) > +DEF_BUILTIN (VSUBADD, 2, v2si_ftype_v2si_v2si, subaddv2si3, > TARGET_PLUS_QMACW) > +DEF_BUILTIN (VADDSUB4H, 2, v4hi_ftype_v4hi_v4hi, addsubv4hi3, > TARGET_PLUS_QMACW) > +DEF_BUILTIN (VSUBADD4H, 2, v4hi_ftype_v4hi_v4hi, subaddv4hi3, > TARGET_PLUS_QMACW) > diff --git a/gcc/config/arc/simdext.md b/gcc/config/arc/simdext.md > index 9fd9d62..51869e3 100644 > --- a/gcc/config/arc/simdext.md > +++ b/gcc/config/arc/simdext.md > @@ -1288,3 +1288,574 @@ > [(set_attr "type" "simd_vcontrol") > (set_attr "length" "4") > (set_attr "cond" "nocond")]) > + > +;; New ARCv2 SIMD extensions > + > +;;64-bit vectors of halwords and words > +(define_mode_iterator VWH [V4HI V2SI]) > + > +;;double element vectors > +(define_mode_iterator VDV [V2HI V2SI]) > +(define_mode_attr V_addsub [(V2HI "HI") (V2SI "SI")]) > +(define_mode_attr V_addsub_suffix [(V2HI "2h") (V2SI "")]) > + > +;;all vectors > +(define_mode_iterator VCT [V2HI V4HI V2SI]) > +(define_mode_attr V_suffix [(V2HI "2h") (V4HI "4h") (V2SI "2")]) > + > +;; Widening operations. > +(define_code_iterator SE [sign_extend zero_extend]) > +(define_code_attr V_US [(sign_extend "s") (zero_extend "u")]) > +(define_code_attr V_US_suffix [(sign_extend "") (zero_extend "u")]) > + > + > +;; Move patterns > +(define_expand "movv2hi" > + [(set (match_operand:V2HI 0 "move_dest_operand" "") > + (match_operand:V2HI 1 "general_operand" ""))] > + "" > + "{ > + if (prepare_move_operands (operands, V2HImode)) > + DONE; > + }") > + > +(define_insn_and_split "*movv2hi_insn" > + [(set (match_operand:V2HI 0 "nonimmediate_operand" "=r,r,r,m") > + (match_operand:V2HI 1 "general_operand" "i,r,m,r"))] > + "(register_operand (operands[0], V2HImode) > + || register_operand (operands[1], V2HImode))" > + "@ > + # > + mov%? %0, %1 > + ld%U1%V1 %0,%1 > + st%U0%V0 %1,%0" > + "reload_completed && GET_CODE (operands[1]) == CONST_VECTOR" > + [(set (match_dup 0) (match_dup 2))] > + { > + HOST_WIDE_INT intval = INTVAL (XVECEXP (operands[1], 0, 1)) << 16; > + intval |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF; > + > + operands[0] = gen_rtx_REG (SImode, REGNO (operands[0])); > + operands[2] = GEN_INT (trunc_int_for_mode (intval, SImode)); > + } > + [(set_attr "type" "move,move,load,store") > + (set_attr "predicable" "yes,yes,no,no") > + (set_attr "iscompact" "false,false,false,false") > + ]) > + > +(define_expand "movmisalignv2hi" > + [(set (match_operand:V2HI 0 "general_operand" "") > + (match_operand:V2HI 1 "general_operand" ""))] > + "" > +{ > + if (!register_operand (operands[0], V2HImode) > + && !register_operand (operands[1], V2HImode)) > + operands[1] = force_reg (V2HImode, operands[1]); > +}) > + > +(define_expand "mov<mode>" > + [(set (match_operand:VWH 0 "move_dest_operand" "") > + (match_operand:VWH 1 "general_operand" ""))] > + "" > + "{ > + if (GET_CODE (operands[0]) == MEM) > + operands[1] = force_reg (<MODE>mode, operands[1]); > + }") > + > +(define_insn_and_split "*mov<mode>_insn" > + [(set (match_operand:VWH 0 "move_dest_operand" "=r,r,r,m") > + (match_operand:VWH 1 "general_operand" "i,r,m,r"))] > + "TARGET_PLUS_QMACW > + && (register_operand (operands[0], <MODE>mode) > + || register_operand (operands[1], <MODE>mode))" > + "* > +{ > + switch (which_alternative) > + { > + default: > + return \"#\"; > + > + case 1: > + return \"vadd2 %0, %1, 0\"; > + > + case 2: > + if (TARGET_LL64) > + return \"ldd%U1%V1 %0,%1\"; > + return \"#\"; > + > + case 3: > + if (TARGET_LL64) > + return \"std%U0%V0 %1,%0\"; > + return \"#\"; > + } > +}" > + "reload_completed" > + [(const_int 0)] > + { > + arc_split_move (operands); > + DONE; > + } > + [(set_attr "type" "move,move,load,store") > + (set_attr "predicable" "yes,no,no,no") > + (set_attr "iscompact" "false,false,false,false") > + ]) > + > +(define_expand "movmisalign<mode>" > + [(set (match_operand:VWH 0 "general_operand" "") > + (match_operand:VWH 1 "general_operand" ""))] > + "" > +{ > + if (!register_operand (operands[0], <MODE>mode) > + && !register_operand (operands[1], <MODE>mode)) > + operands[1] = force_reg (<MODE>mode, operands[1]); > +}) > + > +(define_insn "bswapv2hi2" > + [(set (match_operand:V2HI 0 "register_operand" "=r,r") > + (bswap:V2HI (match_operand:V2HI 1 "nonmemory_operand" "r,i")))] > + "TARGET_V2 && TARGET_SWAP" > + "swape %0, %1" > + [(set_attr "length" "4,8") > + (set_attr "type" "two_cycle_core")]) > + > +;; Simple arithmetic insns > +(define_insn "add<mode>3" > + [(set (match_operand:VCT 0 "register_operand" "=r,r") > + (plus:VCT (match_operand:VCT 1 "register_operand" "0,r") > + (match_operand:VCT 2 "register_operand" "r,r")))] > + "TARGET_PLUS_DMPY" > + "vadd<V_suffix>%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +(define_insn "sub<mode>3" > + [(set (match_operand:VCT 0 "register_operand" "=r,r") > + (minus:VCT (match_operand:VCT 1 "register_operand" "0,r") > + (match_operand:VCT 2 "register_operand" "r,r")))] > + "TARGET_PLUS_DMPY" > + "vsub<V_suffix>%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +;; Combined arithmetic ops > +(define_insn "addsub<mode>3" > + [(set (match_operand:VDV 0 "register_operand" "=r,r") > + (vec_concat:VDV > + (plus:<V_addsub> (vec_select:<V_addsub> (match_operand:VDV 1 > "register_operand" "0,r") > + (parallel [(const_int 0)])) > + (vec_select:<V_addsub> (match_operand:VDV 2 > "register_operand" "r,r") > + (parallel [(const_int 0)]))) > + (minus:<V_addsub> (vec_select:<V_addsub> (match_dup 1) > (parallel [(const_int 1)])) > + (vec_select:<V_addsub> (match_dup 2) (parallel > [(const_int 1)])))))] > + "TARGET_PLUS_DMPY" > + "vaddsub<V_addsub_suffix>%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +(define_insn "subadd<mode>3" > + [(set (match_operand:VDV 0 "register_operand" "=r,r") > + (vec_concat:VDV > + (minus:<V_addsub> (vec_select:<V_addsub> (match_operand:VDV > 1 "register_operand" "0,r") > + (parallel [(const_int 0)])) > + (vec_select:<V_addsub> (match_operand:VDV 2 > "register_operand" "r,r") > + (parallel [(const_int 0)]))) > + (plus:<V_addsub> (vec_select:<V_addsub> (match_dup 1) (parallel > [(const_int 1)])) > + (vec_select:<V_addsub> (match_dup 2) (parallel > [(const_int 1)])))))] > + "TARGET_PLUS_DMPY" > + "vsubadd<V_addsub_suffix>%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +(define_insn "addsubv4hi3" > + [(set (match_operand:V4HI 0 "even_register_operand" "=r,r") > + (vec_concat:V4HI > + (vec_concat:V2HI > + (plus:HI (vec_select:HI (match_operand:V4HI 1 > "even_register_operand" "0,r") > + (parallel [(const_int 0)])) > + (vec_select:HI (match_operand:V4HI 2 > "even_register_operand" "r,r") > + (parallel [(const_int 0)]))) > + (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])) > + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))) > + (vec_concat:V2HI > + (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) > + (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))) > + (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 3)])) > + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) > + ))] > + "TARGET_PLUS_QMACW" > + "vaddsub4h%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +(define_insn "subaddv4hi3" > + [(set (match_operand:V4HI 0 "even_register_operand" "=r,r") > + (vec_concat:V4HI > + (vec_concat:V2HI > + (minus:HI (vec_select:HI (match_operand:V4HI 1 > "even_register_operand" "0,r") > + (parallel [(const_int 0)])) > + (vec_select:HI (match_operand:V4HI 2 > "even_register_operand" "r,r") > + (parallel [(const_int 0)]))) > + (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])) > + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))) > + (vec_concat:V2HI > + (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) > + (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))) > + (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 3)])) > + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) > + ))] > + "TARGET_PLUS_QMACW" > + "vsubadd4h%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +;; Multiplication > +(define_insn "dmpyh<V_US_suffix>" > + [(set (match_operand:SI 0 "register_operand" "=r,r") > + (plus:SI > + (mult:SI > + (SE:SI > + (vec_select:HI (match_operand:V2HI 1 "register_operand" "0,r") > + (parallel [(const_int 0)]))) > + (SE:SI > + (vec_select:HI (match_operand:V2HI 2 "register_operand" "r,r") > + (parallel [(const_int 0)])))) > + (mult:SI > + (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) > + (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))) > + (set (reg:DI ARCV2_ACC) > + (zero_extend:DI > + (plus:SI > + (mult:SI > + (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) > + (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))) > + (mult:SI > + (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) > + (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))))] > + "TARGET_PLUS_DMPY" > + "dmpy<V_US_suffix>%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +;; We can use dmac as well here. To be investigated which version > +;; brings more. > +(define_expand "sdot_prodv2hi" > + [(match_operand:SI 0 "register_operand" "") > + (match_operand:V2HI 1 "register_operand" "") > + (match_operand:V2HI 2 "register_operand" "") > + (match_operand:SI 3 "register_operand" "")] > + "TARGET_PLUS_DMPY" > +{ > + rtx t = gen_reg_rtx (SImode); > + emit_insn (gen_dmpyh (t, operands[1], operands[2])); > + emit_insn (gen_addsi3 (operands[0], operands[3], t)); > + DONE; > +}) > + > +(define_expand "udot_prodv2hi" > + [(match_operand:SI 0 "register_operand" "") > + (match_operand:V2HI 1 "register_operand" "") > + (match_operand:V2HI 2 "register_operand" "") > + (match_operand:SI 3 "register_operand" "")] > + "TARGET_PLUS_DMPY" > +{ > + rtx t = gen_reg_rtx (SImode); > + emit_insn (gen_dmpyhu (t, operands[1], operands[2])); > + emit_insn (gen_addsi3 (operands[0], operands[3], t)); > + DONE; > +}) > + > +(define_insn "arc_vec_<V_US>mult_lo_v4hi" > + [(set (match_operand:V2SI 0 "even_register_operand" > "=r,r") > + (mult:V2SI (SE:V2SI (vec_select:V2HI > + (match_operand:V4HI 1 "even_register_operand" > "0,r") > + (parallel [(const_int 0) (const_int 1)]))) > + (SE:V2SI (vec_select:V2HI > + (match_operand:V4HI 2 "even_register_operand" > "r,r") > + (parallel [(const_int 0) (const_int 1)]))))) > + (set (reg:V2SI ARCV2_ACC) > + (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1) > + (parallel [(const_int 0) (const_int > 1)]))) > + (SE:V2SI (vec_select:V2HI (match_dup 2) > + (parallel [(const_int 0) (const_int > 1)]))))) > + ] > + "TARGET_PLUS_MACD" > + "vmpy2h<V_US_suffix>%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +(define_insn "arc_vec_<V_US>multacc_lo_v4hi" > + [(set (reg:V2SI ARCV2_ACC) > + (mult:V2SI (SE:V2SI (vec_select:V2HI > + (match_operand:V4HI 0 "even_register_operand" > "r") > + (parallel [(const_int 0) (const_int 1)]))) > + (SE:V2SI (vec_select:V2HI > + (match_operand:V4HI 1 "even_register_operand" > "r") > + (parallel [(const_int 0) (const_int 1)]))))) > + ] > + "TARGET_PLUS_MACD" > + "vmpy2h<V_US_suffix>%? 0, %0, %1" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "no") > + (set_attr "cond" "nocond")]) > + > +(define_expand "vec_widen_<V_US>mult_lo_v4hi" > + [(set (match_operand:V2SI 0 "even_register_operand" "") > + (mult:V2SI (SE:V2SI (vec_select:V2HI > + (match_operand:V4HI 1 "even_register_operand" > "") > + (parallel [(const_int 0) (const_int 1)]))) > + (SE:V2SI (vec_select:V2HI > + (match_operand:V4HI 2 "even_register_operand" > "") > + (parallel [(const_int 0) (const_int 1)])))))] > + "TARGET_PLUS_QMACW" > + { > + emit_insn (gen_arc_vec_<V_US>mult_lo_v4hi (operands[0], > + operands[1], > + operands[2])); > + DONE; > + } > +) > + > +(define_insn "arc_vec_<V_US>mult_hi_v4hi" > + [(set (match_operand:V2SI 0 "even_register_operand" > "=r,r") > + (mult:V2SI (SE:V2SI (vec_select:V2HI > + (match_operand:V4HI 1 "even_register_operand" > "0,r") > + (parallel [(const_int 2) (const_int 3)]))) > + (SE:V2SI (vec_select:V2HI > + (match_operand:V4HI 2 "even_register_operand" > "r,r") > + (parallel [(const_int 2) (const_int 3)]))))) > + (set (reg:V2SI ARCV2_ACC) > + (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1) > + (parallel [(const_int 2) (const_int > 3)]))) > + (SE:V2SI (vec_select:V2HI (match_dup 2) > + (parallel [(const_int 2) (const_int > 3)]))))) > + ] > + "TARGET_PLUS_QMACW" > + "vmpy2h<V_US_suffix>%? %0, %R1, %R2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +(define_expand "vec_widen_<V_US>mult_hi_v4hi" > + [(set (match_operand:V2SI 0 "even_register_operand" > "") > + (mult:V2SI (SE:V2SI (vec_select:V2HI > + (match_operand:V4HI 1 > "even_register_operand" "") > + (parallel [(const_int 2) (const_int 3)]))) > + (SE:V2SI (vec_select:V2HI > + (match_operand:V4HI 2 > "even_register_operand" "") > + (parallel [(const_int 2) (const_int > 3)])))))] > + "TARGET_PLUS_MACD" > + { > + emit_insn (gen_arc_vec_<V_US>mult_hi_v4hi (operands[0], > + operands[1], > + operands[2])); > + DONE; > + } > +) > + > +(define_insn "arc_vec_<V_US>mac_hi_v4hi" > + [(set (match_operand:V2SI 0 "even_register_operand" > "=r,r") > + (plus:V2SI > + (reg:V2SI ARCV2_ACC) > + (mult:V2SI (SE:V2SI (vec_select:V2HI > + (match_operand:V4HI 1 "even_register_operand" > "0,r") > + (parallel [(const_int 2) (const_int 3)]))) > + (SE:V2SI (vec_select:V2HI > + (match_operand:V4HI 2 "even_register_operand" > "r,r") > + (parallel [(const_int 2) (const_int 3)])))))) > + (set (reg:V2SI ARCV2_ACC) > + (plus:V2SI > + (reg:V2SI ARCV2_ACC) > + (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1) > + (parallel [(const_int 2) (const_int > 3)]))) > + (SE:V2SI (vec_select:V2HI (match_dup 2) > + (parallel [(const_int 2) (const_int > 3)])))))) > + ] > + "TARGET_PLUS_MACD" > + "vmac2h<V_US_suffix>%? %0, %R1, %R2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +;; Builtins > +(define_insn "dmach" > + [(set (match_operand:SI 0 "register_operand" "=r,r") > + (unspec:SI [(match_operand:V2HI 1 "register_operand" "0,r") > + (match_operand:V2HI 2 "register_operand" "r,r") > + (reg:DI ARCV2_ACC)] > + UNSPEC_ARC_DMACH)) > + (clobber (reg:DI ARCV2_ACC))] > + "TARGET_PLUS_DMPY" > + "dmach%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +(define_insn "dmachu" > + [(set (match_operand:SI 0 "register_operand" "=r,r") > + (unspec:SI [(match_operand:V2HI 1 "register_operand" "0,r") > + (match_operand:V2HI 2 "register_operand" "r,r") > + (reg:DI ARCV2_ACC)] > + UNSPEC_ARC_DMACHU)) > + (clobber (reg:DI ARCV2_ACC))] > + "TARGET_PLUS_DMPY" > + "dmachu%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +(define_insn "dmacwh" > + [(set (match_operand:DI 0 "even_register_operand" "=r,r") > + (unspec:DI [(match_operand:V2SI 1 "even_register_operand" "0,r") > + (match_operand:V2HI 2 "register_operand" "r,r") > + (reg:DI ARCV2_ACC)] > + UNSPEC_ARC_DMACWH)) > + (clobber (reg:DI ARCV2_ACC))] > + "TARGET_PLUS_QMACW" > + "dmacwh%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +(define_insn "dmacwhu" > + [(set (match_operand:DI 0 "register_operand" "=r,r") > + (unspec:DI [(match_operand:V2SI 1 "even_register_operand" "0,r") > + (match_operand:V2HI 2 "register_operand" "r,r") > + (reg:DI ARCV2_ACC)] > + UNSPEC_ARC_DMACWHU)) > + (clobber (reg:DI ARCV2_ACC))] > + "TARGET_PLUS_QMACW" > + "dmacwhu%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +(define_insn "vmac2h" > + [(set (match_operand:V2SI 0 "even_register_operand" "=r,r") > + (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r") > + (match_operand:V2HI 2 "register_operand" "r,r") > + (reg:DI ARCV2_ACC)] > + UNSPEC_ARC_VMAC2H)) > + (clobber (reg:DI ARCV2_ACC))] > + "TARGET_PLUS_MACD" > + "vmac2h%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +(define_insn "vmac2hu" > + [(set (match_operand:V2SI 0 "even_register_operand" "=r,r") > + (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r") > + (match_operand:V2HI 2 "register_operand" "r,r") > + (reg:DI ARCV2_ACC)] > + UNSPEC_ARC_VMAC2HU)) > + (clobber (reg:DI ARCV2_ACC))] > + "TARGET_PLUS_MACD" > + "vmac2hu%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +(define_insn "vmpy2h" > + [(set (match_operand:V2SI 0 "even_register_operand" "=r,r") > + (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r") > + (match_operand:V2HI 2 "register_operand" "r,r")] > + UNSPEC_ARC_VMPY2H)) > + (clobber (reg:DI ARCV2_ACC))] > + "TARGET_PLUS_MACD" > + "vmpy2h%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +(define_insn "vmpy2hu" > + [(set (match_operand:V2SI 0 "even_register_operand" "=r,r") > + (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r") > + (match_operand:V2HI 2 "register_operand" "r,r")] > + UNSPEC_ARC_VMPY2HU)) > + (clobber (reg:DI ARCV2_ACC))] > + "TARGET_PLUS_MACD" > + "vmpy2hu%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +(define_insn "qmach" > + [(set (match_operand:DI 0 "even_register_operand" "=r,r") > + (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r") > + (match_operand:V4HI 2 "even_register_operand" "r,r") > + (reg:DI ARCV2_ACC)] > + UNSPEC_ARC_QMACH)) > + (clobber (reg:DI ARCV2_ACC))] > + "TARGET_PLUS_QMACW" > + "qmach%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +(define_insn "qmachu" > + [(set (match_operand:DI 0 "even_register_operand" "=r,r") > + (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r") > + (match_operand:V4HI 2 "even_register_operand" "r,r") > + (reg:DI ARCV2_ACC)] > + UNSPEC_ARC_QMACHU)) > + (clobber (reg:DI ARCV2_ACC))] > + "TARGET_PLUS_QMACW" > + "qmachu%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +(define_insn "qmpyh" > + [(set (match_operand:DI 0 "even_register_operand" "=r,r") > + (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r") > + (match_operand:V4HI 2 "even_register_operand" "r,r")] > + UNSPEC_ARC_QMPYH)) > + (clobber (reg:DI ARCV2_ACC))] > + "TARGET_PLUS_QMACW" > + "qmpyh%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > + > +(define_insn "qmpyhu" > + [(set (match_operand:DI 0 "even_register_operand" "=r,r") > + (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r") > + (match_operand:V4HI 2 "even_register_operand" "r,r")] > + UNSPEC_ARC_QMPYHU)) > + (clobber (reg:DI ARCV2_ACC))] > + "TARGET_PLUS_QMACW" > + "qmpyhu%? %0, %1, %2" > + [(set_attr "length" "4") > + (set_attr "type" "multi") > + (set_attr "predicable" "yes,no") > + (set_attr "cond" "canuse,nocond")]) > diff --git a/gcc/testsuite/gcc.target/arc/builtin_simdarc.c > b/gcc/testsuite/gcc.target/arc/builtin_simdarc.c > new file mode 100644 > index 0000000..68aae40 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arc/builtin_simdarc.c > @@ -0,0 +1,38 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mcpu=archs -O2 -Werror-implicit-function-declaration - > mmpy-option=9" } */ > + > +#define STEST(name, rettype, op1type, op2type) \ > + rettype test_ ## name \ > + (op1type a, op2type b) \ > + { \ > + return __builtin_arc_ ## name (a, b); \ > + } > + > +typedef short v2hi __attribute__ ((vector_size (4))); > +typedef short v4hi __attribute__ ((vector_size (8))); > +typedef int v2si __attribute__ ((vector_size (8))); > + > +STEST (qmach, long long, v4hi, v4hi) > +STEST (qmachu, long long, v4hi, v4hi) > +STEST (qmpyh, long long, v4hi, v4hi) > +STEST (qmpyhu, long long, v4hi, v4hi) > + > +STEST (dmach, int, v2hi, v2hi) > +STEST (dmachu, int, v2hi, v2hi) > +STEST (dmpyh, int, v2hi, v2hi) > +STEST (dmpyhu, int, v2hi, v2hi) > + > +STEST (dmacwh, long, v2si, v2hi) > +STEST (dmacwhu, long, v2si, v2hi) > + > +STEST (vmac2h, v2si, v2hi, v2hi) > +STEST (vmac2hu, v2si, v2hi, v2hi) > +STEST (vmpy2h, v2si, v2hi, v2hi) > +STEST (vmpy2hu, v2si, v2hi, v2hi) > + > +STEST (vaddsub2h, v2hi, v2hi, v2hi) > +STEST (vsubadd2h, v2hi, v2hi, v2hi) > +STEST (vaddsub, v2si, v2si, v2si) > +STEST (vsubadd, v2si, v2si, v2si) > +STEST (vaddsub4h, v4hi, v4hi, v4hi) > +STEST (vsubadd4h, v4hi, v4hi, v4hi) > -- > 1.9.1