Add vector negate, reduc_plus_scal, vec_duplicate, vector min/max/mult/div patterns. Besides vector negate and reduction patterns, all the others are emulated using scalar instructions. The reason is taking advantage of the double load/store instructions as well as enabling the autovectorizer to further analize a loop.
gcc/ 2021-05-10 Claudiu Zissulescu <claz...@synopsys.com> * config/arc/arc.md (UNSPEC_ARC_DMPYWH): Define. * config/arc/simdext.md (VCT): Add predicates for iterator elements. (EMUVEC): Define. (voptab): Likewise. (vec_widen_<V_US>mult_hi_v4hi): Change pattern predicate. (<voptab>v2si3): New patterns. (neg): Likewise. (reduc_plus_scal_v4hi): Likewise. (reduc_plus_scal_v2si): Likewise. (vec_duplicatev2si): Likewise. (vec_duplicatev4hi): Likewise. Signed-off-by: Claudiu Zissulescu <claz...@synopsys.com> --- gcc/config/arc/arc.md | 1 + gcc/config/arc/simdext.md | 112 +++++++++++++++++++++++++++++++++++++- 2 files changed, 111 insertions(+), 2 deletions(-) diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index f3efe65ca2f..b6f2d8e28be 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -128,6 +128,7 @@ (define_c_enum "unspec" [ UNSPEC_ARC_DMACHU UNSPEC_ARC_DMACWH UNSPEC_ARC_DMACWHU + UNSPEC_ARC_DMPYWH UNSPEC_ARC_QMACH UNSPEC_ARC_QMACHU UNSPEC_ARC_QMPYH diff --git a/gcc/config/arc/simdext.md b/gcc/config/arc/simdext.md index 41c42690633..c7ca306a081 100644 --- a/gcc/config/arc/simdext.md +++ b/gcc/config/arc/simdext.md @@ -1395,9 +1395,20 @@ (define_mode_attr V_addsub [(V2HI "HI") (V2SI "SI")]) (define_mode_attr V_addsub_suffix [(V2HI "2h") (V2SI "")]) ;;all vectors -(define_mode_iterator VCT [V2HI V4HI V2SI]) +(define_mode_iterator VCT [(V2HI "TARGET_PLUS_DMPY") + (V4HI "TARGET_PLUS_QMACW") + (V2SI "TARGET_PLUS_QMACW")]) (define_mode_attr V_suffix [(V2HI "2h") (V4HI "4h") (V2SI "2")]) +(define_code_iterator EMUVEC [(mult "TARGET_MPYW") + (div "TARGET_DIVREM") + smax smin]) + +(define_code_attr voptab [(mult "mul") + (div "div") + (smin "smin") + (smax "smax")]) + ;; Widening operations. (define_code_iterator SE [sign_extend zero_extend]) (define_code_attr V_US [(sign_extend "s") (zero_extend "u")]) @@ -1805,7 +1816,7 @@ (define_expand "vec_widen_<V_US>mult_hi_v4hi" (SE:V2SI (vec_select:V2HI (match_operand:V4HI 2 "even_register_operand") (parallel [(const_int 2) (const_int 3)])))))] - "TARGET_PLUS_MACD" + "TARGET_PLUS_QMACW" { emit_insn (gen_arc_vec_<V_US>mult_hi_v4hi (operands[0], operands[1], @@ -2011,3 +2022,100 @@ (define_insn "qmpyhu" (set_attr "type" "multi") (set_attr "predicable" "yes,no") (set_attr "cond" "canuse,nocond")]) + +;; Emulated vector instructions. +(define_insn_and_split "<voptab>v2si3" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (EMUVEC:V2SI (match_operand:V2SI 1 "register_operand" "r") + (match_operand:V2SI 2 "nonmemory_operand" "ri")))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + rtx high_dest = gen_highpart (SImode, operands[0]); + rtx low_dest = gen_lowpart (SImode, operands[0]); + rtx high_op1 = gen_highpart (SImode, operands[1]); + rtx low_op1 = gen_lowpart (SImode, operands[1]); + rtx high_op2 = gen_highpart (SImode, operands[2]); + rtx low_op2 = gen_lowpart (SImode, operands[2]); + emit_insn (gen_<voptab>si3 (low_dest, low_op1, low_op2)); + emit_insn (gen_<voptab>si3 (high_dest, high_op1, high_op2)); + DONE; + } + [(set_attr "length" "12") + (set_attr "type" "multi")]) + +(define_expand "neg<mode>2" + [(set (match_operand:VCT 0 "register_operand") + (neg:VCT (match_operand:VCT 1 "register_operand")))] + "TARGET_PLUS_DMPY" + "") + +(define_insn "*neg<mode>2" + [(set (match_operand:VCT 0 "register_operand" "=r") + (neg:VCT (match_operand:VCT 1 "register_operand" "r")))] + "TARGET_PLUS_DMPY" + "vsub<V_suffix)>\\t%0,0,%1" + [(set_attr "length" "8") + (set_attr "type" "multi")]) + +(define_insn "reduc_plus_scal_v4hi" + [(set (match_operand:HI 0 "even_register_operand" "=r") + (unspec:HI [(match_operand:V4HI 1 "even_register_operand" "r")] + UNSPEC_ARC_QMPYH)) + (clobber (reg:DI ARCV2_ACC))] + "TARGET_PLUS_QMACW" + "qmpyh\\t%0,%1,1" + [(set_attr "length" "4") + (set_attr "type" "multi")]) + +(define_insn "reduc_plus_scal_v2si" + [(set (match_operand:SI 0 "even_register_operand" "=r") + (unspec:SI [(match_operand:V2SI 1 "even_register_operand" "r")] + UNSPEC_ARC_DMPYWH)) + (clobber (reg:DI ARCV2_ACC))] + "TARGET_PLUS_DMPY" + "dmpywh\\t%0,%1,1" + [(set_attr "length" "4") + (set_attr "type" "multi")]) + +(define_insn_and_split "vec_duplicatev2si" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (vec_duplicate:V2SI + (match_operand:SI 1 "nonmemory_operand" "ri")))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + rtx high_dest = gen_highpart (SImode, operands[0]); + rtx low_dest = gen_lowpart (SImode, operands[0]); + emit_move_insn (high_dest, operands[1]); + emit_move_insn (low_dest, operands[1]); + DONE; + } + [(set_attr "length" "8") + (set_attr "type" "multi")]) + +(define_insn_and_split "vec_duplicatev4hi" + [(set (match_operand:V4HI 0 "register_operand" "=r") + (vec_duplicate:V4HI + (match_operand:HI 1 "nonmemory_operand" "ri")))] + "TARGET_BARREL_SHIFTER" + "#" + "reload_completed" + [(const_int 0)] + { + rtx high_dest = gen_highpart (SImode, operands[0]); + rtx low_dest = gen_lowpart (SImode, operands[0]); + rtx tmp = gen_lowpart (SImode, operands[1]); + emit_insn (gen_rtx_SET (high_dest, + gen_rtx_ASHIFT (SImode, tmp, GEN_INT (16)))); + emit_insn (gen_rtx_SET (low_dest, + gen_rtx_IOR (SImode, high_dest, tmp))); + emit_move_insn (high_dest, low_dest); + DONE; + } + [(set_attr "length" "12") + (set_attr "type" "multi")]) -- 2.30.2