Both plus and min/max. --- gcc/config/mips/loongson.md | 230 +++++++++++++++++++++++++++++++++++++---- gcc/config/mips/mips-protos.h | 3 + gcc/config/mips/mips.c | 77 ++++++++++++++ 3 files changed, 290 insertions(+), 20 deletions(-)
diff --git a/gcc/config/mips/loongson.md b/gcc/config/mips/loongson.md index e9fa616..4f9cc73 100644 --- a/gcc/config/mips/loongson.md +++ b/gcc/config/mips/loongson.md @@ -39,6 +39,8 @@ UNSPEC_LOONGSON_PUNPCKL UNSPEC_LOONGSON_PADDD UNSPEC_LOONGSON_PSUBD + UNSPEC_LOONGSON_DSLL + UNSPEC_LOONGSON_DSRL ]) ;; Mode iterators and attributes. @@ -58,6 +60,9 @@ ;; 64-bit vectors of words and halfwords. (define_mode_iterator VWH [V2SI V4HI]) +;; 64-bit vectors of words and bytes +(define_mode_iterator VWB [V2SI V8QI]) + ;; 64-bit vectors of words, halfwords and bytes. (define_mode_iterator VWHB [V2SI V4HI V8QI]) @@ -404,39 +409,61 @@ }) ;; Maximum of signed halfwords. -(define_insn "smax<mode>3" - [(set (match_operand:VH 0 "register_operand" "=f") - (smax:VH (match_operand:VH 1 "register_operand" "f") - (match_operand:VH 2 "register_operand" "f")))] +(define_insn "smaxv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (smax:V4HI (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pmaxs<V_suffix>\t%0,%1,%2" + "pmaxsh\t%0,%1,%2" [(set_attr "type" "fadd")]) +(define_expand "smax<mode>3" + [(match_operand:VWB 0 "register_operand" "") + (match_operand:VWB 1 "register_operand" "") + (match_operand:VWB 2 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_minmax (operands[0], operands[1], operands[2], + gen_loongson_pcmpgt<V_suffix>, false); + DONE; +}) + ;; Maximum of unsigned bytes. -(define_insn "umax<mode>3" - [(set (match_operand:VB 0 "register_operand" "=f") - (umax:VB (match_operand:VB 1 "register_operand" "f") - (match_operand:VB 2 "register_operand" "f")))] +(define_insn "umaxv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (umax:V8QI (match_operand:V8QI 1 "register_operand" "f") + (match_operand:V8QI 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pmaxu<V_suffix>\t%0,%1,%2" + "pmaxub\t%0,%1,%2" [(set_attr "type" "fadd")]) ;; Minimum of signed halfwords. -(define_insn "smin<mode>3" - [(set (match_operand:VH 0 "register_operand" "=f") - (smin:VH (match_operand:VH 1 "register_operand" "f") - (match_operand:VH 2 "register_operand" "f")))] +(define_insn "sminv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (smin:V4HI (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pmins<V_suffix>\t%0,%1,%2" + "pminsh\t%0,%1,%2" [(set_attr "type" "fadd")]) +(define_expand "smin<mode>3" + [(match_operand:VWB 0 "register_operand" "") + (match_operand:VWB 1 "register_operand" "") + (match_operand:VWB 2 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_minmax (operands[0], operands[1], operands[2], + gen_loongson_pcmpgt<V_suffix>, true); + DONE; +}) + ;; Minimum of unsigned bytes. -(define_insn "umin<mode>3" - [(set (match_operand:VB 0 "register_operand" "=f") - (umin:VB (match_operand:VB 1 "register_operand" "f") - (match_operand:VB 2 "register_operand" "f")))] +(define_insn "uminv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (umin:V8QI (match_operand:V8QI 1 "register_operand" "f") + (match_operand:V8QI 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pminu<V_suffix>\t%0,%1,%2" + "pminub\t%0,%1,%2" [(set_attr "type" "fadd")]) ;; Move byte mask. @@ -506,6 +533,14 @@ "biadd\t%0,%1" [(set_attr "type" "fabs")]) +(define_insn "reduc_uplus_v8qi" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "f")] + UNSPEC_LOONGSON_BIADD))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "biadd\t%0,%1" + [(set_attr "type" "fabs")]) + ;; Sum of absolute differences. (define_insn "loongson_psadbh" [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f") @@ -620,6 +655,20 @@ "punpckhhw\t%0,%1,%2" [(set_attr "type" "fdiv")]) +(define_insn "loongson_punpckhhw_qi" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "f") + (match_operand:V8QI 2 "register_operand" "f")) + (parallel [(const_int 4) (const_int 5) + (const_int 12) (const_int 13) + (const_int 6) (const_int 7) + (const_int 14) (const_int 15)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpckhhw\t%0,%1,%2" + [(set_attr "type" "fdiv")]) + (define_insn "loongson_punpckhwd" [(set (match_operand:V2SI 0 "register_operand" "=f") (vec_select:V2SI @@ -631,6 +680,32 @@ "punpckhwd\t%0,%1,%2" [(set_attr "type" "fcvt")]) +(define_insn "loongson_punpckhwd_qi" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "f") + (match_operand:V8QI 2 "register_operand" "f")) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpckhwd\t%0,%1,%2" + [(set_attr "type" "fcvt")]) + +(define_insn "loongson_punpckhwd_hi" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")) + (parallel [(const_int 2) (const_int 3) + (const_int 6) (const_int 7)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpckhwd\t%0,%1,%2" + [(set_attr "type" "fcvt")]) + ;; Unpack low data. (define_insn "loongson_punpcklbh" [(set (match_operand:V8QI 0 "register_operand" "=f") @@ -658,6 +733,20 @@ "punpcklhw\t%0,%1,%2" [(set_attr "type" "fdiv")]) +(define_insn "*loongson_punpcklhw_qi" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "f") + (match_operand:V8QI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 1) + (const_int 8) (const_int 9) + (const_int 2) (const_int 3) + (const_int 10) (const_int 11)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpcklhw\t%0,%1,%2" + [(set_attr "type" "fdiv")]) + (define_insn "loongson_punpcklwd" [(set (match_operand:V2SI 0 "register_operand" "=f") (vec_select:V2SI @@ -669,6 +758,32 @@ "punpcklwd\t%0,%1,%2" [(set_attr "type" "fcvt")]) +(define_insn "*loongson_punpcklwd_qi" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "f") + (match_operand:V8QI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 8) (const_int 9) + (const_int 10) (const_int 11)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpcklwd\t%0,%1,%2" + [(set_attr "type" "fcvt")]) + +(define_insn "*loongson_punpcklwd_hi" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 1) + (const_int 4) (const_int 5)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpcklwd\t%0,%1,%2" + [(set_attr "type" "fcvt")]) + (define_expand "vec_perm_const<mode>" [(match_operand:VWHB 0 "register_operand" "") (match_operand:VWHB 1 "register_operand" "") @@ -718,6 +833,81 @@ DONE; }) +;; Whole vector shifts, used for reduction epilogues. +(define_insn "vec_shl_<mode>" + [(set (match_operand:VWHBDI 0 "register_operand" "=f") + (unspec:VWHBDI [(match_operand:VWHBDI 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f")] + UNSPEC_LOONGSON_DSLL))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "dsll\t%0,%1,%2" + [(set_attr "type" "fcvt")]) + +(define_insn "vec_shr_<mode>" + [(set (match_operand:VWHBDI 0 "register_operand" "=f") + (unspec:VWHBDI [(match_operand:VWHBDI 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f")] + UNSPEC_LOONGSON_DSRL))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "dsrl\t%0,%1,%2" + [(set_attr "type" "fcvt")]) + +(define_expand "reduc_uplus_<mode>" + [(match_operand:VWH 0 "register_operand" "") + (match_operand:VWH 1 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_reduc (operands[0], operands[1], gen_add<mode>3); + DONE; +}) + +; ??? Given that we're not describing a widening reduction, we should +; not have separate optabs for signed and unsigned. +(define_expand "reduc_splus_<mode>" + [(match_operand:VWHB 0 "register_operand" "") + (match_operand:VWHB 1 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + emit_insn (gen_reduc_uplus_<mode>(operands[0], operands[1])); + DONE; +}) + +(define_expand "reduc_smax_<mode>" + [(match_operand:VWHB 0 "register_operand" "") + (match_operand:VWHB 1 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_reduc (operands[0], operands[1], gen_smax<mode>3); + DONE; +}) + +(define_expand "reduc_smin_<mode>" + [(match_operand:VWHB 0 "register_operand" "") + (match_operand:VWHB 1 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_reduc (operands[0], operands[1], gen_smin<mode>3); + DONE; +}) + +(define_expand "reduc_umax_<mode>" + [(match_operand:VB 0 "register_operand" "") + (match_operand:VB 1 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_reduc (operands[0], operands[1], gen_umax<mode>3); + DONE; +}) + +(define_expand "reduc_umin_<mode>" + [(match_operand:VB 0 "register_operand" "") + (match_operand:VB 1 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_reduc (operands[0], operands[1], gen_umin<mode>3); + DONE; +}) + ;; Integer division and modulus. For integer multiplication, see mips.md. (define_insn "<u>div<mode>3" diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h index 82c8c33..1791ce7 100644 --- a/gcc/config/mips/mips-protos.h +++ b/gcc/config/mips/mips-protos.h @@ -330,6 +330,9 @@ extern void mips_expand_atomic_qihi (union mips_gen_fn_ptrs, extern void mips_expand_vector_init (rtx, rtx); extern bool mips_expand_vec_perm_const (rtx op[4]); extern void mips_expand_vec_unpack (rtx op[2], bool, bool); +extern void mips_expand_vec_reduc (rtx, rtx, rtx (*)(rtx, rtx, rtx)); +extern void mips_expand_vec_minmax (rtx, rtx, rtx, + rtx (*) (rtx, rtx, rtx), bool); extern bool mips_eh_uses (unsigned int); extern bool mips_epilogue_uses (unsigned int); diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index 45b8454..a8f3b26 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -16834,6 +16834,83 @@ mips_expand_vector_init (rtx target, rtx vals) mips_expand_vi_general (vmode, imode, nelt, nvar, target, vals); } + +/* Expand a vector reduction. */ + +void +mips_expand_vec_reduc (rtx target, rtx in, rtx (*gen)(rtx, rtx, rtx)) +{ + enum machine_mode vmode = GET_MODE (in); + unsigned char perm2[2]; + rtx tmp; + bool ok; + + tmp = gen_reg_rtx (vmode); + switch (vmode) + { + case V2SFmode: + /* Use PUL/PLU to produce { L, H } op { H, L }. + By reversing the pair order, rather a pure interleave high, + we don't produce erroneous exceptional conditions. */ + perm2[0] = 1; + perm2[1] = 2; + ok = expand_vselect_vconcat (tmp, in, in, perm2, 2); + gcc_assert (ok); + break; + + case V2SImode: + /* Use interleave to produce { H, L } op { H, H }. */ + emit_insn (gen_loongson_punpckhwd (tmp, in, in)); + break; + + case V4HImode: + /* Perform the first reduction with interleave, + and subsequent reductions with shifts. */ + emit_insn (gen_loongson_punpckhwd_hi (tmp, in, in)); + emit_insn (gen (in, in, tmp)); + emit_insn (gen_vec_shr_v4hi (tmp, in, force_reg (SImode, GEN_INT (16)))); + break; + + case V8QImode: + emit_insn (gen_loongson_punpckhwd_qi (tmp, in, in)); + emit_insn (gen (in, in, tmp)); + emit_insn (gen_vec_shr_v8qi (tmp, in, force_reg (SImode, GEN_INT (16)))); + emit_insn (gen (in, in, tmp)); + emit_insn (gen_vec_shr_v8qi (tmp, in, force_reg (SImode, GEN_INT (8)))); + break; + + default: + gcc_unreachable (); + } + emit_insn (gen (target, in, tmp)); +} + +/* Expand a vector minimum/maximum. */ + +void +mips_expand_vec_minmax (rtx target, rtx op0, rtx op1, + rtx (*cmp) (rtx, rtx, rtx), bool min_p) +{ + enum machine_mode vmode = GET_MODE (target); + rtx tc, t0, t1, x; + + tc = gen_reg_rtx (vmode); + t0 = gen_reg_rtx (vmode); + t1 = gen_reg_rtx (vmode); + + /* op0 > op1 */ + emit_insn (cmp (tc, op0, op1)); + + x = gen_rtx_AND (vmode, tc, (min_p ? op1 : op0)); + emit_insn (gen_rtx_SET (VOIDmode, t0, x)); + + x = gen_rtx_NOT (vmode, tc); + x = gen_rtx_AND (vmode, x, (min_p ? op0 : op1)); + emit_insn (gen_rtx_SET (VOIDmode, t1, x)); + + x = gen_rtx_IOR (vmode, t0, t1); + emit_insn (gen_rtx_SET (VOIDmode, target, x)); +} /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP -- 1.7.7.4