For target VXE3 just emit a 128-bit comparison followed by a conditional load. For targets prior VXE3, emulate the 128-bit comparison and make use of a conditional load, too.
gcc/ChangeLog: * config/s390/s390-protos.h (s390_expand_cstoreti4): New function. * config/s390/s390.cc (s390_expand_cstoreti4): New function. * config/s390/s390.md (CC_SUZ): New mode iterator. (l): New mode attribute. (cc_tolower): New mode attribute. * config/s390/vector.md (cstoreti4): New expander. (*vec_cmpv2di_lane0_<cc_tolower>): New insn. (*vec_cmpti_<cc_tolower>): New insn. gcc/testsuite/ChangeLog: * gcc.target/s390/vector/cstoreti-1.c: New test. * gcc.target/s390/vector/cstoreti-2.c: New test. --- gcc/config/s390/s390-protos.h | 1 + gcc/config/s390/s390.cc | 82 ++++++++++- gcc/config/s390/s390.md | 4 + gcc/config/s390/vector.md | 30 +++++ .../gcc.target/s390/vector/cstoreti-1.c | 127 ++++++++++++++++++ .../gcc.target/s390/vector/cstoreti-2.c | 25 ++++ 6 files changed, 266 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c create mode 100644 gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h index e8c7f830849..d760a7e20ff 100644 --- a/gcc/config/s390/s390-protos.h +++ b/gcc/config/s390/s390-protos.h @@ -114,6 +114,7 @@ extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx); extern void s390_expand_vec_strlen (rtx, rtx, rtx); extern void s390_expand_vec_movstr (rtx, rtx, rtx); extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx); +extern void s390_expand_cstoreti4 (rtx, rtx, rtx, rtx); extern bool s390_expand_insv (rtx, rtx, rtx, rtx); extern void s390_expand_cs (machine_mode, rtx, rtx, rtx, rtx, rtx, bool); extern void s390_expand_atomic_exchange_tdsi (rtx, rtx, rtx); diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index e3edf859513..2d44cecfeed 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -7210,6 +7210,82 @@ s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count) NULL_RTX, 1, OPTAB_DIRECT); } +/* Expand optab cstoreti4. */ + +void +s390_expand_cstoreti4 (rtx dst, rtx cmp, rtx op1, rtx op2) +{ + rtx_code code = GET_CODE (cmp); + + if (TARGET_VXE3) + { + rtx cond = s390_emit_compare (GET_MODE (cmp), code, op1, op2); + emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx)); + return; + } + + /* Prior VXE3 emulate the comparison. For an (in)equality test exploit + VECTOR COMPARE EQUAL. For a relational test, first compare the high part + via VECTOR ELEMENT COMPARE (LOGICAL). If the high part does not equal, + then consume the CC immediatelly by a subsequent LOAD ON CONDITION. + Otherweise, if the high part equals, then perform a subsequent VECTOR + COMPARE HIGH LOGICAL followed by a LOAD ON CONDITION. */ + + op1 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0)); + op2 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0)); + + if (code == EQ || code == NE) + { + s390_expand_vec_compare_cc (dst, code, op1, op2, code == EQ); + return; + } + + /* Normalize code into either GE(U) or GT(U). */ + if (code == LT || code == LE || code == LTU || code == LEU) + { + std::swap (op1, op2); + code = swap_condition (code); + } + + /* For (un)signed comparisons + - high(op1) >= high(op2) instruction VECG op1, op2 sets CC1 + if the relation does _not_ hold. + - high(op1) > high(op2) instruction VECG op2, op1 sets CC1 + if the relation holds. */ + if (code == GT || code == GTU) + std::swap (op1, op2); + machine_mode cc_mode = (code == GEU || code == GTU) ? CCUmode : CCSmode; + rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); + emit_insn ( + gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM), + gen_rtx_COMPARE (cc_mode, + gen_rtx_VEC_SELECT (DImode, op1, lane0), + gen_rtx_VEC_SELECT (DImode, op2, lane0)))); + rtx ccs_reg = gen_rtx_REG (CCSmode, CC_REGNUM); + rtx lab = gen_label_rtx (); + s390_emit_jump (lab, gen_rtx_NE (VOIDmode, ccs_reg, const0_rtx)); + /* At this point we have that high(op1) == high(op2). Thus, test the low + part, now. For unsigned comparisons + - low(op1) >= low(op2) instruction VCHLGS op2, op1 sets CC1 + if the relation does _not_ hold. + - low(op1) > low(op2) instruction VCHLGS op1, op2 sets CC1 + if the relation holds. */ + std::swap (op1, op2); + emit_insn (gen_rtx_PARALLEL ( + VOIDmode, + gen_rtvec (2, + gen_rtx_SET (gen_rtx_REG (CCVIHUmode, CC_REGNUM), + gen_rtx_COMPARE (CCVIHUmode, op1, op2)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode))))); + emit_label (lab); + /* For (un)signed comparison >= any CC except CC1 means that the relation + holds. For (un)signed comparison > only CC1 means that the relation + holds. */ + rtx_code cmp_code = (code == GE || code == GEU) ? UNGE : LT; + rtx cond = gen_rtx_fmt_ee (cmp_code, CCSmode, ccs_reg, const0_rtx); + emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx)); +} + /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store the result in TARGET. */ @@ -7310,9 +7386,9 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond, /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into TARGET if either all (ALL_P is true) or any (ALL_P is false) of the elements in CMP1 and CMP2 fulfill the comparison. - This function is only used to emit patterns for the vx builtins and - therefore only handles comparison codes required by the - builtins. */ + This function is only used in s390_expand_cstoreti4 and to emit patterns for + the vx builtins and therefore only handles comparison codes required by + those. */ void s390_expand_vec_compare_cc (rtx target, enum rtx_code code, rtx cmp1, rtx cmp2, bool all_p) diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 05b9da6976a..97a4bdf96b2 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -993,6 +993,10 @@ (define_mode_attr asm_fcmp [(CCVEQ "e") (CCVFH "h") (CCVFHE "he")]) (define_mode_attr insn_cmp [(CCVEQ "eq") (CCVIH "h") (CCVIHU "hl") (CCVFH "h") (CCVFHE "he")]) +(define_mode_iterator CC_SUZ [CCS CCU CCZ]) +(define_mode_attr l [(CCS "") (CCU "l") (CCZ "")]) +(define_mode_attr cc_tolower [(CCS "ccs") (CCU "ccu") (CCZ "ccz")]) + ; Analogue to TOINTVEC / tointvec (define_mode_attr TOINT [(TF "TI") (DF "DI") (SF "SI")]) (define_mode_attr toint [(TF "ti") (DF "di") (SF "si")]) diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index e29255fe111..160e42a3005 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -538,6 +538,14 @@ "vlvg<bhfgq>\t%v0,%1,%Y4(%2)" [(set_attr "op_type" "VRS")]) +(define_expand "cstoreti4" + [(set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "ordered_comparison_operator" + [(match_operand:TI 2 "register_operand") + (match_operand:TI 3 "register_operand")]))] + "TARGET_VX" + "s390_expand_cstoreti4 (operands[0], operands[1], operands[2], operands[3]); DONE;") + ;; FIXME: Support also vector mode operands for 0 ;; This is used via RTL standard name as well as for expanding the builtin @@ -2209,6 +2217,28 @@ operands[5] = gen_reg_rtx (V2DImode); }) +(define_insn "*vec_cmpv2di_lane0_<cc_tolower>" + [(set (reg:CC_SUZ CC_REGNUM) + (compare:CC_SUZ + (vec_select:DI + (match_operand:V2DI 0 "register_operand" "v") + (parallel [(const_int 0)])) + (vec_select:DI + (match_operand:V2DI 1 "register_operand" "v") + (parallel [(const_int 0)]))))] + "TARGET_VX" + "vec<l>g\t%v0,%v1" + [(set_attr "op_type" "VRR")]) + +(define_insn "*vec_cmpti_<cc_tolower>" + [(set (reg:CC_SUZ CC_REGNUM) + (compare:CC_SUZ + (match_operand:TI 0 "register_operand" "v") + (match_operand:TI 1 "register_operand" "v")))] + "TARGET_VXE3" + "vec<l>q\t%v0,%v1" + [(set_attr "op_type" "VRR")]) + ;; ;; Floating point compares diff --git a/gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c b/gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c new file mode 100644 index 00000000000..f2a131be4c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c @@ -0,0 +1,127 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -march=z13" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** test_le: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** vecg \2,\1 +** jne \.L.+ +** vchlgs %v.,\1,\2 +** lghi %r2,0 +** locghinl %r2,1 +** br %r14 +*/ + +int test_le (__int128 x, __int128 y) { return x <= y; } + +/* +** test_leu: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** veclg \2,\1 +** jne \.L.+ +** vchlgs %v.,\1,\2 +** lghi %r2,0 +** locghinl %r2,1 +** br %r14 +*/ + +int test_leu (unsigned __int128 x, unsigned __int128 y) { return x <= y; } + +/* +** test_lt: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** vecg \1,\2 +** jne \.L.+ +** vchlgs %v.,\2,\1 +** lghi %r2,0 +** locghil %r2,1 +** br %r14 +*/ + +int test_lt (__int128 x, __int128 y) { return x < y; } + +/* +** test_ltu: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** veclg \1,\2 +** jne \.L.+ +** vchlgs %v.,\2,\1 +** lghi %r2,0 +** locghil %r2,1 +** br %r14 +*/ + +int test_ltu (unsigned __int128 x, unsigned __int128 y) { return x < y; } + +/* +** test_ge: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** vecg \1,\2 +** jne \.L.+ +** vchlgs %v.,\2,\1 +** lghi %r2,0 +** locghinl %r2,1 +** br %r14 +*/ + +int test_ge (__int128 x, __int128 y) { return x >= y; } + +/* +** test_geu: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** veclg \1,\2 +** jne \.L.+ +** vchlgs %v.,\2,\1 +** lghi %r2,0 +** locghinl %r2,1 +** br %r14 +*/ + +int test_geu (unsigned __int128 x, unsigned __int128 y) { return x >= y; } + +/* +** test_gt: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** vecg \2,\1 +** jne \.L.+ +** vchlgs %v.,\1,\2 +** lghi %r2,0 +** locghil %r2,1 +** br %r14 +*/ + +int test_gt (__int128 x, __int128 y) { return x > y; } + +/* +** test_gtu: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** veclg \2,\1 +** jne \.L.+ +** vchlgs %v.,\1,\2 +** lghi %r2,0 +** locghil %r2,1 +** br %r14 +*/ + +int test_gtu (unsigned __int128 x, unsigned __int128 y) { return x > y; } + +/* { dg-final { scan-assembler-times {vceqgs\t} 4 } } */ +/* { dg-final { scan-assembler-times {locghie\t} 2 } } */ +/* { dg-final { scan-assembler-times {locghine\t} 2 } } */ + +int test_eq (__int128 x, __int128 y) { return x == y; } + +int test_equ (unsigned __int128 x, unsigned __int128 y) { return x == y; } + +int test_ne (__int128 x, __int128 y) { return x != y; } + +int test_neu (unsigned __int128 x, unsigned __int128 y) { return x != y; } diff --git a/gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c b/gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c new file mode 100644 index 00000000000..d7b03828083 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c @@ -0,0 +1,25 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -march=z17" } */ +/* { dg-final { scan-assembler-times {vecq\t} 8 } } */ +/* { dg-final { scan-assembler-times {veclq\t} 4 } } */ +/* { dg-final { scan-assembler-times {locghile\t} 1 } } LE */ +/* { dg-final { scan-assembler-times {slbgr\t} 1 } } LEU */ +/* { dg-final { scan-assembler-times {locghil\t} 2 } } LT LTU */ +/* { dg-final { scan-assembler-times {locghihe\t} 2 } } GE GEU */ +/* { dg-final { scan-assembler-times {locghih\t} 1 } } GT */ +/* { dg-final { scan-assembler-times {alcgr\t} 1 } } GTU */ +/* { dg-final { scan-assembler-times {locghie\t} 2 } } EQ EQU */ +/* { dg-final { scan-assembler-times {locghine\t} 2 } } NE NEU */ + +int test_le (__int128 x, __int128 y) { return x <= y; } +int test_leu (unsigned __int128 x, unsigned __int128 y) { return x <= y; } +int test_lt (__int128 x, __int128 y) { return x < y; } +int test_ltu (unsigned __int128 x, unsigned __int128 y) { return x < y; } +int test_ge (__int128 x, __int128 y) { return x >= y; } +int test_geu (unsigned __int128 x, unsigned __int128 y) { return x >= y; } +int test_gt (__int128 x, __int128 y) { return x > y; } +int test_gtu (unsigned __int128 x, unsigned __int128 y) { return x > y; } +int test_eq (__int128 x, __int128 y) { return x == y; } +int test_equ (unsigned __int128 x, unsigned __int128 y) { return x == y; } +int test_ne (__int128 x, __int128 y) { return x != y; } +int test_neu (unsigned __int128 x, unsigned __int128 y) { return x != y; } -- 2.49.0