For target VXE3 just emit a 128-bit comparison followed by a conditional
load.  For targets prior VXE3, emulate the 128-bit comparison and make
use of a conditional load, too.

gcc/ChangeLog:

        * config/s390/s390-protos.h (s390_expand_cstoreti4): New
        function.
        * config/s390/s390.cc (s390_expand_cstoreti4): New function.
        * config/s390/s390.md (CC_SUZ): New mode iterator.
        (l): New mode attribute.
        (cc_tolower): New mode attribute.
        * config/s390/vector.md (cstoreti4): New expander.
        (*vec_cmpv2di_lane0_<cc_tolower>): New insn.
        (*vec_cmpti_<cc_tolower>): New insn.

gcc/testsuite/ChangeLog:

        * gcc.target/s390/vector/cstoreti-1.c: New test.
        * gcc.target/s390/vector/cstoreti-2.c: New test.
---
 gcc/config/s390/s390-protos.h                 |   1 +
 gcc/config/s390/s390.cc                       |  82 ++++++++++-
 gcc/config/s390/s390.md                       |   4 +
 gcc/config/s390/vector.md                     |  30 +++++
 .../gcc.target/s390/vector/cstoreti-1.c       | 127 ++++++++++++++++++
 .../gcc.target/s390/vector/cstoreti-2.c       |  25 ++++
 6 files changed, 266 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c

diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index e8c7f830849..d760a7e20ff 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -114,6 +114,7 @@ extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx);
 extern void s390_expand_vec_strlen (rtx, rtx, rtx);
 extern void s390_expand_vec_movstr (rtx, rtx, rtx);
 extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
+extern void s390_expand_cstoreti4 (rtx, rtx, rtx, rtx);
 extern bool s390_expand_insv (rtx, rtx, rtx, rtx);
 extern void s390_expand_cs (machine_mode, rtx, rtx, rtx, rtx, rtx, bool);
 extern void s390_expand_atomic_exchange_tdsi (rtx, rtx, rtx);
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index e3edf859513..2d44cecfeed 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -7210,6 +7210,82 @@ s390_expand_mask_and_shift (rtx val, machine_mode mode, 
rtx count)
                              NULL_RTX, 1, OPTAB_DIRECT);
 }
 
+/* Expand optab cstoreti4.  */
+
+void
+s390_expand_cstoreti4 (rtx dst, rtx cmp, rtx op1, rtx op2)
+{
+  rtx_code code = GET_CODE (cmp);
+
+  if (TARGET_VXE3)
+    {
+      rtx cond = s390_emit_compare (GET_MODE (cmp), code, op1, op2);
+      emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx));
+      return;
+    }
+
+  /* Prior VXE3 emulate the comparison.  For an (in)equality test exploit
+     VECTOR COMPARE EQUAL.  For a relational test, first compare the high part
+     via VECTOR ELEMENT COMPARE (LOGICAL).  If the high part does not equal,
+     then consume the CC immediatelly by a subsequent LOAD ON CONDITION.
+     Otherweise, if the high part equals, then perform a subsequent VECTOR
+     COMPARE HIGH LOGICAL followed by a LOAD ON CONDITION.  */
+
+  op1 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0));
+  op2 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0));
+
+  if (code == EQ || code == NE)
+    {
+      s390_expand_vec_compare_cc (dst, code, op1, op2, code == EQ);
+      return;
+    }
+
+  /* Normalize code into either GE(U) or GT(U).  */
+  if (code == LT || code == LE || code == LTU || code == LEU)
+    {
+      std::swap (op1, op2);
+      code = swap_condition (code);
+    }
+
+  /* For (un)signed comparisons
+     - high(op1) >= high(op2) instruction VECG op1, op2 sets CC1
+       if the relation does _not_ hold.
+     - high(op1) >  high(op2) instruction VECG op2, op1 sets CC1
+       if the relation holds.  */
+  if (code == GT || code == GTU)
+    std::swap (op1, op2);
+  machine_mode cc_mode = (code == GEU || code == GTU) ? CCUmode : CCSmode;
+  rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+  emit_insn (
+    gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
+                gen_rtx_COMPARE (cc_mode,
+                                 gen_rtx_VEC_SELECT (DImode, op1, lane0),
+                                 gen_rtx_VEC_SELECT (DImode, op2, lane0))));
+  rtx ccs_reg = gen_rtx_REG (CCSmode, CC_REGNUM);
+  rtx lab = gen_label_rtx ();
+  s390_emit_jump (lab, gen_rtx_NE (VOIDmode, ccs_reg, const0_rtx));
+  /* At this point we have that high(op1) == high(op2).  Thus, test the low
+     part, now.  For unsigned comparisons
+     - low(op1) >= low(op2) instruction VCHLGS op2, op1 sets CC1
+       if the relation does _not_ hold.
+     - low(op1) >  low(op2) instruction VCHLGS op1, op2 sets CC1
+       if the relation holds.  */
+  std::swap (op1, op2);
+  emit_insn (gen_rtx_PARALLEL (
+    VOIDmode,
+    gen_rtvec (2,
+              gen_rtx_SET (gen_rtx_REG (CCVIHUmode, CC_REGNUM),
+                           gen_rtx_COMPARE (CCVIHUmode, op1, op2)),
+              gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode)))));
+  emit_label (lab);
+  /* For (un)signed comparison >= any CC except CC1 means that the relation
+     holds.  For (un)signed comparison > only CC1 means that the relation
+     holds.  */
+  rtx_code cmp_code = (code == GE || code == GEU) ? UNGE : LT;
+  rtx cond = gen_rtx_fmt_ee (cmp_code, CCSmode, ccs_reg, const0_rtx);
+  emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx));
+}
+
 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
    the result in TARGET.  */
 
@@ -7310,9 +7386,9 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,
 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
    TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
    elements in CMP1 and CMP2 fulfill the comparison.
-   This function is only used to emit patterns for the vx builtins and
-   therefore only handles comparison codes required by the
-   builtins.  */
+   This function is only used in s390_expand_cstoreti4 and to emit patterns for
+   the vx builtins and therefore only handles comparison codes required by
+   those.  */
 void
 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
                            rtx cmp1, rtx cmp2, bool all_p)
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 05b9da6976a..97a4bdf96b2 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -993,6 +993,10 @@
 (define_mode_attr asm_fcmp [(CCVEQ "e") (CCVFH "h") (CCVFHE "he")])
 (define_mode_attr insn_cmp [(CCVEQ "eq") (CCVIH "h") (CCVIHU "hl") (CCVFH "h") 
(CCVFHE "he")])
 
+(define_mode_iterator CC_SUZ [CCS CCU CCZ])
+(define_mode_attr l [(CCS "") (CCU "l") (CCZ "")])
+(define_mode_attr cc_tolower [(CCS "ccs") (CCU "ccu") (CCZ "ccz")])
+
 ; Analogue to TOINTVEC / tointvec
 (define_mode_attr TOINT [(TF "TI") (DF "DI") (SF "SI")])
 (define_mode_attr toint [(TF "ti") (DF "di") (SF "si")])
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index e29255fe111..160e42a3005 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -538,6 +538,14 @@
   "vlvg<bhfgq>\t%v0,%1,%Y4(%2)"
   [(set_attr "op_type" "VRS")])
 
+(define_expand "cstoreti4"
+  [(set (match_operand:SI 0 "register_operand")
+       (match_operator:SI 1 "ordered_comparison_operator"
+        [(match_operand:TI 2 "register_operand")
+         (match_operand:TI 3 "register_operand")]))]
+  "TARGET_VX"
+  "s390_expand_cstoreti4 (operands[0], operands[1], operands[2], operands[3]); 
DONE;")
+
 
 ;; FIXME: Support also vector mode operands for 0
 ;; This is used via RTL standard name as well as for expanding the builtin
@@ -2209,6 +2217,28 @@
   operands[5] = gen_reg_rtx (V2DImode);
 })
 
+(define_insn "*vec_cmpv2di_lane0_<cc_tolower>"
+  [(set (reg:CC_SUZ CC_REGNUM)
+       (compare:CC_SUZ
+         (vec_select:DI
+           (match_operand:V2DI 0 "register_operand" "v")
+           (parallel [(const_int 0)]))
+         (vec_select:DI
+           (match_operand:V2DI 1 "register_operand" "v")
+           (parallel [(const_int 0)]))))]
+  "TARGET_VX"
+  "vec<l>g\t%v0,%v1"
+  [(set_attr "op_type" "VRR")])
+
+(define_insn "*vec_cmpti_<cc_tolower>"
+  [(set (reg:CC_SUZ CC_REGNUM)
+       (compare:CC_SUZ
+         (match_operand:TI 0 "register_operand" "v")
+         (match_operand:TI 1 "register_operand" "v")))]
+  "TARGET_VXE3"
+  "vec<l>q\t%v0,%v1"
+  [(set_attr "op_type" "VRR")])
+
 
 ;;
 ;; Floating point compares
diff --git a/gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c 
b/gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c
new file mode 100644
index 00000000000..f2a131be4c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c
@@ -0,0 +1,127 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -march=z13" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** test_le:
+**     vl      (%v.),0\(%r2\),3
+**     vl      (%v.),0\(%r3\),3
+**     vecg    \2,\1
+**     jne     \.L.+
+**     vchlgs  %v.,\1,\2
+**     lghi    %r2,0
+**     locghinl        %r2,1
+**     br      %r14
+*/
+
+int test_le (__int128 x, __int128 y) { return x <= y; }
+
+/*
+** test_leu:
+**     vl      (%v.),0\(%r2\),3
+**     vl      (%v.),0\(%r3\),3
+**     veclg   \2,\1
+**     jne     \.L.+
+**     vchlgs  %v.,\1,\2
+**     lghi    %r2,0
+**     locghinl        %r2,1
+**     br      %r14
+*/
+
+int test_leu (unsigned __int128 x, unsigned __int128 y) { return x <= y; }
+
+/*
+** test_lt:
+**     vl      (%v.),0\(%r2\),3
+**     vl      (%v.),0\(%r3\),3
+**     vecg    \1,\2
+**     jne     \.L.+
+**     vchlgs  %v.,\2,\1
+**     lghi    %r2,0
+**     locghil %r2,1
+**     br      %r14
+*/
+
+int test_lt (__int128 x, __int128 y) { return x < y; }
+
+/*
+** test_ltu:
+**     vl      (%v.),0\(%r2\),3
+**     vl      (%v.),0\(%r3\),3
+**     veclg   \1,\2
+**     jne     \.L.+
+**     vchlgs  %v.,\2,\1
+**     lghi    %r2,0
+**     locghil %r2,1
+**     br      %r14
+*/
+
+int test_ltu (unsigned __int128 x, unsigned __int128 y) { return x < y; }
+
+/*
+** test_ge:
+**     vl      (%v.),0\(%r2\),3
+**     vl      (%v.),0\(%r3\),3
+**     vecg    \1,\2
+**     jne     \.L.+
+**     vchlgs  %v.,\2,\1
+**     lghi    %r2,0
+**     locghinl        %r2,1
+**     br      %r14
+*/
+
+int test_ge (__int128 x, __int128 y) { return x >= y; }
+
+/*
+** test_geu:
+**     vl      (%v.),0\(%r2\),3
+**     vl      (%v.),0\(%r3\),3
+**     veclg   \1,\2
+**     jne     \.L.+
+**     vchlgs  %v.,\2,\1
+**     lghi    %r2,0
+**     locghinl        %r2,1
+**     br      %r14
+*/
+
+int test_geu (unsigned __int128 x, unsigned __int128 y) { return x >= y; }
+
+/*
+** test_gt:
+**     vl      (%v.),0\(%r2\),3
+**     vl      (%v.),0\(%r3\),3
+**     vecg    \2,\1
+**     jne     \.L.+
+**     vchlgs  %v.,\1,\2
+**     lghi    %r2,0
+**     locghil %r2,1
+**     br      %r14
+*/
+
+int test_gt (__int128 x, __int128 y) { return x > y; }
+
+/*
+** test_gtu:
+**     vl      (%v.),0\(%r2\),3
+**     vl      (%v.),0\(%r3\),3
+**     veclg   \2,\1
+**     jne     \.L.+
+**     vchlgs  %v.,\1,\2
+**     lghi    %r2,0
+**     locghil %r2,1
+**     br      %r14
+*/
+
+int test_gtu (unsigned __int128 x, unsigned __int128 y) { return x > y; }
+
+/* { dg-final { scan-assembler-times {vceqgs\t} 4 } } */
+/* { dg-final { scan-assembler-times {locghie\t} 2 } } */
+/* { dg-final { scan-assembler-times {locghine\t} 2 } } */
+
+int test_eq (__int128 x, __int128 y) { return x == y; }
+
+int test_equ (unsigned __int128 x, unsigned __int128 y) { return x == y; }
+
+int test_ne (__int128 x, __int128 y) { return x != y; }
+
+int test_neu (unsigned __int128 x, unsigned __int128 y) { return x != y; }
diff --git a/gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c 
b/gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c
new file mode 100644
index 00000000000..d7b03828083
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c
@@ -0,0 +1,25 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -march=z17" } */
+/* { dg-final { scan-assembler-times {vecq\t} 8 } } */
+/* { dg-final { scan-assembler-times {veclq\t} 4 } } */
+/* { dg-final { scan-assembler-times {locghile\t} 1 } } LE */
+/* { dg-final { scan-assembler-times {slbgr\t} 1 } } LEU */
+/* { dg-final { scan-assembler-times {locghil\t} 2 } } LT LTU */
+/* { dg-final { scan-assembler-times {locghihe\t} 2 } } GE GEU */
+/* { dg-final { scan-assembler-times {locghih\t} 1 } } GT */
+/* { dg-final { scan-assembler-times {alcgr\t} 1 } } GTU */
+/* { dg-final { scan-assembler-times {locghie\t} 2 } } EQ EQU */
+/* { dg-final { scan-assembler-times {locghine\t} 2 } } NE NEU */
+
+int test_le (__int128 x, __int128 y) { return x <= y; }
+int test_leu (unsigned __int128 x, unsigned __int128 y) { return x <= y; }
+int test_lt (__int128 x, __int128 y) { return x < y; }
+int test_ltu (unsigned __int128 x, unsigned __int128 y) { return x < y; }
+int test_ge (__int128 x, __int128 y) { return x >= y; }
+int test_geu (unsigned __int128 x, unsigned __int128 y) { return x >= y; }
+int test_gt (__int128 x, __int128 y) { return x > y; }
+int test_gtu (unsigned __int128 x, unsigned __int128 y) { return x > y; }
+int test_eq (__int128 x, __int128 y) { return x == y; }
+int test_equ (unsigned __int128 x, unsigned __int128 y) { return x == y; }
+int test_ne (__int128 x, __int128 y) { return x != y; }
+int test_neu (unsigned __int128 x, unsigned __int128 y) { return x != y; }
-- 
2.49.0

Reply via email to