[PATCH 2/2] s390: Implement reduction optabs

Juergen Christ Thu, 10 Jul 2025 00:15:01 -0700

Implementation and tests for the standard reduction optabs.

Bootstrapped and regtested on s390.  Ok for trunk?


Signed-off-by: Juergen Christ <jchr...@linux.ibm.com>

gcc/ChangeLog:

        * config/s390/vector.md (reduc_plus_scal_<mode>): Implement.
        (reduc_plus_scal_v2df): Implement.
        (reduc_plus_scal_v4sf): Implement.
        (REDUC_FMINMAX): New int iterator.
        (reduc_fminmax_name): New int attribute.
        (reduc_minmax): New code iterator.
        (reduc_minmax_name): New code attribute.
        (reduc_<reduc_fminmax_name>_scal_v2df): Implement.
        (reduc_<reduc_fminmax_name>_scal_v4sf): Implement.
        (reduc_<reduc_minmax_name>_scal_v2df): Implement.
        (reduc_<reduc_minmax_name>_scal_v4sf): Implement.
        (REDUCBIN): New code iterator.
        (reduc_bin_insn): New code attribute.
        (reduc_<reduc_bin_insn>_scal_v2di): Implement.
        (reduc_<reduc_bin_insn>_scal_v4si): Implement.
        (reduc_<reduc_bin_insn>_scal_v8hi): Implement.
        (reduc_<reduc_bin_insn>_scal_v16qi): Implement.

gcc/testsuite/ChangeLog:

        * lib/target-supports.exp: Add s390 to vect_logical_reduc targets.
        * gcc.target/s390/vector/reduc-binops-1.c: New test.
        * gcc.target/s390/vector/reduc-minmax-1.c: New test.
        * gcc.target/s390/vector/reduc-plus-1.c: New test.
---
 gcc/config/s390/vector.md                     | 293 +++++++++++++++++-
 .../gcc.target/s390/vector/reduc-binops-1.c   |  40 +++
 .../gcc.target/s390/vector/reduc-minmax-1.c   | 234 ++++++++++++++
 .../gcc.target/s390/vector/reduc-plus-1.c     | 152 +++++++++
 gcc/testsuite/lib/target-supports.exp         |   4 +-
 5 files changed, 717 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/reduc-binops-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/reduc-minmax-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/reduc-plus-1.c

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 26753c099cda..98427b37e884 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -3572,11 +3572,6 @@
   "veval\t%v0,%v1,%v2,%v3,%b4"
   [(set_attr "op_type" "VRI")])
 
-; reduc_smin
-; reduc_smax
-; reduc_umin
-; reduc_umax
-
 ; vec_pack_sfix_trunc: convert + pack ?
 ; vec_pack_ufix_trunc
 ; vec_unpacks_float_hi
@@ -3627,3 +3622,291 @@
               (const_int 4)]
              UNSPEC_FMIN))]
   "TARGET_VXE")
+
+; reduc_plus
+(define_expand "reduc_plus_scal_<mode>"
+  [(set (match_dup 4)
+   (unspec:V4SI [(match_operand:VI_HW_QH 1 "register_operand")
+                (match_dup 2)]
+               UNSPEC_VEC_VSUM))
+   (set (match_dup 5)
+       (unspec:V2DI [(match_dup 4) (match_dup 3)] UNSPEC_VEC_VSUMQ))
+   (set (match_operand:<non_vec> 0 "register_operand")
+       (vec_select:<non_vec> (match_dup 6)
+                             (parallel [(match_dup 7)])))]
+  "TARGET_VX"
+{
+  operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+  operands[3] = simplify_gen_subreg (V4SImode, operands[2], <MODE>mode, 0);
+  operands[4] = gen_reg_rtx (V4SImode);
+  operands[5] = gen_reg_rtx (V2DImode);
+  operands[6] = simplify_gen_subreg(<MODE>mode, operands[5], V2DImode, 0);
+  operands[7] = GEN_INT (16 / GET_MODE_SIZE (<non_vec>mode) - 1);
+})
+
+(define_expand "reduc_plus_scal_<mode>"
+  [(set (match_dup 3)
+     (unspec:V2DI [(match_operand:VI_HW_SD 1 "register_operand")
+                  (match_dup 2)]
+                 UNSPEC_VEC_VSUMQ))
+   (set (match_operand:<non_vec> 0 "register_operand")
+       (vec_select:<non_vec> (match_dup 4)
+                             (parallel [(match_dup 5)])))]
+  "TARGET_VX"
+{
+  operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+  operands[3] = gen_reg_rtx (V2DImode);
+  operands[4] = simplify_gen_subreg (<MODE>mode, operands[3], V2DImode, 0);
+  operands[5] = GEN_INT (16 / GET_MODE_SIZE (<non_vec>mode) - 1);
+})
+
+(define_expand "reduc_plus_scal_v2df"
+  [(set (match_dup 2)
+   (unspec:V2DF [(match_operand:V2DF 1 "register_operand")
+                (match_dup 1)
+                (const_int 8)]
+               UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3) (plus:V2DF (match_dup 1) (match_dup 2)))
+   (set (match_operand:DF 0 "register_operand")
+       (vec_select:DF (match_dup 3) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V2DFmode);
+  operands[3] = gen_reg_rtx (V2DFmode);
+})
+
+(define_expand "reduc_plus_scal_v4sf"
+  [(set (match_dup 2)
+   (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+                (match_dup 1)
+                (const_int 4)]
+               UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3) (plus:V4SF (match_dup 1) (match_dup 2)))
+   (set (match_dup 4)
+       (unspec:V4SF [(match_dup 3) (match_dup 3) (const_int 8)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5) (plus:V4SF (match_dup 3) (match_dup 4)))
+   (set (match_operand:SF 0 "register_operand")
+       (vec_select:SF (match_dup 5) (parallel [(const_int 0)])))]
+  "TARGET_VXE"
+{
+  operands[2] = gen_reg_rtx (V4SFmode);
+  operands[3] = gen_reg_rtx (V4SFmode);
+  operands[4] = gen_reg_rtx (V4SFmode);
+  operands[5] = gen_reg_rtx (V4SFmode);
+})
+
+; reduc_fmin, reduc_fmax, reduc_smin, reduc_smax
+
+(define_int_iterator REDUC_FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
+(define_int_attr reduc_fminmax_name [(UNSPEC_FMAX "fmax") (UNSPEC_FMIN 
"fmin")])
+(define_code_iterator reduc_minmax [smin smax])
+(define_code_attr reduc_minmax_name [(smin "smin") (smax "smax")])
+
+(define_expand "reduc_<reduc_fminmax_name>_scal_v2df"
+  [(set (match_dup 2)
+       (unspec:V2DF [(match_operand:V2DF 1 "register_operand")
+                     (match_dup 1)
+                     (const_int 8)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+       (unspec:V2DF [(match_dup 1) (match_dup 2) (const_int 4)] REDUC_FMINMAX))
+   (set (match_operand:DF 0 "register_operand" "")
+       (vec_select:DF (match_dup 3) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V2DFmode);
+  operands[3] = gen_reg_rtx (V2DFmode);
+})
+
+(define_expand "reduc_<reduc_fminmax_name>_scal_v4sf"
+  [(set (match_dup 2)
+       (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+                     (match_dup 1)
+                     (const_int 4)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+       (unspec:V4SF [(match_dup 1) (match_dup 2) (const_int 4)] REDUC_FMINMAX))
+   (set (match_dup 4)
+       (unspec:V4SF [(match_dup 3)
+                    (match_dup 3)
+                    (const_int 8)]
+                   UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5)
+       (unspec:V4SF [(match_dup 3) (match_dup 4) (const_int 4)] REDUC_FMINMAX))
+   (set (match_operand:SF 0 "register_operand")
+       (vec_select:SF (match_dup 5) (parallel [(const_int 0)])))]
+   "TARGET_VXE"
+{
+  operands[2] = gen_reg_rtx (V4SFmode);
+  operands[3] = gen_reg_rtx (V4SFmode);
+  operands[4] = gen_reg_rtx (V4SFmode);
+  operands[5] = gen_reg_rtx (V4SFmode);
+})
+
+(define_expand "reduc_<reduc_minmax_name>_scal_v2df"
+  [(set (match_dup 2)
+       (unspec:V2DF [(match_operand:V2DF 1 "register_operand")
+                     (match_dup 1)
+                     (const_int 8)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+       (reduc_minmax:V2DF (match_dup 1) (match_dup 2)))
+   (set (match_operand:DF 0 "register_operand" "")
+       (vec_select:DF (match_dup 3) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V2DFmode);
+  operands[3] = gen_reg_rtx (V2DFmode);
+})
+
+(define_expand "reduc_<reduc_minmax_name>_scal_v4sf"
+  [(set (match_dup 2)
+       (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+                     (match_dup 1)
+                     (const_int 4)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+       (reduc_minmax:V4SF (match_dup 1) (match_dup 2)))
+   (set (match_dup 4)
+       (unspec:V4SF [(match_dup 3)
+                     (match_dup 3)
+                     (const_int 8)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5)
+       (reduc_minmax:V4SF (match_dup 3) (match_dup 4)))
+   (set (match_operand:SF 0 "register_operand" "")
+       (vec_select:SF (match_dup 5) (parallel [(const_int 0)])))]
+   "TARGET_VXE"
+{
+  operands[2] = gen_reg_rtx (V4SFmode);
+  operands[3] = gen_reg_rtx (V4SFmode);
+  operands[4] = gen_reg_rtx (V4SFmode);
+  operands[5] = gen_reg_rtx (V4SFmode);
+})
+
+; reduce_and, reduc_ior, reduc_xor
+; reduc_smin, reduc_smax, reduc_umin, reduc_umax
+
+(define_code_iterator REDUCBIN [and xor ior smin smax umin umax])
+(define_code_attr reduc_bin_insn [(and "and") (xor "xor") (ior "ior")
+                                 (smin "smin") (smax "smax")
+                                 (umin "umin") (umax "umax")])
+
+(define_expand "reduc_<reduc_bin_insn>_scal_v2di"
+  [(set (match_dup 2)
+       (unspec:V2DI [(match_operand:V2DI 1 "register_operand")
+                     (match_dup 1)
+                     (const_int 8)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+       (REDUCBIN:V2DI (match_dup 1) (match_dup 2)))
+   (set (match_operand:DI 0 "register_operand" "")
+       (vec_select:DI (match_dup 3) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V2DImode);
+  operands[3] = gen_reg_rtx (V2DImode);
+})
+
+(define_expand "reduc_<reduc_bin_insn>_scal_v4si"
+  [(set (match_dup 2)
+       (unspec:V4SI [(match_operand:V4SI 1 "register_operand")
+                     (match_dup 1)
+                     (const_int 4)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+       (REDUCBIN:V4SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 4)
+       (unspec:V4SI [(match_dup 3)
+                     (match_dup 3)
+                     (const_int 8)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5)
+       (REDUCBIN:V4SI (match_dup 3) (match_dup 4)))
+   (set (match_operand:SI 0 "register_operand" "")
+       (vec_select:SI (match_dup 5) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V4SImode);
+  operands[3] = gen_reg_rtx (V4SImode);
+  operands[4] = gen_reg_rtx (V4SImode);
+  operands[5] = gen_reg_rtx (V4SImode);
+})
+
+(define_expand "reduc_<reduc_bin_insn>_scal_v8hi"
+  [(set (match_dup 2)
+       (unspec:V8HI [(match_operand:V8HI 1 "register_operand")
+                     (match_dup 1)
+                     (const_int 2)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+       (REDUCBIN:V8HI (match_dup 1) (match_dup 2)))
+   (set (match_dup 4)
+       (unspec:V8HI [(match_dup 3)
+                     (match_dup 3)
+                     (const_int 4)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5)
+       (REDUCBIN:V8HI (match_dup 3) (match_dup 4)))
+   (set (match_dup 6)
+       (unspec:V8HI [(match_dup 5)
+                     (match_dup 5)
+                     (const_int 8)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 7)
+       (REDUCBIN:V8HI (match_dup 5) (match_dup 6)))
+   (set (match_operand:HI 0 "register_operand" "")
+       (vec_select:HI (match_dup 7) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V8HImode);
+  operands[3] = gen_reg_rtx (V8HImode);
+  operands[4] = gen_reg_rtx (V8HImode);
+  operands[5] = gen_reg_rtx (V8HImode);
+  operands[6] = gen_reg_rtx (V8HImode);
+  operands[7] = gen_reg_rtx (V8HImode);
+})
+
+(define_expand "reduc_<reduc_bin_insn>_scal_v16qi"
+  [(set (match_dup 2)
+       (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
+                      (match_dup 1)
+                      (const_int 1)]
+                     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+       (REDUCBIN:V16QI (match_dup 1) (match_dup 2)))
+   (set (match_dup 4)
+       (unspec:V16QI [(match_dup 3)
+                      (match_dup 3)
+                      (const_int 2)]
+                     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5)
+       (REDUCBIN:V16QI (match_dup 3) (match_dup 4)))
+   (set (match_dup 6)
+       (unspec:V16QI [(match_dup 5)
+                      (match_dup 5)
+                      (const_int 4)]
+                     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 7)
+       (REDUCBIN:V16QI (match_dup 5) (match_dup 6)))
+   (set (match_dup 8)
+       (unspec:V16QI [(match_dup 7)
+                      (match_dup 7)
+                      (const_int 8)]
+                     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 9)
+       (REDUCBIN:V16QI (match_dup 7) (match_dup 8)))
+   (set (match_operand:QI 0 "register_operand" "")
+       (vec_select:QI (match_dup 9) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V16QImode);
+  operands[3] = gen_reg_rtx (V16QImode);
+  operands[4] = gen_reg_rtx (V16QImode);
+  operands[5] = gen_reg_rtx (V16QImode);
+  operands[6] = gen_reg_rtx (V16QImode);
+  operands[7] = gen_reg_rtx (V16QImode);
+  operands[8] = gen_reg_rtx (V16QImode);
+  operands[9] = gen_reg_rtx (V16QImode);
+})
diff --git a/gcc/testsuite/gcc.target/s390/vector/reduc-binops-1.c 
b/gcc/testsuite/gcc.target/s390/vector/reduc-binops-1.c
new file mode 100644
index 000000000000..efd3294a7350
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reduc-binops-1.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z13 -ftree-vectorize 
-fdump-tree-optimized" } */
+
+#define T(X,N)                                  \
+  unsigned X                                    \
+  reduce_and_##X (unsigned X *in)               \
+  {                                             \
+  unsigned X acc = (unsigned X)-1;              \
+  for (int i = 0; i < N; i++)                   \
+    acc &= in[i];                               \
+  return acc;                                   \
+  }                                             \
+  unsigned X                                    \
+  reduce_ior_##X (unsigned X *in)               \
+  {                                             \
+  unsigned X acc = 0;                           \
+  for (int i = 0; i < N; i++)                   \
+    acc |= in[i];                               \
+  return acc;                                   \
+  }                                             \
+  unsigned X                                    \
+  redue_xor_##X (unsigned X *in)                \
+  {                                             \
+  unsigned X acc = 0;                           \
+  for (int i = 0; i < N; i++)                   \
+    acc ^= in[i];                               \
+  return acc;                                   \
+  }
+
+T(char,16)
+
+T(short, 8)
+
+T(int,4)
+
+T(long,4)
+
+/* { dg-final { scan-tree-dump-times "\.REDUC_AND" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_IOR" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_XOR" 4 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/reduc-minmax-1.c 
b/gcc/testsuite/gcc.target/s390/vector/reduc-minmax-1.c
new file mode 100644
index 000000000000..f23e96f79fe0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reduc-minmax-1.c
@@ -0,0 +1,234 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z16 -ftree-vectorize 
-fdump-tree-optimized" } */
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) > (b) ? (b) : (a))
+
+/* unsigned integers */
+
+unsigned char
+reduce_umax_char (unsigned char *p)
+{
+  unsigned char res = p[0];
+  for (int i = 0; i < 16; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+unsigned char
+reduce_umin_char (unsigned char *p)
+{
+  unsigned char res = p[0];
+  for (int i = 0; i < 16; i++)
+    res = MIN (res, p[i]);
+  return res;
+}
+
+unsigned short
+reduce_umax_short (unsigned short *p)
+{
+  unsigned short res = p[0];
+  for (int i = 0; i < 8; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+unsigned short
+reduce_umin_short (unsigned short *p)
+{
+  unsigned short res = p[0];
+  for (int i = 0; i < 8; i++)
+    res = MIN (res, p[i]);
+  return res;
+}
+
+unsigned int
+reduce_umax_int (unsigned int* p)
+{
+  unsigned int res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+unsigned int
+reduce_umin_int (unsigned int* p)
+{
+  unsigned int res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MIN(res, p[i]);
+  return res;
+}
+
+unsigned long
+reduce_umax_long (unsigned long* p)
+{
+  unsigned long res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+unsigned long
+reduce_umin_long (unsigned long* p)
+{
+  unsigned long res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MIN(res, p[i]);
+  return res;
+}
+
+/* signed integers */
+
+signed char
+reduce_smax_char (signed char *p)
+{
+  signed char res = p[0];
+  for (int i = 0; i < 16; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+signed char
+reduce_smin_char (signed char *p)
+{
+  signed char res = p[0];
+  for (int i = 0; i < 16; i++)
+    res = MIN (res, p[i]);
+  return res;
+}
+
+signed short
+reduce_smax_short (signed short *p)
+{
+  signed short res = p[0];
+  for (int i = 0; i < 8; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+signed short
+reduce_smin_short (signed short *p)
+{
+  signed short res = p[0];
+  for (int i = 0; i < 8; i++)
+    res = MIN (res, p[i]);
+  return res;
+}
+
+signed int
+reduce_smax_int (signed int* p)
+{
+  signed int res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+signed int
+reduce_smin_int (signed int* p)
+{
+  signed int res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MIN(res, p[i]);
+  return res;
+}
+
+signed long
+reduce_smax_long (signed long* p)
+{
+  signed long res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+signed long
+reduce_smin_long (signed long* p)
+{
+  signed long res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MIN(res, p[i]);
+  return res;
+}
+
+float
+__attribute__((optimize("Ofast")))
+reduce_smax_float (float* p)
+{
+  float res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+float
+__attribute__((optimize("Ofast")))
+reduce_smin_float (float* p)
+{
+  float res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MIN (res, p[i]);
+  return res;
+}
+
+double
+__attribute__((optimize("Ofast")))
+reduce_smax_double (double* p)
+{
+  double res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+double
+__attribute__((optimize("Ofast")))
+reduce_smin_double (double* p)
+{
+  double res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MIN (res, p[i]);
+  return res;
+}
+
+float
+reduce_fmax_float (float* p)
+{
+  float res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = __builtin_fmaxf (res, p[i]);
+  return res;
+}
+
+float
+reduce_fmin_float (float* p)
+{
+  float res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = __builtin_fminf (res, p[i]);
+  return res;
+}
+
+double
+reduce_fmax_double (double* p)
+{
+  double res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = __builtin_fmax (res, p[i]);
+  return res;
+}
+
+double
+reduce_fmin_double (double* p)
+{
+  double res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = __builtin_fmin (res, p[i]);
+  return res;
+}
+
+/* { dg-final { scan-tree-dump-times "\.REDUC_MAX" 10 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_MIN" 10 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_FMAX" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_FMIN" 2 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/reduc-plus-1.c 
b/gcc/testsuite/gcc.target/s390/vector/reduc-plus-1.c
new file mode 100644
index 000000000000..ddbab23fecc6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reduc-plus-1.c
@@ -0,0 +1,152 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14 -ftree-vectorize 
-fdump-tree-optimized" } */
+/* { dg-do run { target { s390_z14_hw } } } */
+
+/* signed integers */
+
+signed char
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_char (signed char* p)
+{
+  signed char sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+short
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_short (short* p)
+{
+  short sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+int
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_int (int* p)
+{
+  int sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+long
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_long (long* p)
+{
+  long sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+/* unsigned integers */
+
+unsigned char
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_uchar (unsigned char* p)
+{
+  unsigned char sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+unsigned short
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_ushort (unsigned short* p)
+{
+  unsigned short sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+unsigned int
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_uint (unsigned int* p)
+{
+  unsigned int sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+unsigned long
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_ulong (unsigned long* p)
+{
+  unsigned long sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+/* floating point */
+
+float
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_float (float* p)
+{
+  float sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+double
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_double (double* p)
+{
+  double sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+int
+main()
+{
+  signed char chararr[] = 
{-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16};
+  signed short shortarr[] = 
{-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16};
+  signed int intarr[] = 
{-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16};
+  signed long longarr[] = 
{-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16};
+
+  unsigned char uchararr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+  unsigned short ushortarr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+  unsigned int uintarr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+  unsigned long ulongarr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+  
+  float floatarr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+  double doublearr[] = 
{-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16};
+
+  if (reduce_add_char (chararr) != (-136 & 0xff))
+    __builtin_abort();
+  if (reduce_add_short (shortarr) != -136)
+    __builtin_abort();
+  if (reduce_add_int (intarr) != -136)
+    __builtin_abort();
+  if (reduce_add_long (longarr) != -136)
+    __builtin_abort();
+
+  if (reduce_add_uchar (uchararr) != 136)
+    __builtin_abort();
+  if (reduce_add_ushort (ushortarr) != 136)
+    __builtin_abort();
+  if (reduce_add_uint (uintarr) != 136)
+    __builtin_abort();
+  if (reduce_add_ulong (ulongarr) != 136)
+    __builtin_abort();
+
+  if (reduce_add_float (floatarr) != 136)
+    __builtin_abort();
+  if (reduce_add_double (doublearr) != -136)
+    __builtin_abort();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\.REDUC_PLUS" 10 "optimized" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 956bc0bc7ca4..48c1be73e92a 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -9944,7 +9944,9 @@ proc check_effective_target_vect_logical_reduc { } {
                   || [istarget amdgcn-*-*]
                   || [check_effective_target_riscv_v]
                   || [check_effective_target_loongarch_sx]
-                  || [check_effective_target_x86]}]
+                  || [check_effective_target_x86]
+                  || ([istarget s390*-*-*]
+                      && [check_effective_target_s390_vx])}]
 }
 
 # Return 1 if the target supports the fold_extract_last optab.
-- 
2.43.5

[PATCH 2/2] s390: Implement reduction optabs

Reply via email to