Implementation and tests for the standard reduction optabs. Bootstrapped and regtested on s390. Ok for trunk?
Signed-off-by: Juergen Christ <jchr...@linux.ibm.com> gcc/ChangeLog: * config/s390/vector.md (reduc_plus_scal_<mode>): Implement. (reduc_plus_scal_v2df): Implement. (reduc_plus_scal_v4sf): Implement. (REDUC_FMINMAX): New int iterator. (reduc_fminmax_name): New int attribute. (reduc_minmax): New code iterator. (reduc_minmax_name): New code attribute. (reduc_<reduc_fminmax_name>_scal_v2df): Implement. (reduc_<reduc_fminmax_name>_scal_v4sf): Implement. (reduc_<reduc_minmax_name>_scal_v2df): Implement. (reduc_<reduc_minmax_name>_scal_v4sf): Implement. (REDUCBIN): New code iterator. (reduc_bin_insn): New code attribute. (reduc_<reduc_bin_insn>_scal_v2di): Implement. (reduc_<reduc_bin_insn>_scal_v4si): Implement. (reduc_<reduc_bin_insn>_scal_v8hi): Implement. (reduc_<reduc_bin_insn>_scal_v16qi): Implement. gcc/testsuite/ChangeLog: * lib/target-supports.exp: Add s390 to vect_logical_reduc targets. * gcc.target/s390/vector/reduc-binops-1.c: New test. * gcc.target/s390/vector/reduc-minmax-1.c: New test. * gcc.target/s390/vector/reduc-plus-1.c: New test. --- gcc/config/s390/vector.md | 293 +++++++++++++++++- .../gcc.target/s390/vector/reduc-binops-1.c | 40 +++ .../gcc.target/s390/vector/reduc-minmax-1.c | 234 ++++++++++++++ .../gcc.target/s390/vector/reduc-plus-1.c | 152 +++++++++ gcc/testsuite/lib/target-supports.exp | 4 +- 5 files changed, 717 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/vector/reduc-binops-1.c create mode 100644 gcc/testsuite/gcc.target/s390/vector/reduc-minmax-1.c create mode 100644 gcc/testsuite/gcc.target/s390/vector/reduc-plus-1.c diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 26753c099cda..98427b37e884 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -3572,11 +3572,6 @@ "veval\t%v0,%v1,%v2,%v3,%b4" [(set_attr "op_type" "VRI")]) -; reduc_smin -; reduc_smax -; reduc_umin -; reduc_umax - ; vec_pack_sfix_trunc: convert + pack ? ; vec_pack_ufix_trunc ; vec_unpacks_float_hi @@ -3627,3 +3622,291 @@ (const_int 4)] UNSPEC_FMIN))] "TARGET_VXE") + +; reduc_plus +(define_expand "reduc_plus_scal_<mode>" + [(set (match_dup 4) + (unspec:V4SI [(match_operand:VI_HW_QH 1 "register_operand") + (match_dup 2)] + UNSPEC_VEC_VSUM)) + (set (match_dup 5) + (unspec:V2DI [(match_dup 4) (match_dup 3)] UNSPEC_VEC_VSUMQ)) + (set (match_operand:<non_vec> 0 "register_operand") + (vec_select:<non_vec> (match_dup 6) + (parallel [(match_dup 7)])))] + "TARGET_VX" +{ + operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); + operands[3] = simplify_gen_subreg (V4SImode, operands[2], <MODE>mode, 0); + operands[4] = gen_reg_rtx (V4SImode); + operands[5] = gen_reg_rtx (V2DImode); + operands[6] = simplify_gen_subreg(<MODE>mode, operands[5], V2DImode, 0); + operands[7] = GEN_INT (16 / GET_MODE_SIZE (<non_vec>mode) - 1); +}) + +(define_expand "reduc_plus_scal_<mode>" + [(set (match_dup 3) + (unspec:V2DI [(match_operand:VI_HW_SD 1 "register_operand") + (match_dup 2)] + UNSPEC_VEC_VSUMQ)) + (set (match_operand:<non_vec> 0 "register_operand") + (vec_select:<non_vec> (match_dup 4) + (parallel [(match_dup 5)])))] + "TARGET_VX" +{ + operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); + operands[3] = gen_reg_rtx (V2DImode); + operands[4] = simplify_gen_subreg (<MODE>mode, operands[3], V2DImode, 0); + operands[5] = GEN_INT (16 / GET_MODE_SIZE (<non_vec>mode) - 1); +}) + +(define_expand "reduc_plus_scal_v2df" + [(set (match_dup 2) + (unspec:V2DF [(match_operand:V2DF 1 "register_operand") + (match_dup 1) + (const_int 8)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 3) (plus:V2DF (match_dup 1) (match_dup 2))) + (set (match_operand:DF 0 "register_operand") + (vec_select:DF (match_dup 3) (parallel [(const_int 0)])))] + "TARGET_VX" +{ + operands[2] = gen_reg_rtx (V2DFmode); + operands[3] = gen_reg_rtx (V2DFmode); +}) + +(define_expand "reduc_plus_scal_v4sf" + [(set (match_dup 2) + (unspec:V4SF [(match_operand:V4SF 1 "register_operand") + (match_dup 1) + (const_int 4)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 3) (plus:V4SF (match_dup 1) (match_dup 2))) + (set (match_dup 4) + (unspec:V4SF [(match_dup 3) (match_dup 3) (const_int 8)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 5) (plus:V4SF (match_dup 3) (match_dup 4))) + (set (match_operand:SF 0 "register_operand") + (vec_select:SF (match_dup 5) (parallel [(const_int 0)])))] + "TARGET_VXE" +{ + operands[2] = gen_reg_rtx (V4SFmode); + operands[3] = gen_reg_rtx (V4SFmode); + operands[4] = gen_reg_rtx (V4SFmode); + operands[5] = gen_reg_rtx (V4SFmode); +}) + +; reduc_fmin, reduc_fmax, reduc_smin, reduc_smax + +(define_int_iterator REDUC_FMINMAX [UNSPEC_FMAX UNSPEC_FMIN]) +(define_int_attr reduc_fminmax_name [(UNSPEC_FMAX "fmax") (UNSPEC_FMIN "fmin")]) +(define_code_iterator reduc_minmax [smin smax]) +(define_code_attr reduc_minmax_name [(smin "smin") (smax "smax")]) + +(define_expand "reduc_<reduc_fminmax_name>_scal_v2df" + [(set (match_dup 2) + (unspec:V2DF [(match_operand:V2DF 1 "register_operand") + (match_dup 1) + (const_int 8)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 3) + (unspec:V2DF [(match_dup 1) (match_dup 2) (const_int 4)] REDUC_FMINMAX)) + (set (match_operand:DF 0 "register_operand" "") + (vec_select:DF (match_dup 3) (parallel [(const_int 0)])))] + "TARGET_VX" +{ + operands[2] = gen_reg_rtx (V2DFmode); + operands[3] = gen_reg_rtx (V2DFmode); +}) + +(define_expand "reduc_<reduc_fminmax_name>_scal_v4sf" + [(set (match_dup 2) + (unspec:V4SF [(match_operand:V4SF 1 "register_operand") + (match_dup 1) + (const_int 4)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 3) + (unspec:V4SF [(match_dup 1) (match_dup 2) (const_int 4)] REDUC_FMINMAX)) + (set (match_dup 4) + (unspec:V4SF [(match_dup 3) + (match_dup 3) + (const_int 8)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 5) + (unspec:V4SF [(match_dup 3) (match_dup 4) (const_int 4)] REDUC_FMINMAX)) + (set (match_operand:SF 0 "register_operand") + (vec_select:SF (match_dup 5) (parallel [(const_int 0)])))] + "TARGET_VXE" +{ + operands[2] = gen_reg_rtx (V4SFmode); + operands[3] = gen_reg_rtx (V4SFmode); + operands[4] = gen_reg_rtx (V4SFmode); + operands[5] = gen_reg_rtx (V4SFmode); +}) + +(define_expand "reduc_<reduc_minmax_name>_scal_v2df" + [(set (match_dup 2) + (unspec:V2DF [(match_operand:V2DF 1 "register_operand") + (match_dup 1) + (const_int 8)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 3) + (reduc_minmax:V2DF (match_dup 1) (match_dup 2))) + (set (match_operand:DF 0 "register_operand" "") + (vec_select:DF (match_dup 3) (parallel [(const_int 0)])))] + "TARGET_VX" +{ + operands[2] = gen_reg_rtx (V2DFmode); + operands[3] = gen_reg_rtx (V2DFmode); +}) + +(define_expand "reduc_<reduc_minmax_name>_scal_v4sf" + [(set (match_dup 2) + (unspec:V4SF [(match_operand:V4SF 1 "register_operand") + (match_dup 1) + (const_int 4)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 3) + (reduc_minmax:V4SF (match_dup 1) (match_dup 2))) + (set (match_dup 4) + (unspec:V4SF [(match_dup 3) + (match_dup 3) + (const_int 8)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 5) + (reduc_minmax:V4SF (match_dup 3) (match_dup 4))) + (set (match_operand:SF 0 "register_operand" "") + (vec_select:SF (match_dup 5) (parallel [(const_int 0)])))] + "TARGET_VXE" +{ + operands[2] = gen_reg_rtx (V4SFmode); + operands[3] = gen_reg_rtx (V4SFmode); + operands[4] = gen_reg_rtx (V4SFmode); + operands[5] = gen_reg_rtx (V4SFmode); +}) + +; reduce_and, reduc_ior, reduc_xor +; reduc_smin, reduc_smax, reduc_umin, reduc_umax + +(define_code_iterator REDUCBIN [and xor ior smin smax umin umax]) +(define_code_attr reduc_bin_insn [(and "and") (xor "xor") (ior "ior") + (smin "smin") (smax "smax") + (umin "umin") (umax "umax")]) + +(define_expand "reduc_<reduc_bin_insn>_scal_v2di" + [(set (match_dup 2) + (unspec:V2DI [(match_operand:V2DI 1 "register_operand") + (match_dup 1) + (const_int 8)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 3) + (REDUCBIN:V2DI (match_dup 1) (match_dup 2))) + (set (match_operand:DI 0 "register_operand" "") + (vec_select:DI (match_dup 3) (parallel [(const_int 0)])))] + "TARGET_VX" +{ + operands[2] = gen_reg_rtx (V2DImode); + operands[3] = gen_reg_rtx (V2DImode); +}) + +(define_expand "reduc_<reduc_bin_insn>_scal_v4si" + [(set (match_dup 2) + (unspec:V4SI [(match_operand:V4SI 1 "register_operand") + (match_dup 1) + (const_int 4)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 3) + (REDUCBIN:V4SI (match_dup 1) (match_dup 2))) + (set (match_dup 4) + (unspec:V4SI [(match_dup 3) + (match_dup 3) + (const_int 8)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 5) + (REDUCBIN:V4SI (match_dup 3) (match_dup 4))) + (set (match_operand:SI 0 "register_operand" "") + (vec_select:SI (match_dup 5) (parallel [(const_int 0)])))] + "TARGET_VX" +{ + operands[2] = gen_reg_rtx (V4SImode); + operands[3] = gen_reg_rtx (V4SImode); + operands[4] = gen_reg_rtx (V4SImode); + operands[5] = gen_reg_rtx (V4SImode); +}) + +(define_expand "reduc_<reduc_bin_insn>_scal_v8hi" + [(set (match_dup 2) + (unspec:V8HI [(match_operand:V8HI 1 "register_operand") + (match_dup 1) + (const_int 2)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 3) + (REDUCBIN:V8HI (match_dup 1) (match_dup 2))) + (set (match_dup 4) + (unspec:V8HI [(match_dup 3) + (match_dup 3) + (const_int 4)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 5) + (REDUCBIN:V8HI (match_dup 3) (match_dup 4))) + (set (match_dup 6) + (unspec:V8HI [(match_dup 5) + (match_dup 5) + (const_int 8)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 7) + (REDUCBIN:V8HI (match_dup 5) (match_dup 6))) + (set (match_operand:HI 0 "register_operand" "") + (vec_select:HI (match_dup 7) (parallel [(const_int 0)])))] + "TARGET_VX" +{ + operands[2] = gen_reg_rtx (V8HImode); + operands[3] = gen_reg_rtx (V8HImode); + operands[4] = gen_reg_rtx (V8HImode); + operands[5] = gen_reg_rtx (V8HImode); + operands[6] = gen_reg_rtx (V8HImode); + operands[7] = gen_reg_rtx (V8HImode); +}) + +(define_expand "reduc_<reduc_bin_insn>_scal_v16qi" + [(set (match_dup 2) + (unspec:V16QI [(match_operand:V16QI 1 "register_operand") + (match_dup 1) + (const_int 1)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 3) + (REDUCBIN:V16QI (match_dup 1) (match_dup 2))) + (set (match_dup 4) + (unspec:V16QI [(match_dup 3) + (match_dup 3) + (const_int 2)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 5) + (REDUCBIN:V16QI (match_dup 3) (match_dup 4))) + (set (match_dup 6) + (unspec:V16QI [(match_dup 5) + (match_dup 5) + (const_int 4)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 7) + (REDUCBIN:V16QI (match_dup 5) (match_dup 6))) + (set (match_dup 8) + (unspec:V16QI [(match_dup 7) + (match_dup 7) + (const_int 8)] + UNSPEC_VEC_SLDBYTE)) + (set (match_dup 9) + (REDUCBIN:V16QI (match_dup 7) (match_dup 8))) + (set (match_operand:QI 0 "register_operand" "") + (vec_select:QI (match_dup 9) (parallel [(const_int 0)])))] + "TARGET_VX" +{ + operands[2] = gen_reg_rtx (V16QImode); + operands[3] = gen_reg_rtx (V16QImode); + operands[4] = gen_reg_rtx (V16QImode); + operands[5] = gen_reg_rtx (V16QImode); + operands[6] = gen_reg_rtx (V16QImode); + operands[7] = gen_reg_rtx (V16QImode); + operands[8] = gen_reg_rtx (V16QImode); + operands[9] = gen_reg_rtx (V16QImode); +}) diff --git a/gcc/testsuite/gcc.target/s390/vector/reduc-binops-1.c b/gcc/testsuite/gcc.target/s390/vector/reduc-binops-1.c new file mode 100644 index 000000000000..efd3294a7350 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/reduc-binops-1.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch -march=z13 -ftree-vectorize -fdump-tree-optimized" } */ + +#define T(X,N) \ + unsigned X \ + reduce_and_##X (unsigned X *in) \ + { \ + unsigned X acc = (unsigned X)-1; \ + for (int i = 0; i < N; i++) \ + acc &= in[i]; \ + return acc; \ + } \ + unsigned X \ + reduce_ior_##X (unsigned X *in) \ + { \ + unsigned X acc = 0; \ + for (int i = 0; i < N; i++) \ + acc |= in[i]; \ + return acc; \ + } \ + unsigned X \ + redue_xor_##X (unsigned X *in) \ + { \ + unsigned X acc = 0; \ + for (int i = 0; i < N; i++) \ + acc ^= in[i]; \ + return acc; \ + } + +T(char,16) + +T(short, 8) + +T(int,4) + +T(long,4) + +/* { dg-final { scan-tree-dump-times "\.REDUC_AND" 4 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.REDUC_IOR" 4 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.REDUC_XOR" 4 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/s390/vector/reduc-minmax-1.c b/gcc/testsuite/gcc.target/s390/vector/reduc-minmax-1.c new file mode 100644 index 000000000000..f23e96f79fe0 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/reduc-minmax-1.c @@ -0,0 +1,234 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch -march=z16 -ftree-vectorize -fdump-tree-optimized" } */ + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN(a, b) ((a) > (b) ? (b) : (a)) + +/* unsigned integers */ + +unsigned char +reduce_umax_char (unsigned char *p) +{ + unsigned char res = p[0]; + for (int i = 0; i < 16; i++) + res = MAX (res, p[i]); + return res; +} + +unsigned char +reduce_umin_char (unsigned char *p) +{ + unsigned char res = p[0]; + for (int i = 0; i < 16; i++) + res = MIN (res, p[i]); + return res; +} + +unsigned short +reduce_umax_short (unsigned short *p) +{ + unsigned short res = p[0]; + for (int i = 0; i < 8; i++) + res = MAX (res, p[i]); + return res; +} + +unsigned short +reduce_umin_short (unsigned short *p) +{ + unsigned short res = p[0]; + for (int i = 0; i < 8; i++) + res = MIN (res, p[i]); + return res; +} + +unsigned int +reduce_umax_int (unsigned int* p) +{ + unsigned int res = p[0]; + for (int i = 0; i != 4; i++) + res = MAX (res, p[i]); + return res; +} + +unsigned int +reduce_umin_int (unsigned int* p) +{ + unsigned int res = p[0]; + for (int i = 0; i != 4; i++) + res = MIN(res, p[i]); + return res; +} + +unsigned long +reduce_umax_long (unsigned long* p) +{ + unsigned long res = p[0]; + for (int i = 0; i != 4; i++) + res = MAX (res, p[i]); + return res; +} + +unsigned long +reduce_umin_long (unsigned long* p) +{ + unsigned long res = p[0]; + for (int i = 0; i != 4; i++) + res = MIN(res, p[i]); + return res; +} + +/* signed integers */ + +signed char +reduce_smax_char (signed char *p) +{ + signed char res = p[0]; + for (int i = 0; i < 16; i++) + res = MAX (res, p[i]); + return res; +} + +signed char +reduce_smin_char (signed char *p) +{ + signed char res = p[0]; + for (int i = 0; i < 16; i++) + res = MIN (res, p[i]); + return res; +} + +signed short +reduce_smax_short (signed short *p) +{ + signed short res = p[0]; + for (int i = 0; i < 8; i++) + res = MAX (res, p[i]); + return res; +} + +signed short +reduce_smin_short (signed short *p) +{ + signed short res = p[0]; + for (int i = 0; i < 8; i++) + res = MIN (res, p[i]); + return res; +} + +signed int +reduce_smax_int (signed int* p) +{ + signed int res = p[0]; + for (int i = 0; i != 4; i++) + res = MAX (res, p[i]); + return res; +} + +signed int +reduce_smin_int (signed int* p) +{ + signed int res = p[0]; + for (int i = 0; i != 4; i++) + res = MIN(res, p[i]); + return res; +} + +signed long +reduce_smax_long (signed long* p) +{ + signed long res = p[0]; + for (int i = 0; i != 4; i++) + res = MAX (res, p[i]); + return res; +} + +signed long +reduce_smin_long (signed long* p) +{ + signed long res = p[0]; + for (int i = 0; i != 4; i++) + res = MIN(res, p[i]); + return res; +} + +float +__attribute__((optimize("Ofast"))) +reduce_smax_float (float* p) +{ + float res = p[0]; + for (int i = 0; i != 4; i++) + res = MAX (res, p[i]); + return res; +} + +float +__attribute__((optimize("Ofast"))) +reduce_smin_float (float* p) +{ + float res = p[0]; + for (int i = 0; i != 4; i++) + res = MIN (res, p[i]); + return res; +} + +double +__attribute__((optimize("Ofast"))) +reduce_smax_double (double* p) +{ + double res = p[0]; + for (int i = 0; i != 4; i++) + res = MAX (res, p[i]); + return res; +} + +double +__attribute__((optimize("Ofast"))) +reduce_smin_double (double* p) +{ + double res = p[0]; + for (int i = 0; i != 4; i++) + res = MIN (res, p[i]); + return res; +} + +float +reduce_fmax_float (float* p) +{ + float res = p[0]; + for (int i = 0; i != 4; i++) + res = __builtin_fmaxf (res, p[i]); + return res; +} + +float +reduce_fmin_float (float* p) +{ + float res = p[0]; + for (int i = 0; i != 4; i++) + res = __builtin_fminf (res, p[i]); + return res; +} + +double +reduce_fmax_double (double* p) +{ + double res = p[0]; + for (int i = 0; i != 4; i++) + res = __builtin_fmax (res, p[i]); + return res; +} + +double +reduce_fmin_double (double* p) +{ + double res = p[0]; + for (int i = 0; i != 4; i++) + res = __builtin_fmin (res, p[i]); + return res; +} + +/* { dg-final { scan-tree-dump-times "\.REDUC_MAX" 10 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.REDUC_MIN" 10 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.REDUC_FMAX" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.REDUC_FMIN" 2 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/s390/vector/reduc-plus-1.c b/gcc/testsuite/gcc.target/s390/vector/reduc-plus-1.c new file mode 100644 index 000000000000..ddbab23fecc6 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/reduc-plus-1.c @@ -0,0 +1,152 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch -march=z14 -ftree-vectorize -fdump-tree-optimized" } */ +/* { dg-do run { target { s390_z14_hw } } } */ + +/* signed integers */ + +signed char +__attribute__((noipa, optimize("Ofast"))) +reduce_add_char (signed char* p) +{ + signed char sum = 0; + for (int i = 0; i != 16; i++) + sum += p[i]; + return sum; +} + +short +__attribute__((noipa, optimize("Ofast"))) +reduce_add_short (short* p) +{ + short sum = 0; + for (int i = 0; i != 16; i++) + sum += p[i]; + return sum; +} + +int +__attribute__((noipa, optimize("Ofast"))) +reduce_add_int (int* p) +{ + int sum = 0; + for (int i = 0; i != 16; i++) + sum += p[i]; + return sum; +} + +long +__attribute__((noipa, optimize("Ofast"))) +reduce_add_long (long* p) +{ + long sum = 0; + for (int i = 0; i != 16; i++) + sum += p[i]; + return sum; +} + +/* unsigned integers */ + +unsigned char +__attribute__((noipa, optimize("Ofast"))) +reduce_add_uchar (unsigned char* p) +{ + unsigned char sum = 0; + for (int i = 0; i != 16; i++) + sum += p[i]; + return sum; +} + +unsigned short +__attribute__((noipa, optimize("Ofast"))) +reduce_add_ushort (unsigned short* p) +{ + unsigned short sum = 0; + for (int i = 0; i != 16; i++) + sum += p[i]; + return sum; +} + +unsigned int +__attribute__((noipa, optimize("Ofast"))) +reduce_add_uint (unsigned int* p) +{ + unsigned int sum = 0; + for (int i = 0; i != 16; i++) + sum += p[i]; + return sum; +} + +unsigned long +__attribute__((noipa, optimize("Ofast"))) +reduce_add_ulong (unsigned long* p) +{ + unsigned long sum = 0; + for (int i = 0; i != 16; i++) + sum += p[i]; + return sum; +} + +/* floating point */ + +float +__attribute__((noipa, optimize("Ofast"))) +reduce_add_float (float* p) +{ + float sum = 0; + for (int i = 0; i != 16; i++) + sum += p[i]; + return sum; +} + +double +__attribute__((noipa, optimize("Ofast"))) +reduce_add_double (double* p) +{ + double sum = 0; + for (int i = 0; i != 16; i++) + sum += p[i]; + return sum; +} + +int +main() +{ + signed char chararr[] = {-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}; + signed short shortarr[] = {-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}; + signed int intarr[] = {-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}; + signed long longarr[] = {-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}; + + unsigned char uchararr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}; + unsigned short ushortarr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}; + unsigned int uintarr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}; + unsigned long ulongarr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}; + + float floatarr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}; + double doublearr[] = {-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}; + + if (reduce_add_char (chararr) != (-136 & 0xff)) + __builtin_abort(); + if (reduce_add_short (shortarr) != -136) + __builtin_abort(); + if (reduce_add_int (intarr) != -136) + __builtin_abort(); + if (reduce_add_long (longarr) != -136) + __builtin_abort(); + + if (reduce_add_uchar (uchararr) != 136) + __builtin_abort(); + if (reduce_add_ushort (ushortarr) != 136) + __builtin_abort(); + if (reduce_add_uint (uintarr) != 136) + __builtin_abort(); + if (reduce_add_ulong (ulongarr) != 136) + __builtin_abort(); + + if (reduce_add_float (floatarr) != 136) + __builtin_abort(); + if (reduce_add_double (doublearr) != -136) + __builtin_abort(); + return 0; +} + +/* { dg-final { scan-tree-dump-times "\.REDUC_PLUS" 10 "optimized" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 956bc0bc7ca4..48c1be73e92a 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -9944,7 +9944,9 @@ proc check_effective_target_vect_logical_reduc { } { || [istarget amdgcn-*-*] || [check_effective_target_riscv_v] || [check_effective_target_loongarch_sx] - || [check_effective_target_x86]}] + || [check_effective_target_x86] + || ([istarget s390*-*-*] + && [check_effective_target_s390_vx])}] } # Return 1 if the target supports the fold_extract_last optab. -- 2.43.5