https://gcc.gnu.org/g:188acc9e8bacdbba56ed2b32d09f191da759500a
commit r16-2055-g188acc9e8bacdbba56ed2b32d09f191da759500a Author: Juergen Christ <jchr...@linux.ibm.com> Date: Mon Jun 23 12:04:20 2025 +0200 s390: Add some missing vector patterns. Some patterns that are detected by the autovectorizer can be supported by s390. Add expanders such that autovectorization of these patterns works. RTL for the builtins used unspec to represent highpart multiplication. Replace this by the correct RTL to allow further simplification. gcc/ChangeLog: * config/s390/s390.md: Removed unused unspecs. * config/s390/vector.md (avg<mode>3_ceil): New expander. (uavg<mode>3_ceil): New expander. (smul<mode>3_highpart): New expander. (umul<mode>3_highpart): New expander. * config/s390/vx-builtins.md (vec_umulh<mode>): Remove unspec. (vec_smulh<mode>): Remove unspec. gcc/testsuite/ChangeLog: * gcc.target/s390/vector/pattern-avg-1.c: New test. * gcc.target/s390/vector/pattern-mulh-1.c: New test. Signed-off-by: Juergen Christ <jchr...@linux.ibm.com> Diff: --- gcc/config/s390/s390.md | 3 --- gcc/config/s390/vector.md | 26 +++++++++++++++++++ gcc/config/s390/vx-builtins.md | 14 +++++------ .../gcc.target/s390/vector/pattern-avg-1.c | 26 +++++++++++++++++++ .../gcc.target/s390/vector/pattern-mulh-1.c | 29 ++++++++++++++++++++++ 5 files changed, 87 insertions(+), 11 deletions(-) diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 97a4bdf96b2d..440ce93574f4 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -139,9 +139,6 @@ UNSPEC_LCBB ; Vector - UNSPEC_VEC_SMULT_HI - UNSPEC_VEC_UMULT_HI - UNSPEC_VEC_SMULT_LO UNSPEC_VEC_SMULT_EVEN UNSPEC_VEC_UMULT_EVEN UNSPEC_VEC_SMULT_ODD diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 7251a76c3aea..7c706ecd89c7 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -3576,3 +3576,29 @@ ; vec_unpacks_float_lo ; vec_unpacku_float_hi ; vec_unpacku_float_lo + +(define_expand "avg<mode>3_ceil" + [(set (match_operand:VIT_HW_VXE3_T 0 "register_operand") + (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 "register_operand") + (match_operand:VIT_HW_VXE3_T 2 "register_operand")] + UNSPEC_VEC_AVG))] + "TARGET_VX") + +(define_expand "uavg<mode>3_ceil" + [(set (match_operand:VIT_HW_VXE3_T 0 "register_operand") + (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 "register_operand") + (match_operand:VIT_HW_VXE3_T 2 "register_operand")] + UNSPEC_VEC_AVGU))] + "TARGET_VX") + +(define_expand "smul<mode>3_highpart" + [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand") + (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand") + (match_operand:VIT_HW_VXE3_DT 2 "register_operand")))] + "TARGET_VX") + +(define_expand "umul<mode>3_highpart" + [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand") + (umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand") + (match_operand:VIT_HW_VXE3_DT 2 "register_operand")))] + "TARGET_VX") diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md index a7bb7ff92f5e..9e5d18bcb8f4 100644 --- a/gcc/config/s390/vx-builtins.md +++ b/gcc/config/s390/vx-builtins.md @@ -982,20 +982,18 @@ ; vmhb, vmhh, vmhf, vmhg, vmhq (define_insn "vec_smulh<mode>" - [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" "=v") - (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v") - (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")] - UNSPEC_VEC_SMULT_HI))] + [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" "=v") + (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v") + (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")))] "TARGET_VX" "vmh<bhfgq>\t%v0,%v1,%v2" [(set_attr "op_type" "VRR")]) ; vmlhb, vmlhh, vmlhf, vmlhg, vmlhq (define_insn "vec_umulh<mode>" - [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" "=v") - (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v") - (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")] - UNSPEC_VEC_UMULT_HI))] + [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" "=v") + (umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v") + (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")))] "TARGET_VX" "vmlh<bhfgq>\t%v0,%v1,%v2" [(set_attr "op_type" "VRR")]) diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c new file mode 100644 index 000000000000..a15301aabe54 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch -march=z16 -ftree-vectorize -fdump-tree-optimized" } */ + +#define TEST(T1,T2,N) \ + void \ + avg##T1 (signed T1 *__restrict res, signed T1 *__restrict a, \ + signed T1 *__restrict b) \ + { \ + for (int i = 0; i < N; ++i) \ + res[i] = ((signed T2)a[i] + b[i] + 1) >> 1; \ + } \ + \ + void \ + uavg##T1 (unsigned T1 *__restrict res, unsigned T1 *__restrict a, \ + unsigned T1 *__restrict b) \ + { \ + for (int i = 0; i < N; ++i) \ + res[i] = ((unsigned T2)a[i] + b[i] + 1) >> 1; \ + } + +TEST(char,short,16) +TEST(short,int,8) +TEST(int,long,4) +TEST(long,__int128,2) + +/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 8 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c new file mode 100644 index 000000000000..cd8e4e7d7a09 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch -march=arch15 -ftree-vectorize -fdump-tree-optimized" } */ + +#define TEST(T1,T2,N,S) \ + void \ + mulh##T1 (signed T1 *__restrict res, \ + signed T1 *__restrict l, \ + signed T1 *__restrict r) \ + { \ + for (int i = 0; i < N; ++i) \ + res[i] = (signed T1) (((signed T2)l[i] * (signed T2)r[i]) >> S); \ + } \ + \ + void \ + umulh##T1 (unsigned T1 *__restrict res, \ + unsigned T1 *__restrict l, \ + unsigned T1 *__restrict r) \ + { \ + for (int i = 0; i < N; ++i) \ + res[i] = (unsigned T1) \ + (((unsigned T2)l[i] * (unsigned T2)r[i]) >> S); \ + } + +TEST(char,short,16,8) +TEST(short,int,8,16) +TEST(int,long,4,32) +TEST(long,__int128,2,64) + +/* { dg-final { scan-tree-dump-times "\.MULH" 8 "optimized" } } */