On Wed, Jun 25, 2025 at 10:04:49AM +0200, Juergen Christ wrote: > Some patterns that are detected by the autovectorizer can be supported by > s390. Add expanders such that autovectorization of these patterns works. > > RTL for the builtins used unspec to represent highpart multiplication. > Replace this by the correct RTL to allow further simplification. > > Bootstrapped and regtested on s390. Ok for trunk? > > gcc/ChangeLog: > > * config/s390/s390.md: Removed unused unspecs. > * config/s390/vector.md (avg<mode>3_ceil): New expander. > (uavg<mode>3_ceil): New expander. > (smul<mode>3_highpart): New expander. > (umul<mode>3_highpart): New expander. > * config/s390/vx-builtins.md (vec_umulh<mode>): Remove unspec. > (vec_smulh<mode>): Remove unspec. > > gcc/testsuite/ChangeLog: > > * gcc.target/s390/vector/pattern-avg-1.c: New test. > * gcc.target/s390/vector/pattern-mulh-1.c: New test. > > Signed-off-by: Juergen Christ <jchr...@linux.ibm.com> > --- > gcc/config/s390/s390.md | 3 -- > gcc/config/s390/vector.md | 26 +++++++++++++++++ > gcc/config/s390/vx-builtins.md | 10 +++---- > .../gcc.target/s390/vector/pattern-avg-1.c | 26 +++++++++++++++++ > .../gcc.target/s390/vector/pattern-mulh-1.c | 29 +++++++++++++++++++ > 5 files changed, 85 insertions(+), 9 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c > create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c > > diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md > index 97a4bdf96b2d..440ce93574f4 100644 > --- a/gcc/config/s390/s390.md > +++ b/gcc/config/s390/s390.md > @@ -139,9 +139,6 @@ > UNSPEC_LCBB > > ; Vector > - UNSPEC_VEC_SMULT_HI > - UNSPEC_VEC_UMULT_HI > - UNSPEC_VEC_SMULT_LO > UNSPEC_VEC_SMULT_EVEN > UNSPEC_VEC_UMULT_EVEN > UNSPEC_VEC_SMULT_ODD > diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md > index 6f4e1929eb80..8d7ca1a520f3 100644 > --- a/gcc/config/s390/vector.md > +++ b/gcc/config/s390/vector.md > @@ -3576,3 +3576,29 @@ > ; vec_unpacks_float_lo > ; vec_unpacku_float_hi > ; vec_unpacku_float_lo > + > +(define_expand "avg<mode>3_ceil" > + [(set (match_operand:VIT_HW_VXE3_T 0 > "register_operand" "=v") > + (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 > "register_operand" "v") > + (match_operand:VIT_HW_VXE3_T 2 > "register_operand" "v")] > + UNSPEC_VEC_AVG))] > + "TARGET_VX")
Expanders don't have constraints. > + > +(define_expand "uavg<mode>3_ceil" > + [(set (match_operand:VIT_HW_VXE3_T 0 > "register_operand" "=v") > + (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 > "register_operand" "v") > + (match_operand:VIT_HW_VXE3_T 2 > "register_operand" "v")] > + UNSPEC_VEC_AVGU))] > + "TARGET_VX") Ditto. > + > +(define_expand "smul<mode>3_highpart" > + [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" > "=v") > + (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 > "register_operand" "v") > + (match_operand:VIT_HW_VXE3_DT 2 > "register_operand" "v")))] > + "TARGET_VX") Ditto. > + > +(define_expand "umul<mode>3_highpart" > + [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" > "=v") > + (umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 > "register_operand" "v") > + (match_operand:VIT_HW_VXE3_DT 2 > "register_operand" "v")))] > + "TARGET_VX") Ditto. > diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md > index a7bb7ff92f5e..2478f74e161a 100644 > --- a/gcc/config/s390/vx-builtins.md > +++ b/gcc/config/s390/vx-builtins.md > @@ -983,9 +983,8 @@ > ; vmhb, vmhh, vmhf, vmhg, vmhq > (define_insn "vec_smulh<mode>" > [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" > "=v") ^ ~~~~ Wrong indentation. > - (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 > "register_operand" "v") > - (match_operand:VIT_HW_VXE3_DT 2 > "register_operand" "v")] > - UNSPEC_VEC_SMULT_HI))] > + (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 > "register_operand" "v") > + (match_operand:VIT_HW_VXE3_DT 2 > "register_operand" "v")))] > "TARGET_VX" > "vmh<bhfgq>\t%v0,%v1,%v2" > [(set_attr "op_type" "VRR")]) > @@ -993,9 +992,8 @@ > ; vmlhb, vmlhh, vmlhf, vmlhg, vmlhq > (define_insn "vec_umulh<mode>" > [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" > "=v") ^ ~~~~ Wrong indentation. With those changes ok for trunk. Thanks, Stefan > - (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 > "register_operand" "v") > - (match_operand:VIT_HW_VXE3_DT 2 > "register_operand" "v")] > - UNSPEC_VEC_UMULT_HI))] > + (umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 > "register_operand" "v") > + (match_operand:VIT_HW_VXE3_DT 2 > "register_operand" "v")))] > "TARGET_VX" > "vmlh<bhfgq>\t%v0,%v1,%v2" > [(set_attr "op_type" "VRR")]) > diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c > b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c > new file mode 100644 > index 000000000000..a15301aabe54 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c > @@ -0,0 +1,26 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mzarch -march=z16 -ftree-vectorize > -fdump-tree-optimized" } */ > + > +#define TEST(T1,T2,N) \ > + void \ > + avg##T1 (signed T1 *__restrict res, signed T1 *__restrict a, \ > + signed T1 *__restrict b) \ > + { \ > + for (int i = 0; i < N; ++i) \ > + res[i] = ((signed T2)a[i] + b[i] + 1) >> 1; \ > + } \ > + \ > + void \ > + uavg##T1 (unsigned T1 *__restrict res, unsigned T1 *__restrict a, \ > + unsigned T1 *__restrict b) \ > + { \ > + for (int i = 0; i < N; ++i) \ > + res[i] = ((unsigned T2)a[i] + b[i] + 1) >> 1; \ > + } > + > +TEST(char,short,16) > +TEST(short,int,8) > +TEST(int,long,4) > +TEST(long,__int128,2) > + > +/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 8 "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c > b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c > new file mode 100644 > index 000000000000..cd8e4e7d7a09 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c > @@ -0,0 +1,29 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mzarch -march=arch15 -ftree-vectorize > -fdump-tree-optimized" } */ > + > +#define TEST(T1,T2,N,S) \ > + void \ > + mulh##T1 (signed T1 *__restrict res, \ > + signed T1 *__restrict l, \ > + signed T1 *__restrict r) \ > + { \ > + for (int i = 0; i < N; ++i) \ > + res[i] = (signed T1) (((signed T2)l[i] * (signed T2)r[i]) >> S); \ > + } \ > + \ > + void \ > + umulh##T1 (unsigned T1 *__restrict res, \ > + unsigned T1 *__restrict l, \ > + unsigned T1 *__restrict r) \ > + { \ > + for (int i = 0; i < N; ++i) \ > + res[i] = (unsigned T1) \ > + (((unsigned T2)l[i] * (unsigned T2)r[i]) >> S); \ > + } > + > +TEST(char,short,16,8) > +TEST(short,int,8,16) > +TEST(int,long,4,32) > +TEST(long,__int128,2,64) > + > +/* { dg-final { scan-tree-dump-times "\.MULH" 8 "optimized" } } */ > -- > 2.43.5 >