On Wed, Jun 25, 2025 at 10:04:49AM +0200, Juergen Christ wrote:
> Some patterns that are detected by the autovectorizer can be supported by
> s390.  Add expanders such that autovectorization of these patterns works.
> 
> RTL for the builtins used unspec to represent highpart multiplication.
> Replace this by the correct RTL to allow further simplification.
> 
> Bootstrapped and regtested on s390.  Ok for trunk?
> 
> gcc/ChangeLog:
> 
>       * config/s390/s390.md: Removed unused unspecs.
>       * config/s390/vector.md (avg<mode>3_ceil): New expander.
>       (uavg<mode>3_ceil): New expander.
>       (smul<mode>3_highpart): New expander.
>       (umul<mode>3_highpart): New expander.
>       * config/s390/vx-builtins.md (vec_umulh<mode>): Remove unspec.
>       (vec_smulh<mode>): Remove unspec.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/s390/vector/pattern-avg-1.c: New test.
>       * gcc.target/s390/vector/pattern-mulh-1.c: New test.
> 
> Signed-off-by: Juergen Christ <jchr...@linux.ibm.com>
> ---
>  gcc/config/s390/s390.md                       |  3 --
>  gcc/config/s390/vector.md                     | 26 +++++++++++++++++
>  gcc/config/s390/vx-builtins.md                | 10 +++----
>  .../gcc.target/s390/vector/pattern-avg-1.c    | 26 +++++++++++++++++
>  .../gcc.target/s390/vector/pattern-mulh-1.c   | 29 +++++++++++++++++++
>  5 files changed, 85 insertions(+), 9 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
>  create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
> 
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index 97a4bdf96b2d..440ce93574f4 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -139,9 +139,6 @@
>     UNSPEC_LCBB
>  
>     ; Vector
> -   UNSPEC_VEC_SMULT_HI
> -   UNSPEC_VEC_UMULT_HI
> -   UNSPEC_VEC_SMULT_LO
>     UNSPEC_VEC_SMULT_EVEN
>     UNSPEC_VEC_UMULT_EVEN
>     UNSPEC_VEC_SMULT_ODD
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index 6f4e1929eb80..8d7ca1a520f3 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -3576,3 +3576,29 @@
>  ; vec_unpacks_float_lo
>  ; vec_unpacku_float_hi
>  ; vec_unpacku_float_lo
> +
> +(define_expand "avg<mode>3_ceil"
> +  [(set (match_operand:VIT_HW_VXE3_T                        0 
> "register_operand" "=v")
> +     (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 
> "register_operand"  "v")
> +                            (match_operand:VIT_HW_VXE3_T 2 
> "register_operand"  "v")]
> +                           UNSPEC_VEC_AVG))]
> +  "TARGET_VX")

Expanders don't have constraints.

> +
> +(define_expand "uavg<mode>3_ceil"
> +  [(set (match_operand:VIT_HW_VXE3_T                        0 
> "register_operand" "=v")
> +     (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 
> "register_operand"  "v")
> +                            (match_operand:VIT_HW_VXE3_T 2 
> "register_operand"  "v")]
> +                           UNSPEC_VEC_AVGU))]
> +  "TARGET_VX")

Ditto.

> +
> +(define_expand "smul<mode>3_highpart"
> +  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                   
>     "=v")
> +     (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 
> "register_operand" "v")
> +                                   (match_operand:VIT_HW_VXE3_DT 2 
> "register_operand" "v")))]
> +  "TARGET_VX")

Ditto.

> +
> +(define_expand "umul<mode>3_highpart"
> +  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                   
>     "=v")
> +     (umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 
> "register_operand" "v")
> +                                   (match_operand:VIT_HW_VXE3_DT 2 
> "register_operand" "v")))]
> +  "TARGET_VX")

Ditto.


> diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
> index a7bb7ff92f5e..2478f74e161a 100644
> --- a/gcc/config/s390/vx-builtins.md
> +++ b/gcc/config/s390/vx-builtins.md
> @@ -983,9 +983,8 @@
>  ; vmhb, vmhh, vmhf, vmhg, vmhq
>  (define_insn "vec_smulh<mode>"
>    [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                   
>     "=v")
                                                                                
    ^
                                                                                
    ~~~~
Wrong indentation.

> -     (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 
> "register_operand" "v")
> -                             (match_operand:VIT_HW_VXE3_DT 2 
> "register_operand" "v")]
> -                            UNSPEC_VEC_SMULT_HI))]
> +     (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 
> "register_operand" "v")
> +                                   (match_operand:VIT_HW_VXE3_DT 2 
> "register_operand" "v")))]
>    "TARGET_VX"
>    "vmh<bhfgq>\t%v0,%v1,%v2"
>    [(set_attr "op_type" "VRR")])
> @@ -993,9 +992,8 @@
>  ; vmlhb, vmlhh, vmlhf, vmlhg, vmlhq
>  (define_insn "vec_umulh<mode>"
>    [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                   
>     "=v")
                                                                                
    ^
                                                                                
    ~~~~
Wrong indentation.

With those changes ok for trunk.

Thanks,
Stefan

> -     (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 
> "register_operand" "v")
> -                             (match_operand:VIT_HW_VXE3_DT 2 
> "register_operand" "v")]
> -                            UNSPEC_VEC_UMULT_HI))]
> +     (umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 
> "register_operand" "v")
> +                                   (match_operand:VIT_HW_VXE3_DT 2 
> "register_operand" "v")))]
>    "TARGET_VX"
>    "vmlh<bhfgq>\t%v0,%v1,%v2"
>    [(set_attr "op_type" "VRR")])
> diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c 
> b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
> new file mode 100644
> index 000000000000..a15301aabe54
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mzarch -march=z16 -ftree-vectorize 
> -fdump-tree-optimized" } */
> +
> +#define TEST(T1,T2,N)                                                   \
> +  void                                                                  \
> +  avg##T1 (signed T1 *__restrict res, signed T1 *__restrict a,          \
> +           signed T1 *__restrict b)                                     \
> +  {                                                                     \
> +    for (int i = 0; i < N; ++i)                                         \
> +      res[i] = ((signed T2)a[i] + b[i] + 1) >> 1;                       \
> +  }                                                                     \
> +                                                                        \
> +  void                                                                  \
> +  uavg##T1 (unsigned T1 *__restrict res, unsigned T1 *__restrict a,     \
> +            unsigned T1 *__restrict b)                                  \
> +  {                                                                     \
> +    for (int i = 0; i < N; ++i)                                         \
> +      res[i] = ((unsigned T2)a[i] + b[i] + 1) >> 1;                     \
> +  }
> +
> +TEST(char,short,16)
> +TEST(short,int,8)
> +TEST(int,long,4)
> +TEST(long,__int128,2)
> +
> +/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 8 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c 
> b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
> new file mode 100644
> index 000000000000..cd8e4e7d7a09
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mzarch -march=arch15 -ftree-vectorize 
> -fdump-tree-optimized" } */
> +
> +#define TEST(T1,T2,N,S)                                                 \
> +  void                                                                  \
> +  mulh##T1 (signed T1 *__restrict res,                                  \
> +            signed T1 *__restrict l,                                    \
> +            signed T1 *__restrict r)                                    \
> +  {                                                                     \
> +    for (int i = 0; i < N; ++i)                                         \
> +      res[i] = (signed T1) (((signed T2)l[i] * (signed T2)r[i]) >> S);  \
> +  }                                                                     \
> +                                                                        \
> +  void                                                                  \
> +  umulh##T1 (unsigned T1 *__restrict res,                               \
> +             unsigned T1 *__restrict l,                                 \
> +             unsigned T1 *__restrict r)                                 \
> +  {                                                                     \
> +    for (int i = 0; i < N; ++i)                                         \
> +      res[i] = (unsigned T1)                                            \
> +        (((unsigned T2)l[i] * (unsigned T2)r[i]) >> S);                 \
> +  }
> +
> +TEST(char,short,16,8)
> +TEST(short,int,8,16)
> +TEST(int,long,4,32)
> +TEST(long,__int128,2,64)
> +
> +/* { dg-final { scan-tree-dump-times "\.MULH" 8 "optimized" } } */
> -- 
> 2.43.5
> 

Reply via email to