Re: [PATCH v2] s390: Optimize fmin/fmax.

Stefan Schulze Frielinghaus Mon, 07 Jul 2025 01:38:36 -0700

On Wed, Jun 25, 2025 at 10:04:41AM +0200, Juergen Christ wrote:
> On VXE targets, we can directly use the fp min/max instruction instead of
> calling into libm for fmin/fmax etc.
> 
> Provide fmin/fmax versions also for vectors even though it cannot be
> called directly.  This will be exploited with a follow-up patch when
> reductions are introduced.
> 
> Bootstrapped and regtested on s390.  Ok for trunk?
> 
> gcc/ChangeLog:
> 
>       * config/s390/s390.md: Update UNSPECs
>       * config/s390/vector.md (fmax<mode>3): New expander.
>       (fmin<mode>3): New expander.
>       * config/s390/vx-builtins.md (*fmin<mode>): New insn.
>       (vfmin<mode>): Redefined to use new insn.
>       (*fmax<mode>): New insn.
>       (vfmax<mode>): Redefined to use new insn.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/s390/fminmax-1.c: New test.
>       * gcc.target/s390/fminmax-2.c: New test.
> 
> Signed-off-by: Juergen Christ <jchr...@linux.ibm.com>
> ---
>  gcc/config/s390/s390.md                   |  6 +-
>  gcc/config/s390/vector.md                 | 25 ++++++++
>  gcc/config/s390/vx-builtins.md            | 29 ++++++---
>  gcc/testsuite/gcc.target/s390/fminmax-1.c | 77 +++++++++++++++++++++++
>  gcc/testsuite/gcc.target/s390/fminmax-2.c | 29 +++++++++
>  5 files changed, 156 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/fminmax-1.c
>  create mode 100644 gcc/testsuite/gcc.target/s390/fminmax-2.c
> 
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index 97a4bdf96b2d..1c88c9624b60 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -241,9 +241,6 @@
>  
>     UNSPEC_VEC_MSUM
>  
> -   UNSPEC_VEC_VFMIN
> -   UNSPEC_VEC_VFMAX
> -
>     UNSPEC_VEC_VBLEND
>     UNSPEC_VEC_VEVAL
>     UNSPEC_VEC_VGEM
> @@ -256,6 +253,9 @@
>  
>     UNSPEC_NNPA_VCFN_V8HI
>     UNSPEC_NNPA_VCNF_V8HI
> +
> +   UNSPEC_FMAX
> +   UNSPEC_FMIN
>  ])
>  
>  ;;
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index 6f4e1929eb80..8bda30624c22 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -89,6 +89,13 @@
>  (define_mode_iterator VF_HW [(V4SF "TARGET_VXE") V2DF (V1TF "TARGET_VXE")
>                            (TF "TARGET_VXE")])
>  
> +; FP scalar and vector modes
> +(define_mode_iterator VFT_BFP [SF DF
> +                           (V1SF "TARGET_VXE") (V2SF "TARGET_VXE") (V4SF 
> "TARGET_VXE")
> +                           V1DF V2DF
> +                           (V1TF "TARGET_VXE") (TF "TARGET_VXE")])
> +
> +
>  (define_mode_iterator V_8   [V1QI])
>  (define_mode_iterator V_16  [V2QI  V1HI])
>  (define_mode_iterator V_32  [V4QI  V2HI V1SI V1SF])
> @@ -3576,3 +3583,21 @@
>  ; vec_unpacks_float_lo
>  ; vec_unpacku_float_hi
>  ; vec_unpacku_float_lo
> +
> +; fmax
> +(define_expand "fmax<mode>3"
> +  [(set (match_operand:VFT_BFP 0 "register_operand" "=v")
> +     (unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand" "v")
> +            (match_operand:VFT_BFP 2 "register_operand" "v")
> +            (const_int 4)]
> +           UNSPEC_FMAX))]
> +  "TARGET_VXE")


Expanders don't have constraints and should be removed.
Wrong indentation.

> +
> +; fmin
> +(define_expand "fmin<mode>3"
> +  [(set (match_operand:VFT_BFP 0 "register_operand" "=v")
> +     (unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand" "v")
> +            (match_operand:VFT_BFP 2 "register_operand" "v")
> +            (const_int 4)]
> +           UNSPEC_FMIN))]
> +  "TARGET_VXE")

Ditto.

> diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
> index a7bb7ff92f5e..0508df43b866 100644
> --- a/gcc/config/s390/vx-builtins.md
> +++ b/gcc/config/s390/vx-builtins.md
> @@ -2136,15 +2136,32 @@
>    "<vw>fche<sdx>bs\t%v2,%v0,%v1"
>    [(set_attr "op_type" "VRR")])
>  
> +(define_insn "*fmin<mode>"
> +  [(set (match_operand:VFT_BFP                0 "register_operand"  "=v")
                                                 ^
                                                 ~~~~~~
Wrong indentation.

> +     (unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand"   "v")
> +                      (match_operand:VFT_BFP 2 "register_operand"   "v")
> +                      (match_operand:QI      3 "const_mask_operand" "C")]
> +                     UNSPEC_FMIN))]
> +  "TARGET_VXE"
> +  "<vw>fmin<sdx>b\t%v0,%v1,%v2,%b3"
> +  [(set_attr "op_type" "VRR")])
>  
> -(define_insn "vfmin<mode>"
> +(define_expand "vfmin<mode>"
>    [(set (match_operand:VF_HW                0 "register_operand"  "=v")
>       (unspec:VF_HW [(match_operand:VF_HW 1 "register_operand"   "v")
>                      (match_operand:VF_HW 2 "register_operand"   "v")
>                      (match_operand:QI    3 "const_mask_operand" "C")]
> -                   UNSPEC_VEC_VFMIN))]
> +                   UNSPEC_FMIN))]
> +  "TARGET_VXE")

Expanders don't have constraints.

Anyhow, you could also merge

define_insn "*fmin<mode>"

and define_expand "vfmin<mode>"

into

define_insn "vfmin<mode>"

using iterator VFT_BFP since VFT_BFP subsumes VF_HW.

> +
> +(define_insn "*fmax<mode>"
> +  [(set (match_operand:VFT_BFP                0 "register_operand"  "=v")
> +     (unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand"   "v")
> +                      (match_operand:VFT_BFP 2 "register_operand"   "v")
> +                      (match_operand:QI      3 "const_mask_operand" "C")]
> +                     UNSPEC_FMAX))]
>    "TARGET_VXE"
> -  "<vw>fmin<sdx>b\t%v0,%v1,%v2,%b3"
> +  "<vw>fmax<sdx>b\t%v0,%v1,%v2,%b3"
>    [(set_attr "op_type" "VRR")])
>  
>  (define_insn "vfmax<mode>"
> @@ -2152,10 +2169,8 @@
>       (unspec:VF_HW [(match_operand:VF_HW 1 "register_operand"   "v")
>                      (match_operand:VF_HW 2 "register_operand"   "v")
>                      (match_operand:QI    3 "const_mask_operand" "C")]
> -                   UNSPEC_VEC_VFMAX))]
> -  "TARGET_VXE"
> -  "<vw>fmax<sdx>b\t%v0,%v1,%v2,%b3"
> -  [(set_attr "op_type" "VRR")])
> +                   UNSPEC_FMAX))]
> +  "TARGET_VXE")

An insn always needs an output template.  This was probably meant to be
an expander?  If so, then the constraints should be removed.

Likewise as for min, both could be merged into a single define_insn.

Cheers,
Stefan

>  
>  ; vec_insert (__builtin_bswap32 (*a), b, 1)        set-element-bswap-2.c
>  ; b[1] = __builtin_bswap32 (*a)                    set-element-bswap-3.c
> diff --git a/gcc/testsuite/gcc.target/s390/fminmax-1.c 
> b/gcc/testsuite/gcc.target/s390/fminmax-1.c
> new file mode 100644
> index 000000000000..df10905f037a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/fminmax-1.c
> @@ -0,0 +1,77 @@
> +/* Check fmin/fmax expanders for scalars on VXE targets.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=z14 -mzarch" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +/*
> +** dofmaxl:
> +**   vl      (%v.),0\(%r3\),3
> +**   vl      (%v.),0\(%r4\),3
> +**   wfmaxxb (%v.),\1,\2,4
> +**   vst     \3,0\(%r2\),3
> +**   br      %r14
> +*/
> +long double
> +dofmaxl (long double d1, long double d2)
> +{
> +  return __builtin_fmaxl (d1, d2);
> +}
> +
> +/*
> +** dofminl:
> +**   vl      (%v.),0\(%r3\),3
> +**   vl      (%v.),0\(%r4\),3
> +**   wfminxb (%v.),\1,\2,4
> +**   vst     \3,0\(%r2\),3
> +**   br      %r14
> +*/
> +long double
> +dofminl (long double d1, long double d2)
> +{
> +  return __builtin_fminl (d1, d2);
> +}
> +
> +/*
> +** dofmax:
> +**   wfmaxdb %v0,%v0,%v2,4
> +**   br      %r14
> +*/
> +double
> +dofmax (double d1, double d2)
> +{
> +  return __builtin_fmax (d1, d2);
> +}
> +
> +/*
> +** dofmin:
> +**   wfmindb %v0,%v0,%v2,4
> +**   br      %r14
> +*/
> +double
> +dofmin (double d1, double d2)
> +{
> +  return __builtin_fmin (d1, d2);
> +}
> +
> +/*
> +** dofmaxf:
> +**   wfmaxsb %v0,%v0,%v2,4
> +**   br      %r14
> +*/
> +float
> +dofmaxf (float f1, float f2)
> +{
> +  return __builtin_fmaxf (f1, f2);
> +}
> +
> +/*
> +** dofminf:
> +**   wfminsb %v0,%v0,%v2,4
> +**   br      %r14
> +*/
> +float
> +dofminf (float f1, float f2)
> +{
> +  return __builtin_fminf (f1, f2);
> +}
> diff --git a/gcc/testsuite/gcc.target/s390/fminmax-2.c 
> b/gcc/testsuite/gcc.target/s390/fminmax-2.c
> new file mode 100644
> index 000000000000..ea37a0a821de
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/fminmax-2.c
> @@ -0,0 +1,29 @@
> +/* Check fmin/fmax expanders for scalars on non-VXE targets.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=z13 -mzarch" } */
> +/* { dg-final { scan-assembler-times "jg" 4 } } */
> +
> +double
> +dofmax (double d1, double d2)
> +{
> +  return __builtin_fmax (d1, d2);
> +}
> +
> +double
> +dofmin (double d1, double d2)
> +{
> +  return __builtin_fmin (d1, d2);
> +}
> +
> +float
> +dofmaxf (float f1, float f2)
> +{
> +  return __builtin_fmaxf (f1, f2);
> +}
> +
> +float
> +dofminf (float f1, float f2)
> +{
> +  return __builtin_fminf (f1, f2);
> +}
> -- 
> 2.43.5
>

Re: [PATCH v2] s390: Optimize fmin/fmax.

Reply via email to