On Tue, Sep 9, 2025 at 4:26 AM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> Enable SSE4.1 ceil/floor/trunc for -Os to replace a function call with
> roundss or roundsd.
>
> gcc/
>
>         PR target/121861
>         * config/i386/i386.cc (ix86_optab_supported_p): Return true for
>         SSE4.1 with SSE math for floor_optab, ceil_optab and
>         btrunc_optab.
>
> gcc/testsuite/
>
>         PR target/121861
>         * gcc.target/i386/pr121861-1a.c: New file.
>         * gcc.target/i386/pr121861-1b.c: Likewise.
>
> Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
> ---
>  gcc/config/i386/i386.cc                     |  9 ++---
>  gcc/testsuite/gcc.target/i386/pr121861-1a.c | 43 +++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr121861-1b.c |  7 ++++
>  3 files changed, 54 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr121861-1a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr121861-1b.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index 471be3e8615..c8626f51973 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -27189,11 +27189,10 @@ ix86_optab_supported_p (int op, machine_mode mode1, 
> machine_mode,
>      case floor_optab:
>      case ceil_optab:
>      case btrunc_optab:
> -      if (((SSE_FLOAT_MODE_P (mode1)
> -           && TARGET_SSE_MATH
> -           && TARGET_SSE4_1)
> -          || mode1 == HFmode)
> -         && !flag_trapping_math)
> +      if ((SSE_FLOAT_MODE_P (mode1)
> +          && TARGET_SSE_MATH
> +          && TARGET_SSE4_1)
> +         || (mode1 == HFmode && !flag_trapping_math))

(mode1 == HFmode && TARGET_AVX512FP16)?
<rounding_insn>hf2 is defined under TARGET_AVX512FP16 w/o !flag_trapping_math.

>         return true;
>        return opt_type == OPTIMIZE_FOR_SPEED;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr121861-1a.c 
> b/gcc/testsuite/gcc.target/i386/pr121861-1a.c
> new file mode 100644
> index 00000000000..ebd0a57253c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr121861-1a.c
> @@ -0,0 +1,43 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Os -mfpmath=sse -mno-avx -msse4.1" } */
> +
> +#include <math.h>
> +
> +float
> +cf (float x)
> +{
> +  return ceilf (x);
> +}
> +
> +float
> +ff (float x)
> +{
> +  return floorf (x);
> +}
> +
> +float
> +tf (float x)
> +{
> +  return truncf (x);
> +}
> +
> +double
> +c (double x)
> +{
> +  return ceil (x);
> +}
> +
> +double
> +f (double x)
> +{
> +  return floor (x);
> +}
> +
> +double
> +t (double x)
> +{
> +  return trunc (x);
> +}
> +
> +/* { dg-final { scan-assembler-times "roundss" 3 } } */
> +/* { dg-final { scan-assembler-times "roundsd" 3 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr121861-1b.c 
> b/gcc/testsuite/gcc.target/i386/pr121861-1b.c
> new file mode 100644
> index 00000000000..b52faae66d0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr121861-1b.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Os -mfpmath=sse -mno-avx512f -mavx" } */
> +
> +#include "pr121861-1a.c"
> +
> +/* { dg-final { scan-assembler-times "vroundss" 3 } } */
> +/* { dg-final { scan-assembler-times "vroundsd" 3 } } */
> --
> 2.51.0
>


-- 
BR,
Hongtao

Reply via email to