On Tue, Sep 9, 2025 at 4:26 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > Enable SSE4.1 ceil/floor/trunc for -Os to replace a function call with > roundss or roundsd. > > gcc/ > > PR target/121861 > * config/i386/i386.cc (ix86_optab_supported_p): Return true for > SSE4.1 with SSE math for floor_optab, ceil_optab and > btrunc_optab. > > gcc/testsuite/ > > PR target/121861 > * gcc.target/i386/pr121861-1a.c: New file. > * gcc.target/i386/pr121861-1b.c: Likewise. > > Signed-off-by: H.J. Lu <hjl.to...@gmail.com> > --- > gcc/config/i386/i386.cc | 9 ++--- > gcc/testsuite/gcc.target/i386/pr121861-1a.c | 43 +++++++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr121861-1b.c | 7 ++++ > 3 files changed, 54 insertions(+), 5 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr121861-1a.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr121861-1b.c > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index 471be3e8615..c8626f51973 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -27189,11 +27189,10 @@ ix86_optab_supported_p (int op, machine_mode mode1, > machine_mode, > case floor_optab: > case ceil_optab: > case btrunc_optab: > - if (((SSE_FLOAT_MODE_P (mode1) > - && TARGET_SSE_MATH > - && TARGET_SSE4_1) > - || mode1 == HFmode) > - && !flag_trapping_math) > + if ((SSE_FLOAT_MODE_P (mode1) > + && TARGET_SSE_MATH > + && TARGET_SSE4_1) > + || (mode1 == HFmode && !flag_trapping_math))
(mode1 == HFmode && TARGET_AVX512FP16)? <rounding_insn>hf2 is defined under TARGET_AVX512FP16 w/o !flag_trapping_math. > return true; > return opt_type == OPTIMIZE_FOR_SPEED; > > diff --git a/gcc/testsuite/gcc.target/i386/pr121861-1a.c > b/gcc/testsuite/gcc.target/i386/pr121861-1a.c > new file mode 100644 > index 00000000000..ebd0a57253c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr121861-1a.c > @@ -0,0 +1,43 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Os -mfpmath=sse -mno-avx -msse4.1" } */ > + > +#include <math.h> > + > +float > +cf (float x) > +{ > + return ceilf (x); > +} > + > +float > +ff (float x) > +{ > + return floorf (x); > +} > + > +float > +tf (float x) > +{ > + return truncf (x); > +} > + > +double > +c (double x) > +{ > + return ceil (x); > +} > + > +double > +f (double x) > +{ > + return floor (x); > +} > + > +double > +t (double x) > +{ > + return trunc (x); > +} > + > +/* { dg-final { scan-assembler-times "roundss" 3 } } */ > +/* { dg-final { scan-assembler-times "roundsd" 3 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr121861-1b.c > b/gcc/testsuite/gcc.target/i386/pr121861-1b.c > new file mode 100644 > index 00000000000..b52faae66d0 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr121861-1b.c > @@ -0,0 +1,7 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Os -mfpmath=sse -mno-avx512f -mavx" } */ > + > +#include "pr121861-1a.c" > + > +/* { dg-final { scan-assembler-times "vroundss" 3 } } */ > +/* { dg-final { scan-assembler-times "vroundsd" 3 } } */ > -- > 2.51.0 > -- BR, Hongtao