Enable SSE4.1 ceil/floor/trunc for -Os to replace a function call with roundss or roundsd.
gcc/ PR target/121861 * config/i386/i386.cc (ix86_optab_supported_p): Return true for SSE4.1 with SSE math for floor_optab, ceil_optab and btrunc_optab. gcc/testsuite/ PR target/121861 * gcc.target/i386/pr121861-1a.c: New file. * gcc.target/i386/pr121861-1b.c: Likewise. Signed-off-by: H.J. Lu <hjl.to...@gmail.com> --- gcc/config/i386/i386.cc | 9 ++--- gcc/testsuite/gcc.target/i386/pr121861-1a.c | 43 +++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr121861-1b.c | 7 ++++ 3 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr121861-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr121861-1b.c diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 471be3e8615..c8626f51973 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -27189,11 +27189,10 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode, case floor_optab: case ceil_optab: case btrunc_optab: - if (((SSE_FLOAT_MODE_P (mode1) - && TARGET_SSE_MATH - && TARGET_SSE4_1) - || mode1 == HFmode) - && !flag_trapping_math) + if ((SSE_FLOAT_MODE_P (mode1) + && TARGET_SSE_MATH + && TARGET_SSE4_1) + || (mode1 == HFmode && !flag_trapping_math)) return true; return opt_type == OPTIMIZE_FOR_SPEED; diff --git a/gcc/testsuite/gcc.target/i386/pr121861-1a.c b/gcc/testsuite/gcc.target/i386/pr121861-1a.c new file mode 100644 index 00000000000..ebd0a57253c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121861-1a.c @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options "-Os -mfpmath=sse -mno-avx -msse4.1" } */ + +#include <math.h> + +float +cf (float x) +{ + return ceilf (x); +} + +float +ff (float x) +{ + return floorf (x); +} + +float +tf (float x) +{ + return truncf (x); +} + +double +c (double x) +{ + return ceil (x); +} + +double +f (double x) +{ + return floor (x); +} + +double +t (double x) +{ + return trunc (x); +} + +/* { dg-final { scan-assembler-times "roundss" 3 } } */ +/* { dg-final { scan-assembler-times "roundsd" 3 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr121861-1b.c b/gcc/testsuite/gcc.target/i386/pr121861-1b.c new file mode 100644 index 00000000000..b52faae66d0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121861-1b.c @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-options "-Os -mfpmath=sse -mno-avx512f -mavx" } */ + +#include "pr121861-1a.c" + +/* { dg-final { scan-assembler-times "vroundss" 3 } } */ +/* { dg-final { scan-assembler-times "vroundsd" 3 } } */ -- 2.51.0