On Wed, Aug 10, 2005 at 08:13:17AM -0700, H. J. Lu wrote: > On Wed, Aug 10, 2005 at 10:18:41AM -0400, Jakub Jelinek wrote: > > On Wed, Aug 10, 2005 at 07:09:04AM -0700, H. J. Lu wrote: > > > On Tue, Aug 09, 2005 at 02:58:51PM -0700, Richard Henderson wrote: > > > > On Tue, Aug 09, 2005 at 02:30:46PM -0700, H. J. Lu wrote: > > > > > There is a minor problem. How can I add crtfastmath.o for SSE targets > > > > > only? > > > > > > > > You don't. You either add code to detect sse, or you make the > > > > spec depend on -mfpmath=sse. > > > > > > > > > > Here is the patch to enable FTZ/DAZ for SSE via fast math. There are > > > no regressions on Linux/x86_64 nor Linux/ia32. The performance of one > > > FP benchmark on EM64T is more than doubled with -ffast-math. > > > > Not all i?86 CPUs support cpuid instruction. > > Please look at > > gcc/testsuite/gcc.dg/i386-cpuid.h > > for the ugly details. > > > > Ok. Also all x86_64 supports SSE. There is no need to check that in > 64bit. > >
Here is the updated patch. H.J. --- 2005-08-10 H.J. Lu <[EMAIL PROTECTED]> * config.gcc (i[34567]86-*-linux*): Add i386/t-crtfm to tm-file. (x86_64-*-linux*): Likewise. * config/i386/crtfastmath.c: New file. * config/i386/t-crtfm: Likewise. * config/i386/linux.h (ENDFILE_SPEC): New. * config/i386/linux64.h (ENDFILE_SPEC): Likewise. * config/i386/t-linux64 (EXTRA_MULTILIB_PARTS): Add crtfastmath.o. --- gcc/config.gcc.sse 2005-08-09 16:18:14.000000000 -0700 +++ gcc/config.gcc 2005-08-09 16:18:15.000000000 -0700 @@ -1006,7 +1006,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfree i[34567]86-*-knetbsd*-gnu) tm_file="${tm_file} knetbsd-gnu.h i386/knetbsd-gnu.h" ;; i[34567]86-*-kfreebsd*-gnu) tm_file="${tm_file} kfreebsd-gnu.h i386/kfreebsd-gnu.h" ;; esac - tmake_file="${tmake_file} i386/t-crtstuff" + tmake_file="${tmake_file} i386/t-crtstuff i386/t-crtfm" ;; x86_64-*-linux* | x86_64-*-kfreebsd*-gnu | x86_64-*-knetbsd*-gnu) tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h svr4.h linux.h \ @@ -1015,7 +1015,7 @@ x86_64-*-linux* | x86_64-*-kfreebsd*-gnu x86_64-*-kfreebsd*-gnu) tm_file="${tm_file} kfreebsd-gnu.h" ;; x86_64-*-knetbsd*-gnu) tm_file="${tm_file} knetbsd-gnu.h" ;; esac - tmake_file="${tmake_file} i386/t-linux64" + tmake_file="${tmake_file} i386/t-linux64 i386/t-crtfm" ;; i[34567]86-*-gnu*) ;; --- gcc/config/i386/crtfastmath.c.sse 2005-08-09 16:18:15.000000000 -0700 +++ gcc/config/i386/crtfastmath.c 2005-08-10 08:47:24.207266245 -0700 @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2005 Free Software Foundation, Inc. + * + * This file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * In addition to the permissions in the GNU General Public License, the + * Free Software Foundation gives you unlimited permission to link the + * compiled version of this file with other programs, and to distribute + * those programs without any restriction coming from the use of this + * file. (The General Public License restrictions do apply in other + * respects; for example, they cover modification of the file, and + * distribution when not linked into another program.) + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + * As a special exception, if you link this library with files + * compiled with GCC to produce an executable, this does not cause + * the resulting executable to be covered by the GNU General Public License. + * This exception does not however invalidate any other reasons why + * the executable file might be covered by the GNU General Public License. + */ + +#define MXCSR_DAZ (1 << 6) /* Enable denormals are zero mode */ +#define MXCSR_FTZ (1 << 15) /* Enable flush to zero mode */ + +static void __attribute__((constructor)) +set_fast_math (void) +{ +#ifndef __x86_64__ + /* SSE is the part of 64bit. Only need to check it for 32bit. */ + unsigned int eax, ebx, ecx, edx; + + /* See if we can use cpuid. */ + asm volatile ("pushfl; pushfl; popl %0; movl %0,%1; xorl %2,%0;" + "pushl %0; popfl; pushfl; popl %0; popfl" + : "=&r" (eax), "=&r" (ebx) + : "i" (0x00200000)); + + if (((eax ^ ebx) & 0x00200000) == 0) + return; + + /* Check the highest input value for eax. */ + asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" + : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) + : "0" (0)); + + if (eax == 0) + return; + + asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" + : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) + : "0" (1)); + + if (edx & (1 << 25)) +#endif + { + unsigned int mxcsr = __builtin_ia32_stmxcsr (); + mxcsr |= MXCSR_DAZ | MXCSR_FTZ; + __builtin_ia32_ldmxcsr (mxcsr); + } +} --- gcc/config/i386/linux.h.sse 2004-11-28 17:04:42.000000000 -0800 +++ gcc/config/i386/linux.h 2005-08-09 16:18:15.000000000 -0700 @@ -121,6 +121,12 @@ Boston, MA 02111-1307, USA. */ %{!dynamic-linker:-dynamic-linker %(dynamic_linker)}} \ %{static:-static}}}" +/* Similar to standard Linux, but adding -ffast-math support. */ +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ + %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s" + /* A C statement (sans semicolon) to output to the stdio stream FILE the assembler definition of uninitialized global DECL named NAME whose size is SIZE bytes and alignment is ALIGN bytes. --- gcc/config/i386/linux64.h.sse 2004-11-28 17:04:42.000000000 -0800 +++ gcc/config/i386/linux64.h 2005-08-09 16:18:15.000000000 -0700 @@ -64,6 +64,12 @@ Boston, MA 02111-1307, USA. */ %{!m32:%{!dynamic-linker:-dynamic-linker /lib64/ld-linux-x86-64.so.2}}} \ %{static:-static}}" +/* Similar to standard Linux, but adding -ffast-math support. */ +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ + %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s" + #define MULTILIB_DEFAULTS { "m64" } #undef NEED_INDICATE_EXEC_STACK --- gcc/config/i386/t-crtfm.sse 2005-08-09 16:18:15.000000000 -0700 +++ gcc/config/i386/t-crtfm 2005-08-09 16:18:15.000000000 -0700 @@ -0,0 +1,6 @@ +EXTRA_PARTS += crtfastmath.o + +$(T)crtfastmath.o: $(srcdir)/config/i386/crtfastmath.c $(GCC_PASSES) + $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -msse -c \ + $(srcdir)/config/i386/crtfastmath.c \ + -o $(T)crtfastmath$(objext) --- gcc/config/i386/t-linux64.sse 2003-03-03 12:03:59.000000000 -0800 +++ gcc/config/i386/t-linux64 2005-08-09 16:18:15.000000000 -0700 @@ -11,7 +11,8 @@ MULTILIB_OSDIRNAMES = ../lib64 ../lib LIBGCC = stmp-multilib INSTALL_LIBGCC = install-multilib -EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o +EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o \ + crtbeginT.o crtfastmath.o # The pushl in CTOR initialization interferes with frame pointer elimination. # crtend*.o cannot be compiled without -fno-asynchronous-unwind-tables,