On Wed, Aug 10, 2005 at 08:13:17AM -0700, H. J. Lu wrote:
> On Wed, Aug 10, 2005 at 10:18:41AM -0400, Jakub Jelinek wrote:
> > On Wed, Aug 10, 2005 at 07:09:04AM -0700, H. J. Lu wrote:
> > > On Tue, Aug 09, 2005 at 02:58:51PM -0700, Richard Henderson wrote:
> > > > On Tue, Aug 09, 2005 at 02:30:46PM -0700, H. J. Lu wrote:
> > > > > There is a minor problem. How can I add crtfastmath.o for SSE targets
> > > > > only? 
> > > > 
> > > > You don't.  You either add code to detect sse, or you make the
> > > > spec depend on -mfpmath=sse.
> > > > 
> > > 
> > > Here is the patch to enable FTZ/DAZ for SSE via fast math. There are
> > > no regressions on Linux/x86_64 nor Linux/ia32. The performance of one
> > > FP benchmark on EM64T is more than doubled with -ffast-math.
> > 
> > Not all i?86 CPUs support cpuid instruction.
> > Please look at
> > gcc/testsuite/gcc.dg/i386-cpuid.h
> > for the ugly details.
> > 
> 
> Ok. Also all x86_64 supports SSE. There is no need to check that in
> 64bit.
> 
> 

Here is the updated patch.


H.J.
---
2005-08-10  H.J. Lu  <[EMAIL PROTECTED]>

        * config.gcc (i[34567]86-*-linux*): Add i386/t-crtfm to tm-file.
        (x86_64-*-linux*): Likewise.

        * config/i386/crtfastmath.c: New file.
        * config/i386/t-crtfm: Likewise.

        * config/i386/linux.h (ENDFILE_SPEC): New.
        * config/i386/linux64.h (ENDFILE_SPEC): Likewise.

        * config/i386/t-linux64 (EXTRA_MULTILIB_PARTS): Add
        crtfastmath.o.

--- gcc/config.gcc.sse  2005-08-09 16:18:14.000000000 -0700
+++ gcc/config.gcc      2005-08-09 16:18:15.000000000 -0700
@@ -1006,7 +1006,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfree
        i[34567]86-*-knetbsd*-gnu) tm_file="${tm_file} knetbsd-gnu.h 
i386/knetbsd-gnu.h" ;;
        i[34567]86-*-kfreebsd*-gnu) tm_file="${tm_file} kfreebsd-gnu.h 
i386/kfreebsd-gnu.h" ;;
        esac
-       tmake_file="${tmake_file} i386/t-crtstuff"
+       tmake_file="${tmake_file} i386/t-crtstuff i386/t-crtfm"
        ;;
 x86_64-*-linux* | x86_64-*-kfreebsd*-gnu | x86_64-*-knetbsd*-gnu)
        tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h svr4.h 
linux.h \
@@ -1015,7 +1015,7 @@ x86_64-*-linux* | x86_64-*-kfreebsd*-gnu
        x86_64-*-kfreebsd*-gnu) tm_file="${tm_file} kfreebsd-gnu.h" ;;
        x86_64-*-knetbsd*-gnu) tm_file="${tm_file} knetbsd-gnu.h" ;;
        esac
-       tmake_file="${tmake_file} i386/t-linux64"
+       tmake_file="${tmake_file} i386/t-linux64 i386/t-crtfm"
        ;;
 i[34567]86-*-gnu*)
        ;;
--- gcc/config/i386/crtfastmath.c.sse   2005-08-09 16:18:15.000000000 -0700
+++ gcc/config/i386/crtfastmath.c       2005-08-10 08:47:24.207266245 -0700
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2005 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ * 
+ * In addition to the permissions in the GNU General Public License, the
+ * Free Software Foundation gives you unlimited permission to link the
+ * compiled version of this file with other programs, and to distribute
+ * those programs without any restriction coming from the use of this
+ * file.  (The General Public License restrictions do apply in other
+ * respects; for example, they cover modification of the file, and
+ * distribution when not linked into another program.)
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ * 
+ *    As a special exception, if you link this library with files
+ *    compiled with GCC to produce an executable, this does not cause
+ *    the resulting executable to be covered by the GNU General Public License.
+ *    This exception does not however invalidate any other reasons why
+ *    the executable file might be covered by the GNU General Public License.
+ */
+
+#define MXCSR_DAZ (1 << 6)     /* Enable denormals are zero mode */
+#define MXCSR_FTZ (1 << 15)    /* Enable flush to zero mode */
+
+static void __attribute__((constructor))
+set_fast_math (void)
+{
+#ifndef __x86_64__
+  /* SSE is the part of 64bit. Only need to check it for 32bit.  */
+  unsigned int eax, ebx, ecx, edx;
+
+  /* See if we can use cpuid.  */
+  asm volatile ("pushfl; pushfl; popl %0; movl %0,%1; xorl %2,%0;"
+               "pushl %0; popfl; pushfl; popl %0; popfl"
+               : "=&r" (eax), "=&r" (ebx)
+               : "i" (0x00200000));
+
+  if (((eax ^ ebx) & 0x00200000) == 0)
+    return;
+
+  /* Check the highest input value for eax.  */
+  asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
+               : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
+               : "0" (0));
+
+  if (eax == 0)
+    return;
+
+  asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
+               : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
+               : "0" (1));
+
+  if (edx & (1 << 25))
+#endif
+    {
+      unsigned int mxcsr = __builtin_ia32_stmxcsr ();
+      mxcsr |= MXCSR_DAZ | MXCSR_FTZ;
+      __builtin_ia32_ldmxcsr (mxcsr);
+    }
+}
--- gcc/config/i386/linux.h.sse 2004-11-28 17:04:42.000000000 -0800
+++ gcc/config/i386/linux.h     2005-08-09 16:18:15.000000000 -0700
@@ -121,6 +121,12 @@ Boston, MA 02111-1307, USA.  */
        %{!dynamic-linker:-dynamic-linker %(dynamic_linker)}} \
        %{static:-static}}}"
 
+/* Similar to standard Linux, but adding -ffast-math support.  */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
 /* A C statement (sans semicolon) to output to the stdio stream
    FILE the assembler definition of uninitialized global DECL named
    NAME whose size is SIZE bytes and alignment is ALIGN bytes.
--- gcc/config/i386/linux64.h.sse       2004-11-28 17:04:42.000000000 -0800
+++ gcc/config/i386/linux64.h   2005-08-09 16:18:15.000000000 -0700
@@ -64,6 +64,12 @@ Boston, MA 02111-1307, USA.  */
       %{!m32:%{!dynamic-linker:-dynamic-linker /lib64/ld-linux-x86-64.so.2}}} \
     %{static:-static}}"
 
+/* Similar to standard Linux, but adding -ffast-math support.  */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
 #define MULTILIB_DEFAULTS { "m64" }
 
 #undef NEED_INDICATE_EXEC_STACK
--- gcc/config/i386/t-crtfm.sse 2005-08-09 16:18:15.000000000 -0700
+++ gcc/config/i386/t-crtfm     2005-08-09 16:18:15.000000000 -0700
@@ -0,0 +1,6 @@
+EXTRA_PARTS += crtfastmath.o
+
+$(T)crtfastmath.o: $(srcdir)/config/i386/crtfastmath.c $(GCC_PASSES)
+       $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -msse -c \
+               $(srcdir)/config/i386/crtfastmath.c \
+               -o $(T)crtfastmath$(objext)
--- gcc/config/i386/t-linux64.sse       2003-03-03 12:03:59.000000000 -0800
+++ gcc/config/i386/t-linux64   2005-08-09 16:18:15.000000000 -0700
@@ -11,7 +11,8 @@ MULTILIB_OSDIRNAMES = ../lib64 ../lib
 LIBGCC = stmp-multilib
 INSTALL_LIBGCC = install-multilib
 
-EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o
+EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o \
+                    crtbeginT.o crtfastmath.o
 
 # The pushl in CTOR initialization interferes with frame pointer elimination.
 # crtend*.o cannot be compiled without -fno-asynchronous-unwind-tables,

Reply via email to