When the code below is compiled using "-O" flag it segfaults. If the code is compiled without optimization it works fine. Also the code works fine when compiled with intels compiler. Note that there are no pointers in the code. Thus it seems very likely that there is an SSE2 intrinsics related bug in gcc.
One explanation could be that the __m128i types in SSE2 need to be 16 bytes aligned and that gcc missaligns something (maybe the stack) when optimizing. However, I am far from an expert so I leave it up to you figure that out. Regards, Isaac BELOW: 1) gcc output 2) /proc/cpuinfo 3) the file "bug_creator.c" (the file "bug_creator.i" is to large to be added) //*************************************************** > gcc -v -save-temps -march=pentium4 -msse2 -O -I/usr/include bug_creator.c Reading specs from /usr/lib/gcc/i386-redhat-linux/3.4.2/specs Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --enable-shared --enable-threads=posix --disable-checking --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-java-awt=gtk --host=i386-redhat-linux Thread model: posix gcc version 3.4.2 20041017 (Red Hat 3.4.2-6.fc3) /usr/libexec/gcc/i386-redhat-linux/3.4.2/cc1 -E -quiet -v -I/usr/include bug_creator.c -march=pentium4 -msse2 -O -o bug_creator.i ignoring nonexistent directory "/usr/lib/gcc/i386-redhat-linux/3.4.2/../../../../i386-redhat-linux/include" ignoring duplicate directory "/usr/include" as it is a non-system directory that duplicates a system directory #include "..." search starts here: #include <...> search starts here: /usr/local/include /usr/lib/gcc/i386-redhat-linux/3.4.2/include /usr/include End of search list. /usr/libexec/gcc/i386-redhat-linux/3.4.2/cc1 -fpreprocessed bug_creator.i -quiet -dumpbase bug_creator.c -march=pentium4 -msse2 -auxbase bug_creator -O -version -o bug_creator.s GNU C version 3.4.2 20041017 (Red Hat 3.4.2-6.fc3) (i386-redhat-linux) compiled by GNU C version 3.4.2 20041017 (Red Hat 3.4.2-6.fc3). GGC heuristics: --param ggc-min-expand=81 --param ggc-min-heapsize=96982 as -V -Qy -o bug_creator.o bug_creator.s GNU assembler version 2.15.92.0.2 (i386-redhat-linux) using BFD version 2.15.92.0.2 20040927 /usr/libexec/gcc/i386-redhat-linux/3.4.2/collect2 --eh-frame-hdr -m elf_i386 -dynamic-linker /lib/ld-linux.so.2 /usr/lib/gcc/i386-redhat-linux/3.4.2/../../../crt1.o /usr/lib/gcc/i386-redhat-linux/3.4.2/../../../crti.o /usr/lib/gcc/i386-redhat-linux/3.4.2/crtbegin.o -L/usr/lib/gcc/i386-redhat-linux/3.4.2 -L/usr/lib/gcc/i386-redhat-linux/3.4.2 -L/usr/lib/gcc/i386-redhat-linux/3.4.2/../../.. bug_creator.o -lgcc --as-needed -lgcc_s --no-as-needed -lc -lgcc --as-needed -lgcc_s --no-as-needed /usr/lib/gcc/i386-redhat-linux/3.4.2/crtend.o /usr/lib/gcc/i386-redhat-linux/3.4.2/../../../crtn.o //************************************************* //******* /proc/cpuinfo processor : 0 vendor_id : GenuineIntel cpu family : 15 model : 2 model name : Intel(R) Pentium(R) 4 CPU 2.00GHz stepping : 4 cpu MHz : 2020.823 cache size : 512 KB fdiv_bug : no hlt_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 2 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm bogomips : 3997.69 //************************************************** // ******** bug_creator.c ********************* #include <stdio.h> #include <emmintrin.h> // Compiling with the "-O" flag causes program to segfault. If it is // compiled without "-0" it runs perfectly. // // gcc -v -save-temps -march=pentium4 -msse2 -I/usr/include bug_creator.c //------------------------ //Variables __m128i A1; __m128i B1; __m128i BITMASK; __m128i A2; __m128i B2; //-------------------- void LEVEL_2(){ B1 = _mm_add_epi32(B1,A1); A1 = _mm_add_epi32(A1,_mm_add_epi32(_mm_setzero_si128(), _mm_add_epi32(_mm_setzero_si128(),BITMASK))); A2 = _mm_add_epi32(_mm_add_epi32(A1,A1),_mm_add_epi32(_mm_add_epi32(A1,A1),A1)); B2 = _mm_add_epi32(_mm_add_epi32(B1,A1),_mm_add_epi32(_mm_add_epi32(B1,A1),A1)); B1 = _mm_add_epi32(B1,A1); A1 = _mm_add_epi32(A1,_mm_add_epi32(_mm_setzero_si128(), _mm_add_epi32(_mm_setzero_si128(),BITMASK))); A2 = _mm_add_epi32(A2,_mm_add_epi32(_mm_add_epi32(A1,BITMASK),_mm_add_epi32(_mm_setzero_si128(),BITMASK))); } void LEVEL_3(){ LEVEL_2(); } int main(int argc, char **argv){ A1 = _mm_setzero_si128(); B1 = _mm_setzero_si128(); A2 = _mm_setzero_si128(); B2 = _mm_setzero_si128(); BITMASK = _mm_setzero_si128(); LEVEL_3(); //Everything is initialized to zero and "_mm_add_epi32" is the only //operation used so everything should be zero. printf("A1: %d\n",_mm_extract_epi16(A1,0)); printf("B1: %d\n",_mm_extract_epi16(B1,0)); printf("A2: %d\n",_mm_extract_epi16(A2,0)); printf("B2: %d\n",_mm_extract_epi16(A2,0)); printf("BITMASK: %d\n",_mm_extract_epi16(BITMASK,0)); return 1; } -- Summary: gcc -O causes segfault in code using SSE2 intrinsics Product: gcc Version: 3.4.2 Status: UNCONFIRMED Severity: normal Priority: P2 Component: c AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: isaac at nada dot kth dot se CC: gcc-bugs at gcc dot gnu dot org GCC build triplet: i386-redhat-linux GCC host triplet: i386-redhat-linux GCC target triplet: i386-redhat-linux http://gcc.gnu.org/bugzilla/show_bug.cgi?id=20494