Compiling FFTW 3.1.1 on a new Intel Mac, gcc 4.0.1 generates unassemblable code for several of the main files, e.g. the following,
[stp:/Users/stp/Code/MacLibs/fftw-3.1.1/kernel] make trig.lo gcc -DHAVE_CONFIG_H -I. -I. -I.. -I../simd -O3 -fomit-frame-pointer -malign-double -fstrict-aliasing -ffast-math -march=pentiumpro -D_THREAD_SAFE -MT trig.lo -MD -MP -MF .deps/trig.Tpo -c trig.c -o trig.o /var/tmp//ccL7WCuH.s:79:operands given don't match any known 386 instruction /var/tmp//ccL7WCuH.s:83:operands given don't match any known 386 instruction /var/tmp//ccL7WCuH.s:112:operands given don't match any known 386 instruction /var/tmp//ccL7WCuH.s:114:operands given don't match any known 386 instruction /var/tmp//ccL7WCuH.s:138:operands given don't match any known 386 instruction /var/tmp//ccL7WCuH.s:144:operands given don't match any known 386 instruction make: *** [trig.lo] Error 1 I'm attaching the broken assembly file below. If I compile with no flags, it succeeds; as in, [stp:/Users/stp/Code/MacLibs/fftw-3.1.1/kernel] make CFLAGS="-O2" trig.lo gcc -DHAVE_CONFIG_H -I. -I. -I.. -I../simd -O2 -MT trig.lo -MD -MP -MF .deps/trig.Tpo -c trig.c -o trig.o which works fine. Here's the gcc version: Target: i686-apple-darwin8 Configured with: /private/var/tmp/gcc/gcc-5250.obj~12/src/configure --disable-checking -enable-werror --prefix=/usr --mandir=/share/man --enable-languages=c,objc,c++,obj-c++ --program-transform-name=/^[cg][^.-]*$/s/$/-4.0/ --with-gxx-include-dir=/include/c++/4.0.0 --build=powerpc-apple-darwin8 --with-arch=pentium-m --with-tune=prescott --program-prefix= --host=i686-apple-darwin8 --target=i686-apple-darwin8 Thread model: posix gcc version 4.0.1 (Apple Computer, Inc. build 5250) Here's the broken trig.s file .text _cexpl_sqrtn_table: pushl %edi pushl %esi subl $4, %esp movl 16(%esp), %ecx movl 20(%esp), %edx movl 24(%esp), %edi movl %edx, %eax shrl $31, %eax imull 32(%ecx), %eax addl %eax, %edx movl 24(%ecx), %esi movl %edx, %eax andl 20(%ecx), %eax sall $4, %eax fldl (%esi,%eax) fldl 8(%esi,%eax) movl 28(%ecx), %eax movl 12(%ecx), %ecx sarl %cl, %edx sall $4, %edx fldl (%eax,%edx) fldl 8(%eax,%edx) fld %st(3) fmul %st(2), %st fld %st(3) fmul %st(2), %st fsubrp %st, %st(1) fstpl (%edi) fmulp %st, %st(3) fmulp %st, %st(1) faddp %st, %st(1) fstpl 8(%edi) addl $4, %esp popl %esi popl %edi ret _rotate_sqrtn_table: pushl %edi pushl %esi subl $4, %esp movl 16(%esp), %ecx movl 20(%esp), %edx movl 32(%esp), %edi movl %edx, %eax shrl $31, %eax imull 32(%ecx), %eax addl %eax, %edx movl 24(%ecx), %esi movl %edx, %eax andl 20(%ecx), %eax sall $4, %eax fldl (%esi,%eax) fldl 8(%esi,%eax) movl 28(%ecx), %eax movl 12(%ecx), %ecx sarl %cl, %edx sall $4, %edx fldl (%eax,%edx) fldl 8(%eax,%edx) fld %st(3) fmul %st(2), %st fld %st(3) fmul %st(2), %st fsubrp %st, %st(1) fxch %st(4) fmulp %st, %st(1) fxch %st(2) fmulp %st, %st(1) faddp %st, %st(1) flds 24(%esp) flds 28(%esp) fld %st(3) fmul %st(2), %st fld %st(3) fmul %st(2), %st faddp %st, %st(1) fstp (%edi) fmulp %st, %st(3) fmulp %st, %st(1) fsubrp %st, %st(1) fstp 4(%edi) addl $4, %esp popl %esi popl %edi ret _cexp_zero: movl 12(%esp), %edx xorl %eax, %eax movl %eax, (%edx) movl %eax, 4(%edx) ret _cexpl_zero: movl 12(%esp), %eax fldz fstl (%eax) fstpl 8(%eax) ret _cexp_generic: pushl %esi subl $40, %esp movl 48(%esp), %edx movl 56(%esp), %esi leal 16(%esp), %eax movl %eax, 8(%esp) movl 52(%esp), %eax movl %eax, 4(%esp) movl %edx, (%esp) call *4(%edx) fldl 16(%esp) fstp (%esi) fldl 24(%esp) fstp 4(%esi) addl $40, %esp popl %esi ret _rotate_generic: pushl %esi subl $40, %esp movl 48(%esp), %edx movl 64(%esp), %esi leal 16(%esp), %eax movl %eax, 8(%esp) movl 52(%esp), %eax movl %eax, 4(%esp) movl %edx, (%esp) call *4(%edx) flds 56(%esp) fldl 16(%esp) flds 60(%esp) fldl 24(%esp) fld %st(3) fmul %st(3), %st fld %st(2) fmul %st(2), %st faddp %st, %st(1) fstp (%esi) fxch %st(2) fmulp %st, %st(1) fxch %st(1) fmulp %st, %st(2) fsubp %st, %st(1) fstp 4(%esi) addl $40, %esp popl %esi ret .globl _fftwf_triggen_destroy _fftwf_triggen_destroy: pushl %esi subl $24, %esp movl 32(%esp), %esi movl 24(%esi), %eax movl %eax, (%esp) call L_fftwf_ifree0$stub movl 28(%esi), %eax movl %eax, (%esp) call L_fftwf_ifree0$stub movl %esi, 32(%esp) addl $24, %esp popl %esi jmp L_fftwf_ifree$stub .literal8 .align 3 LC2: .long 1413754136 .long 1075388923 .text _cexpl_sincos: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $4, %esp call ___i686.get_pc_thunk.bx "L00000000001$pb": movl 24(%esp), %eax movl 32(%eax), %esi leal 0(,%esi,4), %edi movl 28(%esp), %edx sall $2, %edx leal (%edx,%edi), %eax cmpl $-1, %edx cmovle %eax, %edx movl %edi, %ecx subl %edx, %ecx cmpl %ecx, %edx jle L18 movl $4, %ebp movl %ecx, %eax subl %esi, %eax testl %eax, %eax jle L36 L21: orl $2, %ebp L23: movl %esi, %edx subl %eax, %edx cmpl %edx, %eax jg L24 movl %eax, %edx L26: fldl LC2-"L00000000001$pb"(%ebx) pushl %edx fimull (%esp) movl %edi, (%esp) fidivl (%esp) addl $4, %esp fsincos fld %st(0) testl $1, %ebp jne L37 fstp %st(0) fxch %st(1) L27: testl $2, %ebp je L31 fchs fxch %st(1) L31: andl $4, %ebp je L32 fchs L32: fxch %st(1) movl 32(%esp), %eax fstpl (%eax) fstpl 8(%eax) addl $4, %esp popl %ebx popl %esi popl %edi popl %ebp ret L37: fstp %st(1) jmp L27 L24: orl $1, %ebp jmp L26 L18: movl %edx, %ecx xorl %ebp, %ebp movl %ecx, %eax subl %esi, %eax testl %eax, %eax jg L21 L36: movl %ecx, %eax jmp L23 .literal8 .align 3 LC5: .long 1413754136 .long 1075388923 .text .globl _fftwf_mktriggen _fftwf_mktriggen: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $76, %esp call ___i686.get_pc_thunk.bx "L00000000002$pb": movl 96(%esp), %esi movl $36, (%esp) call L_fftwf_malloc_plain$stub movl %eax, %edi movl 100(%esp), %eax movl %eax, 32(%edi) movl $0, 28(%edi) movl $0, 24(%edi) movl $0, (%edi) movl $0, 8(%edi) cmpl $2, %esi je L41 cmpl $3, %esi je L42 subl $1, %esi je L100 L115: movl (%edi), %ecx testl %ecx, %ecx je L117 L89: movl 8(%edi), %edx testl %edx, %edx je L118 movl %edi, %eax addl $76, %esp popl %ebx popl %esi popl %edi popl %ebp ret L118: leal _rotate_generic-"L00000000002$pb"(%ebx), %eax movl %eax, 8(%edi) movl %edi, %eax addl $76, %esp popl %ebx popl %esi popl %edi popl %ebp ret L41: movl 100(%esp), %eax testl %eax, %eax jle L103 movl 100(%esp), %eax xorl %ecx, %ecx L44: addl $1, %ecx sarl $2, %eax testl %eax, %eax jg L44 movl $1, 48(%esp) sall %cl, 48(%esp) L46: movl %ecx, 12(%edi) movl 48(%esp), %edx movl %edx, 16(%edi) movl %edx, %eax subl $1, %eax movl %eax, 20(%edi) movl 100(%esp), %ecx leal -1(%ecx,%edx), %esi movl %esi, %eax cltd idivl 48(%esp) movl %eax, 32(%esp) movl 48(%esp), %eax sall $4, %eax movl %eax, (%esp) call L_fftwf_malloc_plain$stub movl %eax, 24(%edi) movl 32(%esp), %eax sall $4, %eax movl %eax, (%esp) call L_fftwf_malloc_plain$stub movl %eax, 40(%esp) movl %eax, 28(%edi) movl 48(%esp), %ebp testl %ebp, %ebp jle L47 movl 100(%esp), %edx sall $2, %edx movl %edx, 52(%esp) fildl 52(%esp) movl $0, 44(%esp) movl $0, 56(%esp) fldl LC5-"L00000000002$pb"(%ebx) movl 24(%edi), %ebp L49: movl 56(%esp), %edx testl %edx, %edx js L105 L50: movl 52(%esp), %eax subl %edx, %eax cmpl %eax, %edx jg L106 movl %edx, %eax xorl %esi, %esi movl %eax, %ecx subl 100(%esp), %ecx testl %ecx, %ecx jle L104 L55: orl $2, %esi movl 100(%esp), %edx subl %ecx, %edx cmpl %edx, %ecx jg L58 L107: movl %ecx, %edx fld %st(0) pushl %edx fimull (%esp) addl $4, %esp fdiv %st(2), %st fsincos fld %st(0) testl $1, %esi jne L111 L120: fstp %st(0) fxch %st(1) L61: testl $2, %esi je L65 fchs fxch %st(1) L65: andl $4, %esi je L66 fchs L66: fxch %st(1) fstpl (%ebp) fstpl 8(%ebp) addl $1, 44(%esp) addl $4, 56(%esp) addl $16, %ebp movl 44(%esp), %ecx cmpl %ecx, 48(%esp) jne L49 fstp %st(0) fstp %st(0) L47: movl 32(%esp), %esi testl %esi, %esi jle L68 movl 100(%esp), %ebp sall $2, %ebp pushl %ebp fildl (%esp) addl $4, %esp movl 16(%edi), %eax movl %eax, 36(%esp) movl 40(%esp), %esi movl $0, 60(%esp) fldl LC5-"L00000000002$pb"(%ebx) movl $0, 28(%esp) L70: movl 28(%esp), %edx sall $2, %edx leal (%edx,%ebp), %eax cmpl $-1, %edx cmovle %eax, %edx movl %ebp, %eax subl %edx, %eax cmpl %eax, %edx jg L109 movl %edx, %eax xorl %ecx, %ecx movl %eax, %edx subl 100(%esp), %edx testl %edx, %edx jle L108 L76: orl $2, %ecx movl 100(%esp), %eax subl %edx, %eax cmpl %eax, %edx jg L79 L110: movl %edx, %eax fld %st(0) pushl %eax fimull (%esp) addl $4, %esp fdiv %st(2), %st fsincos fld %st(0) testb $1, %cl jne L113 L119: fstp %st(0) fxch %st(1) L82: testb $2, %cl je L86 fchs fxch %st(1) L86: andb $4, %cl je L87 fchs L87: fxch %st(1) fstpl (%esi) fstpl 8(%esi) addl $1, 60(%esp) movl 36(%esp), %edx addl %edx, 28(%esp) addl $16, %esi movl 60(%esp), %ecx cmpl %ecx, 32(%esp) jne L70 fstp %st(0) fstp %st(0) L68: leal _cexpl_sqrtn_table-"L00000000002$pb"(%ebx), %eax movl %eax, 4(%edi) leal _rotate_sqrtn_table-"L00000000002$pb"(%ebx), %eax movl %eax, 8(%edi) movl (%edi), %ecx testl %ecx, %ecx jne L89 L117: leal _cexp_generic-"L00000000002$pb"(%ebx), %eax movl %eax, (%edi) jmp L89 L42: leal _cexpl_sincos-"L00000000002$pb"(%ebx), %eax movl %eax, 4(%edi) jmp L115 L100: leal _cexp_zero-"L00000000002$pb"(%ebx), %eax movl %eax, (%edi) leal _cexpl_zero-"L00000000002$pb"(%ebx), %eax movl %eax, 4(%edi) jmp L115 L109: movl $4, %ecx movl %eax, %edx subl 100(%esp), %edx testl %edx, %edx jg L76 L108: movl %eax, %edx movl 100(%esp), %eax subl %edx, %eax cmpl %eax, %edx jle L110 L79: orl $1, %ecx fld %st(0) pushl %eax fimull (%esp) addl $4, %esp fdiv %st(2), %st fsincos fld %st(0) testb $1, %cl je L119 L113: fstp %st(1) jmp L82 L106: movl $4, %esi movl %eax, %ecx subl 100(%esp), %ecx testl %ecx, %ecx jg L55 L104: movl %eax, %ecx movl 100(%esp), %edx subl %ecx, %edx cmpl %edx, %ecx jle L107 L58: orl $1, %esi fld %st(0) pushl %edx fimull (%esp) addl $4, %esp fdiv %st(2), %st fsincos fld %st(0) testl $1, %esi je L120 L111: fstp %st(1) jmp L61 L105: addl 52(%esp), %edx jmp L50 L103: xorl %ecx, %ecx movl $1, 48(%esp) jmp L46 .section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5 L_fftwf_ifree$stub: .indirect_symbol _fftwf_ifree hlt ; hlt ; hlt ; hlt ; hlt L_fftwf_ifree0$stub: .indirect_symbol _fftwf_ifree0 hlt ; hlt ; hlt ; hlt ; hlt L_fftwf_malloc_plain$stub: .indirect_symbol _fftwf_malloc_plain hlt ; hlt ; hlt ; hlt ; hlt .subsections_via_symbols .section __TEXT,__textcoal_nt,coalesced,pure_instructions .weak_definition ___i686.get_pc_thunk.bx .private_extern ___i686.get_pc_thunk.bx ___i686.get_pc_thunk.bx: movl (%esp), %ebx ret ----END---- -- Summary: gcc 4.0.1 creates unassembleable code for FFTW library Product: gcc Version: 4.0.1 Status: UNCONFIRMED Severity: major Priority: P3 Component: c AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: stp at create dot ucsb dot edu GCC build triplet: powerpc-apple-darwin8 GCC host triplet: i686-apple-darwin8 (Intel Mac) GCC target triplet: i686-apple-darwin8 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27264