------- Comment #9 from ubizjak at gmail dot com 2007-04-03 13:32 ------- (In reply to comment #8) > what's the generated code for -ffast-math? in principle i don't see a reason > why it should make any difference...
Trying to answer your question, I have played a bit with compile flags and things are getting really strange: [EMAIL PROTECTED] test]$ gcc -O2 -mfpmath=387 pr19780.c [EMAIL PROTECTED] test]$ time ./a.out Start? Stop! Result = 0.000000, 0.000000, 1.000000 real 0m1.211s user 0m1.212s sys 0m0.004s [EMAIL PROTECTED] test]$ gcc -O2 -mfpmath=387 -msse pr19780.c [EMAIL PROTECTED] test]$ time ./a.out Start? Stop! Result = 0.000000, 0.000000, 1.000000 real 0m0.555s user 0m0.552s sys 0m0.004s Note that -msse should have no effect on calculations. The difference between asm dumps is: --- pr19780.s 2007-04-03 14:28:14.000000000 +0200 +++ pr19780.s_ 2007-04-03 14:28:01.000000000 +0200 @@ -17,69 +17,61 @@ pushl %ebp movl %esp, %ebp pushl %ecx - subl $84, %esp + subl $100, %esp movl $.LC0, (%esp) call puts xorl %eax, %eax - fldz fld1 fsts -16(%ebp) + fldz + fsts -12(%ebp) + fld %st(0) fld %st(1) - fld %st(2) - fld %st(3) jmp .L2 .p2align 4,,7 .L7: - fstp %st(5) - fstp %st(0) - fxch %st(1) - fxch %st(2) - fxch %st(3) - fxch %st(4) fxch %st(3) + fxch %st(2) .L2: - fld %st(1) + fld %st(2) addl $1, %eax - fmul %st(3), %st + fmul %st(1), %st cmpl $100000000, %eax - fstps -12(%ebp) + flds -12(%ebp) + fmul %st(5), %st + fsubrp %st, %st(1) + flds -12(%ebp) + fmul %st(3), %st flds -16(%ebp) - fmul %st(1), %st - fsubrs -12(%ebp) - fstps -12(%ebp) - fmul %st(4), %st - fld %st(3) fmul %st(3), %st fsubrp %st, %st(1) flds -16(%ebp) - fmulp %st, %st(4) - fxch %st(1) + fmul %st(6), %st + fxch %st(5) fmul %st(4), %st - fsubrp %st, %st(3) - flds -16(%ebp) - fld %st(3) + fsubrp %st, %st(5) fxch %st(2) - fsts -16(%ebp) - flds -12(%ebp) + fstps -12(%ebp) + fxch %st(2) + fstps -16(%ebp) jne .L7 - fstp %st(0) - fstp %st(5) - fstp %st(0) - fstp %st(0) - fstp %st(0) + fstp %st(3) + fxch %st(1) movl $.LC3, (%esp) fstps -40(%ebp) + fxch %st(1) fstps -56(%ebp) + fstps -72(%ebp) call puts flds -40(%ebp) fstpl 20(%esp) flds -56(%ebp) fstpl 12(%esp) - flds -12(%ebp) + flds -72(%ebp) fstpl 4(%esp) movl $.LC4, (%esp) call printf - addl $84, %esp + addl $100, %esp xorl %eax, %eax popl %ecx popl %ebp where (+++) is with -msse. -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19780