------- Comment #9 from ubizjak at gmail dot com 2007-12-13 14:10 ------- Reduced c++ testcase that is the cause of the runtime difference:
--cut here-- #include <iostream> extern double *dpb; void s000005m_test(void) { double result = 0.0; for (int n = 0; n < 2000; ++n) result += dpb[n]; #ifdef FUBAR std::cerr << "Blah" << result << std::endl; #else std::cerr << result << std::endl; #endif } --cut here-- g++ -O2: ... .LCFI8: movl dpb, %edx # dpb, dpb.68 fldz .L4: faddl (%edx,%eax,8) #* dpb.68 addl $1, %eax #, n cmpl $2000, %eax #, n jne .L4 #, fstpl 4(%esp) # movl $_ZSt4cerr, (%esp) #, call _ZNSo9_M_insertIdEERSoT_ # ... g++ -O2 -DFUBAR: ... .LCFI8: movl dpb, %edx # dpb, dpb.68 fldz fstpl -288(%ebp) # result .p2align 4,,7 .p2align 3 .L4: fldl -288(%ebp) # result faddl (%edx,%eax,8) #* dpb.68 addl $1, %eax #, n cmpl $2000, %eax #, n fstpl -288(%ebp) # result jne .L4 #, movl $4, 8(%esp) #, movl $.LC1, 4(%esp) #, movl $_ZSt4cerr, (%esp) #, call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_i # movl $_ZSt4cerr, (%esp) #, fldl -288(%ebp) # result fstpl 4(%esp) # call _ZNSo9_M_insertIdEERSoT_ # ... Please see what happens to "result" variable in -DFUBAR case. Similar effect happens for -mfpmath=sse, but postreload gcse eliminates the load (but not the store) from the loop (stack regs are not gcse'd after reload by design). IMO, this is not target dependant, but pure RA problem. -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23322