------- Comment #9 from ubizjak at gmail dot com  2007-12-13 14:10 -------
Reduced c++ testcase that is the cause of the runtime difference:

--cut here--
#include <iostream>

extern double *dpb;

void s000005m_test(void)
{
  double result = 0.0;

  for (int n = 0; n < 2000; ++n)
    result += dpb[n];

#ifdef FUBAR
    std::cerr << "Blah" << result << std::endl;
#else
    std::cerr << result << std::endl;
#endif
}
--cut here--

g++ -O2:

        ...
.LCFI8:
        movl    dpb, %edx       # dpb, dpb.68
        fldz
.L4:
        faddl   (%edx,%eax,8)   #* dpb.68
        addl    $1, %eax        #, n
        cmpl    $2000, %eax     #, n
        jne     .L4     #,
        fstpl   4(%esp) #
        movl    $_ZSt4cerr, (%esp)      #,
        call    _ZNSo9_M_insertIdEERSoT_        #
        ...

g++ -O2 -DFUBAR:

        ...
.LCFI8:
        movl    dpb, %edx       # dpb, dpb.68
        fldz
        fstpl   -288(%ebp)      # result
        .p2align 4,,7
        .p2align 3
.L4:
        fldl    -288(%ebp)      # result
        faddl   (%edx,%eax,8)   #* dpb.68
        addl    $1, %eax        #, n
        cmpl    $2000, %eax     #, n
        fstpl   -288(%ebp)      # result
        jne     .L4     #,
        movl    $4, 8(%esp)     #,
        movl    $.LC1, 4(%esp)  #,
        movl    $_ZSt4cerr, (%esp)      #,
        call   
_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_i  
#
        movl    $_ZSt4cerr, (%esp)      #,
        fldl    -288(%ebp)      # result
        fstpl   4(%esp) #
        call    _ZNSo9_M_insertIdEERSoT_        #
        ...

Please see what happens to "result" variable in -DFUBAR case.

Similar effect happens for -mfpmath=sse, but postreload gcse eliminates the
load (but not the store) from the loop (stack regs are not gcse'd after reload
by design). IMO, this is not target dependant, but pure RA problem.


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23322

Reply via email to