------- Comment #6 from uros at kss-loka dot si 2006-08-16 12:15 ------- IMO the problem here is in IVopts. Using gcc-3.x, the innermost loop compiles to:
.L15: movl (%edi,%edx,4), %eax fldl (%ebp,%edx,8) addl $1, %edx fmull (%esi,%eax,8) cmpl %ecx, %edx faddp %st, %st(1) jl .L15 and with current SVN gcc-4.2 into: .L12: movl (%ecx), %eax fldl (%ebp,%eax,8) fmull (%edx) faddp %st, %st(1) addl $1, %ebx addl $4, %ecx addl $8, %edx cmpl %esi, %ebx jne .L12 Adding -fno-ivopts, this loop gets compiled into: .L12: movl (%edi,%edx,4), %eax fldl (%esi,%eax,8) fmull (%ebp,%edx,8) faddp %st, %st(1) addl $1, %edx cmpl %edx, %ecx jg .L12 Timings (-O3 -march=pentium4 -fomit-frame-pointer): gcc-3.2: 0m2.301s gcc-4.2: 0m2.713s gcc-4.2 + -fno-ivopts: 0m2.473s with: gcc version 3.2 20020903 (Red Hat Linux 8.0 3.2-7) gcc version 4.2.0 20060816 (experimental) I think that remaining time difference is due to strange loop above innermost: gcc-3.2: fld %st(0) .L16: movl 36(%esp), %eax fld %st(0) movl 4(%eax,%ebx,4), %ecx movl (%eax,%ebx,4), %edx cmpl %ecx, %edx jge .L23 .L15: movl (%edi,%edx,4), %eax fldl (%ebp,%edx,8) addl $1, %edx fmull (%esi,%eax,8) cmpl %ecx, %edx faddp %st, %st(1) jl .L15 .L23: movl 28(%esp), %eax fstpl (%eax,%ebx,8) addl $1, %ebx cmpl 24(%esp), %ebx jl .L16 ======== gcc-4.2: .L8: movl 36(%esp), %edx movl (%edx,%edi,4), %eax movl 4(%edx,%edi,4), %esi fldz cmpl %esi, %eax jge .L11 fstp %st(0) movl 40(%esp), %ebx leal (%ebx,%eax,4), %ecx movl 32(%esp), %ebx leal (%ebx,%eax,8), %edx fldz xorl %ebx, %ebx subl %eax, %esi .L12: movl (%ecx), %eax fldl (%ebp,%eax,8) fmull (%edx) faddp %st, %st(1) addl $1, %ebx addl $4, %ecx addl $8, %edx cmpl %esi, %ebx jne .L12 .L11: movl 28(%esp), %eax fstpl (%eax,%edi,8) addl $1, %edi cmpl 24(%esp), %edi jne .L8 ======== and gcc-4.2 -fno-ivopts: .L8: leal (%ebx,%ebx), %eax movl 40(%esp), %edx movl (%edx,%eax,2), %edx movl %edx, (%esp) movl 40(%esp), %edx movl 4(%edx,%eax,2), %ecx fldz cmpl %ecx, (%esp) jge .L11 fstp %st(0) movl (%esp), %edx fldz .L12: movl (%edi,%edx,4), %eax fldl (%esi,%eax,8) fmull (%ebp,%edx,8) faddp %st, %st(1) addl $1, %edx cmpl %edx, %ecx jg .L12 .L11: movl 32(%esp), %ecx fstpl (%ecx,%ebx,8) addl $1, %ebx cmpl %ebx, 28(%esp) jg .L8 -- uros at kss-loka dot si changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |uros at kss-loka dot si Status|UNCONFIRMED |NEW Ever Confirmed|0 |1 Last reconfirmed|0000-00-00 00:00:00 |2006-08-16 12:15:56 date| | http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21676