------- Comment #6 from uros at kss-loka dot si  2006-08-16 12:15 -------
IMO the problem here is in IVopts. Using gcc-3.x, the innermost loop compiles
to:

.L15:
        movl    (%edi,%edx,4), %eax
        fldl    (%ebp,%edx,8)
        addl    $1, %edx
        fmull   (%esi,%eax,8)
        cmpl    %ecx, %edx
        faddp   %st, %st(1)
        jl      .L15

and with current SVN gcc-4.2 into:

.L12:
        movl    (%ecx), %eax
        fldl    (%ebp,%eax,8)
        fmull   (%edx)
        faddp   %st, %st(1)
        addl    $1, %ebx
        addl    $4, %ecx
        addl    $8, %edx
        cmpl    %esi, %ebx
        jne     .L12

Adding -fno-ivopts, this loop gets compiled into:

.L12:
        movl    (%edi,%edx,4), %eax
        fldl    (%esi,%eax,8)
        fmull   (%ebp,%edx,8)
        faddp   %st, %st(1)
        addl    $1, %edx
        cmpl    %edx, %ecx
        jg      .L12

Timings (-O3 -march=pentium4 -fomit-frame-pointer):

gcc-3.2: 0m2.301s
gcc-4.2: 0m2.713s
gcc-4.2 + -fno-ivopts: 0m2.473s

with:

gcc version 3.2 20020903 (Red Hat Linux 8.0 3.2-7)
gcc version 4.2.0 20060816 (experimental)

I think that remaining time difference is due to strange loop above innermost:
gcc-3.2:

        fld     %st(0)
.L16:
        movl    36(%esp), %eax
        fld     %st(0)
        movl    4(%eax,%ebx,4), %ecx
        movl    (%eax,%ebx,4), %edx
        cmpl    %ecx, %edx
        jge     .L23
.L15:
        movl    (%edi,%edx,4), %eax
        fldl    (%ebp,%edx,8)
        addl    $1, %edx
        fmull   (%esi,%eax,8)
        cmpl    %ecx, %edx
        faddp   %st, %st(1)
        jl      .L15
.L23:
        movl    28(%esp), %eax
        fstpl   (%eax,%ebx,8)
        addl    $1, %ebx
        cmpl    24(%esp), %ebx
        jl      .L16

========
gcc-4.2:

.L8:
        movl    36(%esp), %edx
        movl    (%edx,%edi,4), %eax
        movl    4(%edx,%edi,4), %esi
        fldz
        cmpl    %esi, %eax
        jge     .L11
        fstp    %st(0)
        movl    40(%esp), %ebx
        leal    (%ebx,%eax,4), %ecx
        movl    32(%esp), %ebx
        leal    (%ebx,%eax,8), %edx
        fldz
        xorl    %ebx, %ebx
        subl    %eax, %esi
.L12:
        movl    (%ecx), %eax
        fldl    (%ebp,%eax,8)
        fmull   (%edx)
        faddp   %st, %st(1)
        addl    $1, %ebx
        addl    $4, %ecx
        addl    $8, %edx
        cmpl    %esi, %ebx
        jne     .L12
.L11:
        movl    28(%esp), %eax
        fstpl   (%eax,%edi,8)
        addl    $1, %edi
        cmpl    24(%esp), %edi
        jne     .L8

========
and gcc-4.2 -fno-ivopts:

.L8:
        leal    (%ebx,%ebx), %eax
        movl    40(%esp), %edx
        movl    (%edx,%eax,2), %edx
        movl    %edx, (%esp)
        movl    40(%esp), %edx
        movl    4(%edx,%eax,2), %ecx
        fldz
        cmpl    %ecx, (%esp)
        jge     .L11
        fstp    %st(0)
        movl    (%esp), %edx
        fldz
.L12:
        movl    (%edi,%edx,4), %eax
        fldl    (%esi,%eax,8)
        fmull   (%ebp,%edx,8)
        faddp   %st, %st(1)
        addl    $1, %edx
        cmpl    %edx, %ecx
        jg      .L12
.L11:
        movl    32(%esp), %ecx
        fstpl   (%ecx,%ebx,8)
        addl    $1, %ebx
        cmpl    %ebx, 28(%esp)
        jg      .L8


-- 

uros at kss-loka dot si changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |uros at kss-loka dot si
             Status|UNCONFIRMED                 |NEW
     Ever Confirmed|0                           |1
   Last reconfirmed|0000-00-00 00:00:00         |2006-08-16 12:15:56
               date|                            |


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21676

Reply via email to