------- Comment #1 from pinskia at gcc dot gnu dot org 2006-11-30 18:38 ------- Here is the loop that is generated: .L4: flds (%ebx,%edx,4) leal 1(%edx), %eax fmul %st(1), %st fdivs (%ecx,%edx,4) fsqrt fstps (%esi,%edx,4) flds (%ebx,%eax,4) fmul %st(1), %st fdivs (%ecx,%eax,4) fsqrt fstps (%esi,%eax,4) leal 2(%edx), %eax flds (%ebx,%eax,4) fmul %st(1), %st fdivs (%ecx,%eax,4) fsqrt fstps (%esi,%eax,4) leal 3(%edx), %eax flds (%ebx,%eax,4) fmul %st(1), %st fdivs (%ecx,%eax,4) fsqrt fstps (%esi,%eax,4) leal 4(%edx), %eax flds (%ebx,%eax,4) fmul %st(1), %st fdivs (%ecx,%eax,4) fsqrt fstps (%esi,%eax,4) leal 5(%edx), %eax flds (%ebx,%eax,4) fmul %st(1), %st fdivs (%ecx,%eax,4) fsqrt fstps (%esi,%eax,4) leal 6(%edx), %eax flds (%ebx,%eax,4) fmul %st(1), %st fdivs (%ecx,%eax,4) fsqrt fstps (%esi,%eax,4) leal 7(%edx), %eax addl $8, %edx cmpl %edi, %edx flds (%ebx,%eax,4) fmul %st(1), %st fdivs (%ecx,%eax,4) fsqrt fstps (%esi,%eax,4) jne .L4
There are some issues with leal's that should not be there but I think what ICC does is only calculates CS[1] instead of all of CS[i]. -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=30032