------- Additional Comments From pinskia at gcc dot gnu dot org 2004-12-20
19:47 -------
(In reply to comment #8)
> The reduced testcase (well heavely modified) looks like:
Here is the resulting loop asm (on x86 where shows up worse than ppc which is
the same because
the RA does its work better there):
.L7:
movl %edx, %ecx
.L3:
movl %ecx, %edx
movl %ebx, %eax
imull %edi, %edx
imull %esi, %eax
imull %esi, %ecx
subl %eax, %edx
movl %ebx, %eax
movl %ecx, %ebx
imull %edi, %eax
addl %eax, %ebx
je .L7
Compared to 3.4.0:
.L2:
movl 16(%ebp), %ebx
movl %eax, %ecx
movl %esi, %edx
imull 16(%ebp), %eax
imull %edi, %ecx
imull %esi, %ebx
imull %edi, %edx
movl %ebx, %esi
subl %ecx, %esi
addl %edx, %eax
je .L2
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19038