int bar (void);
void foo (int *);
static int s[10];
void foobar (int i1, int i2, int i3, int i4, int i5, int i6)
{
int a[100];
int i, i7;
i7 = bar ();
bar ();
for (i = 0; i < 100; i++)
a[i] = s[i1] + s[i2] + s[i3] + s[i4] + s[i5] + s[i6] + s[i7];
foo (&a[0]);
return;
}
If you compare mainline to dataflow branch at -O2 you can see
--- t.i.trunk 2007-02-21 11:31:09.663252586 +0100
+++ t.i.df 2007-02-21 11:31:10.548064364 +0100
@@ -37,7 +37,6 @@
movl s(,%rbx,4), %edx
addl s(,%rcx,4), %edx
movslq %r12d,%r12
- leaq 16(%rsp), %rdi
addl s(,%r13,4), %edx
addl s(,%r14,4), %edx
addl s(,%r15,4), %edx
@@ -46,10 +45,11 @@
addl s(,%r12,4), %edx
.p2align 4,,7
.L2:
- movl %edx, (%rdi,%rax,4)
+ movl %edx, 16(%rsp,%rax,4)
addq $1, %rax
cmpq $100, %rax
jne .L2
+ leaq 16(%rsp), %rdi
call foo
addq $424, %rsp
popq %rbx
that is, we are choosing a more expensive addressing mode in the loop not
noticing that 16(%rsp) can be (G)CSEd. This makes the above loop run
33% slower on x86_64.
--
Summary: [dataflow] Bad interaction with addressing mode
selection and regalloc
Product: gcc
Version: 4.3.0
Status: UNCONFIRMED
Keywords: missed-optimization
Severity: normal
Priority: P3
Component: middle-end
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: rguenth at gcc dot gnu dot org
GCC target triplet: x86_64-*-*
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=30907