------- Comment #64 from lucier at math dot purdue dot edu 2009-05-06 20:43 ------- In answer to comment 60, here's the command line where I added -fforward-propagate -fno-move-loop-invariants:
/pkgs/gcc-mainline/bin/gcc -save-temps -I../include -I. -Wall -W -Wno-unused -O1 -fno-math-errno -fschedule-insns2 -fno-trapping-math -fno-strict-aliasing -fwrapv -fomit-frame-pointer -fPIC -fno-common -mieee-fp -fforward-propagate -fno-move-loop-invariants -DHAVE_CONFIG_H -D___PRIMAL -D___LIBRARY -D___GAMBCDIR="\"/usr/local/Gambit-C/v4.1.2\"" -D___SYS_TYPE_CPU="\"x86_64\"" -D___SYS_TYPE_VENDOR="\"unknown\"" -D___SYS_TYPE_OS="\"linux-gnu\"" -c _num.c here's the compiler: /pkgs/gcc-mainline/bin/gcc -v Using built-in specs. Target: x86_64-unknown-linux-gnu Configured with: /tmp/lucier/gcc/mainline/configure --enable-checking=release --prefix=/pkgs/gcc-mainline --enable-languages=c Thread model: posix gcc version 4.5.0 20090506 (experimental) [trunk revision 147199] (GCC) and the runtime didn't change (substantially) 132 ms cpu time (132 user, 0 system) and the loop looks pretty much just as bad (it's 117 instructions long, by my count): .L2752: movq %rcx, %rdx addq 8(%rax), %rdx leaq 4(%rcx), %rdi movq %rdx, -8(%rax) leaq 4(%rdx), %rbx addq 8(%rax), %rdx movq %rbx, -16(%rax) movq %rdx, -24(%rax) leaq 4(%rdx), %rbx addq 8(%rax), %rdx movq %rbx, -32(%rax) movq %rdx, -40(%rax) leaq 4(%rdx), %rbx movq 40(%rax), %rdx movq %rbx, -48(%rax) movsd 7(%rdx,%rbx,2), %xmm9 movq -40(%rax), %rbx leaq 7(%rdx,%rcx,2), %r8 addq $8, %rcx movsd (%r8), %xmm4 cmpq %rcx, %r13 movsd 7(%rdx,%rbx,2), %xmm11 movq -32(%rax), %rbx movsd 7(%rdx,%rbx,2), %xmm5 movq -24(%rax), %rbx movsd 7(%rdx,%rbx,2), %xmm7 movq -16(%rax), %rbx movsd 7(%rdx,%rbx,2), %xmm14 movq -8(%rax), %rbx movsd 7(%rdx,%rbx,2), %xmm6 leaq (%rdi,%rdi), %rbx movsd 7(%rbx,%rdx), %xmm8 movq 24(%rax), %rdx movapd %xmm6, %xmm13 movsd 15(%rdx), %xmm1 movsd 7(%rdx), %xmm2 movapd %xmm1, %xmm10 movsd 31(%rdx), %xmm3 movapd %xmm2, %xmm12 mulsd %xmm11, %xmm10 mulsd %xmm9, %xmm12 mulsd %xmm2, %xmm11 mulsd %xmm1, %xmm9 movsd 23(%rdx), %xmm0 addsd %xmm12, %xmm10 movapd %xmm2, %xmm12 mulsd %xmm7, %xmm2 subsd %xmm9, %xmm11 movapd %xmm1, %xmm9 mulsd %xmm5, %xmm12 mulsd %xmm5, %xmm1 movapd %xmm8, %xmm5 mulsd %xmm7, %xmm9 movapd %xmm4, %xmm7 subsd %xmm11, %xmm13 addsd %xmm6, %xmm11 movsd .LC5(%rip), %xmm6 subsd %xmm1, %xmm2 movapd %xmm0, %xmm1 addsd %xmm12, %xmm9 movapd %xmm14, %xmm12 xorpd %xmm3, %xmm6 subsd %xmm10, %xmm12 mulsd %xmm13, %xmm1 subsd %xmm2, %xmm7 addsd %xmm4, %xmm2 movapd %xmm6, %xmm4 addsd %xmm14, %xmm10 mulsd %xmm13, %xmm6 mulsd %xmm12, %xmm4 subsd %xmm9, %xmm5 mulsd %xmm0, %xmm12 addsd %xmm8, %xmm9 movapd %xmm0, %xmm8 mulsd %xmm11, %xmm0 addsd %xmm1, %xmm4 movapd %xmm3, %xmm1 mulsd %xmm10, %xmm3 subsd %xmm12, %xmm6 mulsd %xmm11, %xmm1 mulsd %xmm10, %xmm8 subsd %xmm3, %xmm0 addsd %xmm1, %xmm8 movapd %xmm2, %xmm1 addsd %xmm0, %xmm1 subsd %xmm0, %xmm2 movapd %xmm7, %xmm0 subsd %xmm6, %xmm7 addsd %xmm6, %xmm0 movsd %xmm1, (%r8) movapd %xmm9, %xmm1 movq 40(%rax), %rdx subsd %xmm8, %xmm9 addsd %xmm8, %xmm1 movsd %xmm1, 7(%rbx,%rdx) movq -8(%rax), %rbx movq 40(%rax), %rdx movsd %xmm2, 7(%rdx,%rbx,2) movq -16(%rax), %rbx movq 40(%rax), %rdx movsd %xmm9, 7(%rdx,%rbx,2) movq -24(%rax), %rbx movq 40(%rax), %rdx movsd %xmm0, 7(%rdx,%rbx,2) movapd %xmm5, %xmm0 movq -32(%rax), %rbx movq 40(%rax), %rdx subsd %xmm4, %xmm5 addsd %xmm4, %xmm0 movsd %xmm0, 7(%rdx,%rbx,2) movq -40(%rax), %rbx movq 40(%rax), %rdx movsd %xmm7, 7(%rdx,%rbx,2) movq -48(%rax), %rbx movq 40(%rax), %rdx movsd %xmm5, 7(%rdx,%rbx,2) jg .L2752 movq %rdi, %r13 .L2751: -- lucier at math dot purdue dot edu changed: What |Removed |Added ---------------------------------------------------------------------------- Status|WAITING |NEW http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33928