------- Comment #3 from jacob at math dot jussieu dot fr 2008-02-27 15:49 ------- Thank you for the quick reply. I'm glad to hear that it's fixed and much improved in 4.3.
I tried what you said about __attribute__((flatten)) (I copied and pasted your code) and that didn't change the performance at all (with 4.2.3). I compiled with g++ -O3 -DDEPTH18 test.cpp -S -o test.s and the generated assembly file starts as follows: .file "test.cpp" .section .ctors,"aw",@progbits .align 4 .long _GLOBAL__I__Z8evaluatev .text .align 2 .p2align 4,,15 .globl _Z8evaluatev .type _Z8evaluatev, @function _Z8evaluatev: .LFB1436: pushl %ebp .LCFI0: movl %esp, %ebp .LCFI1: pushl %edi .LCFI2: pushl %esi .LCFI3: xorl %esi, %esi pushl %ebx .LCFI4: xorl %ebx, %ebx subl $172, %esp .LCFI5: leal -84(%ebp), %edi .p2align 4,,7 .L2: leal -160(%ebp), %eax movl %ebx, -16(%ebp) movl %ebx, -20(%ebp) movl %ebx, -24(%ebp) movl %ebx, -28(%ebp) movl %ebx, -32(%ebp) movl %ebx, -36(%ebp) movl %ebx, -40(%ebp) movl %ebx, -44(%ebp) movl %ebx, -48(%ebp) movl %ebx, -52(%ebp) movl %ebx, -56(%ebp) movl %ebx, -60(%ebp) movl %ebx, -64(%ebp) movl %ebx, -68(%ebp) movl %ebx, -72(%ebp) movl %ebx, -76(%ebp) movl %ebx, -80(%ebp) movl %ebx, -84(%ebp) movl $72, 8(%esp) movl %edi, 4(%esp) movl %eax, (%esp) call memcpy movl -92(%ebp), %eax addl -96(%ebp), %eax addl -100(%ebp), %eax addl -104(%ebp), %eax addl -108(%ebp), %eax addl -112(%ebp), %eax addl -116(%ebp), %eax addl -120(%ebp), %eax addl -124(%ebp), %eax addl -128(%ebp), %eax addl -132(%ebp), %eax addl -136(%ebp), %eax addl -140(%ebp), %eax addl -144(%ebp), %eax addl -148(%ebp), %eax addl -152(%ebp), %eax addl -156(%ebp), %eax addl -160(%ebp), %eax addl %esi, %eax leal (%eax,%ebx), %esi addl $1, %ebx cmpl $1000000000, %ebx jne .L2 addl $172, %esp movl %esi, %eax popl %ebx popl %esi popl %edi popl %ebp ret .LFE1436: .size _Z8evaluatev, .-_Z8evaluatev As you can see, there is a call to memcpy() which isn't present in your version. What can I do (if anything) to improve speed with g++ 4.2.3? Also, isn't it a bit surprising to have a call in a function that has __attribute__((flatten)) ? The compilation doesn't produce any warning. -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35393