------- Additional Comments From tlm at daimi dot au dot dk 2005-05-31 20:45 ------- (In reply to comment #1) > The first testcase is fixed in 4.0.0. I have not looked > into the full testcase.
Installed gcc 4.0.0 (a bit hard with the current version) OK - I was wrong before (so please do not close this). The simple situation is fixed - however there is still the same problems with the knight-example. int unrolled_knight_count(unsigned char* board) { int count = 0; for (int bp=0;bp<2;bp++) // reduces to 2 just for the example { if (board[bp]==WHITE_KNIGHT) { if (bp%8>1 && bp/8>0) count++; if (bp%8>0 && bp/8>1) count++; if (bp%8<6 && bp/8>0) count++; if (bp%8<7 && bp/8>1) count++; if (bp%8>1 && bp/8<7) count++; if (bp%8>0 && bp/8<6) count++; if (bp%8<6 && bp/8<7) count++; if (bp%8<7 && bp/8<6) count++; } } return count; } is compiled to .text .align 2 .p2align 4,,15 .globl _Z26unrolled_knight_countPh .type _Z26auto_unrolled_knight_countPh, @function _Z26auto_unrolled_knight_countPh: .LFB2: pushl %ebp .LCFI0: xorl %eax, %eax movl %esp, %ebp .LCFI1: movl 8(%ebp), %edx cmpb $5, (%edx) je .L10 .L6: cmpb $5, 1(%edx) je .L11 popl %ebp ret .p2align 4,,7 .L11: popl %ebp addl $3, %eax .p2align 4,,6 ret .p2align 4,,7 .L10: movl $2, %eax .p2align 4,,7 jmp .L6 .LFE2: .size _Z26auto_unrolled_knight_countPh, .-_Z26auto_unrolled_knight_countPh .ident "GCC: (GNU) 4.0.0" .section .note.GNU-stack,"",@progbits Now if I (manual) expand the loop before compiling int unrolled_knight_count(unsigned char* board) { int count = 0; // for (int bp=0;bp<64;bp++) // We expand 2 as before.. if (board[0]==WHITE_KNIGHT) { if (0%8>1 && 0/8>0) count++; if (0%8>0 && 0/8>1) count++; if (0%8<6 && 0/8>0) count++; if (0%8<7 && 0/8>1) count++; if (0%8>1 && 0/8<7) count++; if (0%8>0 && 0/8<6) count++; if (0%8<6 && 0/8<7) count++; if (0%8<7 && 0/8<6) count++; } if (board[1]==WHITE_KNIGHT) { if (1%8>1 && 1/8>0) count++; if (1%8>0 && 1/8>1) count++; if (1%8<6 && 1/8>0) count++; if (1%8<7 && 1/8>1) count++; if (1%8>1 && 1/8<7) count++; if (1%8>0 && 1/8<6) count++; if (1%8<6 && 1/8<7) count++; if (1%8<7 && 1/8<6) count++; } return count; } The result is mush better. (Not that I know assemblercode) I have WHITE_KNIGT = 5 (as you might have seen from the assemblercode) and when I timed I had knights on pos 24,44,55,56. And the code is 400-500% faster - so it will really improve the speed ... .text .align 2 .p2align 4,,15 .globl _Z26unrolled_knight_countPh .type _Z26auto_unrolled_knight_countPh, @function _Z26unrolled_knight_countPh: .LFB2: pushl %ebp .LCFI0: xorl %eax, %eax movl %esp, %ebp .LCFI1: movl 8(%ebp), %edx cmpb $5, (%edx) sete %al addl %eax, %eax cmpb $5, 1(%edx) je .L9 popl %ebp ret .p2align 4,,7 .L9: popl %ebp addl $3, %eax ret Again thanks. I do not want to sound like an unhappy gcc-user (I admire the work you are doing). -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21827