------- Additional Comments From tlm at daimi dot au dot dk 2005-07-19 17:02 ------- (In reply to comment #1) > The first testcase is fixed in 4.0.0. (Though there is a regression on the mainline). I have not looked > into the full testcase.
There have not been more reactions on this bug / request, so I give a bit more information (and hopefully motivation) to move forward to a solution of it. I have written the following code : auto_unrolled_knight_count8 and t_auto_unrolled_knight_count9 only have one difference. The first loop goes to 8 the second loop goes to 9. If I manually unroll (meaning replaceing with constant up to 64 - it is a chessproblem - the code is excatly like the code generated in the up to eight example.) The code generated for the 9 example is in my opion quite bad. (It does work - but I consider unrolls finest task to be to eliminate what is (easy known) impossible at compiletime). The code is normally at least 4-5 times slower than the above code ! The source is like this : #define WHITE_KNIGHT 5 int auto_unrolled_knight_count8(unsigned char* board) { int count = 0; for (int bp=0;bp<8;++bp) { if (board[bp]==WHITE_KNIGHT) { if (bp%8>1 && bp/8>0) count++; if (bp%8>0 && bp/8>1) count++; if (bp%8<6 && bp/8>0) count++; if (bp%8<7 && bp/8>1) count++; if (bp%8>1 && bp/8<7) count++; if (bp%8>0 && bp/8<6) count++; if (bp%8<6 && bp/8<7) count++; if (bp%8<7 && bp/8<6) count++; } } return count; } int t_auto_unrolled_knight_count9(unsigned char* board) { int count = 0; for (int bp=0;bp<9;++bp) { if (board[bp]==WHITE_KNIGHT) { if (bp%8>1 && bp/8>0) count++; if (bp%8>0 && bp/8>1) count++; if (bp%8<6 && bp/8>0) count++; if (bp%8<7 && bp/8>1) count++; if (bp%8>1 && bp/8<7) count++; if (bp%8>0 && bp/8<6) count++; if (bp%8<6 && bp/8<7) count++; if (bp%8<7 && bp/8<6) count++; } } return count; } Assembly : (Compiled with -O3 and -funroll-loops) .file "all_in_one.cpp" .text .align 2 .p2align 4,,15 .globl _Z27auto_unrolled_knight_count8Ph .type _Z27auto_unrolled_knight_count8Ph, @function _Z27auto_unrolled_knight_count8Ph: .LFB2: pushl %ebp .LCFI0: xorl %eax, %eax movl %esp, %ebp .LCFI1: movl 8(%ebp), %edx cmpb $5, (%edx) je .L22 .L6: cmpb $5, 1(%edx) je .L23 .L8: cmpb $5, 2(%edx) je .L24 .L10: cmpb $5, 3(%edx) .p2align 4,,5 je .L25 .L12: cmpb $5, 4(%edx) .p2align 4,,5 je .L26 .L14: cmpb $5, 5(%edx) .p2align 4,,5 je .L27 .L16: cmpb $5, 6(%edx) .p2align 4,,5 je .L28 .L18: cmpb $5, 7(%edx) .p2align 4,,5 je .L29 popl %ebp .p2align 4,,6 ret .p2align 4,,7 .L29: popl %ebp addl $2, %eax .p2align 4,,6 ret .p2align 4,,7 .L28: addl $3, %eax .p2align 4,,7 jmp .L18 .p2align 4,,7 .L27: addl $4, %eax .p2align 4,,5 jmp .L16 .p2align 4,,7 .L26: addl $4, %eax .p2align 4,,5 jmp .L14 .p2align 4,,7 .L25: addl $4, %eax .p2align 4,,5 jmp .L12 .p2align 4,,7 .L24: addl $4, %eax .p2align 4,,5 jmp .L10 .p2align 4,,7 .L23: addl $3, %eax .p2align 4,,5 jmp .L8 .p2align 4,,7 .L22: movl $2, %eax .p2align 4,,5 jmp .L6 .LFE2: .size _Z27auto_unrolled_knight_count8Ph, .-_Z27auto_unrolled_knight_count8Ph ----------------------- End of "nice" code ---------------------- .align 2 .p2align 4,,15 .globl _Z29t_auto_unrolled_knight_count9Ph .type _Z29t_auto_unrolled_knight_count9Ph, @function _Z29t_auto_unrolled_knight_count9Ph: .LFB3: pushl %ebp .LCFI2: movl %esp, %ebp .LCFI3: pushl %edi .LCFI4: xorl %edi, %edi pushl %esi .LCFI5: xorl %esi, %esi pushl %ebx .LCFI6: subl $8, %esp .LCFI7: jmp .L31 .p2align 4,,7 .L32: incl %esi movl %esi, -20(%ebp) cmpb $5, (%eax,%esi) je .L64 .L52: incl %esi cmpb $5, (%eax,%esi) je .L60 .L54: movl -20(%ebp), %esi addl $2, %esi cmpl $9, %esi je .L65 .L31: movl 8(%ebp), %eax cmpb $5, (%eax,%esi) jne .L32 movl %esi, %eax cltd shrl $29, %edx leal (%esi,%edx), %ecx andl $7, %ecx subl %edx, %ecx cmpl $1, %ecx setg -15(%ebp) cmpl $7, %esi movzbl -15(%ebp), %edx setg %bl andb %bl, %dl cmpb $1, %dl sbbl $-1, %edi testl %ecx, %ecx setg -14(%ebp) cmpl $15, %esi movzbl -14(%ebp), %edx setg %al andb %al, %dl cmpb $1, %dl sbbl $-1, %edi cmpl $5, %ecx setle -13(%ebp) andb -13(%ebp), %bl cmpb $1, %bl sbbl $-1, %edi cmpl $6, %ecx setle %bl andb %bl, %al cmpb $1, %al movl 8(%ebp), %eax sbbl $-1, %edi cmpl $55, %esi setle %cl andb %cl, -15(%ebp) cmpb $1, -15(%ebp) sbbl $-1, %edi cmpl $47, %esi setle %dl andb %dl, -14(%ebp) cmpb $1, -14(%ebp) sbbl $-1, %edi andb %cl, -13(%ebp) cmpb $1, -13(%ebp) sbbl $-1, %edi andb %dl, %bl cmpb $1, %bl sbbl $-1, %edi incl %esi movl %esi, -20(%ebp) cmpb $5, (%eax,%esi) jne .L52 .L64: movl %esi, %eax cltd shrl $29, %edx leal (%esi,%edx), %ecx andl $7, %ecx subl %edx, %ecx cmpl $1, %ecx setg -15(%ebp) cmpl $7, %esi movzbl -15(%ebp), %edx setg %bl andb %bl, %dl cmpb $1, %dl sbbl $-1, %edi testl %ecx, %ecx setg -14(%ebp) cmpl $15, %esi movzbl -14(%ebp), %edx setg %al andb %al, %dl cmpb $1, %dl sbbl $-1, %edi cmpl $5, %ecx setle -13(%ebp) andb -13(%ebp), %bl cmpb $1, %bl sbbl $-1, %edi cmpl $6, %ecx setle %bl andb %bl, %al cmpb $1, %al movl 8(%ebp), %eax sbbl $-1, %edi cmpl $55, %esi setle %cl andb %cl, -15(%ebp) cmpb $1, -15(%ebp) sbbl $-1, %edi cmpl $47, %esi setle %dl andb %dl, -14(%ebp) cmpb $1, -14(%ebp) sbbl $-1, %edi andb %cl, -13(%ebp) cmpb $1, -13(%ebp) sbbl $-1, %edi andb %dl, %bl cmpb $1, %bl sbbl $-1, %edi incl %esi cmpb $5, (%eax,%esi) jne .L54 .L60: movl %esi, %eax cltd shrl $29, %edx leal (%esi,%edx), %ecx andl $7, %ecx subl %edx, %ecx cmpl $1, %ecx setg -15(%ebp) cmpl $7, %esi movzbl -15(%ebp), %edx setg %bl andb %bl, %dl cmpb $1, %dl sbbl $-1, %edi testl %ecx, %ecx setg -14(%ebp) cmpl $15, %esi movzbl -14(%ebp), %edx setg %al andb %al, %dl cmpb $1, %dl sbbl $-1, %edi cmpl $5, %ecx setle -13(%ebp) andb -13(%ebp), %bl cmpb $1, %bl sbbl $-1, %edi cmpl $6, %ecx setle %bl andb %bl, %al cmpb $1, %al sbbl $-1, %edi cmpl $55, %esi setle %cl andb %cl, -15(%ebp) cmpb $1, -15(%ebp) sbbl $-1, %edi cmpl $47, %esi movl -20(%ebp), %esi setle %dl andb %dl, -14(%ebp) cmpb $1, -14(%ebp) sbbl $-1, %edi andb %cl, -13(%ebp) cmpb $1, -13(%ebp) sbbl $-1, %edi andb %dl, %bl cmpb $1, %bl sbbl $-1, %edi addl $2, %esi cmpl $9, %esi jne .L31 .L65: addl $8, %esp movl %edi, %eax popl %ebx popl %esi popl %edi popl %ebp ret .LFE3: .size _Z29t_auto_unrolled_knight_count9Ph, .-_Z29t_auto_unrolled_knight_count9Ph .ident "GCC: (GNU) 4.0.0" .section .note.GNU-stack,"",@progbits I hope you will confirm the problem (so it can be solved). It would really improve gcc. Regards Thorbjørn -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21827