------- Additional Comments From tlm at daimi dot au dot dk 2005-05-31 20:45
-------
(In reply to comment #1)
> The first testcase is fixed in 4.0.0. I have not looked
> into the full testcase.
Installed gcc 4.0.0 (a bit hard with the current version)
OK - I was wrong before (so please do not close this).
The simple situation is fixed - however there is still the same problems
with the knight-example.
int unrolled_knight_count(unsigned char* board)
{
int count = 0;
for (int bp=0;bp<2;bp++) // reduces to 2 just for the example
{
if (board[bp]==WHITE_KNIGHT)
{
if (bp%8>1 && bp/8>0) count++;
if (bp%8>0 && bp/8>1) count++;
if (bp%8<6 && bp/8>0) count++;
if (bp%8<7 && bp/8>1) count++;
if (bp%8>1 && bp/8<7) count++;
if (bp%8>0 && bp/8<6) count++;
if (bp%8<6 && bp/8<7) count++;
if (bp%8<7 && bp/8<6) count++;
}
}
return count;
}
is compiled to
.text
.align 2
.p2align 4,,15
.globl _Z26unrolled_knight_countPh
.type _Z26auto_unrolled_knight_countPh, @function
_Z26auto_unrolled_knight_countPh:
.LFB2:
pushl %ebp
.LCFI0:
xorl %eax, %eax
movl %esp, %ebp
.LCFI1:
movl 8(%ebp), %edx
cmpb $5, (%edx)
je .L10
.L6:
cmpb $5, 1(%edx)
je .L11
popl %ebp
ret
.p2align 4,,7
.L11:
popl %ebp
addl $3, %eax
.p2align 4,,6
ret
.p2align 4,,7
.L10:
movl $2, %eax
.p2align 4,,7
jmp .L6
.LFE2:
.size _Z26auto_unrolled_knight_countPh,
.-_Z26auto_unrolled_knight_countPh
.ident "GCC: (GNU) 4.0.0"
.section .note.GNU-stack,"",@progbits
Now if I (manual) expand the loop before compiling
int unrolled_knight_count(unsigned char* board)
{
int count = 0;
// for (int bp=0;bp<64;bp++) // We expand 2 as before..
if (board[0]==WHITE_KNIGHT)
{
if (0%8>1 && 0/8>0) count++;
if (0%8>0 && 0/8>1) count++;
if (0%8<6 && 0/8>0) count++;
if (0%8<7 && 0/8>1) count++;
if (0%8>1 && 0/8<7) count++;
if (0%8>0 && 0/8<6) count++;
if (0%8<6 && 0/8<7) count++;
if (0%8<7 && 0/8<6) count++;
}
if (board[1]==WHITE_KNIGHT)
{
if (1%8>1 && 1/8>0) count++;
if (1%8>0 && 1/8>1) count++;
if (1%8<6 && 1/8>0) count++;
if (1%8<7 && 1/8>1) count++;
if (1%8>1 && 1/8<7) count++;
if (1%8>0 && 1/8<6) count++;
if (1%8<6 && 1/8<7) count++;
if (1%8<7 && 1/8<6) count++;
}
return count;
}
The result is mush better. (Not that I know assemblercode)
I have WHITE_KNIGT = 5 (as you might have seen from the assemblercode)
and when I timed I had knights on pos 24,44,55,56. And the code is
400-500% faster - so it will really improve the speed ...
.text
.align 2
.p2align 4,,15
.globl _Z26unrolled_knight_countPh
.type _Z26auto_unrolled_knight_countPh, @function
_Z26unrolled_knight_countPh:
.LFB2:
pushl %ebp
.LCFI0:
xorl %eax, %eax
movl %esp, %ebp
.LCFI1:
movl 8(%ebp), %edx
cmpb $5, (%edx)
sete %al
addl %eax, %eax
cmpb $5, 1(%edx)
je .L9
popl %ebp
ret
.p2align 4,,7
.L9:
popl %ebp
addl $3, %eax
ret
Again thanks. I do not want to sound like an unhappy gcc-user
(I admire the work you are doing).
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21827