------- Additional Comments From tlm at daimi dot au dot dk  2005-07-19 17:02 
-------
(In reply to comment #1)
> The first testcase is fixed in 4.0.0.  (Though there is a regression on the
mainline).  I have not looked 
> into the full testcase.

There have not been more reactions on this bug / request, so I give a bit more
information (and hopefully motivation) to move forward to a solution of it.

I have written the following code :
auto_unrolled_knight_count8 and t_auto_unrolled_knight_count9 only have one
difference. The first loop goes to 8 the second loop goes to 9. If I manually
unroll (meaning replaceing with constant up to 64 - it is a chessproblem - 
the code is excatly like the code generated in the up to eight example.)

The code generated for the 9 example is in my opion quite bad. 
(It does work - but I consider unrolls finest task to be to eliminate what is
(easy known) impossible at compiletime). The code is normally at least 
4-5 times slower than the above code !


The source is like this :

#define WHITE_KNIGHT 5

int auto_unrolled_knight_count8(unsigned char* board)
{
  int count = 0;
  for (int bp=0;bp<8;++bp)
  {
    if (board[bp]==WHITE_KNIGHT)
    {
      if (bp%8>1 && bp/8>0) count++;
      if (bp%8>0 && bp/8>1) count++;
      if (bp%8<6 && bp/8>0) count++;
      if (bp%8<7 && bp/8>1) count++;
      if (bp%8>1 && bp/8<7) count++;
      if (bp%8>0 && bp/8<6) count++;
      if (bp%8<6 && bp/8<7) count++;
      if (bp%8<7 && bp/8<6) count++;
    }
  }
  return count;
}

int t_auto_unrolled_knight_count9(unsigned char* board)
{
  int count = 0;
  for (int bp=0;bp<9;++bp)
  {
    if (board[bp]==WHITE_KNIGHT)
    {
      if (bp%8>1 && bp/8>0) count++;
      if (bp%8>0 && bp/8>1) count++;
      if (bp%8<6 && bp/8>0) count++;
      if (bp%8<7 && bp/8>1) count++;
      if (bp%8>1 && bp/8<7) count++;
      if (bp%8>0 && bp/8<6) count++;
      if (bp%8<6 && bp/8<7) count++;
      if (bp%8<7 && bp/8<6) count++;
    }
  }
  return count;
}

Assembly : (Compiled with -O3 and -funroll-loops) 

        .file   "all_in_one.cpp"
        .text
        .align 2
        .p2align 4,,15
.globl _Z27auto_unrolled_knight_count8Ph
        .type   _Z27auto_unrolled_knight_count8Ph, @function
_Z27auto_unrolled_knight_count8Ph:
.LFB2:
        pushl   %ebp
.LCFI0:
        xorl    %eax, %eax
        movl    %esp, %ebp
.LCFI1:
        movl    8(%ebp), %edx
        cmpb    $5, (%edx)
        je      .L22
.L6:
        cmpb    $5, 1(%edx)
        je      .L23
.L8:
        cmpb    $5, 2(%edx)
        je      .L24
.L10:
        cmpb    $5, 3(%edx)
        .p2align 4,,5
        je      .L25
.L12:
        cmpb    $5, 4(%edx)
        .p2align 4,,5
        je      .L26
.L14:
        cmpb    $5, 5(%edx)
        .p2align 4,,5
        je      .L27
.L16:
        cmpb    $5, 6(%edx)
        .p2align 4,,5
        je      .L28
.L18:
        cmpb    $5, 7(%edx)
        .p2align 4,,5
        je      .L29
        popl    %ebp
        .p2align 4,,6
        ret
        .p2align 4,,7
.L29:
        popl    %ebp
        addl    $2, %eax
        .p2align 4,,6
        ret
        .p2align 4,,7
.L28:
        addl    $3, %eax
        .p2align 4,,7
        jmp     .L18
        .p2align 4,,7
.L27:
        addl    $4, %eax
        .p2align 4,,5
        jmp     .L16
        .p2align 4,,7
.L26:
        addl    $4, %eax
        .p2align 4,,5
        jmp     .L14
        .p2align 4,,7
.L25:
        addl    $4, %eax
        .p2align 4,,5
        jmp     .L12
        .p2align 4,,7
.L24:
        addl    $4, %eax
        .p2align 4,,5
        jmp     .L10
        .p2align 4,,7
.L23:
        addl    $3, %eax
        .p2align 4,,5
        jmp     .L8
        .p2align 4,,7
.L22:
        movl    $2, %eax
        .p2align 4,,5
        jmp     .L6
.LFE2:
        .size   _Z27auto_unrolled_knight_count8Ph, 
.-_Z27auto_unrolled_knight_count8Ph

----------------------- End of "nice" code ----------------------

        .align 2
        .p2align 4,,15
.globl _Z29t_auto_unrolled_knight_count9Ph
        .type   _Z29t_auto_unrolled_knight_count9Ph, @function
_Z29t_auto_unrolled_knight_count9Ph:
.LFB3:
        pushl   %ebp
.LCFI2:
        movl    %esp, %ebp
.LCFI3:
        pushl   %edi
.LCFI4:
        xorl    %edi, %edi
        pushl   %esi
.LCFI5:
        xorl    %esi, %esi
        pushl   %ebx
.LCFI6:
        subl    $8, %esp
.LCFI7:
        jmp     .L31
        .p2align 4,,7
.L32:
        incl    %esi
        movl    %esi, -20(%ebp)
        cmpb    $5, (%eax,%esi)
        je      .L64
.L52:
        incl    %esi
        cmpb    $5, (%eax,%esi)
        je      .L60
.L54:
        movl    -20(%ebp), %esi
        addl    $2, %esi
        cmpl    $9, %esi
        je      .L65
.L31:
        movl    8(%ebp), %eax
        cmpb    $5, (%eax,%esi)
        jne     .L32
        movl    %esi, %eax
        cltd
        shrl    $29, %edx
        leal    (%esi,%edx), %ecx
        andl    $7, %ecx
        subl    %edx, %ecx
        cmpl    $1, %ecx
        setg    -15(%ebp)
        cmpl    $7, %esi
        movzbl  -15(%ebp), %edx
        setg    %bl
        andb    %bl, %dl
        cmpb    $1, %dl
        sbbl    $-1, %edi
        testl   %ecx, %ecx
        setg    -14(%ebp)
        cmpl    $15, %esi
        movzbl  -14(%ebp), %edx
        setg    %al
        andb    %al, %dl
        cmpb    $1, %dl
        sbbl    $-1, %edi
        cmpl    $5, %ecx
        setle   -13(%ebp)
        andb    -13(%ebp), %bl
        cmpb    $1, %bl
        sbbl    $-1, %edi
        cmpl    $6, %ecx
        setle   %bl
        andb    %bl, %al
        cmpb    $1, %al
        movl    8(%ebp), %eax
        sbbl    $-1, %edi
        cmpl    $55, %esi
        setle   %cl
        andb    %cl, -15(%ebp)
        cmpb    $1, -15(%ebp)
        sbbl    $-1, %edi
        cmpl    $47, %esi
        setle   %dl
        andb    %dl, -14(%ebp)
        cmpb    $1, -14(%ebp)
        sbbl    $-1, %edi
        andb    %cl, -13(%ebp)
        cmpb    $1, -13(%ebp)
        sbbl    $-1, %edi
        andb    %dl, %bl
        cmpb    $1, %bl
        sbbl    $-1, %edi
        incl    %esi
        movl    %esi, -20(%ebp)
        cmpb    $5, (%eax,%esi)
        jne     .L52
.L64:
        movl    %esi, %eax
        cltd
        shrl    $29, %edx
        leal    (%esi,%edx), %ecx
        andl    $7, %ecx
        subl    %edx, %ecx
        cmpl    $1, %ecx
        setg    -15(%ebp)
        cmpl    $7, %esi
        movzbl  -15(%ebp), %edx
        setg    %bl
        andb    %bl, %dl
        cmpb    $1, %dl
        sbbl    $-1, %edi
        testl   %ecx, %ecx
        setg    -14(%ebp)
        cmpl    $15, %esi
        movzbl  -14(%ebp), %edx
        setg    %al
        andb    %al, %dl
        cmpb    $1, %dl
        sbbl    $-1, %edi
        cmpl    $5, %ecx
        setle   -13(%ebp)
        andb    -13(%ebp), %bl
        cmpb    $1, %bl
        sbbl    $-1, %edi
        cmpl    $6, %ecx
        setle   %bl
        andb    %bl, %al
        cmpb    $1, %al
        movl    8(%ebp), %eax
        sbbl    $-1, %edi
        cmpl    $55, %esi
        setle   %cl
        andb    %cl, -15(%ebp)
        cmpb    $1, -15(%ebp)
        sbbl    $-1, %edi
        cmpl    $47, %esi
        setle   %dl
        andb    %dl, -14(%ebp)
        cmpb    $1, -14(%ebp)
        sbbl    $-1, %edi
        andb    %cl, -13(%ebp)
        cmpb    $1, -13(%ebp)
        sbbl    $-1, %edi
        andb    %dl, %bl
        cmpb    $1, %bl
        sbbl    $-1, %edi
        incl    %esi
        cmpb    $5, (%eax,%esi)
        jne     .L54
.L60:
        movl    %esi, %eax
        cltd
        shrl    $29, %edx
        leal    (%esi,%edx), %ecx
        andl    $7, %ecx
        subl    %edx, %ecx
        cmpl    $1, %ecx
        setg    -15(%ebp)
        cmpl    $7, %esi
        movzbl  -15(%ebp), %edx
        setg    %bl
        andb    %bl, %dl
        cmpb    $1, %dl
        sbbl    $-1, %edi
        testl   %ecx, %ecx
        setg    -14(%ebp)
        cmpl    $15, %esi
        movzbl  -14(%ebp), %edx
        setg    %al
        andb    %al, %dl
        cmpb    $1, %dl
        sbbl    $-1, %edi
        cmpl    $5, %ecx
        setle   -13(%ebp)
        andb    -13(%ebp), %bl
        cmpb    $1, %bl
        sbbl    $-1, %edi
        cmpl    $6, %ecx
        setle   %bl
        andb    %bl, %al
        cmpb    $1, %al
        sbbl    $-1, %edi
        cmpl    $55, %esi
        setle   %cl
        andb    %cl, -15(%ebp)
        cmpb    $1, -15(%ebp)
        sbbl    $-1, %edi
        cmpl    $47, %esi
        movl    -20(%ebp), %esi
        setle   %dl
        andb    %dl, -14(%ebp)
        cmpb    $1, -14(%ebp)
        sbbl    $-1, %edi
        andb    %cl, -13(%ebp)
        cmpb    $1, -13(%ebp)
        sbbl    $-1, %edi
        andb    %dl, %bl
        cmpb    $1, %bl
        sbbl    $-1, %edi
        addl    $2, %esi
        cmpl    $9, %esi
        jne     .L31
.L65:
        addl    $8, %esp
        movl    %edi, %eax
        popl    %ebx
        popl    %esi
        popl    %edi
        popl    %ebp
        ret
.LFE3:
        .size   _Z29t_auto_unrolled_knight_count9Ph, 
.-_Z29t_auto_unrolled_knight_count9Ph
        .ident  "GCC: (GNU) 4.0.0"
        .section        .note.GNU-stack,"",@progbits

I hope you will confirm the problem (so it can be solved). It would really
improve gcc.

Regards Thorbjørn



-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21827

Reply via email to