The following test case illustrates an example where dcbtst instructions are 
being inserted too aggressively during FDO.

Test Case:

typedef struct {
  union {
    short arr[64];
    short arr2[8][8];
  }un;
} str, *strPtr;

str blah;

int main (int argc, char *argv[])
{
  
  int i, x, y;
  strPtr ptr;

  ptr = &blah;

  for( y=0; y<5; y++){
    for(x=0; x<=5; x++){
      for (i = 0; i < (sizeof(ptr->un.arr)/8); i++) {
        *(((double *) ptr->un.arr)+i) = +0.0;
      }
      ptr++;
    }
  }

  return 0;
}

Commands (using gcc 4.0)

gcc -O2 -fprofile-generate -funroll-loops -o bug bug.c
./bug
gcc -O2 -fprofile-use -o bug bug.c

Generated Code - notice that a touch is done every eight bytes.

main:
        stwu 1,-64(1)
        li 0,5
        lis 9,[EMAIL PROTECTED]
        li 11,0
        li 12,0
        mtctr 0
        la 8,[EMAIL PROTECTED](9)
        stw 21,20(1)
        stw 22,24(1)
        stw 23,28(1)
        stw 24,32(1)
        stw 25,36(1)
        stw 26,40(1)
        stw 27,44(1)
        stw 28,48(1)
        stw 29,52(1)
        stw 30,56(1)
        stw 31,60(1)
        .p2align 4,,15
.L2:
        addi 22,8,128
        addi 23,8,136
        addi 7,8,144
        addi 6,8,152
        addi 5,8,160
        addi 4,8,168
        stw 11,0(8)
        stw 12,4(8)
        stw 11,8(8)
        stw 12,12(8)
        dcbtst 0,23
        dcbtst 0,7
        dcbtst 0,6
        dcbtst 0,22
        addi 3,8,176
        addi 31,8,184
        dcbtst 0,5
        dcbtst 0,4
        addi 30,8,192
        addi 29,8,200
        addi 28,8,208
        addi 27,8,216
        dcbtst 0,3
        dcbtst 0,31
        addi 26,8,224
        addi 25,8,232
        dcbtst 0,30
        dcbtst 0,29
        addi 24,8,240
        addi 21,8,248
        dcbtst 0,28
        dcbtst 0,27
        mr 9,22
        addi 23,8,264
        dcbtst 0,26
        dcbtst 0,25
        addi 22,22,128
        addi 7,8,272
        dcbtst 0,24
        dcbtst 0,21
        addi 6,8,280
        addi 5,8,288
        dcbtst 0,23
        addi 4,8,296
        addi 3,8,304
        dcbtst 0,22
        dcbtst 0,7
        addi 31,8,312
        addi 30,8,320
        dcbtst 0,6
        dcbtst 0,5
        addi 29,8,328
        addi 28,8,336
        dcbtst 0,4
        dcbtst 0,3
        addi 27,8,344
        addi 26,8,352
        dcbtst 0,31
        dcbtst 0,30
        addi 25,8,360
        addi 24,8,368
        dcbtst 0,29
        dcbtst 0,28
        addi 21,8,376
        mr 10,22
        dcbtst 0,27
        dcbtst 0,26
        addi 23,9,264
        addi 7,9,272
        dcbtst 0,25
        dcbtst 0,24
        addi 6,9,280
        addi 5,9,288
        dcbtst 0,21
        addi 4,9,296
        addi 21,9,376
        stw 11,16(8)
        stw 12,20(8)
        dcbtst 0,23
        addi 3,9,304
        stw 11,24(8)
        stw 12,28(8)
        dcbtst 0,7
        dcbtst 0,6
        addi 31,9,312
        dcbtst 0,5
        dcbtst 0,4
        addi 30,9,320
        addi 29,9,328
        dcbtst 0,3
        dcbtst 0,21
        addi 28,9,336
        addi 27,9,344
        dcbtst 0,31
        addi 26,9,352
        addi 25,9,360
        stw 11,32(8)
        stw 12,36(8)
        dcbtst 0,30
        addi 24,9,368
        stw 11,40(8)
        stw 12,44(8)
        dcbtst 0,29
        dcbtst 0,28
        addi 22,22,128
        dcbtst 0,27
        dcbtst 0,26
        addi 23,8,520
        addi 7,8,528
        dcbtst 0,25
        dcbtst 0,24
        addi 6,8,536
        addi 5,8,544
        dcbtst 0,22
        stw 11,48(8)
        stw 12,52(8)
        addi 22,8,512
        addi 4,8,552
        dcbtst 0,23
        stw 11,56(8)
        stw 12,60(8)
        addi 3,8,560
        dcbtst 0,7
        stw 11,64(8)
        stw 12,68(8)
        addi 31,8,568
        dcbtst 0,22
        stw 11,72(8)
        stw 12,76(8)
        addi 30,8,576
        dcbtst 0,6
        stw 11,80(8)
        stw 12,84(8)
        addi 29,8,584
        dcbtst 0,5
        stw 11,88(8)
        stw 12,92(8)
        addi 28,8,592
        dcbtst 0,4
        stw 11,96(8)
        stw 12,100(8)
        addi 27,8,600
        dcbtst 0,3
        stw 11,104(8)
        stw 12,108(8)
        addi 26,8,608
        dcbtst 0,31
        stw 11,112(8)
        stw 12,116(8)
        addi 25,8,616
        dcbtst 0,30
        stw 11,120(8)
        stw 12,124(8)
        addi 24,8,624
        dcbtst 0,29
        stw 11,128(8)
        stw 12,132(8)
        addi 21,8,632
        dcbtst 0,28
        stw 11,8(9)
        stw 12,12(9)
        addi 22,8,640
        dcbtst 0,27
        stw 11,16(9)
        stw 12,20(9)
        addi 23,8,648
        dcbtst 0,26
        stw 11,24(9)
        stw 12,28(9)
        addi 7,8,656
        dcbtst 0,25
        stw 11,32(9)
        stw 12,36(9)
        addi 6,8,664
        dcbtst 0,24
        stw 11,40(9)
        stw 12,44(9)
        addi 5,8,672
        dcbtst 0,21
        stw 11,48(9)
        stw 12,52(9)
        addi 4,8,680
        dcbtst 0,22
        stw 11,56(9)
        stw 12,60(9)
        addi 3,8,688
        dcbtst 0,23
        stw 11,64(9)
        stw 12,68(9)
        addi 31,8,696
        dcbtst 0,7
        stw 11,72(9)
        stw 12,76(9)
        addi 30,8,704
        dcbtst 0,6
        stw 11,80(9)
        stw 12,84(9)
        addi 29,8,712
        dcbtst 0,5
        stw 11,88(9)
        stw 12,92(9)
        addi 28,8,720
        dcbtst 0,4
        stw 11,96(9)
        stw 12,100(9)
        addi 27,8,728
        dcbtst 0,3
        stw 11,104(9)
        stw 12,108(9)
        addi 26,8,736
        dcbtst 0,31
        stw 11,112(9)
        stw 12,116(9)
        addi 25,8,744
        dcbtst 0,30
        stw 11,120(9)
        stw 12,124(9)
        addi 24,8,752
        dcbtst 0,29
        stw 11,128(9)
        stw 12,132(9)
        addi 21,8,760
        dcbtst 0,28
        stw 11,8(10)
        stw 12,12(10)
        addi 22,8,768
        dcbtst 0,27
        stw 11,16(10)
        stw 12,20(10)
        addi 23,8,776
        dcbtst 0,26
        stw 11,120(10)
        stw 12,124(10)
        addi 7,8,784
        dcbtst 0,25
        stw 11,24(10)
        stw 12,28(10)
        addi 6,8,792
        dcbtst 0,24
        stw 11,32(10)
        stw 12,36(10)
        addi 5,8,800
        dcbtst 0,21
        stw 11,40(10)
        stw 12,44(10)
        addi 21,8,888
        dcbtst 0,23
        stw 11,48(10)
        stw 12,52(10)
        addi 4,8,808
        dcbtst 0,7
        stw 11,56(10)
        stw 12,60(10)
        addi 3,8,816
        dcbtst 0,6
        stw 11,64(10)
        stw 12,68(10)
        addi 31,8,824
        dcbtst 0,5
        stw 11,72(10)
        stw 12,76(10)
        addi 30,8,832
        dcbtst 0,4
        stw 11,80(10)
        stw 12,84(10)
        addi 29,8,840
        dcbtst 0,3
        stw 11,88(10)
        stw 12,92(10)
        addi 28,8,848
        dcbtst 0,31
        stw 11,96(10)
        stw 12,100(10)
        addi 27,8,856
        dcbtst 0,30
        stw 11,104(10)
        stw 12,108(10)
        addi 26,8,864
        dcbtst 0,29
        stw 11,112(10)
        stw 12,116(10)
        addi 10,8,384
        dcbtst 0,28
        stw 11,384(8)
        stw 12,388(8)
        addi 25,8,872
        dcbtst 0,27
        stw 11,8(10)
        stw 12,12(10)
        addi 24,8,880
        dcbtst 0,26
        stw 11,16(10)
        stw 12,20(10)
        dcbtst 0,25
        dcbtst 0,21
        dcbtst 0,24
        dcbtst 0,22
        stw 11,120(10)
        stw 12,124(10)
        stw 11,24(10)
        stw 12,28(10)
        stw 11,32(10)
        stw 12,36(10)
        stw 11,40(10)
        stw 12,44(10)
        stw 11,48(10)
        stw 12,52(10)
        stw 11,56(10)
        stw 12,60(10)
        stw 11,64(10)
        stw 12,68(10)
        stw 11,72(10)
        stw 12,76(10)
        stw 11,80(10)
        stw 12,84(10)
        stw 11,88(10)
        stw 12,92(10)
        stw 11,96(10)
        stw 12,100(10)
        stw 11,104(10)
        stw 12,108(10)
        stw 11,112(10)
        stw 12,116(10)
        addi 10,8,512
        stw 11,512(8)
        stw 12,516(8)
        stw 11,8(10)
        stw 12,12(10)
        stw 11,16(10)
        stw 12,20(10)
        stw 11,24(10)
        stw 12,28(10)
        stw 11,120(10)
        stw 12,124(10)
        stw 11,32(10)
        stw 12,36(10)
        stw 11,40(10)
        stw 12,44(10)
        stw 11,48(10)
        stw 12,52(10)
        stw 11,56(10)
        stw 12,60(10)
        stw 11,64(10)
        stw 12,68(10)
        stw 11,72(10)
        stw 12,76(10)
        stw 11,80(10)
        stw 12,84(10)
        stw 11,88(10)
        stw 12,92(10)
        stw 11,96(10)
        stw 12,100(10)
        stw 11,104(10)
        stw 12,108(10)
        stw 11,112(10)
        stw 12,116(10)
        addi 10,8,640
        stw 11,640(8)
        stw 12,644(8)
        stw 11,120(10)
        stw 12,124(10)
        stw 11,8(10)
        stw 12,12(10)
        stw 11,16(10)
        stw 12,20(10)
        stw 11,24(10)
        stw 12,28(10)
        stw 11,32(10)
        stw 12,36(10)
        stw 11,40(10)
        stw 12,44(10)
        stw 11,48(10)
        stw 12,52(10)
        stw 11,56(10)
        stw 12,60(10)
        stw 11,64(10)
        stw 12,68(10)
        stw 11,72(10)
        stw 12,76(10)
        stw 11,80(10)
        stw 12,84(10)
        stw 11,88(10)
        stw 12,92(10)
        stw 11,96(10)
        stw 12,100(10)
        stw 11,104(10)
        stw 12,108(10)
        stw 11,112(10)
        stw 12,116(10)
        mr 8,22
        bdnz .L2
        li 3,0
        lwz 21,20(1)
        lwz 22,24(1)
        lwz 23,28(1)
        lwz 24,32(1)
        lwz 25,36(1)
        lwz 26,40(1)
        lwz 27,44(1)
        lwz 28,48(1)
        lwz 29,52(1)
        lwz 30,56(1)
        lwz 31,60(1)
        addi 1,1,64
        blr

-- 
           Summary: Over Aggressive Use of Data Cache Touch Instructions
                    During FDO
           Product: gcc
           Version: 4.0.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P2
         Component: target
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: steinmtz at us dot ibm dot com
                CC: gcc-bugs at gcc dot gnu dot org,steinmtz at us dot ibm
                    dot com
 GCC build triplet: powerpc64-linux
  GCC host triplet: powerpc64-linux
GCC target triplet: powerpc64-linux


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950

Reply via email to