The following test case illustrates an example where dcbtst instructions are being inserted too aggressively during FDO.
Test Case: typedef struct { union { short arr[64]; short arr2[8][8]; }un; } str, *strPtr; str blah; int main (int argc, char *argv[]) { int i, x, y; strPtr ptr; ptr = &blah; for( y=0; y<5; y++){ for(x=0; x<=5; x++){ for (i = 0; i < (sizeof(ptr->un.arr)/8); i++) { *(((double *) ptr->un.arr)+i) = +0.0; } ptr++; } } return 0; } Commands (using gcc 4.0) gcc -O2 -fprofile-generate -funroll-loops -o bug bug.c ./bug gcc -O2 -fprofile-use -o bug bug.c Generated Code - notice that a touch is done every eight bytes. main: stwu 1,-64(1) li 0,5 lis 9,[EMAIL PROTECTED] li 11,0 li 12,0 mtctr 0 la 8,[EMAIL PROTECTED](9) stw 21,20(1) stw 22,24(1) stw 23,28(1) stw 24,32(1) stw 25,36(1) stw 26,40(1) stw 27,44(1) stw 28,48(1) stw 29,52(1) stw 30,56(1) stw 31,60(1) .p2align 4,,15 .L2: addi 22,8,128 addi 23,8,136 addi 7,8,144 addi 6,8,152 addi 5,8,160 addi 4,8,168 stw 11,0(8) stw 12,4(8) stw 11,8(8) stw 12,12(8) dcbtst 0,23 dcbtst 0,7 dcbtst 0,6 dcbtst 0,22 addi 3,8,176 addi 31,8,184 dcbtst 0,5 dcbtst 0,4 addi 30,8,192 addi 29,8,200 addi 28,8,208 addi 27,8,216 dcbtst 0,3 dcbtst 0,31 addi 26,8,224 addi 25,8,232 dcbtst 0,30 dcbtst 0,29 addi 24,8,240 addi 21,8,248 dcbtst 0,28 dcbtst 0,27 mr 9,22 addi 23,8,264 dcbtst 0,26 dcbtst 0,25 addi 22,22,128 addi 7,8,272 dcbtst 0,24 dcbtst 0,21 addi 6,8,280 addi 5,8,288 dcbtst 0,23 addi 4,8,296 addi 3,8,304 dcbtst 0,22 dcbtst 0,7 addi 31,8,312 addi 30,8,320 dcbtst 0,6 dcbtst 0,5 addi 29,8,328 addi 28,8,336 dcbtst 0,4 dcbtst 0,3 addi 27,8,344 addi 26,8,352 dcbtst 0,31 dcbtst 0,30 addi 25,8,360 addi 24,8,368 dcbtst 0,29 dcbtst 0,28 addi 21,8,376 mr 10,22 dcbtst 0,27 dcbtst 0,26 addi 23,9,264 addi 7,9,272 dcbtst 0,25 dcbtst 0,24 addi 6,9,280 addi 5,9,288 dcbtst 0,21 addi 4,9,296 addi 21,9,376 stw 11,16(8) stw 12,20(8) dcbtst 0,23 addi 3,9,304 stw 11,24(8) stw 12,28(8) dcbtst 0,7 dcbtst 0,6 addi 31,9,312 dcbtst 0,5 dcbtst 0,4 addi 30,9,320 addi 29,9,328 dcbtst 0,3 dcbtst 0,21 addi 28,9,336 addi 27,9,344 dcbtst 0,31 addi 26,9,352 addi 25,9,360 stw 11,32(8) stw 12,36(8) dcbtst 0,30 addi 24,9,368 stw 11,40(8) stw 12,44(8) dcbtst 0,29 dcbtst 0,28 addi 22,22,128 dcbtst 0,27 dcbtst 0,26 addi 23,8,520 addi 7,8,528 dcbtst 0,25 dcbtst 0,24 addi 6,8,536 addi 5,8,544 dcbtst 0,22 stw 11,48(8) stw 12,52(8) addi 22,8,512 addi 4,8,552 dcbtst 0,23 stw 11,56(8) stw 12,60(8) addi 3,8,560 dcbtst 0,7 stw 11,64(8) stw 12,68(8) addi 31,8,568 dcbtst 0,22 stw 11,72(8) stw 12,76(8) addi 30,8,576 dcbtst 0,6 stw 11,80(8) stw 12,84(8) addi 29,8,584 dcbtst 0,5 stw 11,88(8) stw 12,92(8) addi 28,8,592 dcbtst 0,4 stw 11,96(8) stw 12,100(8) addi 27,8,600 dcbtst 0,3 stw 11,104(8) stw 12,108(8) addi 26,8,608 dcbtst 0,31 stw 11,112(8) stw 12,116(8) addi 25,8,616 dcbtst 0,30 stw 11,120(8) stw 12,124(8) addi 24,8,624 dcbtst 0,29 stw 11,128(8) stw 12,132(8) addi 21,8,632 dcbtst 0,28 stw 11,8(9) stw 12,12(9) addi 22,8,640 dcbtst 0,27 stw 11,16(9) stw 12,20(9) addi 23,8,648 dcbtst 0,26 stw 11,24(9) stw 12,28(9) addi 7,8,656 dcbtst 0,25 stw 11,32(9) stw 12,36(9) addi 6,8,664 dcbtst 0,24 stw 11,40(9) stw 12,44(9) addi 5,8,672 dcbtst 0,21 stw 11,48(9) stw 12,52(9) addi 4,8,680 dcbtst 0,22 stw 11,56(9) stw 12,60(9) addi 3,8,688 dcbtst 0,23 stw 11,64(9) stw 12,68(9) addi 31,8,696 dcbtst 0,7 stw 11,72(9) stw 12,76(9) addi 30,8,704 dcbtst 0,6 stw 11,80(9) stw 12,84(9) addi 29,8,712 dcbtst 0,5 stw 11,88(9) stw 12,92(9) addi 28,8,720 dcbtst 0,4 stw 11,96(9) stw 12,100(9) addi 27,8,728 dcbtst 0,3 stw 11,104(9) stw 12,108(9) addi 26,8,736 dcbtst 0,31 stw 11,112(9) stw 12,116(9) addi 25,8,744 dcbtst 0,30 stw 11,120(9) stw 12,124(9) addi 24,8,752 dcbtst 0,29 stw 11,128(9) stw 12,132(9) addi 21,8,760 dcbtst 0,28 stw 11,8(10) stw 12,12(10) addi 22,8,768 dcbtst 0,27 stw 11,16(10) stw 12,20(10) addi 23,8,776 dcbtst 0,26 stw 11,120(10) stw 12,124(10) addi 7,8,784 dcbtst 0,25 stw 11,24(10) stw 12,28(10) addi 6,8,792 dcbtst 0,24 stw 11,32(10) stw 12,36(10) addi 5,8,800 dcbtst 0,21 stw 11,40(10) stw 12,44(10) addi 21,8,888 dcbtst 0,23 stw 11,48(10) stw 12,52(10) addi 4,8,808 dcbtst 0,7 stw 11,56(10) stw 12,60(10) addi 3,8,816 dcbtst 0,6 stw 11,64(10) stw 12,68(10) addi 31,8,824 dcbtst 0,5 stw 11,72(10) stw 12,76(10) addi 30,8,832 dcbtst 0,4 stw 11,80(10) stw 12,84(10) addi 29,8,840 dcbtst 0,3 stw 11,88(10) stw 12,92(10) addi 28,8,848 dcbtst 0,31 stw 11,96(10) stw 12,100(10) addi 27,8,856 dcbtst 0,30 stw 11,104(10) stw 12,108(10) addi 26,8,864 dcbtst 0,29 stw 11,112(10) stw 12,116(10) addi 10,8,384 dcbtst 0,28 stw 11,384(8) stw 12,388(8) addi 25,8,872 dcbtst 0,27 stw 11,8(10) stw 12,12(10) addi 24,8,880 dcbtst 0,26 stw 11,16(10) stw 12,20(10) dcbtst 0,25 dcbtst 0,21 dcbtst 0,24 dcbtst 0,22 stw 11,120(10) stw 12,124(10) stw 11,24(10) stw 12,28(10) stw 11,32(10) stw 12,36(10) stw 11,40(10) stw 12,44(10) stw 11,48(10) stw 12,52(10) stw 11,56(10) stw 12,60(10) stw 11,64(10) stw 12,68(10) stw 11,72(10) stw 12,76(10) stw 11,80(10) stw 12,84(10) stw 11,88(10) stw 12,92(10) stw 11,96(10) stw 12,100(10) stw 11,104(10) stw 12,108(10) stw 11,112(10) stw 12,116(10) addi 10,8,512 stw 11,512(8) stw 12,516(8) stw 11,8(10) stw 12,12(10) stw 11,16(10) stw 12,20(10) stw 11,24(10) stw 12,28(10) stw 11,120(10) stw 12,124(10) stw 11,32(10) stw 12,36(10) stw 11,40(10) stw 12,44(10) stw 11,48(10) stw 12,52(10) stw 11,56(10) stw 12,60(10) stw 11,64(10) stw 12,68(10) stw 11,72(10) stw 12,76(10) stw 11,80(10) stw 12,84(10) stw 11,88(10) stw 12,92(10) stw 11,96(10) stw 12,100(10) stw 11,104(10) stw 12,108(10) stw 11,112(10) stw 12,116(10) addi 10,8,640 stw 11,640(8) stw 12,644(8) stw 11,120(10) stw 12,124(10) stw 11,8(10) stw 12,12(10) stw 11,16(10) stw 12,20(10) stw 11,24(10) stw 12,28(10) stw 11,32(10) stw 12,36(10) stw 11,40(10) stw 12,44(10) stw 11,48(10) stw 12,52(10) stw 11,56(10) stw 12,60(10) stw 11,64(10) stw 12,68(10) stw 11,72(10) stw 12,76(10) stw 11,80(10) stw 12,84(10) stw 11,88(10) stw 12,92(10) stw 11,96(10) stw 12,100(10) stw 11,104(10) stw 12,108(10) stw 11,112(10) stw 12,116(10) mr 8,22 bdnz .L2 li 3,0 lwz 21,20(1) lwz 22,24(1) lwz 23,28(1) lwz 24,32(1) lwz 25,36(1) lwz 26,40(1) lwz 27,44(1) lwz 28,48(1) lwz 29,52(1) lwz 30,56(1) lwz 31,60(1) addi 1,1,64 blr -- Summary: Over Aggressive Use of Data Cache Touch Instructions During FDO Product: gcc Version: 4.0.0 Status: UNCONFIRMED Severity: normal Priority: P2 Component: target AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: steinmtz at us dot ibm dot com CC: gcc-bugs at gcc dot gnu dot org,steinmtz at us dot ibm dot com GCC build triplet: powerpc64-linux GCC host triplet: powerpc64-linux GCC target triplet: powerpc64-linux http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950