------- Comment #6 from steven at gcc dot gnu dot org 2006-01-02 21:27 ------- Ehm, wouldn't "unsigned char *" alias everything? GCSE doesn't do load PRE for me on the original test case, either. And, as long as "outbuf[outcnt] = bi_buf;" has a V_MAY_DEF for outcnt, you're not going to see any load PRE happening here.
With a changed test case, GCSE _does_ perform load PRE: unsigned long outcnt; extern void flush_outbuf(void); void bi_windup(unsigned int *outbuf, unsigned int bi_buf) { unsigned long t1 = outcnt; outbuf[t1] = bi_buf; unsigned long t2 = outcnt; if (t2 == 16384) flush_outbuf(); unsigned long t3 = outcnt; outbuf[t3] = bi_buf; } This gives me the following asm on x86-64: bi_windup: movq %rbx, -16(%rsp) movq %rbp, -8(%rsp) subq $24, %rsp movq outcnt(%rip), %rax movq %rdi, %rbp movl %esi, %ebx cmpq $16384, %rax movl %esi, (%rbp,%rax,4) je .L6 .L2: movl %ebx, (%rbp,%rax,4) movq 8(%rsp), %rbx movq 16(%rsp), %rbp addq $24, %rsp ret .p2align 4,,7 .L6: call flush_outbuf movq outcnt(%rip), %rax jmp .L2 Looking at the .pre tree dump, there isn't anything obvious in the way of doing load PRE here AFAICT: bi_windup (outbufD.1866, bi_bufD.1867) { unsigned intD.3 storetmp.21D.1918; long unsigned intD.4 t3D.1872; long unsigned intD.4 t2D.1871; long unsigned intD.4 t1D.1870; unsigned intD.3 * D.1878; unsigned intD.3 * D.1877; long unsigned intD.4 D.1876; unsigned intD.3 * D.1875; unsigned intD.3 * D.1874; long unsigned intD.4 D.1873; # BLOCK 2 freq:10000 # PRED: ENTRY [100.0%] (fallthru,exec) # VUSE <outcntD.1863_1>; t1D.1870_2 = outcntD.1863; D.1873_3 = t1D.1870_2 * 4; D.1874_4 = (unsigned intD.3 *) D.1873_3; D.1875_6 = D.1874_4 + outbufD.1866_5; # TMT.4D.1900_16 = V_MAY_DEF <TMT.4D.1900_15>; *D.1875_6 = bi_bufD.1867_7; if (t1D.1870_2 == 16384) goto <L0>; else goto <L2>; # SUCC: 3 [26.2%] (true,exec) 5 [73.8%] (false,exec) # BLOCK 5 freq:7378 # PRED: 2 [73.8%] (false,exec) <L2>:; goto <bb 4> (<L1>); # SUCC: 4 [100.0%] (fallthru) # BLOCK 3 freq:2622 # PRED: 2 [26.2%] (true,exec) <L0>:; # outcntD.1863_18 = V_MAY_DEF <outcntD.1863_1>; # TMT.4D.1900_19 = V_MAY_DEF <TMT.4D.1900_16>; flush_outbuf (); # SUCC: 4 [100.0%] (fallthru,exec) # BLOCK 4 freq:10000 # PRED: 5 [100.0%] (fallthru) 3 [100.0%] (fallthru,exec) # TMT.4D.1900_14 = PHI <TMT.4D.1900_16(5), TMT.4D.1900_19(3)>; # outcntD.1863_13 = PHI <outcntD.1863_1(5), outcntD.1863_18(3)>; <L1>:; # VUSE <outcntD.1863_13>; t3D.1872_9 = outcntD.1863; D.1876_10 = t3D.1872_9 * 4; D.1877_11 = (unsigned intD.3 *) D.1876_10; D.1878_12 = D.1877_11 + outbufD.1866_5; # TMT.4D.1900_17 = V_MAY_DEF <TMT.4D.1900_14>; *D.1878_12 = bi_bufD.1867_7; return; # SUCC: EXIT [100.0%] } -- steven at gcc dot gnu dot org changed: What |Removed |Added ---------------------------------------------------------------------------- Keywords| |TREE Summary|load PRE is missing |tree load PRE is not working | |properly http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23455