------- Comment #6 from steven at gcc dot gnu dot org  2006-01-02 21:27 -------
Ehm, wouldn't "unsigned char *" alias everything?  GCSE doesn't do load PRE for
me on the original test case, either.  And, as long as "outbuf[outcnt] =
bi_buf;" has a V_MAY_DEF for outcnt, you're not going to see any load PRE
happening here.

With a changed test case, GCSE _does_ perform load PRE:

unsigned long outcnt;
extern void flush_outbuf(void);

void
bi_windup(unsigned int *outbuf, unsigned int bi_buf)
{
    unsigned long t1 = outcnt;
    outbuf[t1] = bi_buf;

    unsigned long t2 = outcnt;
    if (t2 == 16384)
      flush_outbuf();

    unsigned long t3 = outcnt;
    outbuf[t3] = bi_buf;
}

This gives me the following asm on x86-64:
bi_windup:
        movq    %rbx, -16(%rsp)
        movq    %rbp, -8(%rsp)
        subq    $24, %rsp
        movq    outcnt(%rip), %rax
        movq    %rdi, %rbp
        movl    %esi, %ebx
        cmpq    $16384, %rax
        movl    %esi, (%rbp,%rax,4)
        je      .L6
.L2:
        movl    %ebx, (%rbp,%rax,4)
        movq    8(%rsp), %rbx
        movq    16(%rsp), %rbp
        addq    $24, %rsp
        ret
        .p2align 4,,7
.L6:
        call    flush_outbuf
        movq    outcnt(%rip), %rax
        jmp     .L2


Looking at the .pre tree dump, there isn't anything obvious in the way of doing
load PRE here AFAICT:

bi_windup (outbufD.1866, bi_bufD.1867)
{
  unsigned intD.3 storetmp.21D.1918;
  long unsigned intD.4 t3D.1872;
  long unsigned intD.4 t2D.1871;
  long unsigned intD.4 t1D.1870;
  unsigned intD.3 * D.1878;
  unsigned intD.3 * D.1877;
  long unsigned intD.4 D.1876;
  unsigned intD.3 * D.1875;
  unsigned intD.3 * D.1874;
  long unsigned intD.4 D.1873;

  # BLOCK 2 freq:10000
  # PRED: ENTRY [100.0%]  (fallthru,exec)
  #   VUSE <outcntD.1863_1>;
  t1D.1870_2 = outcntD.1863;
  D.1873_3 = t1D.1870_2 * 4;
  D.1874_4 = (unsigned intD.3 *) D.1873_3;
  D.1875_6 = D.1874_4 + outbufD.1866_5;
  #   TMT.4D.1900_16 = V_MAY_DEF <TMT.4D.1900_15>;
  *D.1875_6 = bi_bufD.1867_7;
  if (t1D.1870_2 == 16384) goto <L0>; else goto <L2>;
  # SUCC: 3 [26.2%]  (true,exec) 5 [73.8%]  (false,exec)

  # BLOCK 5 freq:7378
  # PRED: 2 [73.8%]  (false,exec)
<L2>:;
  goto <bb 4> (<L1>);
  # SUCC: 4 [100.0%]  (fallthru)

  # BLOCK 3 freq:2622
  # PRED: 2 [26.2%]  (true,exec)
<L0>:;
  #   outcntD.1863_18 = V_MAY_DEF <outcntD.1863_1>;
  #   TMT.4D.1900_19 = V_MAY_DEF <TMT.4D.1900_16>;
  flush_outbuf ();
  # SUCC: 4 [100.0%]  (fallthru,exec)

  # BLOCK 4 freq:10000
  # PRED: 5 [100.0%]  (fallthru) 3 [100.0%]  (fallthru,exec)
  # TMT.4D.1900_14 = PHI <TMT.4D.1900_16(5), TMT.4D.1900_19(3)>;
  # outcntD.1863_13 = PHI <outcntD.1863_1(5), outcntD.1863_18(3)>;
<L1>:;
  #   VUSE <outcntD.1863_13>;
  t3D.1872_9 = outcntD.1863;
  D.1876_10 = t3D.1872_9 * 4;
  D.1877_11 = (unsigned intD.3 *) D.1876_10;
  D.1878_12 = D.1877_11 + outbufD.1866_5;
  #   TMT.4D.1900_17 = V_MAY_DEF <TMT.4D.1900_14>;
  *D.1878_12 = bi_bufD.1867_7;
  return;
  # SUCC: EXIT [100.0%]

}


-- 

steven at gcc dot gnu dot org changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Keywords|                            |TREE
            Summary|load PRE is missing         |tree load PRE is not working
                   |                            |properly


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23455


Reply via email to