------- Comment #3 from rguenth at gcc dot gnu dot org  2010-02-05 13:17 -------
On the (retired) mem-ref branch bitfield loads/stores were lowered very early
to read-extract-modify-write operations so the tree level would have optimized
this.

But of course people complained that architectures that can do bitfield
stores would be pessimized if we do not retain the funny BIT_FIELD_REFs
of memory.

Basically without some form of lowering the tree level is lost.

mem-ref branch lowered the fn to

  i = 0;
  <D.1562>:;
  D.1564 = (long unsigned int) i;
  D.1565 = D.1564 * 4;
  D.1566 = p + D.1565;
  MEML.0 = IMEM <unsigned int {2}, D.1566>;
  D.1567 = BIT_FIELD_REF <MEML.0, 1, 0>;
  if (D.1567 != 0) goto <D.1574>; else goto <D.1575>;
  <D.1574>:;
  D.1564 = (long unsigned int) i;
  D.1565 = D.1564 * 4;
  D.1566 = p + D.1565;
  D.1564 = (long unsigned int) i;
  D.1565 = D.1564 * 4;
  D.1566 = p + D.1565;
  MEML.1 = IMEM <unsigned int {2}, D.1566>;
  D.1568 = BIT_FIELD_REF <MEML.1, 31, 1>;
  D.1569 = (int) D.1568;
  D.1570 = D.1569 + a;
  D.1571 = (unsigned int) D.1570;
  D.1572 = (<unnamed-unsigned:31>) D.1571;
  MEML.2 = IMEM <unsigned int {2}, D.1566>;
  MEML.2 = BIT_FIELD_EXPR <MEML.2, D.1572, 31, 1>;
  IMEM <unsigned int {2}, D.1566> = MEML.2;
  <D.1575>:;
  i = i + 1;
  if (i < n) goto <D.1562>; else goto <D.1563>;
  <D.1563>:;
  return;

so FRE sees the redundant load and we expand from

<bb 2>:
  ivtmp.20_15 = (unsigned int *) p_5(D);

<bb 3>:
  # ivtmp.20_13 = PHI <ivtmp.20_15(2), ivtmp.20_22(5)>
  # i_1 = PHI <0(2), i_24(5)>
  MEML.0_7 = IMEM <unsigned int {2}, ivtmp.20_13>;
  D.1567_8 = BIT_FIELD_REF <MEML.0_7, 1, 0>;
  if (D.1567_8 != 0)
    goto <bb 4>;
  else
    goto <bb 5>;

<bb 4>:
  D.1568_16 = BIT_FIELD_REF <MEML.0_7, 31, 1>;
  D.1569_17 = (int) D.1568_16;
  D.1570_19 = D.1569_17 + a_18(D);
  D.1571_20 = (unsigned int) D.1570_19;
  D.1572_21 = (<unnamed-unsigned:31>) D.1571_20;
  MEML.2_23 = BIT_FIELD_EXPR <MEML.0_7, D.1572_21, 31, 1>;
  IMEM <unsigned int {2}, ivtmp.20_13> = MEML.2_23;

<bb 5>:
  i_24 = i_1 + 1;
  ivtmp.20_22 = ivtmp.20_13 + 4;
  if (i_24 < n_25(D))
    goto <bb 3>;
  else
    goto <bb 6>;

<bb 6>:
  return;


on x86_64 the generated code was

func:
.LFB2:
        movl    %edx, %r8d
        xorl    %ecx, %ecx
        .p2align 4,,10
        .p2align 3
.L3:
        movl    (%rdi), %eax
        testb   $1, %al
        je      .L2
        movl    %eax, %edx
        shrl    %eax
        addl    %r8d, %eax
        andl    $1, %edx
        addl    %eax, %eax
        orl     %eax, %edx
        movl    %edx, (%rdi)
.L2:
        addl    $1, %ecx
        addq    $4, %rdi
        cmpl    %esi, %ecx
        jl      .L3
        rep
        ret


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42972

Reply via email to