------- Comment #3 from rguenth at gcc dot gnu dot org 2010-02-05 13:17 -------
On the (retired) mem-ref branch bitfield loads/stores were lowered very early
to read-extract-modify-write operations so the tree level would have optimized
this.
But of course people complained that architectures that can do bitfield
stores would be pessimized if we do not retain the funny BIT_FIELD_REFs
of memory.
Basically without some form of lowering the tree level is lost.
mem-ref branch lowered the fn to
i = 0;
<D.1562>:;
D.1564 = (long unsigned int) i;
D.1565 = D.1564 * 4;
D.1566 = p + D.1565;
MEML.0 = IMEM <unsigned int {2}, D.1566>;
D.1567 = BIT_FIELD_REF <MEML.0, 1, 0>;
if (D.1567 != 0) goto <D.1574>; else goto <D.1575>;
<D.1574>:;
D.1564 = (long unsigned int) i;
D.1565 = D.1564 * 4;
D.1566 = p + D.1565;
D.1564 = (long unsigned int) i;
D.1565 = D.1564 * 4;
D.1566 = p + D.1565;
MEML.1 = IMEM <unsigned int {2}, D.1566>;
D.1568 = BIT_FIELD_REF <MEML.1, 31, 1>;
D.1569 = (int) D.1568;
D.1570 = D.1569 + a;
D.1571 = (unsigned int) D.1570;
D.1572 = (<unnamed-unsigned:31>) D.1571;
MEML.2 = IMEM <unsigned int {2}, D.1566>;
MEML.2 = BIT_FIELD_EXPR <MEML.2, D.1572, 31, 1>;
IMEM <unsigned int {2}, D.1566> = MEML.2;
<D.1575>:;
i = i + 1;
if (i < n) goto <D.1562>; else goto <D.1563>;
<D.1563>:;
return;
so FRE sees the redundant load and we expand from
<bb 2>:
ivtmp.20_15 = (unsigned int *) p_5(D);
<bb 3>:
# ivtmp.20_13 = PHI <ivtmp.20_15(2), ivtmp.20_22(5)>
# i_1 = PHI <0(2), i_24(5)>
MEML.0_7 = IMEM <unsigned int {2}, ivtmp.20_13>;
D.1567_8 = BIT_FIELD_REF <MEML.0_7, 1, 0>;
if (D.1567_8 != 0)
goto <bb 4>;
else
goto <bb 5>;
<bb 4>:
D.1568_16 = BIT_FIELD_REF <MEML.0_7, 31, 1>;
D.1569_17 = (int) D.1568_16;
D.1570_19 = D.1569_17 + a_18(D);
D.1571_20 = (unsigned int) D.1570_19;
D.1572_21 = (<unnamed-unsigned:31>) D.1571_20;
MEML.2_23 = BIT_FIELD_EXPR <MEML.0_7, D.1572_21, 31, 1>;
IMEM <unsigned int {2}, ivtmp.20_13> = MEML.2_23;
<bb 5>:
i_24 = i_1 + 1;
ivtmp.20_22 = ivtmp.20_13 + 4;
if (i_24 < n_25(D))
goto <bb 3>;
else
goto <bb 6>;
<bb 6>:
return;
on x86_64 the generated code was
func:
.LFB2:
movl %edx, %r8d
xorl %ecx, %ecx
.p2align 4,,10
.p2align 3
.L3:
movl (%rdi), %eax
testb $1, %al
je .L2
movl %eax, %edx
shrl %eax
addl %r8d, %eax
andl $1, %edx
addl %eax, %eax
orl %eax, %edx
movl %edx, (%rdi)
.L2:
addl $1, %ecx
addq $4, %rdi
cmpl %esi, %ecx
jl .L3
rep
ret
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42972