http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23286
--- Comment #40 from Bernhard Reutner-Fischer <aldot at gcc dot gnu.org>
2012-02-23 13:34:37 UTC ---
The ATTRIBUTE_UNUSED of do_hoist_insertion can be removed.
diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c
index 0f777b4..bfc7a92 100644
--- a/gcc/tree-ssa-pre.c
+++ b/gcc/tree-ssa-pre.c
@@ -3865,7 +3865,7 @@ do_pre_partial_partial_insertion (basic_block block,
basic_block dom)
The caller has to make sure that BLOCK has at least two successors. */
static bool
-do_hoist_insertion (basic_block block ATTRIBUTE_UNUSED)
+do_hoist_insertion (basic_block block)
{
edge e;
edge_iterator ei;
@@ -3878,6 +3878,13 @@ do_hoist_insertion (basic_block block ATTRIBUTE_UNUSED)
/* At least two successors, or else... */
gcc_assert (EDGE_COUNT (block->succs) >= 2);
+ /* We cheat about AVAIL_OUT in the first block
+ so pretend we are done in the second iteration. */
+ if (block->prev_bb
+ && block->prev_bb->index == ENTRY_BLOCK
+ && pre_stats.hoist_insert)
+ return false;
+
/* Check that all successors of BLOCK are dominated by block.
We could use dominated_by_p() for this, but actually there is a much
quicker check: any successor that is dominated by BLOCK can't have
@@ -3890,9 +3897,12 @@ do_hoist_insertion (basic_block block ATTRIBUTE_UNUSED)
availout_in_some = BITMAP_ALLOC (&grand_bitmap_obstack);
/* A hoistable value must be in ANTIC_IN(block)
- but not in AVAIL_OUT(BLOCK). */
+ but not in AVAIL_OUT(BLOCK).
+ To give more opportunity to hoisting,
+ cheat by disregarding AVAIL_OUT of the ENTRY_BLOCK. */
bitmap_set_copy (hoistable_set, ANTIC_IN (block));
- bitmap_set_subtract_values (hoistable_set, AVAIL_OUT (block));
+ if (block->prev_bb && block->prev_bb->index != ENTRY_BLOCK)
+ bitmap_set_subtract_values (hoistable_set, AVAIL_OUT (block));
/* Short-cut for a common case: hoistable_set is empty. */
if (bitmap_empty_p (&hoistable_set->values))
so for a simplified PR5738
$ cat pr5738.c
struct foo
{
unsigned short *p;
};
#define foo_s s
void
func (struct foo *foo_s, unsigned int *coord, _Bool delta)
{
unsigned short change;
if (delta)
{
change = *((foo_s)->p++);
*coord += change;
}
else
{
change = *((foo_s)->p++);
*coord += change;
// *coord += *((foo_s)->p++) << 8;
}
}
we end up a little bit better, with something like
func (struct foo * sD.1705, unsigned intD.9 * coordD.1706, _BoolD.1685
deltaD.1707)
{
unsigned intD.9 pretmp.6D.1727;
short unsigned intD.16 * pretmp.5D.1726;
short unsigned intD.16 pretmp.4D.1725;
short unsigned intD.16 * pretmp.3D.1724;
short unsigned intD.16 changeD.1710;
unsigned intD.9 D.1718;
unsigned intD.9 D.1717;
unsigned intD.9 D.1716;
short unsigned intD.16 * D.1715;
short unsigned intD.16 * D.1714;
# BLOCK 2 freq:10000
# PRED: ENTRY [100.0%] (fallthru,exec)
# VUSE <.MEMD.1720_17(D)>
# PT = nonlocal escaped
pretmp.3D.1724_22 = sD.1705_2(D)->pD.1704;
# VUSE <.MEMD.1720_17(D)>
pretmp.4D.1725_23 = *pretmp.3D.1724_22;
# PT = nonlocal escaped
pretmp.5D.1726_24 = pretmp.3D.1724_22 + 2;
# VUSE <.MEMD.1720_17(D)>
pretmp.6D.1727_25 = *coordD.1706_6(D);
pretmp.6D.1727_26 = (unsigned intD.9) pretmp.4D.1725_23;
pretmp.6D.1727_27 = pretmp.6D.1727_25 + pretmp.6D.1727_26;
if (deltaD.1707_1(D) != 0)
goto <bb 3>;
else
goto <bb 4>;
# SUCC: 3 [39.0%] (true,exec) 4 [61.0%] (false,exec)
# BLOCK 3 freq:3900
# PRED: 2 [39.0%] (true,exec)
# .MEMD.1720_18 = VDEF <.MEMD.1720_17(D)>
sD.1705_2(D)->pD.1704 = pretmp.5D.1726_24;
# .MEMD.1720_19 = VDEF <.MEMD.1720_18>
*coordD.1706_6(D) = pretmp.6D.1727_27;
goto <bb 5>;
# SUCC: 5 [100.0%] (fallthru,exec)
# BLOCK 4 freq:6100
# PRED: 2 [61.0%] (false,exec)
# .MEMD.1720_20 = VDEF <.MEMD.1720_17(D)>
sD.1705_2(D)->pD.1704 = pretmp.5D.1726_24;
# .MEMD.1720_21 = VDEF <.MEMD.1720_20>
*coordD.1706_6(D) = pretmp.6D.1727_27;
# SUCC: 5 [100.0%] (fallthru,exec)
# BLOCK 5 freq:10000
# PRED: 3 [100.0%] (fallthru,exec) 4 [100.0%] (fallthru,exec)
# .MEMD.1720_16 = PHI <.MEMD.1720_19(3), .MEMD.1720_21(4)>
# VUSE <.MEMD.1720_16>
return;
# SUCC: EXIT [100.0%]
}
which translates to nearly proper code:
func:
.LFB0:
.cfi_startproc
movq (%rdi), %rax # sD.1705_2(D)->pD.1704, pretmp.3D.1724
leaq 2(%rax), %rcx #, pretmp.5D.1726
movzwl (%rax), %eax # *pretmp.3D.1724_22, pretmp.6D.1727
addl (%rsi), %eax # *coordD.1706_6(D), pretmp.6D.1727
testb %dl, %dl # deltaD.1707
movq %rcx, (%rdi) # pretmp.5D.1726, sD.1705_2(D)->pD.1704
movl %eax, (%rsi) # pretmp.6D.1727, *coordD.1706_6(D)
je .L2 #,
ret
.L2:
ret
.cfi_endproc
where the expected code would be something like (i think):
func:
.LFB0:
.cfi_startproc
movq (%rdi), %rax # sD.1705_2(D)->pD.1704, D.1714
movzwl (%rax), %edx #* D.1714, changeD.1710
addq $2, %rax #, tmp77
movq %rax, (%rdi) # tmp77, sD.1705_2(D)->pD.1704
addl %edx, (%rsi) # changeD.1710, *coordD.1706_6(D)
ret
.cfi_endproc
.LFE0:
So we just need to recognize that BB3 and BB4 are identical (everything in BB3
can be hoisted and BB4 is dead).