Hi,
This patch is to treat those new pseudo-to-pseudo copies
after hard-reg-to-pseudo-copy as zero costs. The
justification is that these new copies are closely after
the corresponding hard-reg-to-pseudo-copy insns, register
allocation should be able to coalesce them and get them
eliminated.
Now these copies follow the normal costing scheme, the
below case dump shows the unexpected combination:
``` dump
Trying 3, 2 -> 13:
3: r119:DI=r132:DI
REG_DEAD r132:DI
2: r118:DI=r131:DI
REG_DEAD r131:DI
13: r128:DI=r118:DI&0xffffffff|r119:DI<<0x20
REG_DEAD r119:DI
REG_DEAD r118:DI
Failed to match this instruction:
(set (reg:DI 128)
(ior:DI (ashift:DI (reg:DI 132)
(const_int 32 [0x20]))
(reg:DI 131)))
Successfully matched this instruction:
(set (reg/v:DI 119 [ f2 ])
(ashift:DI (reg:DI 132)
(const_int 32 [0x20])))
Successfully matched this instruction:
(set (reg:DI 128)
(ior:DI (reg/v:DI 119 [ f2 ])
(reg:DI 131)))
allowing combination of insns 2, 3 and 13
original costs 4 + 4 + 4 = 12
replacement costs 4 + 4 = 8
deferring deletion of insn with uid = 2.
modifying insn i2 3: r119:DI=r132:DI<<0x20
REG_DEAD r132:DI
deferring rescan insn with uid = 3.
modifying insn i3 13: r128:DI=r119:DI|r131:DI
REG_DEAD r131:DI
REG_DEAD r119:DI
deferring rescan insn with uid = 13.
``` end dump
The original insn 13 can work well as rotldi3_insert_3,
so the combination with shift/or isn't better, but the
costing doesn't matches.
With this patch, we get below instead:
rejecting combination of insns 2, 3 and 13
original costs 0 + 0 + 4 = 4
replacement costs 4 + 4 = 8
Bootstrapped/regtested on powerpc64le-linux-gnu P9.
Is it reasonable? Any comments are highly appreciated!
BR,
Kewen
------
gcc/ChangeLog:
* combine.c (new_copies): New static global variable declare/init.
(combine_validate_cost): Consider zero costs from new_copies.
(combine_instructions): Set zero cost for insns in new_copies.
(make_more_copies): Record new pseudo-to-pseudo copies to new_copies.
(rest_of_handle_combine): Call bitmap alloc/free for new_copies.
diff --git a/gcc/combine.c b/gcc/combine.c
index ed1ad45de83..6fb2fa82c3f 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -419,6 +419,10 @@ static struct undobuf undobuf;
static int n_occurrences;
+/* Record the newly introduced pseudo-to-pseudo copies in function
+ make_more_copies. */
+static bitmap new_copies = NULL;
+
static rtx reg_nonzero_bits_for_combine (const_rtx, scalar_int_mode,
scalar_int_mode,
unsigned HOST_WIDE_INT *);
@@ -856,30 +860,38 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1,
rtx_insn *i2, rtx_insn *i3,
int i0_cost, i1_cost, i2_cost, i3_cost;
int new_i2_cost, new_i3_cost;
int old_cost, new_cost;
+ bool i0_cost_ok, i1_cost_ok, i2_cost_ok, i3_cost_ok;
/* Lookup the original insn_costs. */
i2_cost = INSN_COST (i2);
i3_cost = INSN_COST (i3);
+ i2_cost_ok = (i2_cost > 0) || bitmap_bit_p (new_copies, INSN_UID (i2));
+ i3_cost_ok = (i3_cost > 0) || bitmap_bit_p (new_copies, INSN_UID (i3));
if (i1)
{
i1_cost = INSN_COST (i1);
+ i1_cost_ok = (i1_cost > 0) || bitmap_bit_p (new_copies, INSN_UID (i1));
if (i0)
{
i0_cost = INSN_COST (i0);
- old_cost = (i0_cost > 0 && i1_cost > 0 && i2_cost > 0 && i3_cost > 0
- ? i0_cost + i1_cost + i2_cost + i3_cost : 0);
+ i0_cost_ok = (i0_cost > 0)
+ || bitmap_bit_p (new_copies, INSN_UID (i0));
+ old_cost = (i0_cost_ok && i1_cost_ok && i2_cost_ok && i3_cost_ok
+ ? i0_cost + i1_cost + i2_cost + i3_cost
+ : 0);
}
else
{
- old_cost = (i1_cost > 0 && i2_cost > 0 && i3_cost > 0
- ? i1_cost + i2_cost + i3_cost : 0);
+ old_cost = (i1_cost_ok && i2_cost_ok && i3_cost_ok
+ ? i1_cost + i2_cost + i3_cost
+ : 0);
i0_cost = 0;
}
}
else
{
- old_cost = (i2_cost > 0 && i3_cost > 0) ? i2_cost + i3_cost : 0;
+ old_cost = (i2_cost_ok && i3_cost_ok) ? i2_cost + i3_cost : 0;
i1_cost = i0_cost = 0;
}
@@ -1233,7 +1245,12 @@ combine_instructions (rtx_insn *f, unsigned int nregs)
insn);
/* Record the current insn_cost of this instruction. */
- INSN_COST (insn) = insn_cost (insn, optimize_this_for_speed_p);
+ if (bitmap_bit_p (new_copies, INSN_UID (insn)))
+ /* Newly added pseudo-to-pseudo copies should not take any
+ costs since they should be able to be coalesced. */
+ INSN_COST (insn) = 0;
+ else
+ INSN_COST (insn) = insn_cost (insn, optimize_this_for_speed_p);
if (dump_file)
{
fprintf (dump_file, "insn_cost %d for ", INSN_COST (insn));
@@ -15068,6 +15085,7 @@ make_more_copies (void)
SET_SRC (set) = new_reg;
emit_insn_before (new_insn, insn);
df_insn_rescan (insn);
+ bitmap_set_bit (new_copies, INSN_UID (insn));
}
}
}
@@ -15076,6 +15094,7 @@ make_more_copies (void)
static unsigned int
rest_of_handle_combine (void)
{
+ new_copies = BITMAP_ALLOC (NULL);
make_more_copies ();
df_set_flags (DF_LR_RUN_DCE + DF_DEFER_INSN_RESCAN);
@@ -15102,6 +15121,7 @@ rest_of_handle_combine (void)
}
regstat_free_n_sets_and_refs ();
+ BITMAP_FREE (new_copies);
return 0;
}