The following patch fixes
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60969
The patch was bootstrapped and tested on x86/x86-64.
Committed as rev. 210519 to gcc 4.9 branch and as rev. 210520 to trunk.
2014-05-16 Vladimir Makarov <[email protected]>
PR rtl-optimization/60969
* ira-costs.c (record_reg_classes): Allow only memory for pseudo.
Calculate costs for this case.
2014-05-16 Vladimir Makarov <[email protected]>
PR rtl-optimization/60969
* g++.dg/pr60969.C: New.
Index: ira-costs.c
===================================================================
--- ira-costs.c (revision 210069)
+++ ira-costs.c (working copy)
@@ -762,10 +762,11 @@ record_reg_classes (int n_alts, int n_op
into that class. */
if (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER)
{
- if (classes[i] == NO_REGS)
+ if (classes[i] == NO_REGS && ! allows_mem[i])
{
/* We must always fail if the operand is a REG, but
- we did not find a suitable class.
+ we did not find a suitable class and memory is
+ not allowed.
Otherwise we may perform an uninitialized read
from this_op_costs after the `continue' statement
@@ -783,50 +784,90 @@ record_reg_classes (int n_alts, int n_op
bool out_p = recog_data.operand_type[i] != OP_IN;
enum reg_class op_class = classes[i];
move_table *move_in_cost, *move_out_cost;
+ short (*mem_cost)[2];
ira_init_register_move_cost_if_necessary (mode);
if (! in_p)
{
ira_assert (out_p);
- move_out_cost = ira_may_move_out_cost[mode];
- for (k = cost_classes_ptr->num - 1; k >= 0; k--)
+ if (op_class == NO_REGS)
{
- rclass = cost_classes[k];
- pp_costs[k]
- = move_out_cost[op_class][rclass] * frequency;
+ mem_cost = ira_memory_move_cost[mode];
+ for (k = cost_classes_ptr->num - 1; k >= 0; k--)
+ {
+ rclass = cost_classes[k];
+ pp_costs[k] = mem_cost[rclass][0] * frequency;
+ }
+ }
+ else
+ {
+ move_out_cost = ira_may_move_out_cost[mode];
+ for (k = cost_classes_ptr->num - 1; k >= 0; k--)
+ {
+ rclass = cost_classes[k];
+ pp_costs[k]
+ = move_out_cost[op_class][rclass] * frequency;
+ }
}
}
else if (! out_p)
{
ira_assert (in_p);
- move_in_cost = ira_may_move_in_cost[mode];
- for (k = cost_classes_ptr->num - 1; k >= 0; k--)
+ if (op_class == NO_REGS)
{
- rclass = cost_classes[k];
- pp_costs[k]
- = move_in_cost[rclass][op_class] * frequency;
+ mem_cost = ira_memory_move_cost[mode];
+ for (k = cost_classes_ptr->num - 1; k >= 0; k--)
+ {
+ rclass = cost_classes[k];
+ pp_costs[k] = mem_cost[rclass][1] * frequency;
+ }
+ }
+ else
+ {
+ move_in_cost = ira_may_move_in_cost[mode];
+ for (k = cost_classes_ptr->num - 1; k >= 0; k--)
+ {
+ rclass = cost_classes[k];
+ pp_costs[k]
+ = move_in_cost[rclass][op_class] * frequency;
+ }
}
}
else
{
- move_in_cost = ira_may_move_in_cost[mode];
- move_out_cost = ira_may_move_out_cost[mode];
- for (k = cost_classes_ptr->num - 1; k >= 0; k--)
- {
- rclass = cost_classes[k];
- pp_costs[k] = ((move_in_cost[rclass][op_class]
- + move_out_cost[op_class][rclass])
- * frequency);
+ if (op_class == NO_REGS)
+ {
+ mem_cost = ira_memory_move_cost[mode];
+ for (k = cost_classes_ptr->num - 1; k >= 0; k--)
+ {
+ rclass = cost_classes[k];
+ pp_costs[k] = ((mem_cost[rclass][0]
+ + mem_cost[rclass][1])
+ * frequency);
+ }
+ }
+ else
+ {
+ move_in_cost = ira_may_move_in_cost[mode];
+ move_out_cost = ira_may_move_out_cost[mode];
+ for (k = cost_classes_ptr->num - 1; k >= 0; k--)
+ {
+ rclass = cost_classes[k];
+ pp_costs[k] = ((move_in_cost[rclass][op_class]
+ + move_out_cost[op_class][rclass])
+ * frequency);
+ }
}
}
/* If the alternative actually allows memory, make
things a bit cheaper since we won't need an extra
insn to load it. */
- pp->mem_cost
- = ((out_p ? ira_memory_move_cost[mode][op_class][0] : 0)
- + (in_p ? ira_memory_move_cost[mode][op_class][1] : 0)
- - allows_mem[i]) * frequency;
+ if (op_class != NO_REGS)
+ pp->mem_cost
+ = ((out_p ? ira_memory_move_cost[mode][op_class][0] : 0)
+ + (in_p ? ira_memory_move_cost[mode][op_class][1] : 0)
+ - allows_mem[i]) * frequency;
/* If we have assigned a class to this allocno in
our first pass, add a cost to this alternative
corresponding to what we would add if this
@@ -836,15 +877,28 @@ record_reg_classes (int n_alts, int n_op
enum reg_class pref_class = pref[COST_INDEX (REGNO (op))];
if (pref_class == NO_REGS)
+ {
+ if (op_class != NO_REGS)
+ alt_cost
+ += ((out_p
+ ? ira_memory_move_cost[mode][op_class][0]
+ : 0)
+ + (in_p
+ ? ira_memory_move_cost[mode][op_class][1]
+ : 0));
+ }
+ else if (op_class == NO_REGS)
alt_cost
+= ((out_p
- ? ira_memory_move_cost[mode][op_class][0] : 0)
+ ? ira_memory_move_cost[mode][pref_class][1]
+ : 0)
+ (in_p
- ? ira_memory_move_cost[mode][op_class][1]
+ ? ira_memory_move_cost[mode][pref_class][0]
: 0));
else if (ira_reg_class_intersect[pref_class][op_class]
== NO_REGS)
- alt_cost +=
ira_register_move_cost[mode][pref_class][op_class];
+ alt_cost += (ira_register_move_cost
+ [mode][pref_class][op_class]);
}
}
}
Index: testsuite/g++.dg/pr60969.C
===================================================================
--- testsuite/g++.dg/pr60969.C (revision 0)
+++ testsuite/g++.dg/pr60969.C (working copy)
@@ -0,0 +1,30 @@
+/* { dg-do compile { target i?86-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -march=pentium4" } */
+
+struct A
+{
+ float f, g, h, k;
+ A () {}
+ A (float v0, float x, float y) : f(v0), g(x), h(y), k(0.0f) {}
+ A bar (A &a, float t) { return A (f + a.f * t, g + a.g * t, h + a.h * t); }
+};
+
+A
+baz (A &x, A &y, float t)
+{
+ return x.bar (y, t);
+}
+
+A *
+foo (A &s, A &t, A &u, A &v, int y, int z)
+{
+ A *x = new A[y * z];
+ for (int i = 0; i < 7; i++)
+ {
+ A s = baz (s, u, i / (float) z);
+ A t = baz (t, v, i / (float) z);
+ for (int j = 0; j < 7; j++)
+ x[i * y + j] = baz (s, t, j / (float) y);
+ }
+ return x;
+}