Hi,
While investigating the root cause a testsuite regression for the
ARM/embedded-5-branch GCC in gcc.dg/vect/slp-perm-5.c, we found that the bug
seems to also affect trunk. The bug manifests itself as an ICE in cselib due to
a parallel insn with two SET to the same register. When processing the second
SET in cselib_record_set (), the assert gcc_assert (REG_VALUES (dreg)->elt ==
0) fails because the field was already set when processing the first SET. The
root cause seems to be a register allocation issue in lra-constraints.
When considering an output operand with matching input operand(s),
match_reload does a number of checks to see if it can reuse the first matching
input operand register value or if a new unique value should be given to the
output operand. The current check ignores the case of multiple output operands
(as in neon_vtrn<mode>_insn insn pattern in config/arm/arm.md). This can lead
to cases where multiple output operands share a same register value when the
first matching input operand has the same value as another output operand,
leading to the ICE in cselib. This patch changes match_reload to get
information about other output operands and check whether this case is met or
not.
ChangeLog entry is as follows:
*** gcc/ChangeLog ***
2016-07-01 Thomas Preud'homme <thomas.preudho...@arm.com>
* lra-constraints.c (match_reload): Pass information about other
output operands. Create new unique register value if matching input
operand shares same register value as output operand being considered.
(curr_insn_transform): Record output operands already processed.
Patch passed bootstrap under arm-none-linux-gnueabihf (Cortex-A57 in ARM mode
as well as Thumb mode), aarch64-linux-gnu (Cortex-A57) and x86_64-linux-gnu
and testsuite results does not regress for these and for arm-none-eabi
targeting Cortex-A8.
Is this ok for trunk?
Best regards,
Thomas
diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index bf08dce2e0a4c2ef4c339aedbda4dba47cba1645..a3fd6c93c648050f3479dc8aca359a819d24863e 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -871,15 +871,18 @@ regno_val_use_in (unsigned int regno, rtx x)
}
/* Generate reloads for matching OUT and INS (array of input operand
- numbers with end marker -1) with reg class GOAL_CLASS. Add input
- and output reloads correspondingly to the lists *BEFORE and *AFTER.
- OUT might be negative. In this case we generate input reloads for
- matched input operands INS. EARLY_CLOBBER_P is a flag that the
- output operand is early clobbered for chosen alternative. */
+ numbers with end marker -1) with reg class GOAL_CLASS, considering
+ output operands OUTS (similar array to INS) needing to be in different
+ registers. Add input and output reloads correspondingly to the lists
+ *BEFORE and *AFTER. OUT might be negative. In this case we generate
+ input reloads for matched input operands INS. EARLY_CLOBBER_P is a flag
+ that the output operand is early clobbered for chosen alternative. */
static void
-match_reload (signed char out, signed char *ins, enum reg_class goal_class,
- rtx_insn **before, rtx_insn **after, bool early_clobber_p)
+match_reload (signed char out, signed char *ins, signed char *outs,
+ enum reg_class goal_class, rtx_insn **before,
+ rtx_insn **after, bool early_clobber_p)
{
+ bool out_conflict;
int i, in;
rtx new_in_reg, new_out_reg, reg;
machine_mode inmode, outmode;
@@ -968,12 +971,32 @@ match_reload (signed char out, signed char *ins, enum reg_class goal_class,
We don't care about eliminable hard regs here as we are
interesting only in pseudos. */
+ /* Matching input's register value is the same as one of the other
+ output operand. Output operands in a parallel insn must be in
+ different registers. */
+ out_conflict = false;
+ if (REG_P (in_rtx))
+ {
+ for (i = 0; outs[i] >= 0; i++)
+ {
+ rtx other_out_rtx = *curr_id->operand_loc[outs[i]];
+ if (REG_P (other_out_rtx)
+ && (regno_val_use_in (REGNO (in_rtx), other_out_rtx)
+ != NULL_RTX))
+ {
+ out_conflict = true;
+ break;
+ }
+ }
+ }
+
new_in_reg = new_out_reg
= (! early_clobber_p && ins[1] < 0 && REG_P (in_rtx)
&& (int) REGNO (in_rtx) < lra_new_regno_start
&& find_regno_note (curr_insn, REG_DEAD, REGNO (in_rtx))
&& (out < 0
|| regno_val_use_in (REGNO (in_rtx), out_rtx) == NULL_RTX)
+ && !out_conflict
? lra_create_new_reg (inmode, in_rtx, goal_class, "")
: lra_create_new_reg_with_unique_value (outmode, out_rtx,
goal_class, ""));
@@ -3432,9 +3455,11 @@ curr_insn_transform (bool check_only_p)
int i, j, k;
int n_operands;
int n_alternatives;
+ int n_outputs;
int commutative;
signed char goal_alt_matched[MAX_RECOG_OPERANDS][MAX_RECOG_OPERANDS];
signed char match_inputs[MAX_RECOG_OPERANDS + 1];
+ signed char outputs[MAX_RECOG_OPERANDS + 1];
rtx_insn *before, *after;
bool alt_p = false;
/* Flag that the insn has been changed through a transformation. */
@@ -3844,6 +3869,8 @@ curr_insn_transform (bool check_only_p)
}
}
+ n_outputs = 0;
+ outputs[0] = -1;
for (i = 0; i < n_operands; i++)
{
int regno;
@@ -4001,7 +4028,7 @@ curr_insn_transform (bool check_only_p)
/* generate reloads for input and matched outputs. */
match_inputs[0] = i;
match_inputs[1] = -1;
- match_reload (goal_alt_matched[i][0], match_inputs,
+ match_reload (goal_alt_matched[i][0], match_inputs, outputs,
goal_alt[i], &before, &after,
curr_static_id->operand_alternative
[goal_alt_number * n_operands + goal_alt_matched[i][0]]
@@ -4011,9 +4038,9 @@ curr_insn_transform (bool check_only_p)
&& (curr_static_id->operand[goal_alt_matched[i][0]].type
== OP_IN))
/* Generate reloads for output and matched inputs. */
- match_reload (i, goal_alt_matched[i], goal_alt[i], &before, &after,
- curr_static_id->operand_alternative
- [goal_alt_number * n_operands + i].earlyclobber);
+ match_reload (i, goal_alt_matched[i], outputs, goal_alt[i], &before,
+ &after, curr_static_id->operand_alternative
+ [goal_alt_number * n_operands + i].earlyclobber);
else if (curr_static_id->operand[i].type == OP_IN
&& (curr_static_id->operand[goal_alt_matched[i][0]].type
== OP_IN))
@@ -4023,12 +4050,22 @@ curr_insn_transform (bool check_only_p)
for (j = 0; (k = goal_alt_matched[i][j]) >= 0; j++)
match_inputs[j + 1] = k;
match_inputs[j + 1] = -1;
- match_reload (-1, match_inputs, goal_alt[i], &before, &after, false);
+ match_reload (-1, match_inputs, outputs, goal_alt[i], &before,
+ &after, false);
}
else
/* We must generate code in any case when function
process_alt_operands decides that it is possible. */
gcc_unreachable ();
+
+ /* Memorise processed outputs so that output remaining to be processed
+ can avoid using the same register value (see match_reload). */
+ if (curr_static_id->operand[i].type == OP_OUT)
+ {
+ outputs[n_outputs++] = i;
+ outputs[n_outputs] = -1;
+ }
+
if (optional_p)
{
lra_assert (REG_P (op));