Hi,

While investigating the root cause a testsuite regression for the 
ARM/embedded-5-branch GCC in gcc.dg/vect/slp-perm-5.c, we found that the bug 
seems to also affect trunk. The bug manifests itself as an ICE in cselib due to 
a parallel insn with two SET to the same register. When processing the second 
SET in cselib_record_set (), the assert gcc_assert (REG_VALUES (dreg)->elt == 
0) fails because the field was already set when processing the first SET. The 
root cause seems to be a register allocation issue in lra-constraints.

When considering an output operand with matching input operand(s), 
match_reload does a number of checks to see if it can reuse the first matching 
input operand register value or if a new unique value should be given to the 
output operand. The current check ignores the case of multiple output operands 
(as in neon_vtrn<mode>_insn insn pattern in config/arm/arm.md). This can lead 
to cases where multiple output operands share a same register value when the 
first matching input operand has the same value as another output operand, 
leading to the ICE in cselib. This patch changes match_reload to get 
information about other output operands and check whether this case is met or 
not.

ChangeLog entry is as follows:

*** gcc/ChangeLog ***

2016-07-01  Thomas Preud'homme  <thomas.preudho...@arm.com>

        * lra-constraints.c (match_reload): Pass information about other
        output operands.  Create new unique register value if matching input
        operand shares same register value as output operand being considered.
        (curr_insn_transform): Record output operands already processed.


Patch passed bootstrap under arm-none-linux-gnueabihf (Cortex-A57 in ARM mode 
as well as Thumb mode), aarch64-linux-gnu (Cortex-A57) and x86_64-linux-gnu 
and testsuite results does not regress for these and for arm-none-eabi 
targeting Cortex-A8.

Is this ok for trunk?

Best regards,

Thomas
diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index bf08dce2e0a4c2ef4c339aedbda4dba47cba1645..a3fd6c93c648050f3479dc8aca359a819d24863e 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -871,15 +871,18 @@ regno_val_use_in (unsigned int regno, rtx x)
 }
 
 /* Generate reloads for matching OUT and INS (array of input operand
-   numbers with end marker -1) with reg class GOAL_CLASS.  Add input
-   and output reloads correspondingly to the lists *BEFORE and *AFTER.
-   OUT might be negative.  In this case we generate input reloads for
-   matched input operands INS.  EARLY_CLOBBER_P is a flag that the
-   output operand is early clobbered for chosen alternative.  */
+   numbers with end marker -1) with reg class GOAL_CLASS, considering
+   output operands OUTS (similar array to INS) needing to be in different
+   registers.  Add input and output reloads correspondingly to the lists
+   *BEFORE and *AFTER.  OUT might be negative.  In this case we generate
+   input reloads for matched input operands INS.  EARLY_CLOBBER_P is a flag
+   that the output operand is early clobbered for chosen alternative.  */
 static void
-match_reload (signed char out, signed char *ins, enum reg_class goal_class,
-	      rtx_insn **before, rtx_insn **after, bool early_clobber_p)
+match_reload (signed char out, signed char *ins, signed char *outs,
+	      enum reg_class goal_class, rtx_insn **before,
+	      rtx_insn **after, bool early_clobber_p)
 {
+  bool out_conflict;
   int i, in;
   rtx new_in_reg, new_out_reg, reg;
   machine_mode inmode, outmode;
@@ -968,12 +971,32 @@ match_reload (signed char out, signed char *ins, enum reg_class goal_class,
 	 We don't care about eliminable hard regs here as we are
 	 interesting only in pseudos.  */
 
+      /* Matching input's register value is the same as one of the other
+	 output operand.  Output operands in a parallel insn must be in
+	 different registers.  */
+      out_conflict = false;
+      if (REG_P (in_rtx))
+	{
+	  for (i = 0; outs[i] >= 0; i++)
+	    {
+	      rtx other_out_rtx = *curr_id->operand_loc[outs[i]];
+	      if (REG_P (other_out_rtx)
+		  && (regno_val_use_in (REGNO (in_rtx), other_out_rtx)
+		      != NULL_RTX))
+		{
+		  out_conflict = true;
+		  break;
+		}
+	    }
+	}
+
       new_in_reg = new_out_reg
 	= (! early_clobber_p && ins[1] < 0 && REG_P (in_rtx)
 	   && (int) REGNO (in_rtx) < lra_new_regno_start
 	   && find_regno_note (curr_insn, REG_DEAD, REGNO (in_rtx))
 	   && (out < 0
 	       || regno_val_use_in (REGNO (in_rtx), out_rtx) == NULL_RTX)
+	   && !out_conflict
 	   ? lra_create_new_reg (inmode, in_rtx, goal_class, "")
 	   : lra_create_new_reg_with_unique_value (outmode, out_rtx,
 						   goal_class, ""));
@@ -3432,9 +3455,11 @@ curr_insn_transform (bool check_only_p)
   int i, j, k;
   int n_operands;
   int n_alternatives;
+  int n_outputs;
   int commutative;
   signed char goal_alt_matched[MAX_RECOG_OPERANDS][MAX_RECOG_OPERANDS];
   signed char match_inputs[MAX_RECOG_OPERANDS + 1];
+  signed char outputs[MAX_RECOG_OPERANDS + 1];
   rtx_insn *before, *after;
   bool alt_p = false;
   /* Flag that the insn has been changed through a transformation.  */
@@ -3844,6 +3869,8 @@ curr_insn_transform (bool check_only_p)
 	  }
       }
 
+  n_outputs = 0;
+  outputs[0] = -1;
   for (i = 0; i < n_operands; i++)
     {
       int regno;
@@ -4001,7 +4028,7 @@ curr_insn_transform (bool check_only_p)
 	  /* generate reloads for input and matched outputs.  */
 	  match_inputs[0] = i;
 	  match_inputs[1] = -1;
-	  match_reload (goal_alt_matched[i][0], match_inputs,
+	  match_reload (goal_alt_matched[i][0], match_inputs, outputs,
 			goal_alt[i], &before, &after,
 			curr_static_id->operand_alternative
 			[goal_alt_number * n_operands + goal_alt_matched[i][0]]
@@ -4011,9 +4038,9 @@ curr_insn_transform (bool check_only_p)
 	       && (curr_static_id->operand[goal_alt_matched[i][0]].type
 		   == OP_IN))
 	/* Generate reloads for output and matched inputs.  */
-	match_reload (i, goal_alt_matched[i], goal_alt[i], &before, &after,
-		      curr_static_id->operand_alternative
-		      [goal_alt_number * n_operands + i].earlyclobber);
+	match_reload (i, goal_alt_matched[i], outputs, goal_alt[i], &before,
+		      &after, curr_static_id->operand_alternative
+			      [goal_alt_number * n_operands + i].earlyclobber);
       else if (curr_static_id->operand[i].type == OP_IN
 	       && (curr_static_id->operand[goal_alt_matched[i][0]].type
 		   == OP_IN))
@@ -4023,12 +4050,22 @@ curr_insn_transform (bool check_only_p)
 	  for (j = 0; (k = goal_alt_matched[i][j]) >= 0; j++)
 	    match_inputs[j + 1] = k;
 	  match_inputs[j + 1] = -1;
-	  match_reload (-1, match_inputs, goal_alt[i], &before, &after, false);
+	  match_reload (-1, match_inputs, outputs, goal_alt[i], &before,
+			&after, false);
 	}
       else
 	/* We must generate code in any case when function
 	   process_alt_operands decides that it is possible.  */
 	gcc_unreachable ();
+
+      /* Memorise processed outputs so that output remaining to be processed
+	 can avoid using the same register value (see match_reload).  */
+      if (curr_static_id->operand[i].type == OP_OUT)
+	{
+	  outputs[n_outputs++] = i;
+	  outputs[n_outputs] = -1;
+	}
+
       if (optional_p)
 	{
 	  lra_assert (REG_P (op));

Reply via email to