As exposed in https://gcc.gnu.org/ml/gcc-patches/2014-12/msg01052.html the 
conversion of the original cc0-based Visium port into a CC_MODE port went 
smoothly and didn't affect the run time performance, except for a single but 
notable case: the reorg.c pass cannot put an insn that clobbers the CC reg 
into a conditional branch's delay slot if it comes from before the branch.
I guess this is negligible on most architectures with delay slots but not on 
the Visium where it alone costs 3% on CoreMark.

The attached patch fixes the pessimization very selectively, using the same 
trigger as the compare-elim pass, which means that only aarch64, mn10300, rx 
and visium are potentially affected; now among them only visium has delay 
slots so the patch actually affects visium only.

Tested on visium-elf.  Any objections to me applying it now?


2015-01-16  Eric Botcazou  <ebotca...@adacore.com>

        * reorg.c (fill_simple_delay_slots): If TARGET_FLAGS_REGNUM is valid,
        implement a more precise life analysis for it during backward scan.


-- 
Eric Botcazou
Index: reorg.c
===================================================================
--- reorg.c	(revision 219714)
+++ reorg.c	(working copy)
@@ -2072,9 +2072,24 @@ fill_simple_delay_slots (int non_jumps_p
 
       if (slots_filled < slots_to_fill)
 	{
+	  /* If the flags register is dead after the insn, then we want to be
+	     able to accept a candidate that clobbers it.  For this purpose,
+	     we need to filter the flags register during life analysis, so
+	     that it doesn't create RAW and WAW dependencies, while still
+	     creating the necessary WAR dependencies.  */
+	  bool filter_flags
+	    = (slots_to_fill == 1
+	       && targetm.flags_regnum != INVALID_REGNUM
+	       && find_regno_note (insn, REG_DEAD, targetm.flags_regnum));
+	  struct resources fset;
 	  CLEAR_RESOURCE (&needed);
 	  CLEAR_RESOURCE (&set);
 	  mark_set_resources (insn, &set, 0, MARK_SRC_DEST);
+	  if (filter_flags)
+	    {
+	      CLEAR_RESOURCE (&fset);
+	      mark_set_resources (insn, &fset, 0, MARK_SRC_DEST);
+	    }
 	  mark_referenced_resources (insn, &needed, false);
 
 	  for (trial = prev_nonnote_insn (insn); ! stop_search_p (trial, 1);
@@ -2092,7 +2107,9 @@ fill_simple_delay_slots (int non_jumps_p
 	      /* Check for resource conflict first, to avoid unnecessary
 		 splitting.  */
 	      if (! insn_references_resource_p (trial, &set, true)
-		  && ! insn_sets_resource_p (trial, &set, true)
+		  && ! insn_sets_resource_p (trial,
+					     filter_flags ? &fset : &set,
+					     true)
 		  && ! insn_sets_resource_p (trial, &needed, true)
 #ifdef HAVE_cc0
 		  /* Can't separate set of cc0 from its use.  */
@@ -2121,6 +2138,18 @@ fill_simple_delay_slots (int non_jumps_p
 		}
 
 	      mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
+	      if (filter_flags)
+		{
+		  mark_set_resources (trial, &fset, 0, MARK_SRC_DEST_CALL);
+		  /* If the flags register is set, then it doesn't create RAW
+		     dependencies any longer and it also doesn't create WAW
+		     dependencies since it's dead after the original insn.  */
+		  if (TEST_HARD_REG_BIT (fset.regs, targetm.flags_regnum))
+		    {
+		      CLEAR_HARD_REG_BIT (needed.regs, targetm.flags_regnum);
+		      CLEAR_HARD_REG_BIT (fset.regs, targetm.flags_regnum);
+		    }
+		}
 	      mark_referenced_resources (trial, &needed, true);
 	    }
 	}

Reply via email to