From 0968bac2c3c9b26fd36bd03f327f374501865373 Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@synopsys.com>
Date: Fri, 15 Jan 2016 13:34:23 +0100
Subject: [PATCH] [ARC] Add basic support for double load and store
 instructions

gcc/
2015-01-19  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc.c (TARGET_DWARF_REGISTER_SPAN): Define.
	(arc_init): Check validity mll64 option.
	(arc_save_restore): Use double load/store instruction.
	(arc_expand_movmem): Likewise.
	(arc_split_move): Don't split if we have double load/store
	instructions. Returns a boolean.
	(arc_process_double_reg_moves): Change function to return boolean
	instead of a sequence of instructions.
	(arc_dwarf_register_span): New function.
	* config/arc/arc-protos.h (arc_split_move): Change prototype.
	* config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): Define __ARC_LL64__.
	* config/arc/arc.md (*movdi_insn): Emit ldd/std instructions.
	(*movdf_insn): Likewise.
	* config/arc/arc.opt (mll64): New option.
	* config/arc/predicates.md (even_register_operand): New predicate.
	* doc/invoke.texi (ARC Options): Add mll64 documentation.
---
 gcc/config/arc/arc-protos.h  |   2 +-
 gcc/config/arc/arc.c         | 107 +++++++++++++++++++++++++++++++----------
 gcc/config/arc/arc.h         |   4 ++
 gcc/config/arc/arc.md        | 111 ++++++++++++++++++++++++-------------------
 gcc/config/arc/arc.opt       |   4 ++
 gcc/config/arc/predicates.md |  13 ++++-
 gcc/doc/invoke.texi          |   6 ++-
 7 files changed, 169 insertions(+), 78 deletions(-)

diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index 3f96455..7d44840 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -104,7 +104,7 @@ extern void arc_toggle_unalign (void);
 extern void split_addsi (rtx *);
 extern void split_subsi (rtx *);
 extern void arc_pad_return (void);
-extern rtx arc_split_move (rtx *);
+extern bool arc_split_move (rtx *);
 extern int arc_verify_short (rtx_insn *insn, int unalign, int);
 extern const char *arc_short_long (rtx_insn *insn, const char *, const char *);
 extern rtx arc_regno_use_in (unsigned int, rtx);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index f636534..b308fef 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -420,6 +420,9 @@ static void arc_finalize_pic (void);
 #undef TARGET_ASM_ALIGNED_SI_OP
 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
 
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN arc_dwarf_register_span
+
 /* Try to keep the (mov:DF _, reg) as early as possible so
    that the d<add/sub/mul>h-lr insns appear together and can
    use the peephole2 pattern.  */
@@ -736,6 +739,10 @@ arc_init (void)
   if (TARGET_ATOMIC && !(TARGET_ARC700 || TARGET_HS))
     error ("-matomic is only supported for ARC700 or ARC HS cores");
 
+  /* ll64 ops only available for HS.  */
+  if (TARGET_LL64 && !TARGET_HS)
+    error ("-mll64 is only supported for ARC HS cores");
+
   arc_init_reg_tables ();
 
   /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P.  */
@@ -2175,9 +2182,26 @@ arc_save_restore (rtx base_reg,
 
       for (regno = 0; regno <= 31; regno++)
 	{
-	  if ((gmask & (1L << regno)) != 0)
+	  enum machine_mode mode = SImode;
+	  bool found = false;
+
+	  if (TARGET_LL64
+	      && (regno % 2 == 0)
+	      && ((gmask & (1L << regno)) != 0)
+	      && ((gmask & (1L << (regno+1))) != 0))
+	    {
+	      found = true;
+	      mode  = DImode;
+	    }
+	  else if ((gmask & (1L << regno)) != 0)
 	    {
-	      rtx reg = gen_rtx_REG (SImode, regno);
+	      found = true;
+	      mode  = SImode;
+	    }
+
+	  if (found)
+	    {
+	      rtx reg = gen_rtx_REG (mode, regno);
 	      rtx addr, mem;
 	      int cfa_adjust = *first_offset;
 
@@ -2193,7 +2217,7 @@ arc_save_restore (rtx base_reg,
 		  gcc_assert (SMALL_INT (offset));
 		  addr = plus_constant (Pmode, base_reg, offset);
 		}
-	      mem = gen_frame_mem (SImode, addr);
+	      mem = gen_frame_mem (mode, addr);
 	      if (epilogue_p)
 		{
 		  rtx insn =
@@ -2212,6 +2236,11 @@ arc_save_restore (rtx base_reg,
 	      else
 		frame_move_inc (mem, reg, base_reg, addr);
 	      offset += UNITS_PER_WORD;
+	      if (mode == DImode)
+		{
+		  offset += UNITS_PER_WORD;
+		  ++regno;
+		}
 	    } /* if */
 	} /* for */
     }/* if */
@@ -7009,14 +7038,23 @@ arc_expand_movmem (rtx *operands)
   size = INTVAL (operands[2]);
   /* move_by_pieces_ninsns is static, so we can't use it.  */
   if (align >= 4)
-    n_pieces = (size + 2) / 4U + (size & 1);
+    {
+      if (TARGET_LL64)
+	n_pieces = (size + 2) / 8U + (size & 1);
+      else
+	n_pieces = (size + 2) / 4U + (size & 1);
+    }
   else if (align == 2)
     n_pieces = (size + 1) / 2U;
   else
     n_pieces = size;
   if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
     return false;
-  if (piece > 4)
+  /* Force 32 bit aligned and larger datum to use 64 bit transfers, if
+     possible.  */
+  if (TARGET_LL64 && (piece >= 4))
+    piece = 8;
+  else if (piece > 4)
     piece = 4;
   dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
   src_addr = force_offsettable (XEXP (operands[1], 0), size, 0);
@@ -8463,12 +8501,11 @@ split_subsi (rtx *operands)
    Operand 0: destination register
    Operand 1: source register  */
 
-static rtx
+static bool
 arc_process_double_reg_moves (rtx *operands)
 {
   rtx dest = operands[0];
   rtx src  = operands[1];
-  rtx val;
 
   enum usesDxState { none, srcDx, destDx, maxDx };
   enum usesDxState state = none;
@@ -8483,9 +8520,7 @@ arc_process_double_reg_moves (rtx *operands)
     }
 
   if (state == none)
-    return NULL_RTX;
-
-  start_sequence ();
+    return false;
 
   if (state == srcDx)
     {
@@ -8532,30 +8567,36 @@ arc_process_double_reg_moves (rtx *operands)
   else
     gcc_unreachable ();
 
-  val = get_insns ();
-  end_sequence ();
-  return val;
+  return true;
 }
 
 /* operands 0..1 are the operands of a 64 bit move instruction.
    split it into two moves with operands 2/3 and 4/5.  */
 
-rtx
+bool
 arc_split_move (rtx *operands)
 {
   machine_mode mode = GET_MODE (operands[0]);
   int i;
   int swap = 0;
   rtx xop[4];
-  rtx val;
 
   if (TARGET_DPFP)
   {
-    val = arc_process_double_reg_moves (operands);
-    if (val)
-      return val;
+    if (arc_process_double_reg_moves (operands))
+      return true;
   }
 
+  if (TARGET_LL64
+      && ((memory_operand (operands[0], mode)
+	   && even_register_operand (operands[1], mode))
+	  || (memory_operand (operands[1], mode)
+	      && even_register_operand (operands[0], mode))))
+    {
+      emit_move_insn (operands[0], operands[1]);
+      return true;
+    }
+
   for (i = 0; i < 2; i++)
     {
       if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
@@ -8608,13 +8649,7 @@ arc_split_move (rtx *operands)
   operands[4-swap] = xop[2];
   operands[5-swap] = xop[3];
 
-  start_sequence ();
-  emit_insn (gen_rtx_SET (operands[2], operands[3]));
-  emit_insn (gen_rtx_SET (operands[4], operands[5]));
-  val = get_insns ();
-  end_sequence ();
-
-  return val;
+  return false;
 }
 
 /* Select between the instruction output templates s_tmpl (for short INSNs)
@@ -9329,6 +9364,28 @@ arc_no_speculation_in_delay_slots_p ()
   return true;
 }
 
+/* Return a parallel of registers to represent where to find the
+   register pieces if required, otherwise NULL_RTX.  */
+
+static rtx
+arc_dwarf_register_span (rtx rtl)
+{
+   enum machine_mode mode = GET_MODE (rtl);
+   unsigned regno;
+   rtx p;
+
+   if (GET_MODE_SIZE (mode) != 8)
+     return NULL_RTX;
+
+   p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+   regno = REGNO (rtl);
+   XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
+   XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
+
+   return p;
+}
+
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-arc.h"
diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
index 70a2b1d..27665b0 100644
--- a/gcc/config/arc/arc.h
+++ b/gcc/config/arc/arc.h
@@ -97,6 +97,10 @@ along with GCC; see the file COPYING3.  If not see
 	builtin_define ("__ARC_NORM__");\
 	builtin_define ("__Xnorm");	\
       }					\
+    if (TARGET_LL64)			\
+      {					\
+	builtin_define ("__ARC_LL64__");\
+      }					\
     if (TARGET_MUL64_SET)		\
       builtin_define ("__ARC_MUL64__");\
     if (TARGET_MULMAC_32BY16_SET)	\
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 80f1daa..6072756 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -984,7 +984,7 @@
 }")
 
 (define_insn_and_split "*movdi_insn"
-  [(set (match_operand:DI 0 "move_dest_operand" "=w,w,r,m")
+  [(set (match_operand:DI 0 "move_dest_operand"      "=w, w,r,m")
 	(match_operand:DI 1 "move_double_src_operand" "c,Hi,m,c"))]
   "register_operand (operands[0], DImode)
    || register_operand (operands[1], DImode)"
@@ -993,50 +993,37 @@
   switch (which_alternative)
     {
     default:
-    case 0 :
-      /* We normally copy the low-numbered register first.  However, if
-	 the first register operand 0 is the same as the second register of
-	 operand 1, we must copy in the opposite order.  */
-      if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
-	return \"mov%? %R0,%R1\;mov%? %0,%1\";
-      else
-      return \"mov%? %0,%1\;mov%? %R0,%R1\";
-    case 1 :
-      return \"mov%? %L0,%L1\;mov%? %H0,%H1\";
-    case 2 :
-      /* If the low-address word is used in the address, we must load it
-	 last.  Otherwise, load it first.  Note that we cannot have
-	 auto-increment in that case since the address register is known to be
-	 dead.  */
-      if (refers_to_regno_p (REGNO (operands[0]), operands[1]))
-	return \"ld%V1 %R0,%R1\;ld%V1 %0,%1\";
-      else switch (GET_CODE (XEXP(operands[1], 0)))
-	{
-	case POST_MODIFY: case POST_INC: case POST_DEC:
-	  return \"ld%V1 %R0,%R1\;ld%U1%V1 %0,%1\";
-	case PRE_MODIFY: case PRE_INC: case PRE_DEC:
-	  return \"ld%U1%V1 %0,%1\;ld%V1 %R0,%R1\";
-	default:
-	  return \"ld%U1%V1 %0,%1\;ld%U1%V1 %R0,%R1\";
-	}
-    case 3 :
-      switch (GET_CODE (XEXP(operands[0], 0)))
-	{
-	case POST_MODIFY: case POST_INC: case POST_DEC:
-     	  return \"st%V0 %R1,%R0\;st%U0%V0 %1,%0\";
-	case PRE_MODIFY: case PRE_INC: case PRE_DEC:
-     	  return \"st%U0%V0 %1,%0\;st%V0 %R1,%R0\";
-	default:
-     	  return \"st%U0%V0 %1,%0\;st%U0%V0 %R1,%R0\";
-	}
+      return \"#\";
+
+    case 2:
+    if (TARGET_LL64
+	&& ((even_register_operand (operands[0], DImode)
+	     && memory_operand (operands[1], DImode))
+	    || (memory_operand (operands[0], DImode)
+	        && even_register_operand (operands[1], DImode))))
+      return \"ldd%U1%V1 %0,%1%&\";
+    return \"#\";
+
+    case 3:
+    if (TARGET_LL64
+	&& ((even_register_operand (operands[0], DImode)
+	     && memory_operand (operands[1], DImode))
+	    || (memory_operand (operands[0], DImode)
+	        && even_register_operand (operands[1], DImode))))
+     return \"std%U0%V0 %1,%0\";
+    return \"#\";
     }
 }"
-  "&& reload_completed && optimize"
-  [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))]
-  "arc_split_move (operands);"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  {
+    if (arc_split_move (operands))
+      DONE;
+  }
   [(set_attr "type" "move,move,load,store")
    ;; ??? The ld/st values could be 4 if it's [reg,bignum].
-   (set_attr "length" "8,16,16,16")])
+   (set_attr "length" "8,16,*,*")])
 
 
 ;; Floating point move insns.
@@ -1066,23 +1053,47 @@
   ""
   "if (prepare_move_operands (operands, DFmode)) DONE;")
 
-(define_insn "*movdf_insn"
+(define_insn_and_split "*movdf_insn"
   [(set (match_operand:DF 0 "move_dest_operand"      "=D,r,c,c,r,m")
 	(match_operand:DF 1 "move_double_src_operand" "r,D,c,E,m,c"))]
   "register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)"
-  "#"
+  "*
+{
+ switch (which_alternative)
+   {
+    default:
+      return \"#\";
+    case 4:
+    if (TARGET_LL64
+	&& ((even_register_operand (operands[0], DFmode)
+	     && memory_operand (operands[1], DFmode))
+	    || (memory_operand (operands[0], DFmode)
+	        && even_register_operand (operands[1], DFmode))))
+      return \"ldd%U1%V1 %0,%1%&\";
+    return \"#\";
+
+    case 5:
+    if (TARGET_LL64
+	&& ((even_register_operand (operands[0], DFmode)
+	     && memory_operand (operands[1], DFmode))
+	    || (memory_operand (operands[0], DFmode)
+		&& even_register_operand (operands[1], DFmode))))
+     return \"std%U0%V0 %1,%0\";
+    return \"#\";
+   }
+}"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  {
+    if (arc_split_move (operands))
+      DONE;
+  }
   [(set_attr "type" "move,move,move,move,load,store")
    (set_attr "predicable" "no,no,yes,yes,no,no")
    ;; ??? The ld/st values could be 16 if it's [reg,bignum].
    (set_attr "length" "4,16,8,16,16,16")])
 
-(define_split
-  [(set (match_operand:DF 0 "move_dest_operand" "")
-	(match_operand:DF 1 "move_double_src_operand" ""))]
-  "reload_completed"
-  [(match_dup 2)]
-  "operands[2] = arc_split_move (operands);")
-
 (define_insn_and_split "*movdf_insn_nolrsr"
   [(set (match_operand:DF 0 "register_operand"       "=r")
 	(match_operand:DF 1 "arc_double_register_operand" "D"))
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
index 79113a5..00b98d5 100644
--- a/gcc/config/arc/arc.opt
+++ b/gcc/config/arc/arc.opt
@@ -409,3 +409,7 @@ Target Joined
 matomic
 Target Report Mask(ATOMIC)
 Enable atomic instructions.
+
+mll64
+Target Report Mask(LL64)
+Enable double load/store instructions for ARC HS.
diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md
index fba878b..52ac2ac 100644
--- a/gcc/config/arc/predicates.md
+++ b/gcc/config/arc/predicates.md
@@ -783,4 +783,15 @@
        (match_code "reg" "0")))
 
 (define_predicate "any_mem_operand"
-  (match_code "mem"))
\ No newline at end of file
+  (match_code "mem"))
+
+; Special predicate to match even-odd double register pair
+(define_predicate "even_register_operand"
+  (match_code "reg")
+  {
+   if ((GET_MODE (op) != mode) && (mode != VOIDmode))
+      return 0;
+
+   return (REG_P (op) && ((REGNO (op) >= FIRST_PSEUDO_REGISTER)
+			  || ((REGNO (op) & 1) == 0)));
+  })
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 070a516..6bb7330 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -585,7 +585,7 @@ Objective-C and Objective-C++ Dialects}.
 -mmixed-code -mq-class -mRcq -mRcw -msize-level=@var{level} @gol
 -mtune=@var{cpu} -mmultcost=@var{num} @gol
 -munalign-prob-threshold=@var{probability} -mmpy-option=@var{multo} @gol
--mdiv-rem -mcode-density}
+-mdiv-rem -mcode-density -mll64}
 
 @emph{ARM Options}
 @gccoptlist{-mapcs-frame  -mno-apcs-frame @gol
@@ -13173,6 +13173,10 @@ Enable DIV/REM instructions for ARCv2 cores.
 @opindex mcode-density
 Enable code density instructions for ARC EM, default on for ARC HS.
 
+@item -mll64
+@opindex mll64
+Enable double load/store operations for ARC HS cores.
+
 @item -mmpy-option=@var{multo}
 @opindex mmpy-option
 Compile ARCv2 code with a multiplier design option.  @samp{wlh1} is
-- 
1.9.1

