From 544372a14a1948e059777d3e9bf3a44c2bf69ed0 Mon Sep 17 00:00:00 2001
From: Maxim Kuvyrkov <maxim@codesourcery.com>
Date: Tue, 12 Jun 2012 21:48:20 -0700
Subject: [PATCH 2/3] Add XLP-specific atomic instructions and tweaks.

2012-06-13  Tom de Vries  <vries@codesourcery.com>
            Maxim Kuvyrkov  <maxim@codesourcery.com>

        * config/mips/mips.h (TARGET_XLP): Define.
	(TARGET_SYNC_AFTER_SC): Update.
	(ISA_HAS_SWAP, ISA_HAS_LDADD): Define.
	* config/mips/sync.md (atomic_exchange, atomic_fetch_add): Use
	XLP-specific swap and ldadd patterns.  Workaround a reload problem in
	handling a complex address.
	(atomic_exchange_swap, atomic_fetch_add_ldadd): New patterns.
---
 gcc/config/mips/mips.h  |    6 +++-
 gcc/config/mips/sync.md |   85 ++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 86 insertions(+), 5 deletions(-)

diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index bcb7c04..4eafd26 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -223,6 +223,7 @@ struct mips_cpu_info {
 #define TARGET_SB1                  (mips_arch == PROCESSOR_SB1		\
 				     || mips_arch == PROCESSOR_SB1A)
 #define TARGET_SR71K                (mips_arch == PROCESSOR_SR71000)
+#define TARGET_XLP                  (mips_arch == PROCESSOR_XLP)
 
 /* Scheduling target defines.  */
 #define TUNE_20KC		    (mips_tune == PROCESSOR_20KC)
@@ -311,7 +312,7 @@ struct mips_cpu_info {
    stores.  It does not tell anything about ordering of loads and
    stores prior to and following the SC, only about the SC itself and
    those loads and stores follow it.  */
-#define TARGET_SYNC_AFTER_SC (!TARGET_OCTEON)
+#define TARGET_SYNC_AFTER_SC (!TARGET_OCTEON && !TARGET_XLP)
 
 /* Define preprocessor macros for the -march and -mtune options.
    PREFIX is either _MIPS_ARCH or _MIPS_TUNE, INFO is the selected
@@ -1054,6 +1055,9 @@ struct mips_cpu_info {
    ? TARGET_LLSC && !TARGET_MIPS16	\
    : ISA_HAS_LL_SC)
 
+#define ISA_HAS_SWAP (TARGET_XLP)
+#define ISA_HAS_LDADD (TARGET_XLP)
+
 /* ISA includes the baddu instruction.  */
 #define ISA_HAS_BADDU		(TARGET_OCTEON && !TARGET_MIPS16)
 
diff --git a/gcc/config/mips/sync.md b/gcc/config/mips/sync.md
index 604aefa..ac953b5 100644
--- a/gcc/config/mips/sync.md
+++ b/gcc/config/mips/sync.md
@@ -607,10 +607,32 @@
    (match_operand:GPR 1 "memory_operand")
    (match_operand:GPR 2 "arith_operand")
    (match_operand:SI 3 "const_int_operand")]
-  "GENERATE_LL_SC"
+  "GENERATE_LL_SC || ISA_HAS_SWAP"
 {
+  if (!ISA_HAS_SWAP)
     emit_insn (gen_atomic_exchange<mode>_llsc (operands[0], operands[1],
 					       operands[2], operands[3]));
+  else
+    {
+      rtx addr;
+
+      gcc_assert (MEM_P (operands[1]));
+      addr = XEXP (operands[1], 0);
+      if (!REG_P (addr) && can_create_pseudo_p ())
+        /* Workaround a reload bug that hits (lo_sum (reg) (symbol_ref))
+	   addresses.  Spill the address to a register upfront to simplify
+	   reload's job.  */
+        addr = force_reg (GET_MODE (addr), addr);
+      if (Pmode == SImode)
+        emit_insn (gen_atomic_exchange<mode>_swap_si (operands[0], addr,
+						      operands[2],
+						      operands[3]));
+      else
+        emit_insn (gen_atomic_exchange<mode>_swap_di (operands[0], addr,
+						      operands[2],
+						      operands[3]));
+
+    }
   DONE;
 })
 
@@ -623,7 +645,7 @@
 	 UNSPEC_ATOMIC_EXCHANGE))
    (unspec_volatile:GPR [(match_operand:SI 3 "const_int_operand")]
     UNSPEC_ATOMIC_EXCHANGE)]
-  "GENERATE_LL_SC"
+  "GENERATE_LL_SC && !ISA_HAS_SWAP"
   { return mips_output_sync_loop (insn, operands); }
   [(set_attr "sync_insn1" "li,move")
    (set_attr "sync_oldval" "0")
@@ -631,15 +653,52 @@
    (set_attr "sync_insn1_op2" "2")
    (set_attr "sync_memmodel" "3")])
 
+;; Swap/ldadd instruction accepts only register, no offset, for the address.
+;; Therefore, we spell out the MEM verbatim and constrain its address to "d".
+;; XLP issues implicit sync for swap/ldadd, so no need for an explicit one.
+(define_insn "atomic_exchange<GPR:mode>_swap_<P:mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(unspec_volatile:GPR
+	 [(mem:GPR (match_operand:P 1 "address_operand" "d"))]
+	 UNSPEC_ATOMIC_EXCHANGE))
+   (set (mem:GPR (match_dup 1))
+	(unspec_volatile:GPR [(match_operand:GPR 2 "arith_operand" "0")]
+	 UNSPEC_ATOMIC_EXCHANGE))
+   (unspec_volatile:GPR [(match_operand:SI 3 "const_int_operand")]
+    UNSPEC_ATOMIC_EXCHANGE)]
+  "ISA_HAS_SWAP"
+  "swap<GPR:size>\t%0,%1")
+
 (define_expand "atomic_fetch_add<mode>"
   [(match_operand:GPR 0 "register_operand")
    (match_operand:GPR 1 "memory_operand")
    (match_operand:GPR 2 "arith_operand")
    (match_operand:SI 3 "const_int_operand")]
-  "GENERATE_LL_SC"
+  "GENERATE_LL_SC || ISA_HAS_LDADD"
 {
+  if (!ISA_HAS_LDADD)
     emit_insn (gen_atomic_fetch_add<mode>_llsc (operands[0], operands[1],
 						operands[2], operands[3]));
+  else
+    {
+      rtx addr;
+
+      gcc_assert (MEM_P (operands[1]));
+      addr = XEXP (operands[1], 0);
+      if (!REG_P (addr) && can_create_pseudo_p ())
+        /* Workaround a reload bug that hits (lo_sum (reg) (symbol_ref))
+	   addresses.  Spill the address to a register upfront to simplify
+	   reload's job.  */
+        addr = force_reg (GET_MODE (addr), addr);
+      if (Pmode == SImode)
+        emit_insn (gen_atomic_fetch_add<mode>_ldadd_si (operands[0], addr,
+							operands[2],
+							operands[3]));
+      else
+        emit_insn (gen_atomic_fetch_add<mode>_ldadd_di (operands[0], addr,
+							operands[2],
+							operands[3]));
+    }
   DONE;
 })
 
@@ -654,10 +713,28 @@
 	 UNSPEC_ATOMIC_FETCH_OP))
    (unspec_volatile:GPR [(match_operand:SI 3 "const_int_operand")]
     UNSPEC_ATOMIC_FETCH_OP)]
-  "GENERATE_LL_SC"
+  "GENERATE_LL_SC && !ISA_HAS_LDADD"
   { return mips_output_sync_loop (insn, operands); }
   [(set_attr "sync_insn1" "addiu,addu")
    (set_attr "sync_oldval" "0")
    (set_attr "sync_mem" "1")
    (set_attr "sync_insn1_op2" "2")
    (set_attr "sync_memmodel" "3")])
+
+;; Swap/ldadd instruction accepts only register, no offset, for the address.
+;; Therefore, we spell out the MEM verbatim and constrain its address to "d".
+;; XLP issues implicit sync for swap/ldadd, so no need for an explicit one.
+(define_insn "atomic_fetch_add<GPR:mode>_ldadd_<P:mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(unspec_volatile:GPR
+	 [(mem:GPR (match_operand:P 1 "address_operand" "d"))]
+	 UNSPEC_ATOMIC_FETCH_OP))
+   (set (mem:GPR (match_dup 1))
+	(unspec_volatile:GPR
+	 [(plus:GPR (mem:GPR (match_dup 1))
+		    (match_operand:GPR 2 "arith_operand" "0"))]
+	 UNSPEC_ATOMIC_FETCH_OP))
+   (unspec_volatile:GPR [(match_operand:SI 3 "const_int_operand")]
+    UNSPEC_ATOMIC_FETCH_OP)]
+  "ISA_HAS_LDADD"
+  "ldadd<GPR:size>\t%0,%1")
-- 
1.7.4.1

