diff --git a/contrib/gcc_update b/contrib/gcc_update
index c719502..35db695 100755
--- a/contrib/gcc_update
+++ b/contrib/gcc_update
@@ -81,6 +81,8 @@ gcc/config.in: gcc/cstamp-h.in
 gcc/fixinc/fixincl.x: gcc/fixinc/fixincl.tpl gcc/fixinc/inclhack.def
 gcc/config/arm/arm-tune.md: gcc/config/arm/arm-cores.def gcc/config/arm/gentune.sh
 gcc/config/arm/arm-tables.opt: gcc/config/arm/arm-arches.def gcc/config/arm/arm-cores.def gcc/config/arm/arm-fpus.def gcc/config/arm/genopt.sh
+gcc/config/arm/ldrdstrd0.md: gcc/config/arm/ldrdstrd.md.in
+gcc/config/arm/ldrdstrd1.md: gcc/config/arm/ldrdstrd.md.in
 gcc/config/avr/avr-tables.opt: gcc/config/avr/avr-mcus.def gcc/config/avr/genopt.sh
 gcc/config/c6x/c6x-tables.opt: gcc/config/c6x/c6x-isas.def gcc/config/c6x/genopt.sh
 gcc/config/c6x/c6x-sched.md: gcc/config/c6x/c6x-sched.md.in gcc/config/c6x/gensched.sh
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index a78ba88..4497ebf 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -11298,5 +11298,11 @@
 ;; Load the load/store multiple patterns
 (include "ldmstm.md")
+;; If we failed to merge individual loads/stores into ldm/stm,
+;; (e.g., the base register offset is not approprtiate for ldm/stm)
+;; try generating ldrd/strd.
+;; The same patterns as in ldrdstrd0.md, different conditions.
+;; TODO: How to avoid duplication?
+(include "ldrdstrd1.md")
 ;; Load the FPA co-processor patterns
 (include "fpa.md")
 ;; Load the Maverick co-processor patterns
diff --git a/gcc/config/arm/ldrdstrd.md.in b/gcc/config/arm/ldrdstrd.md.in
new file mode 100644
index 0000000..d6d57fc
--- /dev/null
+++ b/gcc/config/arm/ldrdstrd.md.in
@@ -0,0 +1,365 @@
+;; ARM ldrd/strd instruction patterns. 
+;;
+;; Pease do not edit the files ldrdstrd0.md and ldrdstrd1.md;
+;; they are automatically generated from ldrdstrd.md.in. 
+
+;; Copyright (C) 2011 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License and
+;; a copy of the GCC Runtime Library Exception along with this program;
+;; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+;; <http://www.gnu.org/licenses/>. 
+
+(define_insn "*ldrd_after_ldmstm"
+  [(set (match_operand:SI 0 "arm_hard_core_register_operand" "=q")
+        (mem:SI (plus:SI (match_operand:SI 1 "s_register_operand" "rk")
+                         (match_operand:SI 2 "const_int_operand" ""))))
+   (set (match_operand:SI 3 "arm_hard_core_register_operand" "=q")
+        (mem:SI (plus:SI (match_dup 1)
+                         (match_operand:SI 4 "const_int_operand" ""))))]
+  "TARGET_LDRD && TARGET_THUMB2 
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+        || after_ldmstm)
+     && ((INTVAL (operands[2]) + 4) == INTVAL (operands[4]))
+     && (!bad_operands_ldrd_strd (operands[0], operands[3], 
+                                  operands[1], INTVAL (operands[2]), 
+                                  false, true))"
+  "ldrd%?\t%0, %3, [%1, %2]"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldrd_base_after_ldmstm"
+  [(set (match_operand:SI 0 "arm_hard_core_register_operand" "=q")
+        (mem:SI (match_operand:SI 1 "s_register_operand" "rk")))
+   (set (match_operand:SI 2 "arm_hard_core_register_operand" "=q")
+        (mem:SI (plus:SI (match_dup 1)
+                         (const_int 4))))]
+  "TARGET_LDRD && TARGET_THUMB2 
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || after_ldmstm)
+     && (!bad_operands_ldrd_strd (operands[0], operands[2], 
+                                  operands[1], 0, false, true))"
+  "ldrd%?\t%0, %2, [%1]"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldrd_base_neg_after_ldmstm"
+  [(set (match_operand:SI 0 "arm_hard_core_register_operand" "=q")
+	(mem:SI (plus:SI (match_operand:SI 1 "s_register_operand" "rk")
+                         (const_int -4))))
+   (set (match_operand:SI 2 "arm_hard_core_register_operand" "=q")
+        (mem:SI (match_dup 1)))]
+  "TARGET_LDRD && TARGET_THUMB2 
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || after_ldmstm)
+     && (!bad_operands_ldrd_strd (operands[0], operands[2], 
+                                  operands[1], -4, false, true))"
+  "ldrd%?\t%0, %2, [%1, #-4]"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*strd_after_ldmstm"
+  [(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk")
+                         (match_operand:SI 1 "const_int_operand" "")))
+        (match_operand:SI 2 "arm_hard_core_register_operand" "q"))
+   (set (mem:SI (plus:SI (match_dup 0)
+                         (match_operand:SI 3 "const_int_operand" "")))
+        (match_operand:SI 4 "arm_hard_core_register_operand" "q"))]
+  "TARGET_LDRD && TARGET_THUMB2 
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || after_ldmstm)
+     && ((INTVAL (operands[1]) + 4) == INTVAL (operands[3]))
+     && (!bad_operands_ldrd_strd (operands[2], operands[4], 
+                                  operands[0], INTVAL (operands[1]), 
+                                  false, false))"
+  "strd%?\t%2, %4, [%0, %1]"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*strd_base_after_ldmstm"
+  [(set (mem:SI (match_operand:SI 0 "s_register_operand" "rk"))
+        (match_operand:SI 1 "arm_hard_core_register_operand" "q"))
+   (set (mem:SI (plus:SI (match_dup 0)
+                         (const_int 4))) 
+        (match_operand:SI 2 "arm_hard_core_register_operand" "q"))]
+  "TARGET_LDRD && TARGET_THUMB2 
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || after_ldmstm)
+     && (!bad_operands_ldrd_strd (operands[1], operands[2], 
+                                  operands[0], 0, false, false))"
+  "strd%?\t%1, %2, [%0]"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*strd_base_neg_after_ldmstm"
+  [(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk")
+                         (const_int -4)))
+        (match_operand:SI 1 "arm_hard_core_register_operand" "q"))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 2 "arm_hard_core_register_operand" "q"))]
+  "TARGET_LDRD && TARGET_THUMB2 
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || after_ldmstm)
+     && (!bad_operands_ldrd_strd (operands[1], operands[2], 
+                                  operands[0], -4, false, false))"
+  "strd%?\t%1, %2, [%0, #-4]"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+;; The following peephole optimizations identify consecutive memory accesses,
+;; and try to rearrange the operands to enable generation of ldrd/strd.
+
+(define_peephole2 ; ldrd
+  [(set (match_operand:SI 0 "arm_hard_core_register_operand" "")
+        (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "arm_hard_core_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))]
+  "TARGET_LDRD && TARGET_THUMB2 
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+        || after_ldmstm)"
+  [(parallel [(set (match_dup 0) (match_dup 2))
+	      (set (match_dup 1) (match_dup 3))])]
+{
+  if (!gen_operands_ldrd_strd (operands, 2, true, false))
+    FAIL;
+})
+
+(define_peephole2 ; strd
+  [(set (match_operand:SI 2 "memory_operand" "")
+	(match_operand:SI 0 "arm_hard_core_register_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+	(match_operand:SI 1 "arm_hard_core_register_operand" ""))]
+  "TARGET_LDRD && TARGET_THUMB2 
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+        || after_ldmstm)"
+  [(parallel [(set (match_dup 2) (match_dup 0))
+	      (set (match_dup 3) (match_dup 1))])]
+{
+  if (!gen_operands_ldrd_strd (operands, 2, false, false))
+    FAIL;
+})
+
+;; In ARM state, the destination registers of LDRD/STRD must be consecutive.
+;; This condition holds when the peephole is matched, but subsequent passes
+;; might rename the registers. To preserve this condition even when registers
+;; are renamed, we replace two parallel SI loads with a single DI load.
+
+(define_peephole2 ; ldrd (movdi) for arm
+  [(set (match_operand:SI 0 "arm_hard_core_register_operand" "")
+	(match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "arm_hard_core_register_operand" "")
+	(match_operand:SI 3 "memory_operand" ""))]
+   "TARGET_LDRD && TARGET_ARM
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || after_ldmstm)"
+  [(set (match_dup 0) (match_dup 2))]
+{
+  if (!gen_operands_ldrd_strd (operands, 2, true, false))
+   {
+     FAIL;
+   }
+  else 
+   {
+     operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+     operands[2] = adjust_address (operands[2], DImode, 0);
+   }
+})
+
+(define_peephole2 ; strd (movdi) for arm
+  [(set (match_operand:SI 2 "memory_operand" "")
+	(match_operand:SI 0 "arm_hard_core_register_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+	(match_operand:SI 1 "arm_hard_core_register_operand" ""))]
+  "TARGET_LDRD && TARGET_ARM
+    && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+        || after_ldmstm)"
+  [(set (match_dup 2) (match_dup 0))]
+{
+  if (!gen_operands_ldrd_strd (operands, 2, false, false))
+   {
+     FAIL;
+   }
+  else
+   {
+     operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+     operands[2] = adjust_address (operands[2], DImode, 0);
+   }
+})
+;; The following peepholes reorder registers to enable LDRD/STRD.
+(define_peephole2 ; strd of constants for thumb
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 4 "const_int_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 5 "const_int_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 1))]
+ "TARGET_LDRD && TARGET_THUMB2
+  && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || after_ldmstm)"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 1) (match_dup 5))
+   (parallel [(set (match_dup 2) (match_dup 0))
+	      (set (match_dup 3) (match_dup 1))])]
+{
+  if (!gen_operands_const_strd (operands, 4))
+    FAIL;
+})
+
+(define_peephole2 ; strd of constants for thumb
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 4 "const_int_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 5 "const_int_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 1))]
+ "TARGET_LDRD && TARGET_THUMB2
+  && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || after_ldmstm)"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 1) (match_dup 5))
+   (parallel [(set (match_dup 2) (match_dup 0))
+	      (set (match_dup 3) (match_dup 1))])]
+{
+  if (!gen_operands_const_strd (operands, 4))
+     FAIL;
+})
+
+(define_peephole2 ; strd of constants for arm
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 4 "const_int_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+	(match_dup 0))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 5 "const_int_operand" ""))   
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 1))]
+ "TARGET_LDRD && TARGET_ARM
+  && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || after_ldmstm)"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 1) (match_dup 5))
+   (set (match_dup 2) (match_dup 0))]
+{
+  if (!gen_operands_const_strd (operands, 4))
+     {
+        FAIL;
+     }
+    else
+     {
+        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+        operands[2] = adjust_address (operands[2], DImode, 0);
+     }
+})
+
+(define_peephole2 ; strd of constants for arm
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 4 "const_int_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 5 "const_int_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 1))]
+ "TARGET_LDRD && TARGET_ARM
+  && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || after_ldmstm)"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 1) (match_dup 5))
+   (set (match_dup 2) (match_dup 0))]
+{
+  if (!gen_operands_const_strd (operands, 4))
+     {
+        FAIL;
+     }
+    else
+     {
+        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+        operands[2] = adjust_address (operands[2], DImode, 0);
+     }
+})
+
+(define_peephole2 ; swap the destination registers of two loads
+		  ; before a commutative operation. 
+  [(set (match_operand:SI 0 "arm_hard_core_register_operand" "")
+        (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "arm_hard_core_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))
+   (set (match_operand:SI 4 "s_register_operand" "")
+        (match_operator:SI 5 "commutative_binary_operator"
+			   [(match_operand 6 "s_register_operand" "")
+			    (match_operand 7 "s_register_operand" "") ]))]
+  "TARGET_LDRD && TARGET_ARM 
+   && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || after_ldmstm)
+   && (  ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
+       ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
+   && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
+   && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
+  { 
+    if (!gen_operands_ldrd_strd (operands, 3, true, true))
+     {
+        FAIL;
+     }
+    else
+     {
+        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+        operands[2] = adjust_address (operands[2], DImode, 0);
+     }
+   }
+)
+
+(define_peephole2 ; swap the destination registers of two loads
+		  ; before a commutative operation that sets the flags.                   
+  [(set (match_operand:SI 0 "arm_hard_core_register_operand" "")
+        (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "arm_hard_core_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))
+   (parallel
+      [(set (match_operand:SI 4 "s_register_operand" "")
+	    (match_operator:SI 5 "commutative_binary_operator"		
+			       [(match_operand 6 "s_register_operand" "")
+				(match_operand 7 "s_register_operand" "") ]))
+       (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_LDRD && TARGET_ARM 
+   && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || after_ldmstm)
+   && (  ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
+       ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
+   && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
+   && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
+  [(set (match_dup 0) (match_dup 2))   
+   (parallel
+      [(set (match_dup 4) 
+	    (match_op_dup 5 [(match_dup 6) (match_dup 7)]))
+       (clobber (reg:CC CC_REGNUM))])]
+  {
+    if (!gen_operands_ldrd_strd (operands, 3, true, true))
+     {
+        FAIL;
+     }
+    else
+     {
+        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+        operands[2] = adjust_address (operands[2], DImode, 0);
+     }
+   }
+)
diff --git a/gcc/config/arm/ldrdstrd1.md b/gcc/config/arm/ldrdstrd1.md
new file mode 100644
index 0000000..2f9f964
--- /dev/null
+++ b/gcc/config/arm/ldrdstrd1.md
@@ -0,0 +1,365 @@
+;; ARM ldrd/strd instruction patterns. 
+;;
+;; Pease do not edit the files ldrdstrd0.md and ldrdstrd1.md;
+;; they are automatically generated from ldrdstrd.md.in. 
+
+;; Copyright (C) 2011 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License and
+;; a copy of the GCC Runtime Library Exception along with this program;
+;; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+;; <http://www.gnu.org/licenses/>. 
+
+(define_insn "*ldrd_1"
+  [(set (match_operand:SI 0 "arm_hard_core_register_operand" "=q")
+        (mem:SI (plus:SI (match_operand:SI 1 "s_register_operand" "rk")
+                         (match_operand:SI 2 "const_int_operand" ""))))
+   (set (match_operand:SI 3 "arm_hard_core_register_operand" "=q")
+        (mem:SI (plus:SI (match_dup 1)
+                         (match_operand:SI 4 "const_int_operand" ""))))]
+  "TARGET_LDRD && TARGET_THUMB2 
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+        || 1)
+     && ((INTVAL (operands[2]) + 4) == INTVAL (operands[4]))
+     && (!bad_operands_ldrd_strd (operands[0], operands[3], 
+                                  operands[1], INTVAL (operands[2]), 
+                                  false, true))"
+  "ldrd%?\t%0, %3, [%1, %2]"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldrd_base_1"
+  [(set (match_operand:SI 0 "arm_hard_core_register_operand" "=q")
+        (mem:SI (match_operand:SI 1 "s_register_operand" "rk")))
+   (set (match_operand:SI 2 "arm_hard_core_register_operand" "=q")
+        (mem:SI (plus:SI (match_dup 1)
+                         (const_int 4))))]
+  "TARGET_LDRD && TARGET_THUMB2 
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || 1)
+     && (!bad_operands_ldrd_strd (operands[0], operands[2], 
+                                  operands[1], 0, false, true))"
+  "ldrd%?\t%0, %2, [%1]"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldrd_base_neg_1"
+  [(set (match_operand:SI 0 "arm_hard_core_register_operand" "=q")
+	(mem:SI (plus:SI (match_operand:SI 1 "s_register_operand" "rk")
+                         (const_int -4))))
+   (set (match_operand:SI 2 "arm_hard_core_register_operand" "=q")
+        (mem:SI (match_dup 1)))]
+  "TARGET_LDRD && TARGET_THUMB2 
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || 1)
+     && (!bad_operands_ldrd_strd (operands[0], operands[2], 
+                                  operands[1], -4, false, true))"
+  "ldrd%?\t%0, %2, [%1, #-4]"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*strd_1"
+  [(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk")
+                         (match_operand:SI 1 "const_int_operand" "")))
+        (match_operand:SI 2 "arm_hard_core_register_operand" "q"))
+   (set (mem:SI (plus:SI (match_dup 0)
+                         (match_operand:SI 3 "const_int_operand" "")))
+        (match_operand:SI 4 "arm_hard_core_register_operand" "q"))]
+  "TARGET_LDRD && TARGET_THUMB2 
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || 1)
+     && ((INTVAL (operands[1]) + 4) == INTVAL (operands[3]))
+     && (!bad_operands_ldrd_strd (operands[2], operands[4], 
+                                  operands[0], INTVAL (operands[1]), 
+                                  false, false))"
+  "strd%?\t%2, %4, [%0, %1]"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*strd_base_1"
+  [(set (mem:SI (match_operand:SI 0 "s_register_operand" "rk"))
+        (match_operand:SI 1 "arm_hard_core_register_operand" "q"))
+   (set (mem:SI (plus:SI (match_dup 0)
+                         (const_int 4))) 
+        (match_operand:SI 2 "arm_hard_core_register_operand" "q"))]
+  "TARGET_LDRD && TARGET_THUMB2 
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || 1)
+     && (!bad_operands_ldrd_strd (operands[1], operands[2], 
+                                  operands[0], 0, false, false))"
+  "strd%?\t%1, %2, [%0]"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*strd_base_neg_1"
+  [(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk")
+                         (const_int -4)))
+        (match_operand:SI 1 "arm_hard_core_register_operand" "q"))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 2 "arm_hard_core_register_operand" "q"))]
+  "TARGET_LDRD && TARGET_THUMB2 
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || 1)
+     && (!bad_operands_ldrd_strd (operands[1], operands[2], 
+                                  operands[0], -4, false, false))"
+  "strd%?\t%1, %2, [%0, #-4]"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+;; The following peephole optimizations identify consecutive memory accesses,
+;; and try to rearrange the operands to enable generation of ldrd/strd.
+
+(define_peephole2 ; ldrd
+  [(set (match_operand:SI 0 "arm_hard_core_register_operand" "")
+        (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "arm_hard_core_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))]
+  "TARGET_LDRD && TARGET_THUMB2 
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+        || 1)"
+  [(parallel [(set (match_dup 0) (match_dup 2))
+	      (set (match_dup 1) (match_dup 3))])]
+{
+  if (!gen_operands_ldrd_strd (operands, 2, true, false))
+    FAIL;
+})
+
+(define_peephole2 ; strd
+  [(set (match_operand:SI 2 "memory_operand" "")
+	(match_operand:SI 0 "arm_hard_core_register_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+	(match_operand:SI 1 "arm_hard_core_register_operand" ""))]
+  "TARGET_LDRD && TARGET_THUMB2 
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+        || 1)"
+  [(parallel [(set (match_dup 2) (match_dup 0))
+	      (set (match_dup 3) (match_dup 1))])]
+{
+  if (!gen_operands_ldrd_strd (operands, 2, false, false))
+    FAIL;
+})
+
+;; In ARM state, the destination registers of LDRD/STRD must be consecutive.
+;; This condition holds when the peephole is matched, but subsequent passes
+;; might rename the registers. To preserve this condition even when registers
+;; are renamed, we replace two parallel SI loads with a single DI load.
+
+(define_peephole2 ; ldrd (movdi) for arm
+  [(set (match_operand:SI 0 "arm_hard_core_register_operand" "")
+	(match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "arm_hard_core_register_operand" "")
+	(match_operand:SI 3 "memory_operand" ""))]
+   "TARGET_LDRD && TARGET_ARM
+     && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || 1)"
+  [(set (match_dup 0) (match_dup 2))]
+{
+  if (!gen_operands_ldrd_strd (operands, 2, true, false))
+   {
+     FAIL;
+   }
+  else 
+   {
+     operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+     operands[2] = adjust_address (operands[2], DImode, 0);
+   }
+})
+
+(define_peephole2 ; strd (movdi) for arm
+  [(set (match_operand:SI 2 "memory_operand" "")
+	(match_operand:SI 0 "arm_hard_core_register_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+	(match_operand:SI 1 "arm_hard_core_register_operand" ""))]
+  "TARGET_LDRD && TARGET_ARM
+    && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+        || 1)"
+  [(set (match_dup 2) (match_dup 0))]
+{
+  if (!gen_operands_ldrd_strd (operands, 2, false, false))
+   {
+     FAIL;
+   }
+  else
+   {
+     operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+     operands[2] = adjust_address (operands[2], DImode, 0);
+   }
+})
+;; The following peepholes reorder registers to enable LDRD/STRD.
+(define_peephole2 ; strd of constants for thumb
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 4 "const_int_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 5 "const_int_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 1))]
+ "TARGET_LDRD && TARGET_THUMB2
+  && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || 1)"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 1) (match_dup 5))
+   (parallel [(set (match_dup 2) (match_dup 0))
+	      (set (match_dup 3) (match_dup 1))])]
+{
+  if (!gen_operands_const_strd (operands, 4))
+    FAIL;
+})
+
+(define_peephole2 ; strd of constants for thumb
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 4 "const_int_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 5 "const_int_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 1))]
+ "TARGET_LDRD && TARGET_THUMB2
+  && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || 1)"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 1) (match_dup 5))
+   (parallel [(set (match_dup 2) (match_dup 0))
+	      (set (match_dup 3) (match_dup 1))])]
+{
+  if (!gen_operands_const_strd (operands, 4))
+     FAIL;
+})
+
+(define_peephole2 ; strd of constants for arm
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 4 "const_int_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+	(match_dup 0))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 5 "const_int_operand" ""))   
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 1))]
+ "TARGET_LDRD && TARGET_ARM
+  && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || 1)"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 1) (match_dup 5))
+   (set (match_dup 2) (match_dup 0))]
+{
+  if (!gen_operands_const_strd (operands, 4))
+     {
+        FAIL;
+     }
+    else
+     {
+        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+        operands[2] = adjust_address (operands[2], DImode, 0);
+     }
+})
+
+(define_peephole2 ; strd of constants for arm
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 4 "const_int_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 5 "const_int_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 1))]
+ "TARGET_LDRD && TARGET_ARM
+  && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || 1)"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 1) (match_dup 5))
+   (set (match_dup 2) (match_dup 0))]
+{
+  if (!gen_operands_const_strd (operands, 4))
+     {
+        FAIL;
+     }
+    else
+     {
+        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+        operands[2] = adjust_address (operands[2], DImode, 0);
+     }
+})
+
+(define_peephole2 ; swap the destination registers of two loads
+		  ; before a commutative operation. 
+  [(set (match_operand:SI 0 "arm_hard_core_register_operand" "")
+        (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "arm_hard_core_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))
+   (set (match_operand:SI 4 "s_register_operand" "")
+        (match_operator:SI 5 "commutative_binary_operator"
+			   [(match_operand 6 "s_register_operand" "")
+			    (match_operand 7 "s_register_operand" "") ]))]
+  "TARGET_LDRD && TARGET_ARM 
+   && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || 1)
+   && (  ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
+       ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
+   && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
+   && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
+  { 
+    if (!gen_operands_ldrd_strd (operands, 3, true, true))
+     {
+        FAIL;
+     }
+    else
+     {
+        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+        operands[2] = adjust_address (operands[2], DImode, 0);
+     }
+   }
+)
+
+(define_peephole2 ; swap the destination registers of two loads
+		  ; before a commutative operation that sets the flags.                   
+  [(set (match_operand:SI 0 "arm_hard_core_register_operand" "")
+        (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "arm_hard_core_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))
+   (parallel
+      [(set (match_operand:SI 4 "s_register_operand" "")
+	    (match_operator:SI 5 "commutative_binary_operator"		
+			       [(match_operand 6 "s_register_operand" "")
+				(match_operand 7 "s_register_operand" "") ]))
+       (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_LDRD && TARGET_ARM 
+   && ((current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun))
+         || 1)
+   && (  ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
+       ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
+   && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
+   && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
+  [(set (match_dup 0) (match_dup 2))   
+   (parallel
+      [(set (match_dup 4) 
+	    (match_op_dup 5 [(match_dup 6) (match_dup 7)]))
+       (clobber (reg:CC CC_REGNUM))])]
+  {
+    if (!gen_operands_ldrd_strd (operands, 3, true, true))
+     {
+        FAIL;
+     }
+    else
+     {
+        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+        operands[2] = adjust_address (operands[2], DImode, 0);
+     }
+   }
+)
diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm
index b970ec2..13dbd2b 100644
--- a/gcc/config/arm/t-arm
+++ b/gcc/config/arm/t-arm
@@ -38,7 +38,10 @@ MD_INCLUDES= 	$(srcdir)/config/arm/arm-tune.md \
 		$(srcdir)/config/arm/vfp.md \
 		$(srcdir)/config/arm/neon.md \
 		$(srcdir)/config/arm/thumb2.md \
-		$(srcdir)/config/arm/arm-fixed.md
+		$(srcdir)/config/arm/arm-fixed.md \
+		$(srcdir)/config/arm/ldmstm.md \
+		$(srcdir)/config/arm/ldrdstrd0.md \
+		$(srcdir)/config/arm/ldrdstrd1.md \
 
 LIB1ASMSRC = arm/lib1funcs.asm
 LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi _thumb1_case_shi \
@@ -46,6 +49,16 @@ LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi _thumb1_case_shi \
 s-config s-conditions s-flags s-codes s-constants s-emit s-recog s-preds \
 	s-opinit s-extract s-peep s-attr s-attrtab s-output: $(MD_INCLUDES)
 
+$(srcdir)/config/arm/ldrdstrd0.md: $(srcdir)/config/arm/ldrdstrd.md.in
+	sed 's/after_ldmstm/0/' \
+	     < $(srcdir)/config/arm/ldrdstrd.md.in \
+	     > $(srcdir)/config/arm/ldrdstrd0.md
+
+$(srcdir)/config/arm/ldrdstrd1.md: $(srcdir)/config/arm/ldrdstrd.md.in
+	sed 's/after_ldmstm/1/' \
+	     < $(srcdir)/config/arm/ldrdstrd.md.in \
+	     > $(srcdir)/config/arm/ldrdstrd1.md
+
 $(srcdir)/config/arm/arm-tune.md: $(srcdir)/config/arm/gentune.sh \
 	$(srcdir)/config/arm/arm-cores.def
 	$(SHELL) $(srcdir)/config/arm/gentune.sh \
