diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 76e1abd..169259b 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -93,6 +93,7 @@ extern void neon_split_vcombine (rtx op[3]);
 extern enum reg_class coproc_secondary_reload_class (enum machine_mode, rtx,
 						     bool);
 extern bool arm_tls_referenced_p (rtx);
+extern bool is_const_symbol_ref_p (rtx x);
 
 extern int cirrus_memory_offset (rtx);
 extern int arm_coproc_mem_operand (rtx, bool);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 627b436..df53bd7 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -156,6 +156,7 @@ static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 static bool arm_have_conditional_execution (void);
 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
+bool is_const_symbol_ref_p (rtx);
 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
@@ -6925,6 +6926,23 @@ arm_tls_referenced_p (rtx x)
   return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
 }
 
+bool is_const_symbol_ref_p (rtx x)
+{
+  if (GET_CODE(x) == SYMBOL_REF)
+    return true;
+
+  if (GET_CODE(x) == LABEL_REF)
+    return true;
+
+  if (GET_CODE(x) == CONST
+      && GET_CODE(XEXP(x, 0)) == PLUS
+      && GET_CODE(XEXP(XEXP(x, 0),0)) == SYMBOL_REF
+      && GET_CODE(XEXP(XEXP(x, 0),1)) == CONST_INT
+      && INTVAL(XEXP(XEXP(x, 0),1)) & ~0x7fff == 0)
+    return true;
+  return false;
+}
+
 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
 
    On the ARM, allow any integer (invalid ones are removed later by insn
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index f4ced7b..bb37b98 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -5447,14 +5447,6 @@
 			       optimize && can_create_pseudo_p ());
           DONE;
         }
-
-      if (TARGET_USE_MOVT && !target_word_relocations
-	  && GET_CODE (operands[1]) == SYMBOL_REF
-	  && !flag_pic && !arm_tls_referenced_p (operands[1]))
-	{
-	  arm_emit_movpair (operands[0], operands[1]);
-	  DONE;
-	}
     }
   else /* TARGET_THUMB1...  */
     {
@@ -5563,6 +5555,22 @@
   "
 )
 
+;; Split of symbol_refs at the later stage (after cprop), instead of 
+;; generating movt/movw at expand. This prevents merging back in cprop.
+;; movt/movw should work faster than load.
+(define_split
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  "TARGET_32BIT
+   && TARGET_USE_MOVT && is_const_symbol_ref_p (operands[1])
+   && !flag_pic && !target_word_relocations
+   && !arm_tls_referenced_p (operands[1])"
+  [(clobber (const_int 0))]
+{
+  arm_emit_movpair (operands[0], operands[1]);
+  DONE;
+})
+
 (define_insn "*thumb1_movsi_insn"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=l,l,l,l,l,>,l, m,*l*h*k")
 	(match_operand:SI 1 "general_operand"      "l, I,J,K,>,l,mi,l,*l*h*k"))]
