Re: [PATCH][ARM] Improve use of conditional execution in thumb mode.

Andrew Stubbs Fri, 17 Feb 2012 07:31:20 -0800

On 14/02/12 18:00, Andrew Stubbs wrote:

On Tue 14 Feb 2012 17:35:36 GMT, Richard Earnshaw wrote:

Bernds checked in a patch last year (or maybe even before that) to make
the selection of flags clobbered insns run very late (certainly after
condexec had run), so I'm not sure why you're not seeing this.


Hm, you mentioned some peepholes. Try removing them....


Hmmm, it seems you're right. The machine reorg pass now takes care of
this. Well ... that was easy!

Here's a patch to remove the obsolete peepholes. I've confirmed it works
with the testcase.

Testing revealed that the new thumb_reorg code was not as thorough asthe old peepholes at converting insns to 16-bit encodings.

This updated patch rewrites thumb_reorg to fix up all the missing cases,and adds a testcase to make sure it's all good. The parts that were inthe old patch are unchanged.

I did initially try to insert the new cases into the old code, but theif conditions got way out of hand, and/or I ended up duplicating theactive code. I've therefore recast the code to make it more scalable,but it boils down to the exact same thing.


I've got a full test run going again.

OK for 4.8, again?

Andrew

2012-02-17  Andrew Stubbs  <a...@codesourcery.com>

	gcc/
	* config/arm/arm.c (thumb2_reorg): Add complete support
	for 16-bit instructions.
	* config/arm/thumb2.md: Delete obsolete flag-clobbering peepholes.

	gcc/testsuite/
	* gcc.target/arm/thumb-16bit-ops.c: New file.
	* gcc.target/arm/thumb-ifcvt.c: New file.

---
 gcc/config/arm/arm.c                           |  132 +++++++++++++++----
 gcc/config/arm/thumb2.md                       |  107 ----------------
 gcc/testsuite/gcc.target/arm/thumb-16bit-ops.c |  164 ++++++++++++++++++++++++
 gcc/testsuite/gcc.target/arm/thumb-ifcvt.c     |   19 +++
 4 files changed, 286 insertions(+), 136 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/thumb-16bit-ops.c
 create mode 100644 gcc/testsuite/gcc.target/arm/thumb-ifcvt.c

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 0bded8d..c3a19e4 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -13246,47 +13246,121 @@ thumb2_reorg (void)
       FOR_BB_INSNS_REVERSE (bb, insn)
 	{
 	  if (NONJUMP_INSN_P (insn)
-	      && !REGNO_REG_SET_P (&live, CC_REGNUM))
+	      && !REGNO_REG_SET_P (&live, CC_REGNUM)
+	      && GET_CODE (PATTERN (insn)) == SET)
 	    {
+	      enum {SKIP, CONV, SWAP_CONV} action = SKIP;
 	      rtx pat = PATTERN (insn);
-	      if (GET_CODE (pat) == SET
-		  && low_register_operand (XEXP (pat, 0), SImode)
-		  && thumb_16bit_operator (XEXP (pat, 1), SImode)
-		  && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
-		  && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
+	      rtx dst = XEXP (pat, 0);
+	      rtx src = XEXP (pat, 1);
+	      rtx op0 = NULL_RTX, op1 = NULL_RTX;
+
+	      if (!OBJECT_P (src))
+		  op0 = XEXP (src, 0);
+
+	      if (BINARY_P (src))
+		  op1 = XEXP (src, 1);
+
+	      if (low_register_operand (dst, SImode))
 		{
-		  rtx dst = XEXP (pat, 0);
-		  rtx src = XEXP (pat, 1);
-		  rtx op0 = XEXP (src, 0);
-		  rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
-			     ? XEXP (src, 1) : NULL);
-
-		  if (rtx_equal_p (dst, op0)
-		      || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
+		  switch (GET_CODE (src))
 		    {
-		      rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
-		      rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
-		      rtvec vec = gen_rtvec (2, pat, clobber);
-
-		      PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
-		      INSN_CODE (insn) = -1;
+		    case PLUS:
+		    case MINUS:
+		      if (low_register_operand (op0, SImode))
+			{
+			  /* ADDS <Rd>,<Rn>,<Rm>  */
+			  if (low_register_operand (op1, SImode))
+			    action = CONV;
+			  /* ADDS <Rdn>,#<imm8>  */
+			  else if (rtx_equal_p (dst, op0)
+				   && CONST_INT_P (op1)
+				   && IN_RANGE (INTVAL (op1), 0, 255))
+			    action = CONV;
+			  /* ADDS <Rd>,<Rn>,#<imm3>  */
+			  else if (CONST_INT_P (op1)
+				   && IN_RANGE (INTVAL (op1), 0, 7))
+			    action = CONV;
+			}
+		      break;
+		    case MULT:
+		      /* MULS <Rdm>,<Rn>,<Rdm>
+			 As an exception to the rule, this is only used
+			 when optimizing for size since MULS is slow on all
+			 known implementations.  */
+		      if (!optimize_function_for_size_p (cfun))
+			break;
+		      /* else fall through.  */
+		    case AND:
+		    case IOR:
+		    case XOR:
+		      /* ANDS <Rdn>,<Rm>  */
+		      if (rtx_equal_p (dst, op0)
+			  && low_register_operand (op1, SImode))
+			action = CONV;
+		      else if (rtx_equal_p (dst, op1)
+			       && low_register_operand (op0, SImode))
+			action = SWAP_CONV;
+		      break;
+		    case ASHIFTRT:
+		    case ASHIFT:
+		    case LSHIFTRT:
+		      /* ASRS <Rdn>,<Rm> */
+		      if (rtx_equal_p (dst, op0)
+			  && low_register_operand (op1, SImode))
+			action = CONV;
+		      /* ASRS <Rd>,<Rm>,#<imm5> */
+		      else if (low_register_operand (op0, SImode)
+			       && CONST_INT_P (op1)
+			       && IN_RANGE (INTVAL (op1), 0, 31))
+			action = CONV;
+		      break;
+		    case ROTATERT:
+		      /* RORS <Rdn>,<Rm>  */
+		      if (rtx_equal_p (dst, op0)
+			  && low_register_operand (op1, SImode))
+			action = CONV;
+		      break;
+		    case NOT:
+		    case NEG:
+		      /* MVNS <Rd>,<Rm>  */
+		      if (low_register_operand (op0, SImode))
+			action = CONV;
+		      break;
+		    case CONST_INT:
+		      /* MOVS <Rd>,#<imm8>  */
+		      if (CONST_INT_P (src)
+			  && IN_RANGE (INTVAL (src), 0, 255))
+			action = CONV;
+		      break;
+		    case REG:
+		      /* MOVS and MOV<c> with registers have different
+			 encodings, so are not relevant here.  */
+		      break;
+		    default:
+		      break;
 		    }
-		  /* We can also handle a commutative operation where the
-		     second operand matches the destination.  */
-		  else if (op1 && rtx_equal_p (dst, op1))
-		    {
-		      rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
-		      rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
-		      rtvec vec;
+		}
+
+	      if (action != SKIP)
+		{
+		  rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
+		  rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
+		  rtvec vec;
 
+		  if (action == SWAP_CONV)
+		    {
 		      src = copy_rtx (src);
 		      XEXP (src, 0) = op1;
 		      XEXP (src, 1) = op0;
 		      pat = gen_rtx_SET (VOIDmode, dst, src);
 		      vec = gen_rtvec (2, pat, clobber);
-		      PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
-		      INSN_CODE (insn) = -1;
 		    }
+		  else /* action == CONV */
+		    vec = gen_rtvec (2, pat, clobber);
+
+		  PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
+		  INSN_CODE (insn) = -1;
 		}
 	    }
 
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 05585da..799a3df 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -677,26 +677,6 @@
    (set_attr "length" "2")]
 )
 
-;; Similarly for 16-bit shift instructions
-;; There is no 16-bit rotate by immediate instruction.
-(define_peephole2
-  [(set (match_operand:SI   0 "low_register_operand" "")
-	(match_operator:SI  3 "shift_operator"
-	 [(match_operand:SI 1 "low_register_operand" "")
-	  (match_operand:SI 2 "low_reg_or_int_operand" "")]))]
-  "TARGET_THUMB2
-   && peep2_regno_dead_p(0, CC_REGNUM)
-   && ((GET_CODE(operands[3]) != ROTATE && GET_CODE(operands[3]) != ROTATERT)
-       || REG_P(operands[2]))"
-  [(parallel
-    [(set (match_dup 0)
-	  (match_op_dup 3
-	   [(match_dup 1)
-	    (match_dup 2)]))
-     (clobber (reg:CC CC_REGNUM))])]
-  ""
-)
-
 (define_insn "*thumb2_shiftsi3_short"
   [(set (match_operand:SI   0 "low_register_operand" "=l")
 	(match_operator:SI  3 "shift_operator"
@@ -715,20 +695,6 @@
 		      (const_string "alu_shift_reg")))]
 )
 
-;; 16-bit load immediate
-(define_peephole2
-  [(set (match_operand:QHSI 0 "low_register_operand" "")
-	(match_operand:QHSI 1 "const_int_operand" ""))]
-  "TARGET_THUMB2
-   && peep2_regno_dead_p(0, CC_REGNUM)
-   && (unsigned HOST_WIDE_INT) INTVAL(operands[1]) < 256"
-  [(parallel
-    [(set (match_dup 0)
-	  (match_dup 1))
-     (clobber (reg:CC CC_REGNUM))])]
-  ""
-)
-
 (define_insn "*thumb2_mov<mode>_shortim"
   [(set (match_operand:QHSI 0 "low_register_operand" "=l")
 	(match_operand:QHSI 1 "const_int_operand" "I"))
@@ -739,24 +705,6 @@
    (set_attr "length" "2")]
 )
 
-;; 16-bit add/sub immediate
-(define_peephole2
-  [(set (match_operand:SI 0 "low_register_operand" "")
-	(plus:SI (match_operand:SI 1 "low_register_operand" "")
-		 (match_operand:SI 2 "const_int_operand" "")))]
-  "TARGET_THUMB2
-   && peep2_regno_dead_p(0, CC_REGNUM)
-   && ((rtx_equal_p(operands[0], operands[1])
-	&& INTVAL(operands[2]) > -256 && INTVAL(operands[2]) < 256)
-       || (INTVAL(operands[2]) > -8 && INTVAL(operands[2]) < 8))"
-  [(parallel
-    [(set (match_dup 0)
-	  (plus:SI (match_dup 1)
-		   (match_dup 2)))
-     (clobber (reg:CC CC_REGNUM))])]
-  ""
-)
-
 (define_insn "*thumb2_addsi_short"
   [(set (match_operand:SI 0 "low_register_operand" "=l,l")
 	(plus:SI (match_operand:SI 1 "low_register_operand" "l,0")
@@ -868,35 +816,6 @@
    (set_attr "length" "2,4")]
 )
 
-;; 16-bit encodings of "muls" and "mul<c>".  We only use these when
-;; optimizing for size since "muls" is slow on all known
-;; implementations and since "mul<c>" will be generated by
-;; "*arm_mulsi3_v6" anyhow.  The assembler will use a 16-bit encoding
-;; for "mul<c>" whenever possible anyhow.
-(define_peephole2
-  [(set (match_operand:SI 0 "low_register_operand" "")
-        (mult:SI (match_operand:SI 1 "low_register_operand" "")
-                 (match_dup 0)))]
-  "TARGET_THUMB2 && optimize_size && peep2_regno_dead_p (0, CC_REGNUM)"
-  [(parallel
-    [(set (match_dup 0)
-           (mult:SI (match_dup 0) (match_dup 1)))
-     (clobber (reg:CC CC_REGNUM))])]
-  ""
-)
-
-(define_peephole2
-  [(set (match_operand:SI 0 "low_register_operand" "")
-        (mult:SI (match_dup 0)
-	         (match_operand:SI 1 "low_register_operand" "")))]
-  "TARGET_THUMB2 && optimize_size && peep2_regno_dead_p (0, CC_REGNUM)"
-  [(parallel
-    [(set (match_dup 0)
-           (mult:SI (match_dup 0) (match_dup 1)))
-     (clobber (reg:CC CC_REGNUM))])]
-  ""
-)
-
 (define_insn "*thumb2_mulsi_short"
   [(set (match_operand:SI 0 "low_register_operand" "=l")
         (mult:SI (match_operand:SI 1 "low_register_operand" "%0")
@@ -979,19 +898,6 @@
 	    (const_int 8)))]
 )
 
-;; 16-bit complement
-(define_peephole2
-  [(set (match_operand:SI 0 "low_register_operand" "")
-	(not:SI (match_operand:SI 1 "low_register_operand" "")))]
-  "TARGET_THUMB2
-   && peep2_regno_dead_p(0, CC_REGNUM)"
-  [(parallel
-    [(set (match_dup 0)
-	  (not:SI (match_dup 1)))
-     (clobber (reg:CC CC_REGNUM))])]
-  ""
-)
-
 (define_insn "*thumb2_one_cmplsi2_short"
   [(set (match_operand:SI 0 "low_register_operand" "=l")
 	(not:SI (match_operand:SI 1 "low_register_operand" "l")))
@@ -1002,19 +908,6 @@
    (set_attr "length" "2")]
 )
 
-;; 16-bit negate
-(define_peephole2
-  [(set (match_operand:SI 0 "low_register_operand" "")
-	(neg:SI (match_operand:SI 1 "low_register_operand" "")))]
-  "TARGET_THUMB2
-   && peep2_regno_dead_p(0, CC_REGNUM)"
-  [(parallel
-    [(set (match_dup 0)
-	  (neg:SI (match_dup 1)))
-     (clobber (reg:CC CC_REGNUM))])]
-  ""
-)
-
 (define_insn "*thumb2_negsi2_short"
   [(set (match_operand:SI 0 "low_register_operand" "=l")
 	(neg:SI (match_operand:SI 1 "low_register_operand" "l")))
diff --git a/gcc/testsuite/gcc.target/arm/thumb-16bit-ops.c b/gcc/testsuite/gcc.target/arm/thumb-16bit-ops.c
new file mode 100644
index 0000000..3e43e4a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/thumb-16bit-ops.c
@@ -0,0 +1,164 @@
+/* Check that the compiler properly uses 16-bit encodings where available.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-options "-Os -fno-builtin" } */
+
+int
+f (int a, int b )
+{
+  return a + b;
+}
+
+/* { dg-final { scan-assembler "adds	r0, r0, r1" } } */
+
+int
+g1 (int a)
+{
+  return a + 255;
+}
+
+/* { dg-final { scan-assembler "adds	r0, r0, #255" } } */
+
+int
+g2 (int a)
+{
+  return a + 256;
+}
+
+/* { dg-final { scan-assembler "add	r0, r0, #256" } } */
+
+int
+h1 (int a, int b)
+{
+  return b + 7;
+}
+
+/* { dg-final { scan-assembler "adds	r0, r1, #7" } } */
+
+int
+h2 (int a, int b)
+{
+  return b + 8;
+}
+
+/* { dg-final { scan-assembler "add	r0, r1, #8" } } */
+
+int
+i (int a, int b)
+{
+  return b;
+}
+
+/* { dg-final { scan-assembler "mov	r0, r1" } } */
+
+int
+j1 ()
+{
+  return 255;
+}
+
+/* { dg-final { scan-assembler "movs	r0, #255" } } */
+
+int
+j2 ()
+{
+  return 256;
+}
+
+/* { dg-final { scan-assembler "mov	r0, #256" } } */
+
+int
+k (int a, int b)
+{
+  return b << 15;
+}
+
+/* { dg-final { scan-assembler "lsls	r0, r1, #15" } } */
+
+int
+l1 (int a, int b)
+{
+  return a << b;
+}
+
+/* { dg-final { scan-assembler "lsls	r0, r0, r1" } } */
+
+int
+l2 (int a, int b, int c)
+{
+  return b << c;
+}
+
+/* { dg-final { scan-assembler "lsl	r0, r1, r2" } } */
+
+int
+m (int a, int b)
+{
+  return b >> 15;
+}
+
+/* { dg-final { scan-assembler "asrs	r0, r1, #15" } } */
+
+int
+n1 (int a, int b)
+{
+  return a >> b;
+}
+
+/* { dg-final { scan-assembler "asrs	r0, r0, r1" } } */
+
+int
+n2 (int a, int b, int c)
+{
+  return b >> c;
+}
+
+/* { dg-final { scan-assembler "asr	r0, r1, r2" } } */
+
+unsigned int
+o (unsigned int a, unsigned int b)
+{
+  return b >> 15;
+}
+
+/* { dg-final { scan-assembler "lsrs	r0, r1, #15" } } */
+
+unsigned int
+p1 (unsigned int a, unsigned int b)
+{
+  return a >> b;
+}
+
+/* { dg-final { scan-assembler "lsrs	r0, r0, r1" } } */
+
+unsigned int
+p2 (unsigned int a, unsigned int b, unsigned int c)
+{
+  return b >> c;
+}
+
+/* { dg-final { scan-assembler "lsr	r0, r1, r2" } } */
+
+int
+q (int a, int b)
+{
+  return b * a;
+}
+
+/* { dg-final { scan-assembler "muls	r0, r1, r0" } } */
+
+int
+r (int a, int b)
+{
+  return ~b;
+}
+
+/* { dg-final { scan-assembler "mvns	r0, r1" } } */
+
+int
+s (int a, int b)
+{
+  return -b;
+}
+
+/* { dg-final { scan-assembler "negs	r0, r1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/thumb-ifcvt.c b/gcc/testsuite/gcc.target/arm/thumb-ifcvt.c
new file mode 100644
index 0000000..b03bbce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/thumb-ifcvt.c
@@ -0,0 +1,19 @@
+/* Check that Thumb 16-bit shifts can be if-converted.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-options "-O2" } */
+
+int
+foo (int a, int b)
+{
+  if (a != b)
+    {
+      a = a << b;
+      a = a >> 1;
+    }
+
+  return a + b;
+}
+
+/* { dg-final { scan-assembler "lslne" } } */
+/* { dg-final { scan-assembler "asrne" } } */

Re: [PATCH][ARM] Improve use of conditional execution in thumb mode.

Reply via email to