GCC currently generates unnecessary uxtb instructions when compiling
gcc.target/arm/unsigned-extend-1.c for Thumb1 targets such as the Cortex-M0.
This test case is as follows:

unsigned char foo (unsigned char c)
{
  return (c >= '0') && (c <= '9');
}

The assembly output generated with "-mcpu=cortex-m0 -O2":

foo:
        mov     r3, r0
        mov     r2, #9
        mov     r0, #0
        sub     r3, r3, #48
        uxtb    r3, r3
        cmp     r2, r3
        adc     r0, r0, r0
        @ sp needed
        uxtb    r0, r0
        bx      lr

The uxtb instructions are supposed to be optimized out at the rtl combine pass,
but the current thumb1_addsi3_addgeu insn pattern is not friendly to the
combine pass. Upon entering the combine pass, we have this rtl:

(insn 6 3 7 2 (set (reg:SI 116)
        (plus:SI (reg/v:SI 114 [ c ])
            (const_int -48 [0xffffffffffffffd0])))

(insn 7 6 8 2 (set (reg:SI 117)
        (zero_extend:SI (subreg:QI (reg:SI 116) 0)))

(insn 8 7 9 2 (set (reg:SI 119)
        (const_int 9 [0x9]))

(insn 9 8 10 2 (set (reg:SI 120)
        (const_int 0 [0]))

(insn 10 9 12 2 (set (reg:SI 118)
        (plus:SI (plus:SI (reg:SI 120)
                (reg:SI 120))
            (geu:SI (reg:SI 119)
                (reg:SI 117))))

During the rtl combine pass, operand 0 of the first plus operator is
(plus:SI (reg:SI 120) (reg:SI 120)) and this will be optimized to (const_int 0)
as reg 120 is zero after insn 9. The first plus operator will be subsequently
optimized out as it performs addition with zero. Finally, insn 10 is turned
into:

(insn 10 9 12 2 (set (reg:SI 118)
                     (leu:SI (reg:SI 116) (const_int 9))))

The optimization to the leu comparator matches our expectation that reg 116
will be used directly instead of reg 117 to eliminate uxtb insn 7.
Unfortunately, this new insn 10 is an illegal rtl for Thumb1. Hence, the
optimization done in the combine pass is undone and the unnecessary uxtb
instruction is kept.

This patch implements a veneer for the insn pattern thumb1_addsi3_addgeu. This
veneer insn pattern looks like the new insn 10 in the above example and can
facilitate the combine pass to optimize out the unnecessary uxtb instructions.
During the rtl expand stage, GIMPLE trees are expanded to this veneer insn
pattern rather than the normal thumb1_addsi3_addgeu. Later on in the split1
stage, this veneer insn pattern will be turned into normal thumb1_addsi3_addgeu
pattern for future code generation.

gcc/ChangeLog:

2017-06-05  Prakhar Bahuguna  <prakhar.bahug...@arm.com>

        * config/arm/arm.md (cstoresi_leu_thumb1): New define_insn_and_split
        pattern.
        (cstoresi4): Use above new pattern.

testsuite/ChangeLog:

2017-06-05  Prakhar Bahuguna  <prakhar.bahug...@arm.com>

        * gcc.target/arm/thumb1-unsigned-extend-1.c: New file.

Testing done: Ran regression tests, added new test to confirm uxtb instructions
are optimized out for Thumb1 targets.

Okay for stage1?

-- 

Prakhar Bahuguna
>From c0c5d410536238bf57967cbc50bdc8e7f7f0ab59 Mon Sep 17 00:00:00 2001
From: Prakhar Bahuguna <prakhar.bahug...@arm.com>
Date: Fri, 24 Mar 2017 10:48:03 +0000
Subject: [PATCH] New define_insn_and_split pattern to optimize out unnecessary
 uxtb instructions

---
 gcc/config/arm/arm.md                              | 36 ++++++++++++++++++----
 .../gcc.target/arm/thumb1-unsigned-extend-1.c      | 10 ++++++
 2 files changed, 40 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/thumb1-unsigned-extend-1.c

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index e6e1ac54a85..656ede7c2f5 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -7691,18 +7691,14 @@
 
     case LEU:
       op3 = force_reg (SImode, operands[3]);
-      scratch = force_reg (SImode, const0_rtx);
-      emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch,
-                                         op3, operands[2]));
+      emit_insn (gen_cstoresi_leu_thumb1 (operands[0], operands[2], op3));
       break;
 
     case GEU:
       op3 = operands[3];
       if (!thumb1_cmp_operand (op3, SImode))
         op3 = force_reg (SImode, op3);
-      scratch = force_reg (SImode, const0_rtx);
-      emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch,
-                                         operands[2], op3));
+      emit_insn (gen_cstoresi_leu_thumb1 (operands[0], op3, operands[2]));
       break;
 
     case LTU:
@@ -7781,6 +7777,34 @@
    }"
 )
 
+(define_insn_and_split "cstoresi_leu_thumb1"
+  [(set (match_operand:SI 0 "s_register_operand" "=l")
+       (leu:SI (match_operand:SI 1 "s_register_operand" "l")
+               (match_operand:SI 2 "thumb1_cmp_operand" "lI")))]
+  "TARGET_THUMB1"
+  "#"
+  "TARGET_THUMB1 && !reload_completed"
+  [(set (match_dup 3) (const_int 0))
+   (set (match_dup 0)
+       (plus:SI (plus:SI (match_dup 3)
+                         (match_dup 3))
+                (geu:SI (match_dup 4)
+                        (match_dup 1))))]
+  "
+    operands[3] = gen_reg_rtx (SImode);
+
+    if (CONST_INT_P (operands[2]))
+      {
+       operands[4] = gen_reg_rtx (SImode);
+       emit_move_insn (operands[4], operands[2]);
+      }
+    else
+      operands[4] = operands[2];
+  "
+  [(set_attr "length" "4")
+   (set_attr "type" "multiple")]
+)
+
 
 ;; Conditional move insns
 
diff --git a/gcc/testsuite/gcc.target/arm/thumb1-unsigned-extend-1.c 
b/gcc/testsuite/gcc.target/arm/thumb1-unsigned-extend-1.c
new file mode 100644
index 00000000000..e6066b65d60
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/thumb1-unsigned-extend-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_thumb1_ok } */
+/* { dg-options "-mcpu=cortex-m0 -O2" } */
+
+unsigned char foo (unsigned char c)
+{
+  return (c >= '0') && (c <= '9');
+}
+
+/* { dg-final { scan-assembler-not "uxtb" } } */
-- 
2.13.0

Reply via email to