Hi all,
This is an optimization patch which will combine "ubfiz" and "orr"
insns with a single "bfi" when certain conditions meet.
tmp = (x & m) | ( (y & n) << lsb) can be presented using
and tmp, x, m
bfi tmp, y, #lsb, #width
if ((n+1) == 2^width) && (m & n << lsb) == 0.
The original codegen is
ubfiz tmp1, y, #lsb, #width
and tmp, x, m
orr tmp, tmp1, tmp
A small test case is also added to verify it.
Is this Okay for trunk?
Kind regards,
Renlin Li
gcc/ChangeLog:
2014-02-25 Renlin Li <renlin...@arm.com>
* config/aarch64/aarch64.md (define_insn_and_split): New
*combine_bfi2 and *combine_bfi3 insns.
gcc/testsuite:
2014-02-25 Renlin Li <renlin...@arm.com>
* gcc.target/aarch64/combine-and-orr.c (New): New test case for
this feature.
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 99a6ac8..2307f43 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3115,6 +3115,62 @@
[(set_attr "type" "bfm")]
)
+(define_insn_and_split "*combine_bfi2<GPI:mode><SHORT:mode>"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (ior:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
+ (match_operand 2 "const_int_operand" "n"))
+ (match_operand 3 "const_int_operand" "n"))
+ (zero_extend:GPI (match_operand:SHORT 4 "register_operand" "0"))))]
+ "exact_log2 ((INTVAL (operands[3]) >> INTVAL (operands[2])) + 1) >= 0
+ && (INTVAL (operands[3]) & ((1 << INTVAL (operands[2])) - 1)) == 0
+ && <SHORT:sizen> <= INTVAL (operands[2])"
+ "#"
+ ""
+ [(set (match_dup 0)
+ (zero_extend:GPI (match_dup 4)))
+ (set (zero_extract:GPI (match_dup 0 )
+ (match_dup 3 )
+ (match_dup 2 ))
+ (match_dup 1 ))]
+ "{
+ int tmp = (INTVAL (operands[3]) >> INTVAL (operands[2])) + 1;
+ operands[3] = GEN_INT (exact_log2 (tmp));
+ }"
+ [(set_attr "type" "bfm")]
+)
+
+(define_insn_and_split "*combine_bfi3<mode>"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
+ (match_operand 2 "const_int_operand" "n"))
+ (and:GPI (ashift:GPI (match_operand:GPI 3 "register_operand" "r")
+ (match_operand 4 "const_int_operand" "n"))
+ (match_operand 5 "const_int_operand" "n"))))]
+ "exact_log2 ((INTVAL (operands[5]) >> INTVAL (operands[4])) + 1) >= 0
+ && (INTVAL (operands[5]) & ((1 << INTVAL (operands[4])) - 1)) == 0
+ && (INTVAL (operands[2]) & INTVAL (operands[5])) == 0"
+ "#"
+ ""
+ [(set (match_dup 0)
+ (and:GPI (match_dup 1) (match_dup 6)))
+ (set (zero_extract:GPI (match_dup 0 )
+ (match_dup 5 )
+ (match_dup 4 ))
+ (match_dup 3 ))]
+ "{
+ int tmp = (INTVAL (operands[5]) >> INTVAL (operands[4])) + 1;
+ operands[5] = GEN_INT (exact_log2 (tmp));
+
+ enum machine_mode mode = GET_MODE (operands[0]);
+ operands[6] = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0];
+ if (!aarch64_bitmask_imm (INTVAL (operands[2]), mode))
+ emit_move_insn (operands[6], operands[2]);
+ else
+ operands[6] = operands[2];
+ }"
+ [(set_attr "type" "bfm")]
+)
+
(define_insn "*extr_insv_lower_reg<mode>"
[(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
(match_operand 1 "const_int_operand" "n")
diff --git a/gcc/testsuite/gcc.target/aarch64/combine-and-orr.c b/gcc/testsuite/gcc.target/aarch64/combine-and-orr.c
new file mode 100644
index 0000000..97d8d5d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/combine-and-orr.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fexpensive-optimizations" } */
+
+unsigned int
+foo1 (unsigned int major, unsigned int minor)
+{
+ unsigned int tmp = (minor & 0xff) | ((major & 0xfff) << 8);
+ return tmp;
+}
+
+unsigned int
+foo2 (unsigned int major, unsigned int minor)
+{
+ unsigned int tmp = (minor & 0x1f) | ((major & 0xfff) << 8);
+ return tmp;
+}
+
+unsigned int
+foo3 (unsigned int major, unsigned int minor)
+{
+ unsigned int tmp = (minor & 0x12) | ((major & 0xfff) << 5);
+ return tmp;
+}
+
+/* { dg-final { scan-assembler-times "bfi\tw\[0-9\]+, w\[0-9\]+, 8|5, 12" 3} } */