Hi all,
Thank you for your suggestions, Richard. I have updated the patch
accordingly.
This is an optimization patch which will combine "ubfiz" and "orr"
insns with a single "bfi" when certain conditions meet.
tmp = (x & m) | ( (y & n) << lsb) can be presented using
and tmp, x, m
bfi tmp, y, #lsb, #width
if ((n+1) == 2^width) && (m & n << lsb) == 0.
A small test case is also added to verify it.
Is this Okay for stage-1?
Kind regards,
Renlin Li
gcc/ChangeLog:
2014-03-14 Renlin Li <renlin...@arm.com>
* config/aarch64/aarch64.md (*combine_bfi2<GPI:mode><SHORT:mode>,
*combine_bfi3<mode>): New.
gcc/testsuite:
2014-03-14 Renlin Li <renlin...@arm.com>
* gcc.target/aarch64/combine_and_orr_1.c: New.
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 99a6ac8..6c2798b 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3115,6 +3115,53 @@
[(set_attr "type" "bfm")]
)
+(define_insn_and_split "*combine_bfi2<GPI:mode><SHORT:mode>"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (ior:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
+ (match_operand 2 "const_int_operand" "n"))
+ (match_operand 3 "const_int_operand" "n"))
+ (zero_extend:GPI (match_operand:SHORT 4 "register_operand" "0"))))]
+ "exact_log2 ((INTVAL (operands[3]) >> INTVAL (operands[2])) + 1) >= 0
+ && <SHORT:sizen> <= INTVAL (operands[2])"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (zero_extend:GPI (match_dup 4)))
+ (set (zero_extract:GPI (match_dup 0)
+ (match_dup 3)
+ (match_dup 2))
+ (match_dup 1))]
+ {
+ int tmp = (INTVAL (operands[3]) >> INTVAL (operands[2])) + 1;
+ operands[3] = GEN_INT (exact_log2 (tmp));
+ }
+ [(set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*combine_bfi3<mode>"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
+ (match_operand 2 "aarch64_logical_immediate" "n"))
+ (and:GPI (ashift:GPI (match_operand:GPI 3 "register_operand" "r")
+ (match_operand 4 "const_int_operand" "n"))
+ (match_operand 5 "const_int_operand" "n"))))]
+ "exact_log2 ((INTVAL (operands[5]) >> INTVAL (operands[4])) + 1) >= 0
+ && (INTVAL (operands[2]) & INTVAL (operands[5])) == 0"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (and:GPI (match_dup 1) (match_dup 2)))
+ (set (zero_extract:GPI (match_dup 0)
+ (match_dup 5)
+ (match_dup 4))
+ (match_dup 3))]
+ {
+ int tmp = (INTVAL (operands[5]) >> INTVAL (operands[4])) + 1;
+ operands[5] = GEN_INT (exact_log2 (tmp));
+ }
+ [(set_attr "type" "multiple")]
+)
+
(define_insn "*extr_insv_lower_reg<mode>"
[(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
(match_operand 1 "const_int_operand" "n")
diff --git a/gcc/testsuite/gcc.target/aarch64/combine_and_orr_1.c b/gcc/testsuite/gcc.target/aarch64/combine_and_orr_1.c
new file mode 100644
index 0000000..b2c0194
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/combine_and_orr_1.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-save-temps -O2" } */
+
+extern void abort (void);
+
+unsigned int __attribute__ ((noinline))
+foo1 (unsigned int major, unsigned int minor)
+{
+ unsigned int tmp = (minor & 0xff) | ((major & 0xfff) << 8);
+ return tmp;
+}
+
+unsigned int __attribute__ ((noinline))
+foo2 (unsigned int major, unsigned int minor)
+{
+ unsigned int tmp = (minor & 0x1f) | ((major & 0xfff) << 8);
+ return tmp;
+}
+
+int
+main (void)
+{
+ unsigned int major[10] = {1947662, 484254, 193508, 4219233, 2211215,
+ 3998162, 4240676, 1034099, 54412, 3195572};
+ unsigned int minor[10] = {1027568, 21481, 2746675, 3121857, 2471080,
+ 3158801, 237587, 813307, 4073168, 1503494};
+
+ unsigned int result1[10] = {528112, 237289, 255027, 90561, 888744,
+ 119313, 336915, 488443, 298192, 177158};
+ unsigned int result2[10] = {527888, 237065, 254995, 90369, 888584,
+ 119313, 336915, 488219, 298000, 177158};
+
+ unsigned int index = 0;
+ unsigned result = 0;
+ for (index; index < 10; ++index)
+ {
+
+ result = foo1 (major[index], minor[index]);
+ if (result != result1[index])
+ abort ();
+
+ result = foo2 (major[index], minor[index]);
+ if (result != result2[index])
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "bfi\tw\[0-9\]+, w\[0-9\]+, 8|5, 12" 2 } } */
+/* { dg-final { cleanup-saved-temps } } */