This patch adds the ability to spot and exploit the UBFX instruction
(bit-field extract) to perform AND operations with an immediate which is
2^N - 1.  On some benchmarks this can lead to a significant win over the
existing behaviour on Cortex-A15.

Tested on arm-eabi with some additional testing on arm-linux-gnueabi
platforms.

R.

2012-08-02  Richard Earnshaw  <rearn...@arm.com>

        * arm.c (arm_gen_constant): Use UBFX for some AND operations when
        available.
--- arm.c       (revision 190101)
+++ arm.c       (local)
@@ -2982,6 +2982,31 @@ arm_gen_constant (enum rtx_code code, en
       return 1;
     }
 
+  /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
+     insn.  */
+  if (code == AND && (i = exact_log2 (remainder + 1)) > 0
+      && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
+    {
+      if (generate)
+       {
+         if (mode == SImode && i == 16)
+           /* Use UXTH in preference to UBFX, since on Thumb2 it's a 
+              smaller insn.  */
+           emit_constant_insn (cond,
+                               gen_zero_extendhisi2
+                               (target, gen_lowpart (HImode, source)));
+         else
+           /* Extz only supports SImode, but we can coerce the operands
+              into that mode.  */
+           emit_constant_insn (cond,
+                               gen_extzv_t2 (gen_lowpart (mode, target),
+                                             gen_lowpart (mode, source),
+                                             GEN_INT (i), const0_rtx));
+       }
+
+      return 1;
+    }
+
   /* Calculate a few attributes that may be useful for specific
      optimizations.  */
   /* Count number of leading zeros.  */

Reply via email to