https://gcc.gnu.org/g:46077992180d6d86c86544df5e8cb943492d3b01

commit r15-390-g46077992180d6d86c86544df5e8cb943492d3b01
Author: Roger Sayle <ro...@nextmovesoftware.com>
Date:   Sun May 12 16:27:22 2024 +0100

    arm: Use utxb rN, rM, ror #8 to implement zero_extract on armv6.
    
    Examining the code generated for the following C snippet on a
    raspberry pi:
    
    int popcount_lut8(unsigned *buf, int n)
    {
      int cnt=0;
      unsigned int i;
      do {
        i = *buf;
        cnt += lut[i&255];
        cnt += lut[i>>8&255];
        cnt += lut[i>>16&255];
        cnt += lut[i>>24];
        buf++;
      } while(--n);
      return cnt;
    }
    
    I was surprised to see following instruction sequence generated by the
    compiler:
    
      mov    r5, r2, lsr #8
      uxtb   r5, r5
    
    This sequence can be performed by a single ARM instruction:
    
      uxtb   r5, r2, ror #8
    
    The attached patch allows GCC's combine pass to take advantage of ARM's
    uxtb with rotate functionality to implement the above zero_extract, and
    likewise to use the sxtb with rotate to implement sign_extract.  ARM's
    uxtb and sxtb can only be used with rotates of 0, 8, 16 and 24, and of
    these only the 8 and 16 are useful [ror #0 is a nop, and extends with
    ror #24 can be implemented using regular shifts],  so the approach here
    is to add the six missing but useful instructions as 6 different
    define_insn in arm.md, rather than try to be clever with new predicates.
    
    Later ARM hardware has advanced bit field instructions, and earlier
    ARM cores didn't support extend-with-rotate, so this appears to only
    benefit armv6 era CPUs (e.g. the raspberry pi).
    
    Patch posted:
    https://gcc.gnu.org/legacy-ml/gcc-patches/2018-01/msg01339.html
    Approved by Kyrill Tkachov:
    https://gcc.gnu.org/legacy-ml/gcc-patches/2018-01/msg01881.html
    
    2024-05-12  Roger Sayle  <ro...@nextmovesoftware.com>
                Kyrill Tkachov  <kyrylo.tkac...@foss.arm.com>
    
            * config/arm/arm.md (*arm_zeroextractsi2_8_8, 
*arm_signextractsi2_8_8,
            *arm_zeroextractsi2_8_16, *arm_signextractsi2_8_16,
            *arm_zeroextractsi2_16_8, *arm_signextractsi2_16_8): New.
    
    2024-05-12  Roger Sayle  <ro...@nextmovesoftware.com>
                Kyrill Tkachov  <kyrylo.tkac...@foss.arm.com>
    
            * gcc.target/arm/extend-ror.c: New test.

Diff:
---
 gcc/config/arm/arm.md                     | 66 +++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/arm/extend-ror.c | 38 ++++++++++++++++++
 2 files changed, 104 insertions(+)

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 1fd00146ca9e..f47e036a8034 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -12647,6 +12647,72 @@
 ""
 )
 
+;; Implement zero_extract using uxtb/uxth instruction with 
+;; the ror #N qualifier when applicable.
+
+(define_insn "*arm_zeroextractsi2_8_8"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+       (zero_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+                        (const_int 8) (const_int 8)))]
+  "TARGET_ARM && arm_arch6"
+  "uxtb%?\\t%0, %1, ror #8"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "extend")]
+)
+
+(define_insn "*arm_zeroextractsi2_8_16"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+       (zero_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+                        (const_int 8) (const_int 16)))]
+  "TARGET_ARM && arm_arch6"
+  "uxtb%?\\t%0, %1, ror #16"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "extend")]
+)
+
+(define_insn "*arm_zeroextractsi2_16_8"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+       (zero_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+                        (const_int 16) (const_int 8)))]
+  "TARGET_ARM && arm_arch6"
+  "uxth%?\\t%0, %1, ror #8"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "extend")]
+)
+
+;; Implement sign_extract using sxtb/sxth instruction with 
+;; the ror #N qualifier when applicable.
+
+(define_insn "*arm_signextractsi2_8_8"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+       (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+                        (const_int 8) (const_int 8)))]
+  "TARGET_ARM && arm_arch6"
+  "sxtb%?\\t%0, %1, ror #8"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "extend")]
+)
+
+(define_insn "*arm_signextractsi2_8_16"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+       (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+                        (const_int 8) (const_int 16)))]
+  "TARGET_ARM && arm_arch6"
+  "sxtb%?\\t%0, %1, ror #16"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "extend")]
+)
+
+(define_insn "*arm_signextractsi2_16_8"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+       (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+                        (const_int 16) (const_int 8)))]
+  "TARGET_ARM && arm_arch6"
+  "sxth%?\\t%0, %1, ror #8"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "extend")]
+)
+
 ;; Patterns for LDRD/STRD in Thumb2 mode
 
 (define_insn "*thumb2_ldrd"
diff --git a/gcc/testsuite/gcc.target/arm/extend-ror.c 
b/gcc/testsuite/gcc.target/arm/extend-ror.c
new file mode 100644
index 000000000000..8b52a93e253a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/extend-ror.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-march=*" } 
{ "-march=armv6" } } */
+/* { dg-require-effective-target arm_arm_ok } */
+/* { dg-add-options arm_arch_v6 } */
+/* { dg-additional-options "-O -marm" } */
+
+unsigned int zeroextractsi2_8_8(unsigned int x)
+{
+  return (unsigned char)(x>>8);
+}
+
+unsigned int zeroextractsi2_8_16(unsigned int x)
+{
+  return (unsigned char)(x>>16);
+}
+
+unsigned int signextractsi2_8_8(unsigned int x)
+{
+  return (int)(signed char)(x>>8);
+}
+
+unsigned int signextractsi2_8_16(unsigned int x)
+{
+  return (int)(signed char)(x>>16);
+}
+
+unsigned int zeroextractsi2_16_8(unsigned int x)
+{
+  return (unsigned short)(x>>8);
+}
+
+unsigned int signextractsi2_16_8(unsigned int x)
+{
+  return (int)(short)(x>>8);
+}
+
+/* { dg-final { scan-assembler-times ", ror #8" 4 } } */
+/* { dg-final { scan-assembler-times ", ror #16" 2 } } */

Reply via email to