https://gcc.gnu.org/g:7fdfeb27c6945c98cff3edf399c855c6df44fa1e

commit r16-5676-g7fdfeb27c6945c98cff3edf399c855c6df44fa1e
Author: Wilco Dijkstra <[email protected]>
Date:   Thu Nov 6 20:49:22 2025 +0000

    AArch64: Improve ctz and ffs
    
    Use the ctz insn in the ffs expansion so it uses ctz if CSSC
    is available. Rather than splitting, keep ctz as a single
    insn for simplicity and possible fusion opportunities.
    Move clz, ctz, clrsb, rbit and ffs instructions together.
    
    gcc:
            * config/aarch64/aarch64.md (ffs<mode>2): Use gen_ctz.
            (ctz<mode>2): Model ctz as a single target instruction.
    
    gcc/testsuite:
            * gcc.target/aarch64/ffs.c: Improve test.

Diff:
---
 gcc/config/aarch64/aarch64.md          | 74 +++++++++++++++++-----------------
 gcc/testsuite/gcc.target/aarch64/ffs.c | 65 +++++++++++++++++++++++++----
 2 files changed, 95 insertions(+), 44 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 8dcb5e3f0ecb..f62247f3e39c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5697,6 +5697,8 @@
   [(set_attr "type" "logics_shift_imm")]
 )
 
+;; CLZ, CTZ, CLS, RBIT instructions.
+
 (define_insn "clz<mode>2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
        (clz:GPI (match_operand:GPI 1 "register_operand" "r")))]
@@ -5705,6 +5707,40 @@
   [(set_attr "type" "clz")]
 )
 
+;; Model ctz as a target instruction.
+;; If TARGET_CSSC is not available, emit rbit and clz.
+
+(define_insn "ctz<mode>2"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+       (ctz:GPI (match_operand:GPI 1 "register_operand" "r")))]
+  ""
+  {
+    if (TARGET_CSSC)
+      return "ctz\\t%<w>0, %<w>1";
+    return "rbit\\t%<w>0, %<w>1\;clz\\t%<w>0, %<w>0";
+  }
+  [(set_attr "type" "clz")
+   (set (attr "length") (if_then_else (match_test "TARGET_CSSC")
+                                     (const_int 4) (const_int 8)))
+  ]
+)
+
+(define_insn "clrsb<mode>2"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+       (clrsb:GPI (match_operand:GPI 1 "register_operand" "r")))]
+  ""
+  "cls\\t%<w>0, %<w>1"
+  [(set_attr "type" "clz")]
+)
+
+(define_insn "@aarch64_rbit<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+       (bitreverse:GPI (match_operand:GPI 1 "register_operand" "r")))]
+  ""
+  "rbit\\t%<w>0, %<w>1"
+  [(set_attr "type" "rbit")]
+)
+
 (define_expand "ffs<mode>2"
   [(match_operand:GPI 0 "register_operand")
    (match_operand:GPI 1 "register_operand")]
@@ -5712,9 +5748,7 @@
   {
     rtx ccreg = aarch64_gen_compare_reg (EQ, operands[1], const0_rtx);
     rtx x = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
-
-    emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1]));
-    emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
+    emit_insn (gen_ctz<mode>2 (operands[0], operands[1]));
     emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0], 
const0_rtx));
     DONE;
   }
@@ -5809,40 +5843,6 @@
   DONE;
 })
 
-(define_insn "clrsb<mode>2"
-  [(set (match_operand:GPI 0 "register_operand" "=r")
-        (clrsb:GPI (match_operand:GPI 1 "register_operand" "r")))]
-  ""
-  "cls\\t%<w>0, %<w>1"
-  [(set_attr "type" "clz")]
-)
-
-(define_insn "@aarch64_rbit<mode>"
-  [(set (match_operand:GPI 0 "register_operand" "=r")
-       (bitreverse:GPI (match_operand:GPI 1 "register_operand" "r")))]
-  ""
-  "rbit\\t%<w>0, %<w>1"
-  [(set_attr "type" "rbit")]
-)
-
-;; Split after reload into RBIT + CLZ.  Since RBIT is represented as an UNSPEC
-;; it is unlikely to fold with any other operation, so keep this as a CTZ
-;; expression and split after reload to enable scheduling them apart if
-;; needed.  For TARGET_CSSC we have a single CTZ instruction that can do this.
-
-(define_insn_and_split "ctz<mode>2"
- [(set (match_operand:GPI           0 "register_operand" "=r")
-       (ctz:GPI (match_operand:GPI  1 "register_operand" "r")))]
-  ""
-  { return TARGET_CSSC ? "ctz\\t%<w>0, %<w>1" : "#"; }
-  "reload_completed && !TARGET_CSSC"
-  [(const_int 0)]
-  "
-  emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1]));
-  emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
-  DONE;
-")
-
 (define_insn "*and<mode>_compare0"
   [(set (reg:CC_Z CC_REGNUM)
        (compare:CC_Z
diff --git a/gcc/testsuite/gcc.target/aarch64/ffs.c 
b/gcc/testsuite/gcc.target/aarch64/ffs.c
index a3447619d235..a303bee5fd47 100644
--- a/gcc/testsuite/gcc.target/aarch64/ffs.c
+++ b/gcc/testsuite/gcc.target/aarch64/ffs.c
@@ -1,12 +1,63 @@
 /* { dg-do compile } */
-/* { dg-options "-O2" } */
+/* { dg-additional-options "--save-temps -O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
 
-unsigned int functest(unsigned int x)
+#include <stdint.h>
+
+#pragma GCC target "+nocssc"
+
+/*
+** ffsw1:
+**     cmp     w1, 0
+**     rbit    w0, w1
+**     clz     w0, w0
+**     csinc   w0, wzr, w0, eq
+**     ret
+*/
+
+int ffsw1 (int y, uint32_t x)
+{
+  return __builtin_ffs (x);
+}
+
+/*
+** ffsx1:
+**     cmp     x1, 0
+**     rbit    x0, x1
+**     clz     x0, x0
+**     csinc   x0, xzr, x0, eq
+**     ret
+*/
+
+int ffsx1 (int y, uint64_t x)
 {
-  return __builtin_ffs(x);
+  return __builtin_ffsll (x);
 }
 
-/* { dg-final { scan-assembler "cmp\tw" } } */
-/* { dg-final { scan-assembler "rbit\tw" } } */
-/* { dg-final { scan-assembler "clz\tw" } } */
-/* { dg-final { scan-assembler "csinc\tw" } } */
+#pragma GCC target "+cssc"
+
+/*
+** ffsw2:
+**     cmp     w1, 0
+**     ctz     w0, w1
+**     csinc   w0, wzr, w0, eq
+**     ret
+*/
+
+int ffsw2 (int y, uint32_t x)
+{
+  return __builtin_ffs (x);
+}
+
+/*
+** ffsx2:
+**     cmp     x1, 0
+**     ctz     x0, x1
+**     csinc   x0, xzr, x0, eq
+**     ret
+*/
+
+int ffsx2 (int y, uint64_t x)
+{
+  return __builtin_ffsll (x);
+}

Reply via email to