> -----Original Message-----
> From: Wilco Dijkstra <[email protected]>
> Sent: 12 November 2025 17:03
> To: GCC Patches <[email protected]>
> Cc: Alex Coplan <[email protected]>; Alice Carlotti
> <[email protected]>; Andrew Pinski
> <[email protected]>; Kyrylo Tkachov
> <[email protected]>; Tamar Christina <[email protected]>
> Subject: [PATCH] AArch64: Improve ctz and ffs
> 
> 
> Use the ctz insn in the ffs expansion so it uses ctz if CSSC
> is available. Rather than splitting, keep ctz as a single
> insn for simplicity and possible fusion opportunities.
> Move clz, ctz, clrsb, rbit and ffs instructions together.
> 
> Passes regress, OK for commit?
> 
> gcc:
>       * config/aarch64/aarch64.md (ffs<mode>2): Use gen_ctz.
>       (ctz<mode>2): Model ctz as a single target instruction.
> 
> testsuite:
>       * gcc.target/aarch64/ffs.c: Improve test.
> 
> ---
> 
> diff --git a/gcc/config/aarch64/aarch64.md
> b/gcc/config/aarch64/aarch64.md
> index
> 98c65a74c8ed113dcfa1601fc18393c203b04c1c..245d80a35d4138de15a55
> f5f5e4f55caf15c6149 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -5687,6 +5687,8 @@ (define_insn
> "*and_one_cmpl_<SHIFT:optab><mode>3_compare0_no_reuse"
>    [(set_attr "type" "logics_shift_imm")]
>  )
> 
> +;; CLZ, CTZ, CLS, RBIT instructions.
> +
>  (define_insn "clz<mode>2"
>    [(set (match_operand:GPI 0 "register_operand" "=r")
>       (clz:GPI (match_operand:GPI 1 "register_operand" "r")))]
> @@ -5695,6 +5697,40 @@ (define_insn "clz<mode>2"
>    [(set_attr "type" "clz")]
>  )
> 
> +;; Model ctz as a target instruction.
> +;; If TARGET_CSSC is not available, emit rbit and clz.
> +
> +(define_insn "ctz<mode>2"
> +  [(set (match_operand:GPI 0 "register_operand" "=r")
> +     (ctz:GPI (match_operand:GPI 1 "register_operand" "r")))]
> +  ""
> +  {
> +    if (TARGET_CSSC)
> +      return "ctz\\t%<w>0, %<w>1";
> +    return "rbit\\t%<w>0, %<w>1\;clz\\t%<w>0, %<w>0";
> +  }
> +  [(set_attr "type" "clz")
> +   (set (attr "length") (if_then_else (match_test "TARGET_CSSC")
> +                                   (const_int 4) (const_int 8)))
> +  ]
> +)

OK,

Note that if you wanted to clean this up more, you can add "cssc" and
TARGET_CSSC to

(define_attr "arch_enabled" "no,yes"
  (if_then_else
    (and

In aarch64.md

And then you can remote the runtime checks from the patterns.

But since this mess already existed before your patch it's OK
without this cleanup.  But would appreciate if it you did do it.

If so that cleanup patch is pre-approved.

Thanks,
Tamar

> +
> +(define_insn "clrsb<mode>2"
> +  [(set (match_operand:GPI 0 "register_operand" "=r")
> +     (clrsb:GPI (match_operand:GPI 1 "register_operand" "r")))]
> +  ""
> +  "cls\\t%<w>0, %<w>1"
> +  [(set_attr "type" "clz")]
> +)
> +
> +(define_insn "@aarch64_rbit<mode>"
> +  [(set (match_operand:GPI 0 "register_operand" "=r")
> +     (bitreverse:GPI (match_operand:GPI 1 "register_operand" "r")))]
> +  ""
> +  "rbit\\t%<w>0, %<w>1"
> +  [(set_attr "type" "rbit")]
> +)
> +
>  (define_expand "ffs<mode>2"
>    [(match_operand:GPI 0 "register_operand")
>     (match_operand:GPI 1 "register_operand")]
> @@ -5702,9 +5738,7 @@ (define_expand "ffs<mode>2"
>    {
>      rtx ccreg = aarch64_gen_compare_reg (EQ, operands[1], const0_rtx);
>      rtx x = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
> -
> -    emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1]));
> -    emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
> +    emit_insn (gen_ctz<mode>2 (operands[0], operands[1]));
>      emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0],
> const0_rtx));
>      DONE;
>    }
> @@ -5799,40 +5833,6 @@ (define_expand "popcountti2"
>    DONE;
>  })
> 
> -(define_insn "clrsb<mode>2"
> -  [(set (match_operand:GPI 0 "register_operand" "=r")
> -        (clrsb:GPI (match_operand:GPI 1 "register_operand" "r")))]
> -  ""
> -  "cls\\t%<w>0, %<w>1"
> -  [(set_attr "type" "clz")]
> -)
> -
> -(define_insn "@aarch64_rbit<mode>"
> -  [(set (match_operand:GPI 0 "register_operand" "=r")
> -     (bitreverse:GPI (match_operand:GPI 1 "register_operand" "r")))]
> -  ""
> -  "rbit\\t%<w>0, %<w>1"
> -  [(set_attr "type" "rbit")]
> -)
> -
> -;; Split after reload into RBIT + CLZ.  Since RBIT is represented as an 
> UNSPEC
> -;; it is unlikely to fold with any other operation, so keep this as a CTZ
> -;; expression and split after reload to enable scheduling them apart if
> -;; needed.  For TARGET_CSSC we have a single CTZ instruction that can do 
> this.
> -
> -(define_insn_and_split "ctz<mode>2"
> - [(set (match_operand:GPI           0 "register_operand" "=r")
> -       (ctz:GPI (match_operand:GPI  1 "register_operand" "r")))]
> -  ""
> -  { return TARGET_CSSC ? "ctz\\t%<w>0, %<w>1" : "#"; }
> -  "reload_completed && !TARGET_CSSC"
> -  [(const_int 0)]
> -  "
> -  emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1]));
> -  emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
> -  DONE;
> -")
> -
>  (define_insn "*and<mode>_compare0"
>    [(set (reg:CC_Z CC_REGNUM)
>       (compare:CC_Z
> diff --git a/gcc/testsuite/gcc.target/aarch64/ffs.c
> b/gcc/testsuite/gcc.target/aarch64/ffs.c
> index
> a3447619d235adda6800e7c197d181ab9615a10e..a303bee5fd47df82bd82
> e83e5b8b0a0c62103b30 100644
> --- a/gcc/testsuite/gcc.target/aarch64/ffs.c
> +++ b/gcc/testsuite/gcc.target/aarch64/ffs.c
> @@ -1,12 +1,63 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2" } */
> +/* { dg-additional-options "--save-temps -O2" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> 
> -unsigned int functest(unsigned int x)
> +#include <stdint.h>
> +
> +#pragma GCC target "+nocssc"
> +
> +/*
> +** ffsw1:
> +**   cmp     w1, 0
> +**   rbit    w0, w1
> +**   clz     w0, w0
> +**   csinc   w0, wzr, w0, eq
> +**   ret
> +*/
> +
> +int ffsw1 (int y, uint32_t x)
> +{
> +  return __builtin_ffs (x);
> +}
> +
> +/*
> +** ffsx1:
> +**   cmp     x1, 0
> +**   rbit    x0, x1
> +**   clz     x0, x0
> +**   csinc   x0, xzr, x0, eq
> +**   ret
> +*/
> +
> +int ffsx1 (int y, uint64_t x)
>  {
> -  return __builtin_ffs(x);
> +  return __builtin_ffsll (x);
>  }
> 
> -/* { dg-final { scan-assembler "cmp\tw" } } */
> -/* { dg-final { scan-assembler "rbit\tw" } } */
> -/* { dg-final { scan-assembler "clz\tw" } } */
> -/* { dg-final { scan-assembler "csinc\tw" } } */
> +#pragma GCC target "+cssc"
> +
> +/*
> +** ffsw2:
> +**   cmp     w1, 0
> +**   ctz     w0, w1
> +**   csinc   w0, wzr, w0, eq
> +**   ret
> +*/
> +
> +int ffsw2 (int y, uint32_t x)
> +{
> +  return __builtin_ffs (x);
> +}
> +
> +/*
> +** ffsx2:
> +**   cmp     x1, 0
> +**   ctz     x0, x1
> +**   csinc   x0, xzr, x0, eq
> +**   ret
> +*/
> +
> +int ffsx2 (int y, uint64_t x)
> +{
> +  return __builtin_ffsll (x);
> +}

Reply via email to