https://gcc.gnu.org/g:7fdfeb27c6945c98cff3edf399c855c6df44fa1e
commit r16-5676-g7fdfeb27c6945c98cff3edf399c855c6df44fa1e Author: Wilco Dijkstra <[email protected]> Date: Thu Nov 6 20:49:22 2025 +0000 AArch64: Improve ctz and ffs Use the ctz insn in the ffs expansion so it uses ctz if CSSC is available. Rather than splitting, keep ctz as a single insn for simplicity and possible fusion opportunities. Move clz, ctz, clrsb, rbit and ffs instructions together. gcc: * config/aarch64/aarch64.md (ffs<mode>2): Use gen_ctz. (ctz<mode>2): Model ctz as a single target instruction. gcc/testsuite: * gcc.target/aarch64/ffs.c: Improve test. Diff: --- gcc/config/aarch64/aarch64.md | 74 +++++++++++++++++----------------- gcc/testsuite/gcc.target/aarch64/ffs.c | 65 +++++++++++++++++++++++++---- 2 files changed, 95 insertions(+), 44 deletions(-) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 8dcb5e3f0ecb..f62247f3e39c 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -5697,6 +5697,8 @@ [(set_attr "type" "logics_shift_imm")] ) +;; CLZ, CTZ, CLS, RBIT instructions. + (define_insn "clz<mode>2" [(set (match_operand:GPI 0 "register_operand" "=r") (clz:GPI (match_operand:GPI 1 "register_operand" "r")))] @@ -5705,6 +5707,40 @@ [(set_attr "type" "clz")] ) +;; Model ctz as a target instruction. +;; If TARGET_CSSC is not available, emit rbit and clz. + +(define_insn "ctz<mode>2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ctz:GPI (match_operand:GPI 1 "register_operand" "r")))] + "" + { + if (TARGET_CSSC) + return "ctz\\t%<w>0, %<w>1"; + return "rbit\\t%<w>0, %<w>1\;clz\\t%<w>0, %<w>0"; + } + [(set_attr "type" "clz") + (set (attr "length") (if_then_else (match_test "TARGET_CSSC") + (const_int 4) (const_int 8))) + ] +) + +(define_insn "clrsb<mode>2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (clrsb:GPI (match_operand:GPI 1 "register_operand" "r")))] + "" + "cls\\t%<w>0, %<w>1" + [(set_attr "type" "clz")] +) + +(define_insn "@aarch64_rbit<mode>" + [(set (match_operand:GPI 0 "register_operand" "=r") + (bitreverse:GPI (match_operand:GPI 1 "register_operand" "r")))] + "" + "rbit\\t%<w>0, %<w>1" + [(set_attr "type" "rbit")] +) + (define_expand "ffs<mode>2" [(match_operand:GPI 0 "register_operand") (match_operand:GPI 1 "register_operand")] @@ -5712,9 +5748,7 @@ { rtx ccreg = aarch64_gen_compare_reg (EQ, operands[1], const0_rtx); rtx x = gen_rtx_NE (VOIDmode, ccreg, const0_rtx); - - emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); - emit_insn (gen_clz<mode>2 (operands[0], operands[0])); + emit_insn (gen_ctz<mode>2 (operands[0], operands[1])); emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0], const0_rtx)); DONE; } @@ -5809,40 +5843,6 @@ DONE; }) -(define_insn "clrsb<mode>2" - [(set (match_operand:GPI 0 "register_operand" "=r") - (clrsb:GPI (match_operand:GPI 1 "register_operand" "r")))] - "" - "cls\\t%<w>0, %<w>1" - [(set_attr "type" "clz")] -) - -(define_insn "@aarch64_rbit<mode>" - [(set (match_operand:GPI 0 "register_operand" "=r") - (bitreverse:GPI (match_operand:GPI 1 "register_operand" "r")))] - "" - "rbit\\t%<w>0, %<w>1" - [(set_attr "type" "rbit")] -) - -;; Split after reload into RBIT + CLZ. Since RBIT is represented as an UNSPEC -;; it is unlikely to fold with any other operation, so keep this as a CTZ -;; expression and split after reload to enable scheduling them apart if -;; needed. For TARGET_CSSC we have a single CTZ instruction that can do this. - -(define_insn_and_split "ctz<mode>2" - [(set (match_operand:GPI 0 "register_operand" "=r") - (ctz:GPI (match_operand:GPI 1 "register_operand" "r")))] - "" - { return TARGET_CSSC ? "ctz\\t%<w>0, %<w>1" : "#"; } - "reload_completed && !TARGET_CSSC" - [(const_int 0)] - " - emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); - emit_insn (gen_clz<mode>2 (operands[0], operands[0])); - DONE; -") - (define_insn "*and<mode>_compare0" [(set (reg:CC_Z CC_REGNUM) (compare:CC_Z diff --git a/gcc/testsuite/gcc.target/aarch64/ffs.c b/gcc/testsuite/gcc.target/aarch64/ffs.c index a3447619d235..a303bee5fd47 100644 --- a/gcc/testsuite/gcc.target/aarch64/ffs.c +++ b/gcc/testsuite/gcc.target/aarch64/ffs.c @@ -1,12 +1,63 @@ /* { dg-do compile } */ -/* { dg-options "-O2" } */ +/* { dg-additional-options "--save-temps -O2" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ -unsigned int functest(unsigned int x) +#include <stdint.h> + +#pragma GCC target "+nocssc" + +/* +** ffsw1: +** cmp w1, 0 +** rbit w0, w1 +** clz w0, w0 +** csinc w0, wzr, w0, eq +** ret +*/ + +int ffsw1 (int y, uint32_t x) +{ + return __builtin_ffs (x); +} + +/* +** ffsx1: +** cmp x1, 0 +** rbit x0, x1 +** clz x0, x0 +** csinc x0, xzr, x0, eq +** ret +*/ + +int ffsx1 (int y, uint64_t x) { - return __builtin_ffs(x); + return __builtin_ffsll (x); } -/* { dg-final { scan-assembler "cmp\tw" } } */ -/* { dg-final { scan-assembler "rbit\tw" } } */ -/* { dg-final { scan-assembler "clz\tw" } } */ -/* { dg-final { scan-assembler "csinc\tw" } } */ +#pragma GCC target "+cssc" + +/* +** ffsw2: +** cmp w1, 0 +** ctz w0, w1 +** csinc w0, wzr, w0, eq +** ret +*/ + +int ffsw2 (int y, uint32_t x) +{ + return __builtin_ffs (x); +} + +/* +** ffsx2: +** cmp x1, 0 +** ctz x0, x1 +** csinc x0, xzr, x0, eq +** ret +*/ + +int ffsx2 (int y, uint64_t x) +{ + return __builtin_ffsll (x); +}
