The Zbb support has introduced ctz and clz to the backend, but some transformations in GCC need to know what the value of c[lt]z at zero is. This affects how the optab is generated and may suppress use of CLZ/CTZ in tree passes.
Among other things, this is needed for the transformation of table-based ctz-implementations, such as in deepsjeng, to work (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90838). Prior to this change, the test case from PR90838 would compile to on RISC-V targets with Zbb: myctz: lui a4,%hi(.LC0) ld a4,%lo(.LC0)(a4) neg a5,a0 and a5,a5,a0 mul a5,a5,a4 lui a4,%hi(.LANCHOR0) addi a4,a4,%lo(.LANCHOR0) srli a5,a5,58 sh2add a5,a5,a4 lw a0,0(a5) ret After this change, we get: myctz: ctz a0,a0 andi a0,a0,63 ret Testing this with deepsjeng_r (from SPEC 2017) against QEMU, this shows a clear reduction in dynamic instruction count: - before 1961888067076 - after 1907928279874 (2.75% reduction) gcc/ChangeLog: * config/riscv/riscv.h (CLZ_DEFINED_VALUE_AT_ZERO): Implement. (CTZ_DEFINED_VALUE_AT_ZERO): Same. gcc/testsuite/ChangeLog: * gcc.dg/pr90838.c: Add additional flags (dg-additional-options) when compiling for riscv64. * gcc.target/riscv/zbb-ctz.c: New test. Signed-off-by: Philipp Tomsich <philipp.toms...@vrull.eu> Signed-off-by: Manolis Tsamis <manolis.tsa...@vrull.eu> Co-developed-by: Manolis Tsamis <manolis.tsa...@vrull.eu> --- gcc/config/riscv/riscv.h | 5 ++ gcc/testsuite/gcc.dg/pr90838.c | 2 + gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c | 65 ++++++++++++++++++++ gcc/testsuite/gcc.target/riscv/zbb-ctz.c | 66 +++++++++++++++++++++ 4 files changed, 138 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-ctz.c diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 4210e252255..95f72e2fd3f 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -1019,4 +1019,9 @@ extern void riscv_remove_unneeded_save_restore_calls (void); #define HARD_REGNO_RENAME_OK(FROM, TO) riscv_hard_regno_rename_ok (FROM, TO) +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) + #endif /* ! GCC_RISCV_H */ diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c index 41c5dab9a5c..162bd6f51d0 100644 --- a/gcc/testsuite/gcc.dg/pr90838.c +++ b/gcc/testsuite/gcc.dg/pr90838.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-forwprop2-details" } */ +/* { dg-additional-options "-march=rv64gc_zbb" { target riscv64*-*-* } } */ int ctz1 (unsigned x) { @@ -57,3 +58,4 @@ int ctz4 (unsigned long x) } /* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target aarch64*-*-* } } } */ +/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target riscv64*-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c new file mode 100644 index 00000000000..b903517197a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz-32.c @@ -0,0 +1,65 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zbb -mabi=ilp32" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ + +int ctz1 (unsigned x) +{ + static const char table[32] = + { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + + return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; +} + +int ctz2 (unsigned x) +{ +#define u 0 + static short table[64] = + { + 32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14, + 10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15, + 31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26, + 30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u + }; + + x = (x & -x) * 0x0450FBAF; + return table[x >> 26]; +} + +int ctz3 (unsigned x) +{ + static int table[32] = + { + 0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26, + 31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27 + }; + + if (x == 0) return 32; + x = (x & -x) * 0x04D7651F; + return table[x >> 27]; +} + +static const unsigned long long magic = 0x03f08c5392f756cdULL; + +static const char table[64] = { + 0, 1, 12, 2, 13, 22, 17, 3, + 14, 33, 23, 36, 18, 58, 28, 4, + 62, 15, 34, 26, 24, 48, 50, 37, + 19, 55, 59, 52, 29, 44, 39, 5, + 63, 11, 21, 16, 32, 35, 57, 27, + 61, 25, 47, 49, 54, 51, 43, 38, + 10, 20, 31, 56, 60, 46, 53, 42, + 9, 30, 45, 41, 8, 40, 7, 6, +}; + +int ctz4 (unsigned long x) +{ + unsigned long lsb = x & -x; + return table[(lsb * magic) >> 58]; +} + +/* { dg-final { scan-assembler-times "ctz\t" 3 } } */ +/* { dg-final { scan-assembler-times "andi\t" 1 } } */ +/* { dg-final { scan-assembler-times "mul\t" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/zbb-ctz.c b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c new file mode 100644 index 00000000000..f9fbcb38dee --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-ctz.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ + +int ctz1 (unsigned x) +{ + static const char table[32] = + { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + + return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; +} + +int ctz2 (unsigned x) +{ +#define u 0 + static short table[64] = + { + 32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14, + 10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15, + 31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26, + 30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u + }; + + x = (x & -x) * 0x0450FBAF; + return table[x >> 26]; +} + +int ctz3 (unsigned x) +{ + static int table[32] = + { + 0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26, + 31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27 + }; + + if (x == 0) return 32; + x = (x & -x) * 0x04D7651F; + return table[x >> 27]; +} + +static const unsigned long long magic = 0x03f08c5392f756cdULL; + +static const char table[64] = { + 0, 1, 12, 2, 13, 22, 17, 3, + 14, 33, 23, 36, 18, 58, 28, 4, + 62, 15, 34, 26, 24, 48, 50, 37, + 19, 55, 59, 52, 29, 44, 39, 5, + 63, 11, 21, 16, 32, 35, 57, 27, + 61, 25, 47, 49, 54, 51, 43, 38, + 10, 20, 31, 56, 60, 46, 53, 42, + 9, 30, 45, 41, 8, 40, 7, 6, +}; + +int ctz4 (unsigned long x) +{ + unsigned long lsb = x & -x; + return table[(lsb * magic) >> 58]; +} + +/* { dg-final { scan-assembler-times "ctzw\t" 3 } } */ +/* { dg-final { scan-assembler-times "ctz\t" 1 } } */ +/* { dg-final { scan-assembler-times "andi\t" 2 } } */ +/* { dg-final { scan-assembler-not "mul" } } */ -- 2.34.1