Hi all, This patch replaces the umov instruction in the aarch64 popcount expansion with the less expensive fmov instruction.
Example: int foo (int a) { return __builtin_popcount (a); } would generate: foo: uxtw x0, w0 fmov d0, x0 cnt v0.8b, v0.8b addv b0, v0.8b umov w0, v0.b[0] ret but now generates: foo: uxtw x0, w0 fmov d0, x0 cnt v0.8b, v0.8b addv b0, v0.8b fmov w0, s0 ret Using __builtin_popcountl on a long generates foo: fmov d0, x0 cnt v0.8b, v0.8b addv b0, v0.8b umov w0, v0.b[0] ret but with this patch generates: foo: fmov d0, x0 cnt v0.8b, v0.8b addv b0, v0.8b fmov w0, s0 ret Bootstrapped successfully and tested on aarch64-none-elf and aarch64_be-none-elf with no regressions. OK for trunk? gcc/ 2018-10-22 Sam Tebbs<sam.te...@arm.com> * config/aarch64/aarch64.md (popcount<mode>2): Replaced zero_extend generation with move generation. gcc/testsuite 2018-10-22 Sam Tebbs<sam.te...@arm.com> * gcc.target/aarch64/popcnt2.c: New file.
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index d7473418a8eb62b2757017cd1675493f86e41ef4..77e6f75cc15f06733df7b47906ee00580bea8d29 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4489,7 +4489,7 @@ emit_move_insn (v, gen_lowpart (V8QImode, in)); emit_insn (gen_popcountv8qi2 (v1, v)); emit_insn (gen_reduc_plus_scal_v8qi (r, v1)); - emit_insn (gen_zero_extendqi<mode>2 (out, r)); + emit_move_insn (out, gen_lowpart_SUBREG (GET_MODE (out), r)); DONE; }) diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt2.c b/gcc/testsuite/gcc.target/aarch64/popcnt2.c new file mode 100644 index 0000000000000000000000000000000000000000..9c595f09222c24eefb4b00e8823e4c02f6eaf3b9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/popcnt2.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int +foo0 (int a) +{ + return __builtin_popcount (a); +} + +int +foo1 (long a) +{ + return __builtin_popcountl (a); +} + +/* { dg-final { scan-assembler-not "umov\\t" } } */ +/* { dg-final { scan-assembler-times "fmov\\t" 4 } } */