Hi all,

This patch replaces the umov instruction in the aarch64 popcount expansion with
the less expensive fmov instruction.

Example:

int foo (int a) {
  return __builtin_popcount (a);
}

would generate:

foo:
  uxtw  x0, w0
  fmov  d0, x0
  cnt   v0.8b, v0.8b
  addv  b0, v0.8b
  umov  w0, v0.b[0]
  ret

but now generates:

foo:
  uxtw  x0, w0
  fmov  d0, x0
  cnt   v0.8b, v0.8b
  addv  b0, v0.8b
  fmov  w0, s0
  ret

Using __builtin_popcountl on a long generates

foo:
  fmov  d0, x0
  cnt   v0.8b, v0.8b
  addv  b0, v0.8b
  umov  w0, v0.b[0]
  ret

but with this patch generates:

foo:
  fmov  d0, x0
  cnt   v0.8b, v0.8b
  addv  b0, v0.8b
  fmov  w0, s0
  ret

Bootstrapped successfully and tested on aarch64-none-elf and 
aarch64_be-none-elf with no regressions.

OK for trunk?

gcc/
2018-10-22  Sam Tebbs<sam.te...@arm.com>

        * config/aarch64/aarch64.md (popcount<mode>2): Replaced zero_extend
        generation with move generation.

gcc/testsuite
2018-10-22  Sam Tebbs<sam.te...@arm.com>

        * gcc.target/aarch64/popcnt2.c: New file.

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index d7473418a8eb62b2757017cd1675493f86e41ef4..77e6f75cc15f06733df7b47906ee00580bea8d29 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4489,7 +4489,7 @@
   emit_move_insn (v, gen_lowpart (V8QImode, in));
   emit_insn (gen_popcountv8qi2 (v1, v));
   emit_insn (gen_reduc_plus_scal_v8qi (r, v1));
-  emit_insn (gen_zero_extendqi<mode>2 (out, r));
+  emit_move_insn (out, gen_lowpart_SUBREG (GET_MODE (out), r));
   DONE;
 })
 
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt2.c b/gcc/testsuite/gcc.target/aarch64/popcnt2.c
new file mode 100644
index 0000000000000000000000000000000000000000..9c595f09222c24eefb4b00e8823e4c02f6eaf3b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/popcnt2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo0 (int a)
+{
+  return __builtin_popcount (a);
+}
+
+int
+foo1 (long a)
+{
+  return __builtin_popcountl (a);
+}
+
+/* { dg-final { scan-assembler-not "umov\\t" } } */
+/* { dg-final { scan-assembler-times "fmov\\t" 4 } } */

Reply via email to