Hi!

expand_unop already handles specially several bitop builtins (strangely
e.g. only clz and not ctz which can be handled pretty much the same,
except comparing the other subreg), but does not handle popcount/parity
this way.

popcount of double word value can be computed as popcount (hi) + popcount (lo),
parity of double word value can be computed as parity (hi ^ lo).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2015-12-02  Jakub Jelinek  <ja...@redhat.com>

        PR target/68647
        * optabs.c (expand_doubleword_popcount, expand_doubleword_parity):
        New functions.
        (expand_unop): Use them.

        * gcc.target/i386/pr68647.c: New test.

--- gcc/optabs.c.jj     2015-11-27 10:01:03.000000000 +0100
+++ gcc/optabs.c        2015-12-02 09:50:46.774907703 +0100
@@ -2223,6 +2223,58 @@ expand_doubleword_clz (machine_mode mode
   return 0;
 }
 
+/* Try calculating popcount of a double-word quantity as two popcount's of
+   word-sized quantities and summing up the results.  */
+static rtx
+expand_doubleword_popcount (machine_mode mode, rtx op0, rtx target)
+{
+  rtx t0, t1, t;
+  rtx_insn *seq;
+
+  start_sequence ();
+
+  t0 = expand_unop_direct (word_mode, popcount_optab,
+                          operand_subword_force (op0, 0, mode), NULL_RTX,
+                          true);
+  t1 = expand_unop_direct (word_mode, popcount_optab,
+                          operand_subword_force (op0, 1, mode), NULL_RTX,
+                          true);
+  if (!t0 || !t1)
+    {
+      end_sequence ();
+      return NULL_RTX;
+    }
+
+  /* If we were not given a target, use a word_mode register, not a
+     'mode' register.  The result will fit, and nobody is expecting
+     anything bigger (the return type of __builtin_popcount* is int).  */
+  if (!target)
+    target = gen_reg_rtx (word_mode);
+
+  t = expand_binop (word_mode, add_optab, t0, t1, target, 0, OPTAB_DIRECT);
+
+  seq = get_insns ();
+  end_sequence ();
+
+  add_equal_note (seq, t, POPCOUNT, op0, 0);
+  emit_insn (seq);
+  return t;
+}
+
+/* Try calculating
+       (parity:wide x)
+   as
+       (parity:narrow (low (x) ^ high (x))) */
+static rtx
+expand_doubleword_parity (machine_mode mode, rtx op0, rtx target)
+{
+  rtx t = expand_binop (word_mode, xor_optab,
+                       operand_subword_force (op0, 0, mode),
+                       operand_subword_force (op0, 1, mode),
+                       NULL_RTX, 0, OPTAB_DIRECT);
+  return expand_unop (word_mode, parity_optab, t, target, true);
+}
+
 /* Try calculating
        (bswap:narrow x)
    as
@@ -2582,7 +2634,7 @@ expand_absneg_bit (enum rtx_code code, m
    different mode or with a libcall.  */
 static rtx
 expand_unop_direct (machine_mode mode, optab unoptab, rtx op0, rtx target,
-            int unsignedp)
+                   int unsignedp)
 {
   if (optab_handler (unoptab, mode) != CODE_FOR_nothing)
     {
@@ -2665,6 +2717,27 @@ expand_unop (machine_mode mode, optab un
       goto try_libcall;
     }
 
+  if (unoptab == popcount_optab
+      && GET_MODE_SIZE (mode) == 2 * UNITS_PER_WORD
+      && optab_handler (unoptab, word_mode) != CODE_FOR_nothing
+      && optimize_insn_for_speed_p ())
+    {
+      temp = expand_doubleword_popcount (mode, op0, target);
+      if (temp)
+       return temp;
+    }
+
+  if (unoptab == parity_optab
+      && GET_MODE_SIZE (mode) == 2 * UNITS_PER_WORD
+      && (optab_handler (unoptab, word_mode) != CODE_FOR_nothing
+         || optab_handler (popcount_optab, word_mode) != CODE_FOR_nothing)
+      && optimize_insn_for_speed_p ())
+    {
+      temp = expand_doubleword_parity (mode, op0, target);
+      if (temp)
+       return temp;
+    }
+
   /* Widening (or narrowing) bswap needs special treatment.  */
   if (unoptab == bswap_optab)
     {
--- gcc/testsuite/gcc.target/i386/pr68647.c.jj  2015-12-02 10:01:25.621671528 
+0100
+++ gcc/testsuite/gcc.target/i386/pr68647.c     2015-12-02 10:01:56.451226044 
+0100
@@ -0,0 +1,18 @@
+/* PR target/68647 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mpopcnt" } */
+
+int
+f1 (unsigned long long a)
+{
+  return __builtin_popcountll (a);
+}
+
+int
+f2 (unsigned long long a)
+{
+  return __builtin_parityll (a);
+}
+
+/* { dg-final { scan-assembler-not "__popcountdi2" } } */
+/* { dg-final { scan-assembler-not "__paritydi2" } } */

        Jakub

Reply via email to