optabs.c:expand_unop_direct can expand a popcount builtin without a call
under certain conditions even without a popcount pattern of the required
data width:
if (unoptab == popcount_optab
&& is_a <scalar_int_mode> (mode, &int_mode)
&& GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
&& optab_handler (unoptab, word_mode) != CODE_FOR_nothing
&& optimize_insn_for_speed_p ())
{
temp = expand_doubleword_popcount (int_mode, op0, target);
if (temp)
return temp;
}
However, the match.pd recognition of popcount arithmetic using & / + is
tied to having an exactly matching operation. This causes a failure for
gcc.dg/tree-ssa/popcount4l.c for 16-bit targets that have a 16 bit
popcount operation (and no wider).
Likewise, not recognizing a 64 bit popcount for a 32 bit target with
32 bit popcount could be rectified by synthesizing the wide popcount
operations with two narrower popcount operations.
The attached patch implements this.
2020-07-30 Joern Rennecke <[email protected]>
gcc:
* gimple-match-head.c (langhooks.h): Include.
* match.pd <popcount & / + pattern matching>:
When generating popcount directly fails, try doing it in two halves.
* gcc.dg/tree-ssa/popcount4ll.c: Remove lp64 condition.
Adjust scanning pattern for !lp64.
testsuite:
* gcc.dg/tree-ssa/popcount5ll.c: Likewise.
* gcc.dg/tree-ssa/popcount4l.c: Adjust scanning pattern
for ! int32plus.
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index d941b8b..e3342e3 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. If not see
#include "optabs-tree.h"
#include "tree-eh.h"
#include "dbgcnt.h"
+#include "langhooks.h"
/* Forward declarations of the private auto-generated matchers.
They expect valueized operands in canonical order and do not
diff --git a/gcc/match.pd b/gcc/match.pd
index 17c35ee4..fa2e93e 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6437,10 +6437,25 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& tree_to_uhwi (@3) == c2
&& tree_to_uhwi (@9) == c3
&& tree_to_uhwi (@7) == c3
- && tree_to_uhwi (@11) == c4
- && direct_internal_fn_supported_p (IFN_POPCOUNT, type,
- OPTIMIZE_FOR_BOTH))
- (convert (IFN_POPCOUNT:type @0)))))
+ && tree_to_uhwi (@11) == c4)
+ (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
+ OPTIMIZE_FOR_BOTH))
+ (convert (IFN_POPCOUNT:type @0))
+ /* Try to do popcount in two halves. PREC must be even, and at least
+ six bits for this to work without extension before adding.
+ If popcount is available, is should probably be available for
+ BITS_PER_WORD, so don't bother with smaller halves. */
+ (with { tree half_type = (prec <= BITS_PER_WORD || (prec & 1) ? NULL_TREE
+ : lang_hooks.types.type_for_size (prec/2, 1));
+ gcc_assert (prec > 2 || half_type == NULL_TREE);
+ }
+ (if (half_type != NULL_TREE
+ && direct_internal_fn_supported_p (IFN_POPCOUNT, half_type,
+ OPTIMIZE_FOR_BOTH))
+ (convert (plus
+ (IFN_POPCOUNT:half_type (convert @0))
+ (IFN_POPCOUNT:half_type (convert (rshift @0
+ { wide_int_to_tree (half_type, prec/2); } )))))))))))
/* __builtin_ffs needs to deal on many targets with the possible zero
argument. If we know the argument is always non-zero, __builtin_ctz + 1
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c
b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c
index 69fb2d1..269e56e 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c
@@ -25,6 +25,7 @@ int popcount64c(unsigned long x)
return (x * h01) >> shift;
}
-/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" { target
int32plus } } } */
+/* { dg-final { scan-tree-dump "\.POPCOUNT" "optimized" { target { ! int32plus
} } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c
b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c
index c1588be..7abadf6 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { lp64 } } } */
+/* { dg-do compile } */
/* { dg-require-effective-target popcountll } */
/* { dg-options "-O2 -fdump-tree-optimized" } */
@@ -16,4 +16,5 @@ int popcount64c(unsigned long long x)
return (x * h01) >> shift;
}
-/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" { target {
lp64 } } } } */
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 2 "optimized" { target { !
lp64 } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c
b/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c
index edb191b..2afe081 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c
@@ -1,5 +1,5 @@
/* PR tree-optimization/94800 */
-/* { dg-do compile { target { lp64 } } } */
+/* { dg-do compile } */
/* { dg-require-effective-target popcountll } */
/* { dg-options "-O2 -fdump-tree-optimized" } */
@@ -19,4 +19,5 @@ int popcount64c(unsigned long long x)
return x >> shift;
}
-/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" { target {
lp64 } } } } */
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 2 "optimized" { target { !
lp64 } } } } */