Add simplifications for popcount (x) > 1 to (x & (x-1)) != 0 and popcount (x) == 1 into (x-1) <u (x & -x). These trigger only for single-use cases and support an optional convert. A microbenchmark shows a speedup of 2-2.5x on both x64 and AArch64.
Bootstrap OK, OK for commit? ChangeLog: 2019-08-13 Wilco Dijkstra <wdijk...@arm.com> gcc/ PR middle-end/90693 * match.pd: Add popcount simplifications. testsuite/ PR middle-end/90693 * gcc.dg/fold-popcount-5.c: Add new test. --- diff --git a/gcc/match.pd b/gcc/match.pd index 0317bc704f771f626ab72189b3a54de00087ad5a..bf4351a330f45f3a1424d9792cefc3da6267597d 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5356,7 +5356,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) rep (eq eq ne ne) (simplify (cmp (popcount @0) integer_zerop) - (rep @0 { build_zero_cst (TREE_TYPE (@0)); })))) + (rep @0 { build_zero_cst (TREE_TYPE (@0)); }))) + /* popcount(X) == 1 -> (X-1) <u (X & -X). */ + (for cmp (eq ne) + rep (lt ge) + (simplify + (cmp (convert? (popcount:s @0)) integer_onep) + (with { + tree utype = unsigned_type_for (TREE_TYPE (@0)); + tree a0 = fold_convert (utype, @0); } + (rep (plus { a0; } { build_minus_one_cst (utype); }) + (bit_and (negate { a0; }) { a0; }))))) + /* popcount(X) > 1 -> (X & (X-1)) != 0. */ + (for cmp (gt le) + rep (ne eq) + (simplify + (cmp (convert? (popcount:s @0)) integer_onep) + (rep (bit_and (plus @0 { build_minus_one_cst (TREE_TYPE (@0)); }) @0) + { build_zero_cst (TREE_TYPE (@0)); })))) /* Simplify: diff --git a/gcc/testsuite/gcc.dg/fold-popcount-5.c b/gcc/testsuite/gcc.dg/fold-popcount-5.c new file mode 100644 index 0000000000000000000000000000000000000000..fcf3910587caacb8e39cf437dc3971df892f405a --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-popcount-5.c @@ -0,0 +1,69 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +/* Test popcount (x) > 1 -> (x & (x-1)) != 0. */ + +int test_1 (long x) +{ + return __builtin_popcountl (x) >= 2; +} + +int test_2 (int x) +{ + return (unsigned) __builtin_popcount (x) <= 1u; +} + +int test_3 (unsigned x) +{ + return __builtin_popcount (x) > 1u; +} + +int test_4 (unsigned long x) +{ + return (unsigned char) __builtin_popcountl (x) > 1; +} + +int test_5 (unsigned long x) +{ + return (signed char) __builtin_popcountl (x) <= (signed char)1; +} + +int test_6 (unsigned long long x) +{ + return 2u <= __builtin_popcountll (x); +} + +/* Test popcount (x) == 1 -> (x-1) <u (x & -x). */ + +int test_7 (unsigned long x) +{ + return __builtin_popcountl (x) != 1; +} + +int test_8 (long long x) +{ + return (unsigned) __builtin_popcountll (x) == 1u; +} + +int test_9 (int x) +{ + return (unsigned char) __builtin_popcount (x) != 1u; +} + +int test_10 (unsigned x) +{ + return (unsigned char) __builtin_popcount (x) == 1; +} + +int test_11 (long x) +{ + return (signed char) __builtin_popcountl (x) == 1; +} + +int test_12 (long x) +{ + return 1u == __builtin_popcountl (x); +} + +/* { dg-final { scan-tree-dump-times "popcount" 0 "optimized" } } */ +