Erm, ignore this - I just rediscovered the approval in a different mail folder. I forgot that Outlook's automatic email dedpulication meant that messages CC'd to me end up in one of two different folders at random when I want them in both.
On Mon, Jan 16, 2023 at 02:03:29PM +0000, Andrew Carlotti via Gcc-patches wrote: > Hi Richard > > I accidentally pushed this patch earlier in the mistaken belief that > you'd already approved it. It looks uncontroversial to me - it just adds > IFN support to build_popcount_expr, analogous to the changes you > suggested and approved for build_cltz_expr (and adjusts testcases > accordingly). I might have incorporated it into an earlier patch in this > series, if I hadn't already pushed that earlier patch. > > Is this OK to leave in master now? > > Thanks, > Andrew > > On Thu, Dec 22, 2022 at 05:43:21PM +0000, Andrew Carlotti via Gcc-patches > wrote: > > Bootstrapped and regression tested on aarch64-unknown-linux-gnu and > > x86_64-pc-linux-gnu - ok to merge? > > > > gcc/ChangeLog: > > > > * tree-ssa-loop-niter.cc (build_popcount_expr): Add IFN support. > > > > gcc/testsuite/ChangeLog: > > > > * g++.dg/tree-ssa/pr86544.C: Add .POPCOUNT to tree scan regex. > > * gcc.dg/tree-ssa/popcount.c: Likewise. > > * gcc.dg/tree-ssa/popcount2.c: Likewise. > > * gcc.dg/tree-ssa/popcount3.c: Likewise. > > * gcc.target/aarch64/popcount4.c: Likewise. > > * gcc.target/i386/pr95771.c: Likewise, and... > > * gcc.target/i386/pr95771-2.c: ...split int128 test from above, > > since this would emit just a single IFN if a TI optab is added. > > > > --- > > > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr86544.C > > b/gcc/testsuite/g++.dg/tree-ssa/pr86544.C > > index > > ef438916a8019320564f444ace08e2f4b4190684..50befb36bac75de1cfa282e38358278b3288bd1c > > 100644 > > --- a/gcc/testsuite/g++.dg/tree-ssa/pr86544.C > > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr86544.C > > @@ -12,5 +12,5 @@ int PopCount (long b) { > > return c; > > } > > > > -/* { dg-final { scan-tree-dump-times "__builtin_popcount" 1 "optimized" } > > } */ > > +/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 1 > > "optimized" } } */ > > /* { dg-final { scan-tree-dump-times "if" 0 "phiopt4" } } */ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount.c > > b/gcc/testsuite/gcc.dg/tree-ssa/popcount.c > > index > > b4694109411a4631697463519acbe7d9df65bf6e..efd906a0f5447f0beb3752eded3756999b02e6e6 > > 100644 > > --- a/gcc/testsuite/gcc.dg/tree-ssa/popcount.c > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount.c > > @@ -39,4 +39,4 @@ void PopCount3 (long b1) { > > } > > } > > > > -/* { dg-final { scan-tree-dump-times "__builtin_popcount" 3 "optimized" } > > } */ > > +/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 3 > > "optimized" } } */ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount2.c > > b/gcc/testsuite/gcc.dg/tree-ssa/popcount2.c > > index > > ef73e345573de721833e98e89c252640a55f7c60..ae38a329bd4d868a762300d3218d68864c0fc4be > > 100644 > > --- a/gcc/testsuite/gcc.dg/tree-ssa/popcount2.c > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount2.c > > @@ -26,4 +26,4 @@ int main() > > return 0; > > } > > > > -/* { dg-final { scan-tree-dump-times "__builtin_popcount" 1 "optimized" } > > } */ > > +/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 1 > > "optimized" } } */ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount3.c > > b/gcc/testsuite/gcc.dg/tree-ssa/popcount3.c > > index > > ef438916a8019320564f444ace08e2f4b4190684..50befb36bac75de1cfa282e38358278b3288bd1c > > 100644 > > --- a/gcc/testsuite/gcc.dg/tree-ssa/popcount3.c > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount3.c > > @@ -12,5 +12,5 @@ int PopCount (long b) { > > return c; > > } > > > > -/* { dg-final { scan-tree-dump-times "__builtin_popcount" 1 "optimized" } > > } */ > > +/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 1 > > "optimized" } } */ > > /* { dg-final { scan-tree-dump-times "if" 0 "phiopt4" } } */ > > diff --git a/gcc/testsuite/gcc.target/aarch64/popcount4.c > > b/gcc/testsuite/gcc.target/aarch64/popcount4.c > > index > > ee55b2e335223053ca024e95b7a13aa4af32550e..8aa15ff018d4b5fc6bb59e52af20d5c33cea2ee0 > > 100644 > > --- a/gcc/testsuite/gcc.target/aarch64/popcount4.c > > +++ b/gcc/testsuite/gcc.target/aarch64/popcount4.c > > @@ -11,4 +11,4 @@ int PopCount (long b) { > > return c; > > } > > > > -/* { dg-final { scan-tree-dump-times "__builtin_popcount" 0 "optimized" } > > } */ > > +/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 0 > > "optimized" } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/pr95771-2.c > > b/gcc/testsuite/gcc.target/i386/pr95771-2.c > > new file mode 100644 > > index > > 0000000000000000000000000000000000000000..1db9dc94d0b66477667624012221d6844c141a26 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr95771-2.c > > @@ -0,0 +1,17 @@ > > +/* PR tree-optimization/95771 */ > > +/* { dg-do compile } */ > > +/* { dg-require-effective-target int128 } */ > > +/* { dg-options "-O2 -mpopcnt -fdump-tree-optimized" } */ > > +/* { dg-final { scan-tree-dump " = __builtin_popcount| = \\.POPCOUNT" > > "optimized" } } */ > > + > > +int > > +corge (unsigned __int128 x) > > +{ > > + int i = 0; > > + while (x) > > + { > > + x &= x - 1; > > + ++i; > > + } > > + return i; > > +} > > diff --git a/gcc/testsuite/gcc.target/i386/pr95771.c > > b/gcc/testsuite/gcc.target/i386/pr95771.c > > index > > d7b67017800b705b9854f561916c20901ea76803..d41be445f4a68613a082b8956fea3ceaf33d7e0f > > 100644 > > --- a/gcc/testsuite/gcc.target/i386/pr95771.c > > +++ b/gcc/testsuite/gcc.target/i386/pr95771.c > > @@ -1,8 +1,7 @@ > > /* PR tree-optimization/95771 */ > > /* { dg-do compile } */ > > /* { dg-options "-O2 -mpopcnt -fdump-tree-optimized" } */ > > -/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 6 "optimized" > > { target int128 } } } */ > > -/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 4 "optimized" > > { target { ! int128 } } } } */ > > +/* { dg-final { scan-tree-dump-times " = __builtin_popcount| = > > \\.POPCOUNT" 4 "optimized" } } */ > > > > int > > foo (unsigned char x) > > @@ -51,17 +50,3 @@ qux (unsigned long long x) > > } > > return i; > > } > > - > > -#ifdef __SIZEOF_INT128__ > > -int > > -corge (unsigned __int128 x) > > -{ > > - int i = 0; > > - while (x) > > - { > > - x &= x - 1; > > - ++i; > > - } > > - return i; > > -} > > -#endif > > diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc > > index > > 9c2f9f3d5f6205bb5e7f490257800c660fdd0b8d..cc53b27329f8518bc2cacef1830768a140331b31 > > 100644 > > --- a/gcc/tree-ssa-loop-niter.cc > > +++ b/gcc/tree-ssa-loop-niter.cc > > @@ -2033,11 +2033,18 @@ static tree > > build_popcount_expr (tree src) > > { > > tree fn; > > + bool use_ifn = false; > > int prec = TYPE_PRECISION (TREE_TYPE (src)); > > int i_prec = TYPE_PRECISION (integer_type_node); > > int li_prec = TYPE_PRECISION (long_integer_type_node); > > int lli_prec = TYPE_PRECISION (long_long_integer_type_node); > > - if (prec <= i_prec) > > + > > + tree utype = unsigned_type_for (TREE_TYPE (src)); > > + src = fold_convert (utype, src); > > + > > + if (direct_internal_fn_supported_p (IFN_POPCOUNT, utype, > > OPTIMIZE_FOR_BOTH)) > > + use_ifn = true; > > + else if (prec <= i_prec) > > fn = builtin_decl_implicit (BUILT_IN_POPCOUNT); > > else if (prec == li_prec) > > fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL); > > @@ -2046,12 +2053,11 @@ build_popcount_expr (tree src) > > else > > return NULL_TREE; > > > > - tree utype = unsigned_type_for (TREE_TYPE (src)); > > - src = fold_convert (utype, src); > > - if (prec < i_prec) > > - src = fold_convert (unsigned_type_node, src); > > tree call; > > - if (prec == 2 * lli_prec) > > + if (use_ifn) > > + call = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_POPCOUNT, > > + integer_type_node, 1, src); > > + else if (prec == 2 * lli_prec) > > { > > tree src1 = fold_convert (long_long_unsigned_type_node, > > fold_build2 (RSHIFT_EXPR, TREE_TYPE (src), > > @@ -2064,7 +2070,12 @@ build_popcount_expr (tree src) > > call = fold_build2 (PLUS_EXPR, integer_type_node, call1, call2); > > } > > else > > - call = build_call_expr (fn, 1, src); > > + { > > + if (prec < i_prec) > > + src = fold_convert (unsigned_type_node, src); > > + > > + call = build_call_expr (fn, 1, src); > > + } > > > > return call; > > }