On Fri, Sep 6, 2024 at 12:07 AM Filip Kastl <fka...@suse.cz> wrote: > > Hi, > > bootstrapped and regtested on x86_64-linux. Ok to push? > > Thanks, > Filip Kastl > > > ---- 8< ---- > > > Switch exponential transformation in the switch conversion pass > currently generates > > tmp1 = __builtin_popcount (var); > tmp2 = tmp1 == 1; > > when inserting code to determine if var is power of two. If the target > doesn't support expanding the builtin as special instructions switch > conversion relies on this whole pattern being expanded as bitmagic. > However, it is possible that other GIMPLE optimizations move the two > statements of the pattern apart. In that case the builtin becomes a > libgcc call in the final binary. The call is slow and in case of > freestanding programs can result in linking error (this bug was > originally found while compiling Linux kernel). > > This patch modifies switch conversion to insert the bitmagic > (var ^ (var - 1)) > (var - 1) instead of the builtin. > > gcc/ChangeLog: > > PR tree-optimization/116616 > * tree-switch-conversion.cc (can_pow2p): Remove this function. > (gen_pow2p): Generate bitmagic instead of a builtin. Remove the > TYPE parameter. > (switch_conversion::is_exp_index_transform_viable): Don't call > can_pow2p. > (switch_conversion::exp_index_transform): Call gen_pow2p without > the TYPE parameter. > * tree-switch-conversion.h: Remove > m_exp_index_transform_pow2p_type. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/116616 > * gcc.target/i386/switch-exp-transform-1.c: Don't test for > presence of the POPCOUNT internal fn call. > > Signed-off-by: Filip Kastl <fka...@suse.cz> > --- > .../gcc.target/i386/switch-exp-transform-1.c | 7 +- > gcc/tree-switch-conversion.cc | 82 ++++--------------- > gcc/tree-switch-conversion.h | 6 +- > 3 files changed, 19 insertions(+), 76 deletions(-) > > diff --git a/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c > b/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c > index a8c9e03e515..4832f5b52c3 100644 > --- a/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c > +++ b/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c > @@ -1,10 +1,8 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fdump-tree-switchconv -fdump-tree-widening_mul > -mpopcnt -mbmi" } */ > +/* { dg-options "-O2 -fdump-tree-switchconv -mbmi" } */ > > /* Checks that exponential index transform enables switch conversion to > convert > - this switch into an array lookup. Also checks that the "index variable > is a > - power of two" check has been generated and that it has been later expanded > - into an internal function. */ > + this switch into an array lookup. */ > > int foo(unsigned bar) > { > @@ -30,4 +28,3 @@ int foo(unsigned bar) > } > > /* { dg-final { scan-tree-dump "CSWTCH" "switchconv" } } */ > -/* { dg-final { scan-tree-dump "POPCOUNT" "widening_mul" } } */ > diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc > index c1332a26094..2e42611f9fd 100644 > --- a/gcc/tree-switch-conversion.cc > +++ b/gcc/tree-switch-conversion.cc > @@ -133,75 +133,27 @@ gen_log2 (tree op, location_t loc, tree *result, tree > type) > return stmts; > } > > -/* Is it possible to efficiently check that a value of TYPE is a power of 2? > - > - If yes, returns TYPE. If no, returns NULL_TREE. May also return another > - type. This indicates that logarithm of the variable can be computed but > - only after it is converted to this type. > - > - Also see gen_pow2p. */ > - > -static tree > -can_pow2p (tree type) > -{ > - /* __builtin_popcount supports the unsigned type or its long and long long > - variants. Choose the smallest out of those that can still fit TYPE. */ > - int prec = TYPE_PRECISION (type); > - int i_prec = TYPE_PRECISION (unsigned_type_node); > - int li_prec = TYPE_PRECISION (long_unsigned_type_node); > - int lli_prec = TYPE_PRECISION (long_long_unsigned_type_node); > - > - if (prec <= i_prec) > - return unsigned_type_node; > - else if (prec <= li_prec) > - return long_unsigned_type_node; > - else if (prec <= lli_prec) > - return long_long_unsigned_type_node; > - else > - return NULL_TREE; > -} > - > -/* Build a sequence of gimple statements checking that OP is a power of 2. > Use > - special optabs if target supports them. Return the result as a > - boolean_type_node ssa name through RESULT. Assumes that OP's value will > - be non-negative. The generated check may give arbitrary answer for > negative > - values. > - > - Before computing the check, OP may have to be converted to another type. > - This should be specified in TYPE. Use can_pow2p to decide what this type > - should be. > - > - Should only be used if can_pow2p returns true for type of OP. */ > +/* Build a sequence of gimple statements checking that OP is a power of 2. > + Return the result as a boolean_type_node ssa name through RESULT. Assumes > + that OP's value will be non-negative. The generated check may give > + arbitrary answer for negative values. */ > > static gimple_seq > -gen_pow2p (tree op, location_t loc, tree *result, tree type) > +gen_pow2p (tree op, location_t loc, tree *result) > { > gimple_seq stmts = NULL; > gimple_stmt_iterator gsi = gsi_last (stmts); > > - built_in_function fn; > - if (type == unsigned_type_node) > - fn = BUILT_IN_POPCOUNT; > - else if (type == long_unsigned_type_node) > - fn = BUILT_IN_POPCOUNTL; > - else > - { > - fn = BUILT_IN_POPCOUNTLL; > - gcc_checking_assert (type == long_long_unsigned_type_node); > - } > + tree type = TREE_TYPE (op); > + > + /* Build (op ^ (op - 1)) > (op - 1). */ > + tree tmp1 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, MINUS_EXPR, type, > + op, build_one_cst (type)); > + tree tmp2 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, BIT_XOR_EXPR, > type, > + op, tmp1); > + *result = gimple_build (&gsi, false, GSI_NEW_STMT, loc, GT_EXPR, > + boolean_type_node, tmp2, tmp1);
You need to do this in an unsigned types. Otherwise you get the wrong answer and also introduce undefined code. So you need to use: tree utype = unsigned_type_for (type); tree tmp3; if (types_compatible_p (type, utype) tmp3 = op; else tmp3 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, CONVERT_EXPR, utype, op); And then use utype and tmp3 instead of op. Thanks, Andrew Pinski > > - tree orig_type = TREE_TYPE (op); > - tree tmp1; > - if (type != orig_type) > - tmp1 = gimple_convert (&gsi, false, GSI_NEW_STMT, loc, type, op); > - else > - tmp1 = op; > - /* Build __builtin_popcount{l,ll} (op) == 1. */ > - tree tmp2 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, > - as_combined_fn (fn), integer_type_node, tmp1); > - *result = gimple_build (&gsi, false, GSI_NEW_STMT, loc, EQ_EXPR, > - boolean_type_node, tmp2, > - build_one_cst (integer_type_node)); > return stmts; > } > > @@ -371,9 +323,6 @@ switch_conversion::is_exp_index_transform_viable (gswitch > *swtch) > m_exp_index_transform_log2_type = can_log2 (index_type, opt_type); > if (!m_exp_index_transform_log2_type) > return false; > - m_exp_index_transform_pow2p_type = can_pow2p (index_type); > - if (!m_exp_index_transform_pow2p_type) > - return false; > > /* Check that each case label corresponds only to one value > (no case 1..3). */ > @@ -467,8 +416,7 @@ switch_conversion::exp_index_transform (gswitch *swtch) > new_edge2->probability = profile_probability::even (); > > tree tmp; > - gimple_seq stmts = gen_pow2p (index, UNKNOWN_LOCATION, &tmp, > - m_exp_index_transform_pow2p_type); > + gimple_seq stmts = gen_pow2p (index, UNKNOWN_LOCATION, &tmp); > gsi = gsi_last_bb (cond_bb); > gsi_insert_seq_after (&gsi, stmts, GSI_LAST_NEW_STMT); > gcond *stmt_cond = gimple_build_cond (NE_EXPR, tmp, boolean_false_node, > diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h > index 14610499e5f..6468995eb31 100644 > --- a/gcc/tree-switch-conversion.h > +++ b/gcc/tree-switch-conversion.h > @@ -920,11 +920,9 @@ public: > bool m_exp_index_transform_applied; > > /* If switch conversion decided exponential index transform is viable, here > - will be stored the types to which index variable has to be converted > - before the logarithm and the "is power of 2" operations which are part > of > - the transform. */ > + will be stored the type to which index variable has to be converted > + before the logarithm operation which is a part of the transform. */ > tree m_exp_index_transform_log2_type; > - tree m_exp_index_transform_pow2p_type; > }; > > void > -- > 2.46.0 >