On Thu, 17 May 2012, William J. Schmidt wrote: > This patch gives up on using the reassociation rank algorithm to > correctly place __builtin_powi calls and their feeding multiplies. In > the end this proved to introduce more complexity than it saved, due in > part to the poor fit of introducing DAG expressions into the > reassociated operand tree. This patch returns to generating explicit > multiplies to bind the builtin calls together and to the results of the > expression tree rewrite. I feel this version is smaller, easier to > understand, and less fragile than the existing code. > > Bootstrapped and tested on powerpc64-unknown-linux-gnu with no new > regressions. Ok for trunk?
Ok. Thanks, Richard. > Thanks, > Bill > > > 2012-05-17 Bill Schmidt <wschm...@linux.vnet.ibm.com> > > * tree-ssa-reassoc.c (bip_map): Remove decl. > (completely_remove_stmt): Remove function. > (remove_def_if_absorbed_call): Remove function. > (remove_visited_stmt_chain): Remove __builtin_powi handling. > (possibly_move_powi): Remove function. > (rewrite_expr_tree): Remove calls to possibly_move_powi. > (rewrite_expr_tree_parallel): Likewise. > (attempt_builtin_powi): Build multiplies explicitly rather than > relying on the ops vector and rank system. > (transform_stmt_to_copy): New function. > (transform_stmt_to_multiply): Likewise. > (reassociate_bb): Handle leftover operations after __builtin_powi > optimization; build a final multiply if necessary. > > > Index: gcc/tree-ssa-reassoc.c > =================================================================== > --- gcc/tree-ssa-reassoc.c (revision 187626) > +++ gcc/tree-ssa-reassoc.c (working copy) > @@ -200,10 +200,6 @@ static long *bb_rank; > /* Operand->rank hashtable. */ > static struct pointer_map_t *operand_rank; > > -/* Map from inserted __builtin_powi calls to multiply chains that > - feed them. */ > -static struct pointer_map_t *bip_map; > - > /* Forward decls. */ > static long get_rank (tree); > > @@ -2184,32 +2180,6 @@ is_phi_for_stmt (gimple stmt, tree operand) > return false; > } > > -/* Remove STMT, unlink its virtual defs, and release its SSA defs. */ > - > -static inline void > -completely_remove_stmt (gimple stmt) > -{ > - gimple_stmt_iterator gsi = gsi_for_stmt (stmt); > - gsi_remove (&gsi, true); > - unlink_stmt_vdef (stmt); > - release_defs (stmt); > -} > - > -/* If OP is defined by a builtin call that has been absorbed by > - reassociation, remove its defining statement completely. */ > - > -static inline void > -remove_def_if_absorbed_call (tree op) > -{ > - gimple stmt; > - > - if (TREE_CODE (op) == SSA_NAME > - && has_zero_uses (op) > - && is_gimple_call ((stmt = SSA_NAME_DEF_STMT (op))) > - && gimple_visited_p (stmt)) > - completely_remove_stmt (stmt); > -} > - > /* Remove def stmt of VAR if VAR has zero uses and recurse > on rhs1 operand if so. */ > > @@ -2218,7 +2188,6 @@ remove_visited_stmt_chain (tree var) > { > gimple stmt; > gimple_stmt_iterator gsi; > - tree var2; > > while (1) > { > @@ -2228,95 +2197,15 @@ remove_visited_stmt_chain (tree var) > if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) > { > var = gimple_assign_rhs1 (stmt); > - var2 = gimple_assign_rhs2 (stmt); > gsi = gsi_for_stmt (stmt); > gsi_remove (&gsi, true); > release_defs (stmt); > - /* A multiply whose operands are both fed by builtin pow/powi > - calls must check whether to remove rhs2 as well. */ > - remove_def_if_absorbed_call (var2); > } > - else if (is_gimple_call (stmt) && gimple_visited_p (stmt)) > - { > - completely_remove_stmt (stmt); > - return; > - } > else > return; > } > } > > -/* If OP is an SSA name, find its definition and determine whether it > - is a call to __builtin_powi. If so, move the definition prior to > - STMT. Only do this during early reassociation. */ > - > -static void > -possibly_move_powi (gimple stmt, tree op) > -{ > - gimple stmt2, *mpy; > - tree fndecl; > - gimple_stmt_iterator gsi1, gsi2; > - > - if (!first_pass_instance > - || !flag_unsafe_math_optimizations > - || TREE_CODE (op) != SSA_NAME) > - return; > - > - stmt2 = SSA_NAME_DEF_STMT (op); > - > - if (!is_gimple_call (stmt2) > - || !has_single_use (gimple_call_lhs (stmt2))) > - return; > - > - fndecl = gimple_call_fndecl (stmt2); > - > - if (!fndecl > - || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) > - return; > - > - switch (DECL_FUNCTION_CODE (fndecl)) > - { > - CASE_FLT_FN (BUILT_IN_POWI): > - break; > - default: > - return; > - } > - > - /* Move the __builtin_powi. */ > - gsi1 = gsi_for_stmt (stmt); > - gsi2 = gsi_for_stmt (stmt2); > - gsi_move_before (&gsi2, &gsi1); > - > - /* See if there are multiplies feeding the __builtin_powi base > - argument that must also be moved. */ > - while ((mpy = (gimple *) pointer_map_contains (bip_map, stmt2)) != NULL) > - { > - /* If we've already moved this statement, we're done. This is > - identified by a NULL entry for the statement in bip_map. */ > - gimple *next = (gimple *) pointer_map_contains (bip_map, *mpy); > - if (next && !*next) > - return; > - > - stmt = stmt2; > - stmt2 = *mpy; > - gsi1 = gsi_for_stmt (stmt); > - gsi2 = gsi_for_stmt (stmt2); > - gsi_move_before (&gsi2, &gsi1); > - > - /* The moved multiply may be DAG'd from multiple calls if it > - was the result of a cached multiply. Only move it once. > - Rank order ensures we move it to the right place the first > - time. */ > - if (next) > - *next = NULL; > - else > - { > - next = (gimple *) pointer_map_insert (bip_map, *mpy); > - *next = NULL; > - } > - } > -} > - > /* This function checks three consequtive operands in > passed operands vector OPS starting from OPINDEX and > swaps two operands if it is profitable for binary operation > @@ -2421,9 +2310,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind > fprintf (dump_file, " into "); > print_gimple_stmt (dump_file, stmt, 0, 0); > } > - > - possibly_move_powi (stmt, oe1->op); > - possibly_move_powi (stmt, oe2->op); > } > return; > } > @@ -2469,8 +2355,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind > fprintf (dump_file, " into "); > print_gimple_stmt (dump_file, stmt, 0, 0); > } > - > - possibly_move_powi (stmt, oe->op); > } > /* Recurse on the LHS of the binary operator, which is guaranteed to > be the non-leaf side. */ > @@ -2644,9 +2528,6 @@ rewrite_expr_tree_parallel (gimple stmt, int width > fprintf (dump_file, " into "); > print_gimple_stmt (dump_file, stmts[i], 0, 0); > } > - > - possibly_move_powi (stmts[i], op1); > - possibly_move_powi (stmts[i], op2); > } > > remove_visited_stmt_chain (last_rhs1); > @@ -3222,11 +3103,10 @@ get_reassoc_pow_ssa_name (tree *target, tree type) > > /* Look for repeated operands in OPS in the multiply tree rooted at > STMT. Replace them with an optimal sequence of multiplies and powi > - builtin calls, and remove the used operands from OPS. Push new > - SSA names onto OPS that represent the introduced multiplies and > - builtin calls. */ > + builtin calls, and remove the used operands from OPS. Return an > + SSA name representing the value of the replacement sequence. */ > > -static void > +static tree > attempt_builtin_powi (gimple stmt, VEC(operand_entry_t, heap) **ops, > tree *target) > { > @@ -3235,6 +3115,7 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent > operand_entry_t oe; > repeat_factor_t rf1, rf2; > repeat_factor rfnew; > + tree result = NULL_TREE; > tree target_ssa, iter_result; > tree type = TREE_TYPE (gimple_get_lhs (stmt)); > tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI); > @@ -3244,7 +3125,7 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent > /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and > target. */ > if (!powi_fndecl) > - return; > + return NULL_TREE; > > /* Allocate the repeated factor vector. */ > repeat_factor_vec = VEC_alloc (repeat_factor, heap, 10); > @@ -3315,7 +3196,6 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent > while (true) > { > HOST_WIDE_INT power; > - gimple last_mul = NULL; > > /* First look for the largest cached product of factors from > preceding iterations. If found, create a builtin_powi for > @@ -3353,25 +3233,14 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent > } > else > { > - gimple *value; > - > iter_result = get_reassoc_pow_ssa_name (target, type); > pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, > build_int_cst (integer_type_node, > power)); > gimple_call_set_lhs (pow_stmt, iter_result); > gimple_set_location (pow_stmt, gimple_location (stmt)); > - /* Temporarily place the call; we will move it and its > - feeding multiplies to the correct place during > - rewrite_expr. */ > gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); > > - if (!operand_equal_p (rf1->repr, rf1->factor, 0)) > - { > - value = (gimple *) pointer_map_insert (bip_map, pow_stmt); > - *value = SSA_NAME_DEF_STMT (rf1->repr); > - } > - > if (dump_file && (dump_flags & TDF_DETAILS)) > { > unsigned elt; > @@ -3457,15 +3326,6 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent > gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); > rf1->repr = target_ssa; > > - /* Chain multiplies together for later movement. */ > - if (last_mul) > - { > - gimple *value > - = (gimple *) pointer_map_insert (bip_map, mul_stmt); > - *value = last_mul; > - } > - last_mul = mul_stmt; > - > /* Don't reprocess the multiply we just introduced. */ > gimple_set_visited (mul_stmt, true); > } > @@ -3481,20 +3341,23 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent > gimple_call_set_lhs (pow_stmt, iter_result); > gimple_set_location (pow_stmt, gimple_location (stmt)); > gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); > + } > > - /* If we inserted a chain of multiplies before the pow_stmt, > - record that fact so we can move it later when we move the > - pow_stmt. */ > - if (last_mul) > - { > - gimple *value = (gimple *) pointer_map_insert (bip_map, pow_stmt); > - *value = last_mul; > - } > + /* If we previously formed at least one other builtin_powi call, > + form the product of this one and those others. */ > + if (result) > + { > + tree new_result = get_reassoc_pow_ssa_name (target, type); > + mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result, > + result, iter_result); > + gimple_set_location (mul_stmt, gimple_location (stmt)); > + gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); > + gimple_set_visited (mul_stmt, true); > + result = new_result; > } > + else > + result = iter_result; > > - /* Append the result of this iteration to the ops vector. */ > - add_to_ops_vec (ops, iter_result); > - > /* Decrement the occurrence count of each element in the product > by the count found above, and remove this many copies of each > factor from OPS. */ > @@ -3534,8 +3397,61 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent > clean up. */ > VEC_qsort (operand_entry_t, *ops, sort_by_operand_rank); > VEC_free (repeat_factor, heap, repeat_factor_vec); > + > + /* Return the final product computed herein. Note that there may > + still be some elements with single occurrence count left in OPS; > + those will be handled by the normal reassociation logic. */ > + return result; > } > > +/* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ > + > +static void > +transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs) > +{ > + tree rhs1; > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, "Transforming "); > + print_gimple_stmt (dump_file, stmt, 0, 0); > + } > + > + rhs1 = gimple_assign_rhs1 (stmt); > + gimple_assign_set_rhs_from_tree (gsi, new_rhs); > + update_stmt (stmt); > + remove_visited_stmt_chain (rhs1); > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, " into "); > + print_gimple_stmt (dump_file, stmt, 0, 0); > + } > +} > + > +/* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ > + > +static void > +transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt, > + tree rhs1, tree rhs2) > +{ > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, "Transforming "); > + print_gimple_stmt (dump_file, stmt, 0, 0); > + } > + > + gimple_assign_set_rhs_with_ops_1 (gsi, MULT_EXPR, rhs1, rhs2, NULL_TREE); > + update_stmt (gsi_stmt (*gsi)); > + remove_visited_stmt_chain (rhs1); > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, " into "); > + print_gimple_stmt (dump_file, stmt, 0, 0); > + } > +} > + > /* Reassociate expressions in basic block BB and its post-dominator as > children. */ > > @@ -3606,7 +3522,7 @@ reassociate_bb (basic_block bb) > if (associative_tree_code (rhs_code)) > { > VEC(operand_entry_t, heap) *ops = NULL; > - bip_map = pointer_map_create (); > + tree powi_result = NULL_TREE; > > /* There may be no immediate uses left by the time we > get here because we may have eliminated them all. */ > @@ -3630,28 +3546,21 @@ reassociate_bb (basic_block bb) > if (first_pass_instance > && rhs_code == MULT_EXPR > && flag_unsafe_math_optimizations) > - attempt_builtin_powi (stmt, &ops, &target); > + powi_result = attempt_builtin_powi (stmt, &ops, &target); > > - if (VEC_length (operand_entry_t, ops) == 1) > + /* If the operand vector is now empty, all operands were > + consumed by the __builtin_powi optimization. */ > + if (VEC_length (operand_entry_t, ops) == 0) > + transform_stmt_to_copy (&gsi, stmt, powi_result); > + else if (VEC_length (operand_entry_t, ops) == 1) > { > - if (dump_file && (dump_flags & TDF_DETAILS)) > - { > - fprintf (dump_file, "Transforming "); > - print_gimple_stmt (dump_file, stmt, 0, 0); > - } > - > - rhs1 = gimple_assign_rhs1 (stmt); > - gimple_assign_set_rhs_from_tree (&gsi, > - VEC_last (operand_entry_t, > - ops)->op); > - update_stmt (stmt); > - remove_visited_stmt_chain (rhs1); > - > - if (dump_file && (dump_flags & TDF_DETAILS)) > - { > - fprintf (dump_file, " into "); > - print_gimple_stmt (dump_file, stmt, 0, 0); > - } > + tree last_op = VEC_last (operand_entry_t, ops)->op; > + > + if (powi_result) > + transform_stmt_to_multiply (&gsi, stmt, last_op, > + powi_result); > + else > + transform_stmt_to_copy (&gsi, stmt, last_op); > } > else > { > @@ -3668,10 +3577,27 @@ reassociate_bb (basic_block bb) > rewrite_expr_tree_parallel (stmt, width, ops); > else > rewrite_expr_tree (stmt, 0, ops, false); > + > + /* If we combined some repeated factors into a > + __builtin_powi call, multiply that result by the > + reassociated operands. */ > + if (powi_result) > + { > + gimple mul_stmt; > + tree type = TREE_TYPE (gimple_get_lhs (stmt)); > + tree target_ssa = get_reassoc_pow_ssa_name (&target, > + type); > + gimple_set_lhs (stmt, target_ssa); > + update_stmt (stmt); > + mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs, > + powi_result, > + target_ssa); > + gimple_set_location (mul_stmt, gimple_location (stmt)); > + gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); > + } > } > > VEC_free (operand_entry_t, heap, ops); > - pointer_map_destroy (bip_map); > } > } > } > > > -- Richard Guenther <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer