On Thu, 25 Oct 2012, Jan Hubicka wrote:
> Hi,
> while looking into cunroll issues I noticed that we miss quite lot of
> important unrolling
> at -O2 for EON because we think it will increase code size. This is because
> we do not
> account the fact that making array index constant is good thing.
>
> This tracks back to the fact that estimate_num_insns do not consider refs to
> be expensive
> thing at all. This patch adds simple costmodel for those accounting the
> address arithmetics
> + updates inliner to take into account when inlining eliminate them.
>
> Bootstrapped/regtested x86_64-linux, tested on few benchmarks (tramp3d, eon,
> mozilla) that it generally decrease code size without measurable slowdowns
>
> OK?
> Honza
> * tree-inline.c (estimate_ref_cost): New function.
> (estimate_operand_cost): New function.
> (estimate_num_insns): Estimate operand costs for calls, returns,
> moves and asm statements.
> * tree-inline.h (estimate_ref_cost): Declare.
> * ipa-inline-analysis.c (account_address_costs): New function.
> (estimate_function_body_sizes): Use it.
> Index: tree-inline.c
> ===================================================================
> --- tree-inline.c (revision 192761)
> +++ tree-inline.c (working copy)
> @@ -3322,6 +3322,69 @@ estimate_move_cost (tree type)
> return ((size + MOVE_MAX_PIECES - 1) / MOVE_MAX_PIECES);
> }
>
> +/* Return cost of the reference REF. This is intended to primarily
> + handle address computations hidden in ARRAY_REFs and TARGET_MEM_REFs.
> + Do not dive recursively, the caller is supposed to walk all the
> + handled components. This is so to make it easy for other code to
> + compensate the cost when some of references becomes constant. */
> +
> +int
> +estimate_ref_cost (tree ref)
> +{
> + int cost = 0;
> +
> + switch (TREE_CODE (ref))
> + {
> + case MEM_REF:
> + /* One variable addition may be needed. */
> + if (TREE_CODE (TREE_OPERAND (ref, 1)) != INTEGER_CST)
> + cost++;
> + return cost;
> +
> + case TARGET_MEM_REF:
> + if (TREE_CODE (TMR_INDEX (ref)) != INTEGER_CST
> + || TREE_CODE (TMR_STEP (ref)) != INTEGER_CST)
> + cost++;
> + if ((TMR_INDEX2 (ref)
> + && TREE_CODE (TMR_INDEX2 (ref)) != INTEGER_CST)
> + || TREE_CODE (TMR_OFFSET (ref)) != INTEGER_CST)
> + cost++;
> + return cost;
> +
> + case ARRAY_REF:
> + case ARRAY_RANGE_REF:
> + if (TREE_CODE (TREE_OPERAND (ref, 1)) == INTEGER_CST
> + && (TREE_CODE (array_ref_low_bound (ref)) == INTEGER_CST)
> + && (TREE_CODE (array_ref_element_size (ref)) == INTEGER_CST))
> + return 0;
> + /* Arrays of variable length objects are more costly by needing
> + arbitrary multiply. */
> + if (TREE_CODE (TYPE_SIZE (TREE_TYPE (ref)))
> + == INTEGER_CST)
> + return 2;
> + else
> + return 1;
> + default:
> + return 0;
> + }
> +}
> +
> +/* For operands of load/stores estimate cost of the address computations
> + involved. */
> +
> +static int
> +estimate_operand_cost (tree op)
> +{
> + int cost = 0;
> + while (handled_component_p (op))
> + {
> + cost += estimate_ref_cost (op);
> + op = TREE_OPERAND (op, 0);
> + }
> + /* account (TARGET_)MEM_REF. */
> + return cost + estimate_ref_cost (op);
ICK ...
Why not sth as simple as
return num_ssa_operands (stmt, SSA_OP_USE);
? a[1][2] and b[2] really have the same cost, variable length
objects have extra SSA operands in ARRAY_REF/COMPONENT_REF for
the size. Thus, stmt cost somehow should reflect the number
of dependent stmts.
So in estimate_num_insns I'd try
int
estimate_num_insns (gimple stmt, eni_weights *weights)
{
unsigned cost, i;
enum gimple_code code = gimple_code (stmt);
tree lhs;
tree rhs;
switch (code)
{
case GIMPLE_ASSIGN:
/* Initialize with the number of SSA uses, one is free. */
cost = num_ssa_operands (stmt, SSA_OP_USE);
if (cost > 1)
--cost;
...
Richard.
> +}
> +
> /* Returns cost of operation CODE, according to WEIGHTS */
>
> static int
> @@ -3520,6 +3583,9 @@ estimate_num_insns (gimple stmt, eni_wei
> if (!is_gimple_reg (rhs) && !is_gimple_min_invariant (rhs))
> cost += estimate_move_cost (TREE_TYPE (rhs));
>
> + cost += estimate_operand_cost (lhs);
> + cost += estimate_operand_cost (rhs);
> +
> cost += estimate_operator_cost (gimple_assign_rhs_code (stmt), weights,
> gimple_assign_rhs1 (stmt),
> get_gimple_rhs_class
> (gimple_assign_rhs_code (stmt))
> @@ -3585,17 +3651,24 @@ estimate_num_insns (gimple stmt, eni_wei
>
> cost = node ? weights->call_cost : weights->indirect_call_cost;
> if (gimple_call_lhs (stmt))
> - cost += estimate_move_cost (TREE_TYPE (gimple_call_lhs (stmt)));
> + {
> + cost += estimate_move_cost (TREE_TYPE (gimple_call_lhs (stmt)));
> + cost += estimate_operand_cost (gimple_call_lhs (stmt));
> + }
> for (i = 0; i < gimple_call_num_args (stmt); i++)
> {
> tree arg = gimple_call_arg (stmt, i);
> + cost += estimate_operand_cost (arg);
> cost += estimate_move_cost (TREE_TYPE (arg));
> }
> break;
> }
>
> case GIMPLE_RETURN:
> - return weights->return_cost;
> + return (weights->return_cost
> + + (gimple_return_retval (stmt)
> + ? estimate_operand_cost (gimple_return_retval (stmt))
> + : 0));
>
> case GIMPLE_GOTO:
> case GIMPLE_LABEL:
> @@ -3606,7 +3679,18 @@ estimate_num_insns (gimple stmt, eni_wei
> return 0;
>
> case GIMPLE_ASM:
> - return asm_str_count (gimple_asm_string (stmt));
> + {
> + int cost = asm_str_count (gimple_asm_string (stmt));
> + unsigned int i;
> +
> + for (i = 0; i < gimple_asm_ninputs (stmt); i++)
> + cost += estimate_operand_cost
> + (TREE_VALUE (gimple_asm_input_op (stmt, i)));
> + for (i = 0; i < gimple_asm_noutputs (stmt); i++)
> + cost += estimate_operand_cost
> + (TREE_VALUE (gimple_asm_output_op (stmt, i)));
> + return cost;
> + }
>
> case GIMPLE_RESX:
> /* This is either going to be an external function call with one
> Index: tree-inline.h
> ===================================================================
> --- tree-inline.h (revision 192761)
> +++ tree-inline.h (working copy)
> @@ -186,6 +186,7 @@ tree copy_decl_no_change (tree decl, cop
> int estimate_move_cost (tree type);
> int estimate_num_insns (gimple, eni_weights *);
> int estimate_num_insns_fn (tree, eni_weights *);
> +int estimate_ref_cost (tree);
> int count_insns_seq (gimple_seq, eni_weights *);
> bool tree_versionable_function_p (tree);
>
> Index: ipa-inline-analysis.c
> ===================================================================
> --- ipa-inline-analysis.c (revision 192761)
> +++ ipa-inline-analysis.c (working copy)
> @@ -2240,6 +2240,77 @@ predicate_for_phi_result (struct inline_
> SSA_NAME_VERSION (gimple_phi_result (phi)), *p);
> }
>
> +/* Account costs related to the address operation
> + of OP executed with frequency FREQ with predicate P.
> + INFO and PARMS_INFO hold the function analyzed, NONCONSTANT_NAMES
> + the vector of known SSA names.
> + THIS_TIME/THIS_SIZE will be updated accordingly. */
> +static void
> +account_address_costs (tree op, int freq,
> + struct inline_summary *info,
> + struct ipa_node_params *parms_info,
> + struct predicate *p,
> + VEC (predicate_t, heap) *nonconstant_names,
> + int &this_time, int &this_size)
> +{
> + int cost;
> + while (handled_component_p (op))
> + {
> + if ((TREE_CODE (op) == ARRAY_REF
> + || TREE_CODE (op) == ARRAY_RANGE_REF)
> + && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME
> + && (TREE_CODE (array_ref_low_bound (op)) == INTEGER_CST)
> + && (TREE_CODE (array_ref_element_size (op)) == INTEGER_CST)
> + && (cost = estimate_ref_cost (op)) != 0)
> + {
> + predicate ref_will_be_nonconstant;
> + if (parms_info)
> + {
> + ref_will_be_nonconstant
> + = will_be_nonconstant_expr_predicate
> + (parms_info, info, TREE_OPERAND (op, 1),
> + nonconstant_names);
> + ref_will_be_nonconstant
> + = and_predicates (info->conds,
> + &ref_will_be_nonconstant,
> + p);
> + }
> + else
> + ref_will_be_nonconstant = *p;
> + this_time -= cost;
> + this_size -= cost;
> + account_size_time (info, cost * 2,
> + cost * freq * 2, &ref_will_be_nonconstant);
> + }
> + op = TREE_OPERAND (op, 0);
> + }
> + if (TREE_CODE (op) == MEM_REF
> + && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME
> + && (cost = estimate_ref_cost (op)) != 0)
> + {
> + predicate ref_will_be_nonconstant;
> + if (parms_info)
> + {
> + ref_will_be_nonconstant
> + = will_be_nonconstant_expr_predicate
> + (parms_info, info, TREE_OPERAND (op, 1),
> + nonconstant_names);
> + ref_will_be_nonconstant
> + = and_predicates (info->conds,
> + &ref_will_be_nonconstant,
> + p);
> + }
> + else
> + ref_will_be_nonconstant = *p;
> + this_time -= cost;
> + this_size -= cost;
> + account_size_time (info, cost * 2,
> + cost * freq * 2, &ref_will_be_nonconstant);
> + }
> + gcc_checking_assert (this_time >= 0);
> + gcc_checking_assert (this_size >= 0);
> +}
> +
> /* Compute function body size parameters for NODE.
> When EARLY is true, we compute only simple summaries without
> non-trivial predicates to drive the early inliner. */
> @@ -2351,11 +2422,14 @@ estimate_function_body_sizes (struct cgr
> fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
> ((double)freq)/CGRAPH_FREQ_BASE, this_size, this_time);
> }
> + time += this_time * freq;
> + size += this_size;
>
> if (is_gimple_call (stmt))
> {
> struct cgraph_edge *edge = cgraph_edge (node, stmt);
> struct inline_edge_summary *es = inline_edge_summary (edge);
> + int i;
>
> /* Special case: results of BUILT_IN_CONSTANT_P will be always
> resolved as constant. We however don't want to optimize
> @@ -2387,13 +2461,26 @@ estimate_function_body_sizes (struct cgr
> }
> }
>
> + /* Account address costs separately. */
> + if (gimple_call_lhs (stmt))
> + account_address_costs (gimple_call_lhs (stmt), freq, info,
> + parms_info, &bb_predicate,
> + nonconstant_names,
> + this_time, this_size);
> + for (i = 0; i < (int) gimple_call_num_args (stmt); i++)
> + account_address_costs (gimple_call_arg (stmt, i), freq,
> + info, parms_info, &bb_predicate,
> + nonconstant_names,
> + this_time, this_size);
> +
> es->call_stmt_size = this_size;
> es->call_stmt_time = this_time;
> es->loop_depth = bb_loop_depth (bb);
> edge_set_predicate (edge, &bb_predicate);
> + continue;
> }
>
> - /* TODO: When conditional jump or swithc is known to be constant, but
> + /* TODO: When conditional jump or switch is known to be constant, but
> we did not translate it into the predicates, we really can account
> just maximum of the possible paths. */
> if (parms_info)
> @@ -2403,10 +2490,7 @@ estimate_function_body_sizes (struct cgr
> if (this_time || this_size)
> {
> struct predicate p;
> -
> - this_time *= freq;
> - time += this_time;
> - size += this_size;
> + unsigned int i;
>
> prob = eliminated_by_inlining_prob (stmt);
> if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
> @@ -2420,6 +2504,42 @@ estimate_function_body_sizes (struct cgr
> else
> p = true_predicate ();
>
> + switch (gimple_code (stmt))
> + {
> + case GIMPLE_ASSIGN:
> + account_address_costs (gimple_assign_lhs (stmt), freq, info,
> + parms_info, &p, nonconstant_names,
> + this_time, this_size);
> + if (gimple_assign_single_p (stmt))
> + account_address_costs (gimple_assign_rhs1 (stmt), freq,
> info,
> + parms_info, &p, nonconstant_names,
> + this_time, this_size);
> + break;
> + case GIMPLE_RETURN:
> + if (gimple_return_retval (stmt))
> + account_address_costs (gimple_return_retval (stmt), freq,
> info,
> + parms_info, &p, nonconstant_names,
> + this_time, this_size);
> + break;
> + case GIMPLE_ASM:
> + for (i = 0; i < gimple_asm_ninputs (stmt); i++)
> + account_address_costs (TREE_VALUE (gimple_asm_input_op
> + (stmt, i)),
> + freq, info,
> + parms_info, &p, nonconstant_names,
> + this_time, this_size);
> + for (i = 0; i < gimple_asm_noutputs (stmt); i++)
> + account_address_costs (TREE_VALUE (gimple_asm_output_op
> + (stmt, i)),
> + freq, info,
> + parms_info, &p, nonconstant_names,
> + this_time, this_size);
> + default:
> + break;
> + }
> +
> + this_time *= freq;
> +
> /* We account everything but the calls. Calls have their own
> size/time info attached to cgraph edges. This is necessary
> in order to make the cost disappear after inlining. */
>
>
--
Richard Biener <[email protected]>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend