On Thu, 25 Oct 2012, Jan Hubicka wrote: > Hi, > while looking into cunroll issues I noticed that we miss quite lot of > important unrolling > at -O2 for EON because we think it will increase code size. This is because > we do not > account the fact that making array index constant is good thing. > > This tracks back to the fact that estimate_num_insns do not consider refs to > be expensive > thing at all. This patch adds simple costmodel for those accounting the > address arithmetics > + updates inliner to take into account when inlining eliminate them. > > Bootstrapped/regtested x86_64-linux, tested on few benchmarks (tramp3d, eon, > mozilla) that it generally decrease code size without measurable slowdowns > > OK? > Honza > * tree-inline.c (estimate_ref_cost): New function. > (estimate_operand_cost): New function. > (estimate_num_insns): Estimate operand costs for calls, returns, > moves and asm statements. > * tree-inline.h (estimate_ref_cost): Declare. > * ipa-inline-analysis.c (account_address_costs): New function. > (estimate_function_body_sizes): Use it. > Index: tree-inline.c > =================================================================== > --- tree-inline.c (revision 192761) > +++ tree-inline.c (working copy) > @@ -3322,6 +3322,69 @@ estimate_move_cost (tree type) > return ((size + MOVE_MAX_PIECES - 1) / MOVE_MAX_PIECES); > } > > +/* Return cost of the reference REF. This is intended to primarily > + handle address computations hidden in ARRAY_REFs and TARGET_MEM_REFs. > + Do not dive recursively, the caller is supposed to walk all the > + handled components. This is so to make it easy for other code to > + compensate the cost when some of references becomes constant. */ > + > +int > +estimate_ref_cost (tree ref) > +{ > + int cost = 0; > + > + switch (TREE_CODE (ref)) > + { > + case MEM_REF: > + /* One variable addition may be needed. */ > + if (TREE_CODE (TREE_OPERAND (ref, 1)) != INTEGER_CST) > + cost++; > + return cost; > + > + case TARGET_MEM_REF: > + if (TREE_CODE (TMR_INDEX (ref)) != INTEGER_CST > + || TREE_CODE (TMR_STEP (ref)) != INTEGER_CST) > + cost++; > + if ((TMR_INDEX2 (ref) > + && TREE_CODE (TMR_INDEX2 (ref)) != INTEGER_CST) > + || TREE_CODE (TMR_OFFSET (ref)) != INTEGER_CST) > + cost++; > + return cost; > + > + case ARRAY_REF: > + case ARRAY_RANGE_REF: > + if (TREE_CODE (TREE_OPERAND (ref, 1)) == INTEGER_CST > + && (TREE_CODE (array_ref_low_bound (ref)) == INTEGER_CST) > + && (TREE_CODE (array_ref_element_size (ref)) == INTEGER_CST)) > + return 0; > + /* Arrays of variable length objects are more costly by needing > + arbitrary multiply. */ > + if (TREE_CODE (TYPE_SIZE (TREE_TYPE (ref))) > + == INTEGER_CST) > + return 2; > + else > + return 1; > + default: > + return 0; > + } > +} > + > +/* For operands of load/stores estimate cost of the address computations > + involved. */ > + > +static int > +estimate_operand_cost (tree op) > +{ > + int cost = 0; > + while (handled_component_p (op)) > + { > + cost += estimate_ref_cost (op); > + op = TREE_OPERAND (op, 0); > + } > + /* account (TARGET_)MEM_REF. */ > + return cost + estimate_ref_cost (op);
ICK ... Why not sth as simple as return num_ssa_operands (stmt, SSA_OP_USE); ? a[1][2] and b[2] really have the same cost, variable length objects have extra SSA operands in ARRAY_REF/COMPONENT_REF for the size. Thus, stmt cost somehow should reflect the number of dependent stmts. So in estimate_num_insns I'd try int estimate_num_insns (gimple stmt, eni_weights *weights) { unsigned cost, i; enum gimple_code code = gimple_code (stmt); tree lhs; tree rhs; switch (code) { case GIMPLE_ASSIGN: /* Initialize with the number of SSA uses, one is free. */ cost = num_ssa_operands (stmt, SSA_OP_USE); if (cost > 1) --cost; ... Richard. > +} > + > /* Returns cost of operation CODE, according to WEIGHTS */ > > static int > @@ -3520,6 +3583,9 @@ estimate_num_insns (gimple stmt, eni_wei > if (!is_gimple_reg (rhs) && !is_gimple_min_invariant (rhs)) > cost += estimate_move_cost (TREE_TYPE (rhs)); > > + cost += estimate_operand_cost (lhs); > + cost += estimate_operand_cost (rhs); > + > cost += estimate_operator_cost (gimple_assign_rhs_code (stmt), weights, > gimple_assign_rhs1 (stmt), > get_gimple_rhs_class > (gimple_assign_rhs_code (stmt)) > @@ -3585,17 +3651,24 @@ estimate_num_insns (gimple stmt, eni_wei > > cost = node ? weights->call_cost : weights->indirect_call_cost; > if (gimple_call_lhs (stmt)) > - cost += estimate_move_cost (TREE_TYPE (gimple_call_lhs (stmt))); > + { > + cost += estimate_move_cost (TREE_TYPE (gimple_call_lhs (stmt))); > + cost += estimate_operand_cost (gimple_call_lhs (stmt)); > + } > for (i = 0; i < gimple_call_num_args (stmt); i++) > { > tree arg = gimple_call_arg (stmt, i); > + cost += estimate_operand_cost (arg); > cost += estimate_move_cost (TREE_TYPE (arg)); > } > break; > } > > case GIMPLE_RETURN: > - return weights->return_cost; > + return (weights->return_cost > + + (gimple_return_retval (stmt) > + ? estimate_operand_cost (gimple_return_retval (stmt)) > + : 0)); > > case GIMPLE_GOTO: > case GIMPLE_LABEL: > @@ -3606,7 +3679,18 @@ estimate_num_insns (gimple stmt, eni_wei > return 0; > > case GIMPLE_ASM: > - return asm_str_count (gimple_asm_string (stmt)); > + { > + int cost = asm_str_count (gimple_asm_string (stmt)); > + unsigned int i; > + > + for (i = 0; i < gimple_asm_ninputs (stmt); i++) > + cost += estimate_operand_cost > + (TREE_VALUE (gimple_asm_input_op (stmt, i))); > + for (i = 0; i < gimple_asm_noutputs (stmt); i++) > + cost += estimate_operand_cost > + (TREE_VALUE (gimple_asm_output_op (stmt, i))); > + return cost; > + } > > case GIMPLE_RESX: > /* This is either going to be an external function call with one > Index: tree-inline.h > =================================================================== > --- tree-inline.h (revision 192761) > +++ tree-inline.h (working copy) > @@ -186,6 +186,7 @@ tree copy_decl_no_change (tree decl, cop > int estimate_move_cost (tree type); > int estimate_num_insns (gimple, eni_weights *); > int estimate_num_insns_fn (tree, eni_weights *); > +int estimate_ref_cost (tree); > int count_insns_seq (gimple_seq, eni_weights *); > bool tree_versionable_function_p (tree); > > Index: ipa-inline-analysis.c > =================================================================== > --- ipa-inline-analysis.c (revision 192761) > +++ ipa-inline-analysis.c (working copy) > @@ -2240,6 +2240,77 @@ predicate_for_phi_result (struct inline_ > SSA_NAME_VERSION (gimple_phi_result (phi)), *p); > } > > +/* Account costs related to the address operation > + of OP executed with frequency FREQ with predicate P. > + INFO and PARMS_INFO hold the function analyzed, NONCONSTANT_NAMES > + the vector of known SSA names. > + THIS_TIME/THIS_SIZE will be updated accordingly. */ > +static void > +account_address_costs (tree op, int freq, > + struct inline_summary *info, > + struct ipa_node_params *parms_info, > + struct predicate *p, > + VEC (predicate_t, heap) *nonconstant_names, > + int &this_time, int &this_size) > +{ > + int cost; > + while (handled_component_p (op)) > + { > + if ((TREE_CODE (op) == ARRAY_REF > + || TREE_CODE (op) == ARRAY_RANGE_REF) > + && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME > + && (TREE_CODE (array_ref_low_bound (op)) == INTEGER_CST) > + && (TREE_CODE (array_ref_element_size (op)) == INTEGER_CST) > + && (cost = estimate_ref_cost (op)) != 0) > + { > + predicate ref_will_be_nonconstant; > + if (parms_info) > + { > + ref_will_be_nonconstant > + = will_be_nonconstant_expr_predicate > + (parms_info, info, TREE_OPERAND (op, 1), > + nonconstant_names); > + ref_will_be_nonconstant > + = and_predicates (info->conds, > + &ref_will_be_nonconstant, > + p); > + } > + else > + ref_will_be_nonconstant = *p; > + this_time -= cost; > + this_size -= cost; > + account_size_time (info, cost * 2, > + cost * freq * 2, &ref_will_be_nonconstant); > + } > + op = TREE_OPERAND (op, 0); > + } > + if (TREE_CODE (op) == MEM_REF > + && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME > + && (cost = estimate_ref_cost (op)) != 0) > + { > + predicate ref_will_be_nonconstant; > + if (parms_info) > + { > + ref_will_be_nonconstant > + = will_be_nonconstant_expr_predicate > + (parms_info, info, TREE_OPERAND (op, 1), > + nonconstant_names); > + ref_will_be_nonconstant > + = and_predicates (info->conds, > + &ref_will_be_nonconstant, > + p); > + } > + else > + ref_will_be_nonconstant = *p; > + this_time -= cost; > + this_size -= cost; > + account_size_time (info, cost * 2, > + cost * freq * 2, &ref_will_be_nonconstant); > + } > + gcc_checking_assert (this_time >= 0); > + gcc_checking_assert (this_size >= 0); > +} > + > /* Compute function body size parameters for NODE. > When EARLY is true, we compute only simple summaries without > non-trivial predicates to drive the early inliner. */ > @@ -2351,11 +2422,14 @@ estimate_function_body_sizes (struct cgr > fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n", > ((double)freq)/CGRAPH_FREQ_BASE, this_size, this_time); > } > + time += this_time * freq; > + size += this_size; > > if (is_gimple_call (stmt)) > { > struct cgraph_edge *edge = cgraph_edge (node, stmt); > struct inline_edge_summary *es = inline_edge_summary (edge); > + int i; > > /* Special case: results of BUILT_IN_CONSTANT_P will be always > resolved as constant. We however don't want to optimize > @@ -2387,13 +2461,26 @@ estimate_function_body_sizes (struct cgr > } > } > > + /* Account address costs separately. */ > + if (gimple_call_lhs (stmt)) > + account_address_costs (gimple_call_lhs (stmt), freq, info, > + parms_info, &bb_predicate, > + nonconstant_names, > + this_time, this_size); > + for (i = 0; i < (int) gimple_call_num_args (stmt); i++) > + account_address_costs (gimple_call_arg (stmt, i), freq, > + info, parms_info, &bb_predicate, > + nonconstant_names, > + this_time, this_size); > + > es->call_stmt_size = this_size; > es->call_stmt_time = this_time; > es->loop_depth = bb_loop_depth (bb); > edge_set_predicate (edge, &bb_predicate); > + continue; > } > > - /* TODO: When conditional jump or swithc is known to be constant, but > + /* TODO: When conditional jump or switch is known to be constant, but > we did not translate it into the predicates, we really can account > just maximum of the possible paths. */ > if (parms_info) > @@ -2403,10 +2490,7 @@ estimate_function_body_sizes (struct cgr > if (this_time || this_size) > { > struct predicate p; > - > - this_time *= freq; > - time += this_time; > - size += this_size; > + unsigned int i; > > prob = eliminated_by_inlining_prob (stmt); > if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS)) > @@ -2420,6 +2504,42 @@ estimate_function_body_sizes (struct cgr > else > p = true_predicate (); > > + switch (gimple_code (stmt)) > + { > + case GIMPLE_ASSIGN: > + account_address_costs (gimple_assign_lhs (stmt), freq, info, > + parms_info, &p, nonconstant_names, > + this_time, this_size); > + if (gimple_assign_single_p (stmt)) > + account_address_costs (gimple_assign_rhs1 (stmt), freq, > info, > + parms_info, &p, nonconstant_names, > + this_time, this_size); > + break; > + case GIMPLE_RETURN: > + if (gimple_return_retval (stmt)) > + account_address_costs (gimple_return_retval (stmt), freq, > info, > + parms_info, &p, nonconstant_names, > + this_time, this_size); > + break; > + case GIMPLE_ASM: > + for (i = 0; i < gimple_asm_ninputs (stmt); i++) > + account_address_costs (TREE_VALUE (gimple_asm_input_op > + (stmt, i)), > + freq, info, > + parms_info, &p, nonconstant_names, > + this_time, this_size); > + for (i = 0; i < gimple_asm_noutputs (stmt); i++) > + account_address_costs (TREE_VALUE (gimple_asm_output_op > + (stmt, i)), > + freq, info, > + parms_info, &p, nonconstant_names, > + this_time, this_size); > + default: > + break; > + } > + > + this_time *= freq; > + > /* We account everything but the calls. Calls have their own > size/time info attached to cgraph edges. This is necessary > in order to make the cost disappear after inlining. */ > > -- Richard Biener <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imend