On Thu, 25 Oct 2012, Jan Hubicka wrote:

> Hi,
> while looking into cunroll issues I noticed that we miss quite lot of 
> important unrolling
> at -O2 for EON because we think it will increase code size.  This is because 
> we do not
> account the fact that making array index constant is good thing.
> 
> This tracks back to the fact that estimate_num_insns do not consider refs to 
> be expensive
> thing at all.  This patch adds simple costmodel for those accounting the 
> address arithmetics
> + updates inliner to take into account when inlining eliminate them.
> 
> Bootstrapped/regtested x86_64-linux, tested on few benchmarks (tramp3d, eon,
> mozilla) that it generally decrease code size without measurable slowdowns
> 
> OK?
> Honza
>       * tree-inline.c (estimate_ref_cost): New function.
>       (estimate_operand_cost): New function.
>       (estimate_num_insns): Estimate operand costs for calls, returns,
>       moves and asm statements.
>       * tree-inline.h (estimate_ref_cost): Declare.
>       * ipa-inline-analysis.c (account_address_costs): New function.
>       (estimate_function_body_sizes): Use it.
> Index: tree-inline.c
> ===================================================================
> --- tree-inline.c     (revision 192761)
> +++ tree-inline.c     (working copy)
> @@ -3322,6 +3322,69 @@ estimate_move_cost (tree type)
>      return ((size + MOVE_MAX_PIECES - 1) / MOVE_MAX_PIECES);
>  }
>  
> +/* Return cost of the reference REF.  This is intended to primarily
> +   handle address computations hidden in ARRAY_REFs and TARGET_MEM_REFs.
> +   Do not dive recursively, the caller is supposed to walk all the
> +   handled components.  This is so to make it easy for other code to
> +   compensate the cost when some of references becomes constant.  */
> +
> +int
> +estimate_ref_cost (tree ref)
> +{
> +  int cost = 0;
> +
> +  switch (TREE_CODE (ref))
> +    {
> +      case MEM_REF:
> +     /* One variable addition may be needed.  */
> +     if (TREE_CODE (TREE_OPERAND (ref, 1)) != INTEGER_CST)
> +       cost++;
> +     return cost;
> +
> +      case TARGET_MEM_REF:
> +     if (TREE_CODE (TMR_INDEX (ref)) != INTEGER_CST
> +         || TREE_CODE (TMR_STEP (ref)) != INTEGER_CST)
> +       cost++;
> +     if ((TMR_INDEX2 (ref)
> +          && TREE_CODE (TMR_INDEX2 (ref)) != INTEGER_CST)
> +         || TREE_CODE (TMR_OFFSET (ref)) != INTEGER_CST)
> +       cost++;
> +     return cost;
> +
> +      case ARRAY_REF:
> +      case ARRAY_RANGE_REF:
> +     if (TREE_CODE (TREE_OPERAND (ref, 1)) == INTEGER_CST
> +         && (TREE_CODE (array_ref_low_bound (ref)) == INTEGER_CST)
> +         && (TREE_CODE (array_ref_element_size (ref)) == INTEGER_CST))
> +       return 0;
> +     /* Arrays of variable length objects are more costly by needing
> +        arbitrary multiply.  */
> +     if (TREE_CODE (TYPE_SIZE (TREE_TYPE (ref)))
> +         == INTEGER_CST)
> +       return 2;
> +     else
> +       return 1;
> +      default:
> +     return 0;
> +    }
> +}
> +
> +/* For operands of load/stores estimate cost of the address computations
> +   involved.  */
> +
> +static int
> +estimate_operand_cost (tree op)
> +{
> +  int cost = 0;
> +  while (handled_component_p (op))
> +    {
> +      cost += estimate_ref_cost (op);
> +      op = TREE_OPERAND (op, 0);
> +    }
> +  /* account (TARGET_)MEM_REF.  */
> +  return cost + estimate_ref_cost (op);

ICK ...

Why not sth as simple as

     return num_ssa_operands (stmt, SSA_OP_USE);

?  a[1][2] and b[2] really have the same cost, variable length
objects have extra SSA operands in ARRAY_REF/COMPONENT_REF for
the size.  Thus, stmt cost somehow should reflect the number
of dependent stmts.

So in estimate_num_insns I'd try

int
estimate_num_insns (gimple stmt, eni_weights *weights)
{
  unsigned cost, i;
  enum gimple_code code = gimple_code (stmt);
  tree lhs;
  tree rhs;

  switch (code)
  {
   case GIMPLE_ASSIGN:
    /* Initialize with the number of SSA uses, one is free.  */
    cost = num_ssa_operands (stmt, SSA_OP_USE);
    if (cost > 1)
      --cost;

  ...

Richard.


> +}
> +
>  /* Returns cost of operation CODE, according to WEIGHTS  */
>  
>  static int
> @@ -3520,6 +3583,9 @@ estimate_num_insns (gimple stmt, eni_wei
>        if (!is_gimple_reg (rhs) && !is_gimple_min_invariant (rhs))
>       cost += estimate_move_cost (TREE_TYPE (rhs));
>  
> +      cost += estimate_operand_cost (lhs);
> +      cost += estimate_operand_cost (rhs);
> +
>        cost += estimate_operator_cost (gimple_assign_rhs_code (stmt), weights,
>                                             gimple_assign_rhs1 (stmt),
>                                     get_gimple_rhs_class 
> (gimple_assign_rhs_code (stmt))
> @@ -3585,17 +3651,24 @@ estimate_num_insns (gimple stmt, eni_wei
>  
>       cost = node ? weights->call_cost : weights->indirect_call_cost;
>       if (gimple_call_lhs (stmt))
> -       cost += estimate_move_cost (TREE_TYPE (gimple_call_lhs (stmt)));
> +       {
> +         cost += estimate_move_cost (TREE_TYPE (gimple_call_lhs (stmt)));
> +         cost += estimate_operand_cost (gimple_call_lhs (stmt));
> +       }
>       for (i = 0; i < gimple_call_num_args (stmt); i++)
>         {
>           tree arg = gimple_call_arg (stmt, i);
> +         cost += estimate_operand_cost (arg);
>           cost += estimate_move_cost (TREE_TYPE (arg));
>         }
>       break;
>        }
>  
>      case GIMPLE_RETURN:
> -      return weights->return_cost;
> +      return (weights->return_cost
> +           + (gimple_return_retval (stmt)
> +              ? estimate_operand_cost (gimple_return_retval (stmt))
> +              : 0));
>  
>      case GIMPLE_GOTO:
>      case GIMPLE_LABEL:
> @@ -3606,7 +3679,18 @@ estimate_num_insns (gimple stmt, eni_wei
>        return 0;
>  
>      case GIMPLE_ASM:
> -      return asm_str_count (gimple_asm_string (stmt));
> +      {
> +     int cost = asm_str_count (gimple_asm_string (stmt));
> +     unsigned int i;
> +
> +     for (i = 0; i < gimple_asm_ninputs (stmt); i++)
> +       cost += estimate_operand_cost
> +                 (TREE_VALUE (gimple_asm_input_op (stmt, i)));
> +     for (i = 0; i < gimple_asm_noutputs (stmt); i++)
> +       cost += estimate_operand_cost
> +                 (TREE_VALUE (gimple_asm_output_op (stmt, i)));
> +        return cost;
> +      }
>  
>      case GIMPLE_RESX:
>        /* This is either going to be an external function call with one
> Index: tree-inline.h
> ===================================================================
> --- tree-inline.h     (revision 192761)
> +++ tree-inline.h     (working copy)
> @@ -186,6 +186,7 @@ tree copy_decl_no_change (tree decl, cop
>  int estimate_move_cost (tree type);
>  int estimate_num_insns (gimple, eni_weights *);
>  int estimate_num_insns_fn (tree, eni_weights *);
> +int estimate_ref_cost (tree);
>  int count_insns_seq (gimple_seq, eni_weights *);
>  bool tree_versionable_function_p (tree);
>  
> Index: ipa-inline-analysis.c
> ===================================================================
> --- ipa-inline-analysis.c     (revision 192761)
> +++ ipa-inline-analysis.c     (working copy)
> @@ -2240,6 +2240,77 @@ predicate_for_phi_result (struct inline_
>              SSA_NAME_VERSION (gimple_phi_result (phi)), *p);
>  }
>  
> +/* Account costs related to the address operation 
> +   of OP executed with frequency FREQ with predicate P.
> +   INFO and PARMS_INFO hold the function analyzed, NONCONSTANT_NAMES
> +   the vector of known SSA names.
> +   THIS_TIME/THIS_SIZE will be updated accordingly.  */
> +static void
> +account_address_costs (tree op, int freq,
> +                    struct inline_summary *info,
> +                    struct ipa_node_params *parms_info,
> +                    struct predicate *p,
> +                    VEC (predicate_t, heap) *nonconstant_names,
> +                    int &this_time, int &this_size)
> +{
> +  int cost;
> +  while (handled_component_p (op))
> +    {
> +      if ((TREE_CODE (op) == ARRAY_REF
> +        || TREE_CODE (op) == ARRAY_RANGE_REF)
> +       && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME
> +       && (TREE_CODE (array_ref_low_bound (op)) == INTEGER_CST)
> +       && (TREE_CODE (array_ref_element_size (op)) == INTEGER_CST)
> +       && (cost = estimate_ref_cost (op)) != 0)
> +     {
> +        predicate ref_will_be_nonconstant;
> +        if (parms_info)
> +          {
> +            ref_will_be_nonconstant
> +               = will_be_nonconstant_expr_predicate
> +                     (parms_info, info, TREE_OPERAND (op, 1),
> +                      nonconstant_names);
> +            ref_will_be_nonconstant
> +               = and_predicates (info->conds,
> +                                 &ref_will_be_nonconstant,
> +                                 p);
> +          }
> +        else
> +          ref_will_be_nonconstant = *p;
> +        this_time -= cost;
> +        this_size -= cost;
> +        account_size_time (info, cost * 2,
> +                           cost * freq * 2, &ref_will_be_nonconstant);
> +     }
> +      op = TREE_OPERAND (op, 0);
> +    }
> +  if (TREE_CODE (op) == MEM_REF
> +      && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME
> +      && (cost = estimate_ref_cost (op)) != 0)
> +    {
> +       predicate ref_will_be_nonconstant;
> +       if (parms_info)
> +      {
> +        ref_will_be_nonconstant
> +           = will_be_nonconstant_expr_predicate
> +                 (parms_info, info, TREE_OPERAND (op, 1),
> +                  nonconstant_names);
> +        ref_will_be_nonconstant
> +           = and_predicates (info->conds,
> +                             &ref_will_be_nonconstant,
> +                             p);
> +      }
> +       else
> +      ref_will_be_nonconstant = *p;
> +       this_time -= cost;
> +       this_size -= cost;
> +       account_size_time (info, cost * 2,
> +                       cost * freq * 2, &ref_will_be_nonconstant);
> +    }
> +  gcc_checking_assert (this_time >= 0);
> +  gcc_checking_assert (this_size >= 0);
> +}
> +
>  /* Compute function body size parameters for NODE.
>     When EARLY is true, we compute only simple summaries without
>     non-trivial predicates to drive the early inliner.  */
> @@ -2351,11 +2422,14 @@ estimate_function_body_sizes (struct cgr
>             fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
>                      ((double)freq)/CGRAPH_FREQ_BASE, this_size, this_time);
>           }
> +          time += this_time * freq;
> +       size += this_size;
>  
>         if (is_gimple_call (stmt))
>           {
>             struct cgraph_edge *edge = cgraph_edge (node, stmt);
>             struct inline_edge_summary *es = inline_edge_summary (edge);
> +           int i;
>  
>             /* Special case: results of BUILT_IN_CONSTANT_P will be always
>                resolved as constant.  We however don't want to optimize
> @@ -2387,13 +2461,26 @@ estimate_function_body_sizes (struct cgr
>                   }
>               }
>  
> +           /* Account address costs separately.  */
> +           if (gimple_call_lhs (stmt))
> +             account_address_costs (gimple_call_lhs (stmt), freq, info,
> +                                    parms_info, &bb_predicate,
> +                                    nonconstant_names,
> +                                    this_time, this_size);
> +           for (i = 0; i < (int) gimple_call_num_args (stmt); i++)
> +             account_address_costs (gimple_call_arg (stmt, i), freq,
> +                                    info, parms_info, &bb_predicate,
> +                                    nonconstant_names,
> +                                    this_time, this_size);
> +
>             es->call_stmt_size = this_size;
>             es->call_stmt_time = this_time;
>             es->loop_depth = bb_loop_depth (bb);
>             edge_set_predicate (edge, &bb_predicate);
> +           continue;
>           }
>  
> -       /* TODO: When conditional jump or swithc is known to be constant, but
> +       /* TODO: When conditional jump or switch is known to be constant, but
>            we did not translate it into the predicates, we really can account
>            just maximum of the possible paths.  */
>         if (parms_info)
> @@ -2403,10 +2490,7 @@ estimate_function_body_sizes (struct cgr
>         if (this_time || this_size)
>           {
>             struct predicate p;
> -
> -           this_time *= freq;
> -           time += this_time;
> -           size += this_size;
> +           unsigned int i;
>  
>             prob = eliminated_by_inlining_prob (stmt);
>             if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
> @@ -2420,6 +2504,42 @@ estimate_function_body_sizes (struct cgr
>             else
>               p = true_predicate ();
>  
> +           switch (gimple_code (stmt))
> +             {
> +             case GIMPLE_ASSIGN:
> +               account_address_costs (gimple_assign_lhs (stmt), freq, info,
> +                                      parms_info, &p, nonconstant_names,
> +                                      this_time, this_size);
> +               if (gimple_assign_single_p (stmt))
> +                 account_address_costs (gimple_assign_rhs1 (stmt), freq, 
> info,
> +                                        parms_info, &p, nonconstant_names,
> +                                        this_time, this_size);
> +               break;
> +             case GIMPLE_RETURN:
> +               if (gimple_return_retval (stmt))
> +                 account_address_costs (gimple_return_retval (stmt), freq, 
> info,
> +                                        parms_info, &p, nonconstant_names,
> +                                        this_time, this_size);
> +               break;
> +             case GIMPLE_ASM:
> +               for (i = 0; i < gimple_asm_ninputs (stmt); i++)
> +                 account_address_costs (TREE_VALUE (gimple_asm_input_op
> +                                                      (stmt, i)),
> +                                        freq, info,
> +                                        parms_info, &p, nonconstant_names,
> +                                        this_time, this_size);
> +               for (i = 0; i < gimple_asm_noutputs (stmt); i++)
> +                 account_address_costs (TREE_VALUE (gimple_asm_output_op
> +                                                      (stmt, i)),
> +                                        freq, info,
> +                                        parms_info, &p, nonconstant_names,
> +                                        this_time, this_size);
> +             default:
> +               break;
> +             }
> +
> +           this_time *= freq;
> +
>             /* We account everything but the calls.  Calls have their own
>                size/time info attached to cgraph edges.  This is necessary
>                in order to make the cost disappear after inlining.  */
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend

Reply via email to