This patch implements indirect call promotion for AutoFDO. Bootstrapped and passed gcc regression tests.
Is it okay for gcc-4_7 branch? Thanks, Dehao http://codereview.appspot.com/8814043 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index d17b250..c217846 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -3039,7 +3039,7 @@ coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(DIAGNOSTIC_CORE_H) intl.h gt-coverage.h $(TARGET_H) $(AUTO_PROFILE_H) auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \ $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) $(PROFILE_H) \ - $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H) \ + $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H) value-prof.h \ $(AUTO_PROFILE_H) cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \ diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c index cf17370..38f4209 100644 --- a/gcc/auto-profile.c +++ b/gcc/auto-profile.c @@ -38,6 +38,7 @@ along with GCC; see the file COPYING3. If not see #include "gimple.h" #include "cgraph.h" #include "tree-flow.h" +#include "value-prof.h" #include "auto-profile.h" /* The following routines implements AutoFDO optimization. @@ -109,6 +110,8 @@ struct gcov_stack const char *callee_name; struct gcov_callsite_pos *stack; gcov_unsigned_t size; + struct gcov_hist *hist; + gcov_unsigned_t hist_size; gcov_type num_inst; gcov_type count; gcov_type max_count; @@ -150,6 +153,24 @@ struct afdo_module char **strings; }; +enum afdo_histogram_type +{ + CALL_HIST = 1, + STRINGOP_HIST, + DIVMOD_HIST +}; + +struct gcov_hist +{ + enum afdo_histogram_type type; + union + { + const char *func_name; + unsigned long long value; + } value; + int count; +}; + /* Store the file name strings read from the profile data file. */ static const char **file_names; @@ -493,6 +514,64 @@ read_aux_modules (void) } } +/* From AutoFDO profiles, find values inside STMT for that we want to measure + histograms for indirect-call optimization. */ +static void +afdo_indirect_call (gimple stmt, struct gcov_hist *values, int hist_size) +{ + tree callee; + int i, total = 0; + int actual_count = 0; + histogram_value hist; + + if (gimple_code (stmt) != GIMPLE_CALL + || gimple_call_fndecl (stmt) != NULL_TREE) + return; + + callee = gimple_call_fn (stmt); + + for (i = 0; i < hist_size; i++) + if (values[i].type == CALL_HIST) + break; + + if (i == hist_size) + return; + + hist = gimple_alloc_histogram_value (cfun, HIST_TYPE_INDIR_CALL_TOPN, + stmt, callee); + hist->n_counters = (GCOV_ICALL_TOPN_VAL << 2) + 1; + hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters); + gimple_add_histogram_value (cfun, stmt, hist); + + for (i = 0; i < hist_size; i++) + if (values[i].type == CALL_HIST) + { + total += values[i].count; + if (actual_count < 2) + { + hist->hvalue.counters[actual_count * 2 + 1] = + (unsigned long long) values[i].value.func_name; + hist->hvalue.counters[actual_count * 2 + 2] = values[i].count; + actual_count ++; + } + } + + hist->hvalue.counters[0] = total; + + if (actual_count == 1) + { + hist->hvalue.counters[3] = 0; + hist->hvalue.counters[4] = 0; + } +} + +/* From AutoFDO profiles, find values inside STMT for that we want to measure + histograms and adds them to list VALUES. */ +static void afdo_vpt (gimple stmt, struct gcov_hist *v, int hist_size) +{ + afdo_indirect_call (stmt, v, hist_size); +} + /* Return the size of the inline stack of the STMT. */ static int @@ -838,7 +917,8 @@ static bool get_stack_count (struct gcov_callsite_pos *pos_stack, const char *callee_name, int size, - gcov_type *count, gcov_type *max_count, gcov_type *num_inst) + gcov_type *count, gcov_type *max_count, gcov_type *num_inst, + gcov_unsigned_t *hist_size, struct gcov_hist **hist) { int i; @@ -858,6 +938,11 @@ get_stack_count (struct gcov_callsite_pos *pos_stack, *num_inst = entry->num_inst; if (max_count) *max_count = entry->max_count; + if (hist_size) + { + *hist_size = entry->hist_size; + *hist = entry->hist; + } return true; } else @@ -876,6 +961,11 @@ get_stack_count (struct gcov_callsite_pos *pos_stack, *num_inst = entry->num_inst; if (max_count) *max_count = entry->max_count; + if (hist_size) + { + *hist_size = entry->hist_size; + *hist = entry->hist; + } return true; } } @@ -885,6 +975,11 @@ get_stack_count (struct gcov_callsite_pos *pos_stack, *num_inst = 0; if (max_count) *max_count = 0; + if (hist_size) + { + *hist_size = 0; + *hist = 0; + } return false; } @@ -892,7 +987,8 @@ get_stack_count (struct gcov_callsite_pos *pos_stack, Return FALSE if profile is not found for STMT. */ static bool -get_stmt_count (gimple stmt, gcov_type *count, gcov_type *num_inst) +get_stmt_count (gimple stmt, gcov_type *count, gcov_type *num_inst, + gcov_unsigned_t *hist_size, struct gcov_hist **hist) { struct gcov_callsite_pos *pos_stack; int size; @@ -910,7 +1006,8 @@ get_stmt_count (gimple stmt, gcov_type *count, gcov_type *num_inst) get_inline_stack_by_stmt (stmt, current_function_decl, pos_stack, true); - return get_stack_count (pos_stack, NULL, size, count, NULL, num_inst); + return get_stack_count (pos_stack, NULL, size, count, NULL, num_inst, + hist_size, hist); } /* For a given EDGE, if IS_TOTAL is true, save EDGE->callee's total count @@ -938,13 +1035,13 @@ afdo_get_callsite_count (struct cgraph_edge *edge, gcov_type *count, get_discriminator_from_locus(gimple_location(edge->call_stmt)); return get_stack_count (pos_stack, callee_name, - size, count, max_count, &num_inst); + size, count, max_count, &num_inst, NULL, NULL); } /* For a given BB, return its execution count. */ gcov_type -afdo_get_bb_count (basic_block bb) +afdo_get_bb_count (basic_block bb, bool annotate_vpt) { gimple_stmt_iterator gsi; gcov_type max_count = 0; @@ -953,12 +1050,16 @@ afdo_get_bb_count (basic_block bb) for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { gcov_type count, num_inst; + gcov_unsigned_t hist_size; + struct gcov_hist *hist; gimple stmt = gsi_stmt (gsi); - if (get_stmt_count (stmt, &count, &num_inst)) + if (get_stmt_count (stmt, &count, &num_inst, &hist_size, &hist)) { if (count > max_count) max_count = count; has_annotated = true; + if (annotate_vpt && hist_size > 0) + afdo_vpt (stmt, hist, hist_size); } } if (has_annotated) @@ -980,7 +1081,7 @@ afdo_annotate_cfg (void) FOR_EACH_BB (bb) { - bb->count = afdo_get_bb_count (bb); + bb->count = afdo_get_bb_count (bb, true); if (bb->count > max_count) max_count = bb->count; } @@ -992,6 +1093,8 @@ afdo_annotate_cfg (void) afdo_calculate_branch_prob (); profile_status = PROFILE_READ; } + if (flag_value_profile_transformations) + gimple_value_profile_transformations (); } extern gcov_working_set_t *gcov_working_sets; @@ -1055,7 +1158,7 @@ read_profile (void) xmalloc (function_num * sizeof (struct gcov_function)); for (i = 0; i < function_num; i++) { - gcov_functions[i].name = xstrdup (gcov_read_string ()); + gcov_functions[i].name = file_names[gcov_read_unsigned ()]; gcov_functions[i].file = file_names[gcov_read_unsigned ()]; gcov_functions[i].total_count = gcov_read_counter (); gcov_functions[i].entry_count = gcov_read_counter (); @@ -1087,6 +1190,22 @@ read_profile (void) } gcov_functions[i].stacks[j].count = gcov_read_counter (); gcov_functions[i].stacks[j].num_inst = gcov_read_counter (); + gcov_functions[i].stacks[j].hist_size = gcov_read_unsigned (); + gcov_functions[i].stacks[j].hist = (struct gcov_hist *) + xmalloc (gcov_functions[i].stacks[j].hist_size + * sizeof (struct gcov_hist)); + for (k = 0; k < gcov_functions[i].stacks[j].hist_size; k++) + { + gcov_functions[i].stacks[j].hist[k].type = + (enum afdo_histogram_type) gcov_read_unsigned(); + if (gcov_functions[i].stacks[j].hist[k].type == CALL_HIST) + gcov_functions[i].stacks[j].hist[k].value.func_name = + file_names[gcov_read_counter()]; + else + gcov_functions[i].stacks[j].hist[k].value.value = + gcov_read_counter(); + gcov_functions[i].stacks[j].hist[k].count = gcov_read_counter(); + } } } @@ -1698,6 +1817,7 @@ auto_profile (void) afdo_annotate_cfg (); compute_function_frequency (); + update_ssa (TODO_update_ssa); current_function_decl = NULL; pop_cfun (); diff --git a/gcc/auto-profile.h b/gcc/auto-profile.h index 8e0a670..f7175ff 100644 --- a/gcc/auto-profile.h +++ b/gcc/auto-profile.h @@ -43,5 +43,5 @@ extern bool afdo_get_callsite_count (struct cgraph_edge *, gcov_type *, gcov_type *, bool); /* Calculate basic block count. */ -extern gcov_type afdo_get_bb_count (basic_block); +extern gcov_type afdo_get_bb_count (basic_block, bool); #endif /* AUTO_PROFILE_H */ diff --git a/gcc/opts.c b/gcc/opts.c index 4657d6a..dab2564 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -1665,6 +1665,8 @@ common_handle_option (struct gcc_options *opts, opts->x_flag_unroll_loops = value; if (!opts_set->x_flag_peel_loops) opts->x_flag_peel_loops = value; + if (!opts_set->x_flag_value_profile_transformations) + opts->x_flag_value_profile_transformations = value; if (!opts_set->x_flag_inline_functions) opts->x_flag_inline_functions = value; if (!opts_set->x_flag_ipa_cp) diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index f09f201..62180fc 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -1830,7 +1830,7 @@ copy_bb (copy_body_data *id, basic_block bb, int frequency_scale, { /* If the same inline happens in the profile-collection binary, use that instance's profile count. Otherwise use the scaled count. */ - gcov_type count = afdo_get_bb_count (copy_basic_block); + gcov_type count = afdo_get_bb_count (copy_basic_block, false); if (copy_basic_block->flags & BB_ANNOTATED) copy_basic_block->count = count; else if (bb->flags & BB_ANNOTATED) diff --git a/gcc/value-prof.c b/gcc/value-prof.c index 81465bc..c20d13a 100644 --- a/gcc/value-prof.c +++ b/gcc/value-prof.c @@ -94,7 +94,7 @@ static bool gimple_ic_transform (gimple); /* Allocate histogram value. */ -static histogram_value +histogram_value gimple_alloc_histogram_value (struct function *fun ATTRIBUTE_UNUSED, enum hist_type type, gimple stmt, tree value) { @@ -1205,6 +1205,33 @@ find_func_by_funcdef_no (int func_id) return VEC_index (cgraph_node_ptr, cgraph_node_map, func_id); } +/* Return cgraph node for function with name. We need this because + AutoFDO doesn't record the function id for each function + in the profile. + TODO: need to transform this lookup method to hash table. */ + +static struct cgraph_node * +find_func_by_name (char *name) +{ + struct cgraph_node *n; + struct cgraph_node *ret = NULL; + int match = 0; + + for (n = cgraph_nodes; n; n = n->next) + { + const char *fname = IDENTIFIER_POINTER (decl_assembler_name (n->decl)); + if (!strcmp(fname, name)) + { + match ++; + ret = n; + } + } + if (match == 1) + return ret; + else + return NULL; +} + /* Initialize map of gids (gid -> cgraph node) */ static htab_t gid_map = NULL; @@ -1562,7 +1589,8 @@ gimple_ic_transform_mult_targ (gimple stmt, histogram_value histogram) count1 = histogram->hvalue.counters [2]; val2 = histogram->hvalue.counters [3]; count2 = histogram->hvalue.counters [4]; - bb_all = gimple_bb (stmt)->count; + bb_all = flag_auto_profile ? histogram->hvalue.counters[0] + : gimple_bb (stmt)->count; all = bb_all; gimple_remove_histogram_value (cfun, stmt, histogram); @@ -1592,18 +1620,26 @@ gimple_ic_transform_mult_targ (gimple stmt, histogram_value histogram) else prob1 = prob2 = 0; - direct_call1 = find_func_by_global_id (val1); + if (flag_auto_profile) + direct_call1 = find_func_by_name ((char *) val1); + else + direct_call1 = find_func_by_global_id (val1); if (val2 && (100 * count2 >= all * perc_threshold) && count2 > count_threshold) - direct_call2 = find_func_by_global_id (val2); + { + if (flag_auto_profile) + direct_call2 = find_func_by_name ((char *) val2); + else + direct_call2 = find_func_by_global_id (val2); + } locus = (stmt != NULL) ? gimple_location (stmt) : DECL_SOURCE_LOCATION (current_function_decl); if (direct_call1 == NULL || !check_ic_target (stmt, direct_call1)) { - if (flag_opt_info >= OPT_INFO_MAX) + if (flag_opt_info >= OPT_INFO_MAX && !flag_auto_profile) { if (!direct_call1) inform (locus, "Can not find indirect call target decl " @@ -1645,9 +1681,12 @@ gimple_ic_transform_mult_targ (gimple stmt, histogram_value histogram) print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM); fprintf (dump_file, "=> "); print_generic_expr (dump_file, direct_call1->decl, TDF_SLIM); - fprintf (dump_file, " (module_id:%d, func_id:%d)\n", - EXTRACT_MODULE_ID_FROM_GLOBAL_ID (val1), - EXTRACT_FUNC_ID_FROM_GLOBAL_ID (val1)); + if (flag_auto_profile) + fprintf (dump_file, " (%s)\n", (char *) val1); + else + fprintf (dump_file, " (module_id:%d, func_id:%d)\n", + EXTRACT_MODULE_ID_FROM_GLOBAL_ID (val1), + EXTRACT_FUNC_ID_FROM_GLOBAL_ID (val1)); fprintf (dump_file, "Transformation on insn:\n"); print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); fprintf (dump_file, "==>\n"); @@ -1683,9 +1722,12 @@ gimple_ic_transform_mult_targ (gimple stmt, histogram_value histogram) print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM); fprintf (dump_file, "=> "); print_generic_expr (dump_file, direct_call2->decl, TDF_SLIM); - fprintf (dump_file, " (module_id:%d, func_id:%d)\n", - EXTRACT_MODULE_ID_FROM_GLOBAL_ID (val2), - EXTRACT_FUNC_ID_FROM_GLOBAL_ID (val2)); + if (flag_auto_profile) + fprintf (dump_file, " (%s)\n", (char *) val2); + else + fprintf (dump_file, " (module_id:%d, func_id:%d)\n", + EXTRACT_MODULE_ID_FROM_GLOBAL_ID (val2), + EXTRACT_FUNC_ID_FROM_GLOBAL_ID (val2)); fprintf (dump_file, "Transformation on insn\n"); print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); fprintf (dump_file, "=>\n"); diff --git a/gcc/value-prof.h b/gcc/value-prof.h index 9425e25..2d84fe9 100644 --- a/gcc/value-prof.h +++ b/gcc/value-prof.h @@ -77,6 +77,8 @@ typedef VEC(histogram_value,heap) *histogram_values; extern void gimple_find_values_to_profile (histogram_values *); extern bool gimple_value_profile_transformations (void); +histogram_value gimple_alloc_histogram_value (struct function *, enum hist_type, + gimple stmt, tree); histogram_value gimple_histogram_value (struct function *, gimple); histogram_value gimple_histogram_value_of_type (struct function *, gimple, enum hist_type);