Ping. Any review comments? Thanks, bin
On Wed, Oct 1, 2014 at 6:31 AM, Sebastian Pop <seb...@gmail.com> wrote: > Bin Cheng wrote: >> Hi, >> As analyzed in PR62178, IVOPT can't find the optimal iv set for that case. >> The problem with current heuristic algorithm is it only replaces candidate >> with ones not in current solution one by one, starting from small solution. >> This patch adds another heuristic which starts from assigning the best >> candidate for each iv use, then replaces candidate with ones in the current >> solution. >> Before this patch, there are two runs of find_optimal_set_1 to find the >> optimal iv sets, we name them as set_a and set_b. After this patch we will >> have set_c. At last, IVOPT chooses the best one from set_a/set_b/set_c. To >> prove that this patch is necessary, I collected instrumental data for gcc >> bootstrap, spec2k, eembc and can confirm for some cases only the newly added >> heuristic can find the optimal iv set. The number of these cases in which >> set_c is the optimal one is on the same level of set_b. >> As for the compilation time, the newly added function actually is one >> iteration of previous selection algorithm, it should be much faster than >> previous process. >> >> I also added one target dependent test case. >> Bootstrap and test on x86_64, test on aarch64. Any comments? > > I verified that the patch fixes the performance regression on intmm. I have > seen improvements to other benchmarks, and very small degradations that could > very well be noise. > > Thanks for fixing this perf issue! > Sebastian > >> >> 2014-09-30 Bin Cheng <bin.ch...@arm.com> >> >> PR tree-optimization/62178 >> * tree-ssa-loop-ivopts.c (enum sel_type): New. >> (iv_ca_add_use): Add parameter RELATED_P and find the best cand >> for iv use if it's true. >> (try_add_cand_for, get_initial_solution): Change paramter ORIGINALP >> to SELECT_TYPE and handle it. >> (find_optimal_iv_set_1): Ditto. >> (try_prune_iv_set, find_optimal_iv_set_2): New functions. >> (find_optimal_iv_set): Call find_optimal_iv_set_2 and choose the >> best candidate set. >> >> gcc/testsuite/ChangeLog >> 2014-09-30 Bin Cheng <bin.ch...@arm.com> >> >> PR tree-optimization/62178 >> * gcc.target/aarch64/pr62178.c: New test. > >> Index: gcc/testsuite/gcc.target/aarch64/pr62178.c >> =================================================================== >> --- gcc/testsuite/gcc.target/aarch64/pr62178.c (revision 0) >> +++ gcc/testsuite/gcc.target/aarch64/pr62178.c (revision 0) >> @@ -0,0 +1,17 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O3" } */ >> + >> +int a[30 +1][30 +1], b[30 +1][30 +1], r[30 +1][30 +1]; >> + >> +void Intmm (int run) { >> + int i, j, k; >> + >> + for ( i = 1; i <= 30; i++ ) >> + for ( j = 1; j <= 30; j++ ) { >> + r[i][j] = 0; >> + for(k = 1; k <= 30; k++ ) >> + r[i][j] += a[i][k]*b[k][j]; >> + } >> +} >> + >> +/* { dg-final { scan-assembler "ld1r\\t\{v\[0-9\]+\."} } */ >> Index: gcc/tree-ssa-loop-ivopts.c >> =================================================================== >> --- gcc/tree-ssa-loop-ivopts.c (revision 215113) >> +++ gcc/tree-ssa-loop-ivopts.c (working copy) >> @@ -254,6 +254,14 @@ struct iv_inv_expr_ent >> hashval_t hash; >> }; >> >> +/* Types used to start selecting the candidate for each IV use. */ >> +enum sel_type >> +{ >> + SEL_ORIGINAL, /* Start selecting from original cands. */ >> + SEL_IMPORTANT, /* Start selecting from important cands. */ >> + SEL_RELATED /* Start selecting from related cands. */ >> +}; >> + >> /* The data used by the induction variable optimizations. */ >> >> typedef struct iv_use *iv_use_p; >> @@ -5417,22 +5425,51 @@ iv_ca_set_cp (struct ivopts_data *data, struct iv_ >> } >> >> /* Extend set IVS by expressing USE by some of the candidates in it >> - if possible. Consider all important candidates if candidates in >> - set IVS don't give any result. */ >> + if possible. If RELATED_P is FALSE, consider all important >> + candidates if candidates in set IVS don't give any result; >> + otherwise, try to find the best one from related or all candidates, >> + depending on consider_all_candidates. */ >> >> static void >> iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs, >> - struct iv_use *use) >> + struct iv_use *use, bool related_p) >> { >> struct cost_pair *best_cp = NULL, *cp; >> bitmap_iterator bi; >> unsigned i; >> struct iv_cand *cand; >> >> - gcc_assert (ivs->upto >= use->id); >> + gcc_assert (ivs->upto == use->id); >> ivs->upto++; >> ivs->bad_uses++; >> >> + if (related_p) >> + { >> + if (data->consider_all_candidates) >> + { >> + for (i = 0; i < n_iv_cands (data); i++) >> + { >> + cand = iv_cand (data, i); >> + cp = get_use_iv_cost (data, use, cand); >> + if (cheaper_cost_pair (cp, best_cp)) >> + best_cp = cp; >> + } >> + } >> + else >> + { >> + EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi) >> + { >> + cand = iv_cand (data, i); >> + cp = get_use_iv_cost (data, use, cand); >> + if (cheaper_cost_pair (cp, best_cp)) >> + best_cp = cp; >> + } >> + } >> + >> + iv_ca_set_cp (data, ivs, use, best_cp); >> + return; >> + } >> + >> EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) >> { >> cand = iv_cand (data, i); >> @@ -5440,7 +5477,7 @@ iv_ca_add_use (struct ivopts_data *data, struct iv >> if (cheaper_cost_pair (cp, best_cp)) >> best_cp = cp; >> } >> - >> + >> if (best_cp == NULL) >> { >> EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi) >> @@ -5869,13 +5906,15 @@ iv_ca_prune (struct ivopts_data *data, struct iv_c >> } >> >> /* Tries to extend the sets IVS in the best possible way in order >> - to express the USE. If ORIGINALP is true, prefer candidates from >> - the original set of IVs, otherwise favor important candidates not >> - based on any memory object. */ >> + to express the USE. If SELECT_TYPE is SEL_ORIGINAL, prefer >> + candidates from the original set of IVs; if it's SEL_IMPORTANT, >> + favor important candidates not based on any memory object; >> + if it's SEL_RELATED, assign the best candidate to each use if >> + possible. */ >> >> static bool >> try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, >> - struct iv_use *use, bool originalp) >> + struct iv_use *use, enum sel_type select_type) >> { >> comp_cost best_cost, act_cost; >> unsigned i; >> @@ -5883,8 +5922,9 @@ try_add_cand_for (struct ivopts_data *data, struct >> struct iv_cand *cand; >> struct iv_ca_delta *best_delta = NULL, *act_delta; >> struct cost_pair *cp; >> + bool originalp = (select_type == SEL_ORIGINAL); >> >> - iv_ca_add_use (data, ivs, use); >> + iv_ca_add_use (data, ivs, use, select_type == SEL_RELATED); >> best_cost = iv_ca_cost (ivs); >> cp = iv_ca_cand_for_use (ivs, use); >> if (cp) >> @@ -5892,7 +5932,16 @@ try_add_cand_for (struct ivopts_data *data, struct >> best_delta = iv_ca_delta_add (use, NULL, cp, NULL); >> iv_ca_set_no_cp (data, ivs, use); >> } >> + if (select_type == SEL_RELATED) >> + { >> + /* We should have selected the best candidate if possible. */ >> + gcc_assert (cp != NULL || infinite_cost_p (best_cost)); >> + iv_ca_delta_commit (data, ivs, best_delta, true); >> + iv_ca_delta_free (&best_delta); >> >> + return !infinite_cost_p (best_cost); >> + } >> + >> /* If ORIGINALP is true, try to find the original IV for the use. >> Otherwise >> first try important candidates not based on any memory object. Only if >> this fails, try the specific ones. Rationale -- in loops with many >> @@ -5983,16 +6032,17 @@ try_add_cand_for (struct ivopts_data *data, struct >> return !infinite_cost_p (best_cost); >> } >> >> -/* Finds an initial assignment of candidates to uses. */ >> +/* Finds an initial assignment of candidates to uses according to >> + SELECT_TYPE. */ >> >> static struct iv_ca * >> -get_initial_solution (struct ivopts_data *data, bool originalp) >> +get_initial_solution (struct ivopts_data *data, enum sel_type select_type) >> { >> struct iv_ca *ivs = iv_ca_new (data); >> unsigned i; >> >> for (i = 0; i < n_iv_uses (data); i++) >> - if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp)) >> + if (!try_add_cand_for (data, ivs, iv_use (data, i), select_type)) >> { >> iv_ca_free (&ivs); >> return NULL; >> @@ -6059,17 +6109,43 @@ try_improve_iv_set (struct ivopts_data *data, stru >> return true; >> } >> >> +/* Tries to prune set of induction variables IVS. */ >> + >> +static bool >> +try_prune_iv_set (struct ivopts_data *data, struct iv_ca *ivs) >> +{ >> + comp_cost new_cost, old_cost; >> + struct iv_ca_delta *delta = NULL; >> + >> + if (iv_ca_n_cands (ivs) > ALWAYS_PRUNE_CAND_SET_BOUND) >> + return false; >> + >> + old_cost = iv_ca_cost (ivs); >> + /* Try pruning the candidates from the set. */ >> + new_cost = iv_ca_prune (data, ivs, NULL, &delta); >> + >> + /* Nothing more we can do. */ >> + if (!delta >> + || compare_costs (new_cost, old_cost) >= 0) >> + return false; >> + >> + iv_ca_delta_commit (data, ivs, delta, true); >> + gcc_assert (compare_costs (new_cost, iv_ca_cost (ivs)) == 0); >> + iv_ca_delta_free (&delta); >> + return true; >> +} >> + >> /* Attempts to find the optimal set of induction variables. We do simple >> greedy heuristic -- we try to replace at most one candidate in the >> selected >> solution and remove the unused ivs while this improves the cost. */ >> >> static struct iv_ca * >> -find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp) >> +find_optimal_iv_set_1 (struct ivopts_data *data, enum sel_type select_type) >> { >> struct iv_ca *set; >> >> /* Get the initial solution. */ >> - set = get_initial_solution (data, originalp); >> + set = get_initial_solution (data, select_type); >> if (!set) >> { >> if (dump_file && (dump_flags & TDF_DETAILS)) >> @@ -6095,25 +6171,72 @@ static struct iv_ca * >> return set; >> } >> >> +/* Attempts to find the optimal set of induction variables. Different >> + with find_optimal_iv_set_1 -- this function uses greedy heuristic >> + that firstly assigns the best candidate to each use, then tries to >> + prune the solution. */ >> + >> static struct iv_ca * >> +find_optimal_iv_set_2 (struct ivopts_data *data, enum sel_type select_type) >> +{ >> + struct iv_ca *set; >> + >> + /* Get the initial solution. */ >> + set = get_initial_solution (data, select_type); >> + if (!set) >> + { >> + if (dump_file && (dump_flags & TDF_DETAILS)) >> + fprintf (dump_file, "Unable to substitute for ivs, failed.\n"); >> + return NULL; >> + } >> + >> + if (dump_file && (dump_flags & TDF_DETAILS)) >> + { >> + fprintf (dump_file, "Initial set of candidates:\n"); >> + iv_ca_dump (data, dump_file, set); >> + } >> + >> + while (try_prune_iv_set (data, set)) >> + { >> + if (dump_file && (dump_flags & TDF_DETAILS)) >> + { >> + fprintf (dump_file, "Pruned to:\n"); >> + iv_ca_dump (data, dump_file, set); >> + } >> + } >> + >> + return set; >> +} >> + >> +/* Find the optimal IV set by using two different heuristic strategies. >> + The first strategy starts with small IV solution, tries to replace at >> + most one candidate with others not in the current solution at one >> + time. The second strategy starts with large IV set, tries to replace >> + candidates with others in the current solution. */ >> + >> +static struct iv_ca * >> find_optimal_iv_set (struct ivopts_data *data) >> { >> unsigned i; >> - struct iv_ca *set, *origset; >> + struct iv_ca *set, *origset, *pruneset; >> struct iv_use *use; >> - comp_cost cost, origcost; >> + comp_cost cost, origcost, prunecost; >> >> - /* Determine the cost based on a strategy that starts with original IVs, >> - and try again using a strategy that prefers candidates not based >> - on any IVs. */ >> - origset = find_optimal_iv_set_1 (data, true); >> - set = find_optimal_iv_set_1 (data, false); >> + /* Try to find optimal IV set using the first strategy and starting >> + with original IVS. */ >> + origset = find_optimal_iv_set_1 (data, SEL_ORIGINAL); >> + /* Try to find optimal IV set using the first strategy and starting >> + with important IVS. */ >> + set = find_optimal_iv_set_1 (data, SEL_IMPORTANT); >> + /* Try to find optimal IV set using the second strategy. */ >> + pruneset = find_optimal_iv_set_2 (data, SEL_RELATED); >> >> - if (!origset && !set) >> + if (!origset && !set && !pruneset) >> return NULL; >> >> origcost = origset ? iv_ca_cost (origset) : infinite_cost; >> cost = set ? iv_ca_cost (set) : infinite_cost; >> + prunecost = pruneset ? iv_ca_cost (pruneset) : infinite_cost; >> >> if (dump_file && (dump_flags & TDF_DETAILS)) >> { >> @@ -6121,9 +6244,12 @@ find_optimal_iv_set (struct ivopts_data *data) >> origcost.cost, origcost.complexity); >> fprintf (dump_file, "Final cost %d (complexity %d)\n\n", >> cost.cost, cost.complexity); >> + fprintf (dump_file, "Pruned cost %d (complexity %d)\n\n", >> + prunecost.cost, prunecost.complexity); >> } >> >> - /* Choose the one with the best cost. */ >> + /* Choose the one with the best cost among the three candidates. */ >> + >> if (compare_costs (origcost, cost) <= 0) >> { >> if (set) >> @@ -6133,6 +6259,15 @@ find_optimal_iv_set (struct ivopts_data *data) >> else if (origset) >> iv_ca_free (&origset); >> >> + if (compare_costs (prunecost, cost) < 0) >> + { >> + if (set) >> + iv_ca_free (&set); >> + set = pruneset; >> + } >> + else if (pruneset) >> + iv_ca_free (&pruneset); >> + >> for (i = 0; i < n_iv_uses (data); i++) >> { >> use = iv_use (data, i); >