> It is somewhat hard to write a testcase to show role of range info only with > this patch. If another patch "Generalized predicate/condition for parameter > reference in IPA (PR ipa/91088)" is accepted, it will become easy and I will > update this testcase to show that. > > And this new version also resolves a problem that we might generate very > complicated predicate for basic block in convergence point reached from > all switch cases. > > switch (index) > / | \ > case1 case2 ... default > \ | / > convergence point > > For switch/if statement, current algorithm synthesizes predicate of > convergence point by OR-combining predicates of all its cases/branches, which > might give us a very complicated one. Actually, we can get simplified > predicate > by using the fact that predicate for a basic block must also hold true for > its post > dominators. To be specific, convergence point should include (at least equal > to) > predicate of the switch/if statement. > > Honza & Martin, > > Would please review this new patch when you have time? Thanks. > > Feng > > ----- > diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c > index 278bf606661..dc5752fc005 100644 > --- a/gcc/ipa-fnsummary.c > +++ b/gcc/ipa-fnsummary.c > @@ -1198,7 +1198,8 @@ set_cond_stmt_execution_predicate (struct > ipa_func_body_info *fbi, > /* invert_tree_comparison will return ERROR_MARK on FP > comparsions that are not EQ/NE instead of returning proper > unordered one. Be sure it is not confused with NON_CONSTANT. */ > - if (this_code != ERROR_MARK) > + if (this_code != ERROR_MARK > + && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest)) > { > predicate p > = add_condition (summary, index, size, &aggpos, this_code,
So this change is handling the diamond conditional you describe above? This is bit of hack since it leaves the edge predicate unnecesarily conservative though I see it saves some conditions to be inserted and makes things to go smoother. Please add a comment that explain this and reffers to the other places where we do this (in the switch handling below). > @@ -1269,13 +1270,31 @@ set_switch_stmt_execution_predicate (struct > ipa_func_body_info *fbi, > if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, > &aggpos)) > return; > > + auto_vec<std::pair<tree, tree> > ranges; > + tree type = TREE_TYPE (op); > + tree one_cst = build_one_cst (type); > + int bound_limit = PARAM_VALUE (PARAM_IPA_MAX_SWITCH_PREDICATE_BOUNDS); > + int bound_count = 0; > + value_range_base vr; > + > + get_range_info (op, vr); > + > FOR_EACH_EDGE (e, ei, bb->succs) > { > e->aux = edge_predicate_pool.allocate (); > *(predicate *) e->aux = false; > } > + > + e = gimple_switch_edge (cfun, last, 0); > + /* Set BOUND_COUNT to maximum count to bypass computing predicate for > + default case if its target basic block is in convergence point of all > + switch cases, which can be determined by checking whether it > + post-dominates the switch statement. */ > + if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest)) > + bound_count = INT_MAX; > + > n = gimple_switch_num_labels (last); > - for (case_idx = 0; case_idx < n; ++case_idx) > + for (case_idx = 1; case_idx < n; ++case_idx) > { > tree cl = gimple_switch_label (last, case_idx); > tree min, max; > @@ -1285,10 +1304,10 @@ set_switch_stmt_execution_predicate (struct > ipa_func_body_info *fbi, > min = CASE_LOW (cl); > max = CASE_HIGH (cl); > > - /* For default we might want to construct predicate that none > - of cases is met, but it is bit hard to do not having negations > - of conditionals handy. */ > - if (!min && !max) > + /* The case's target basic block is in convergence point of all switch > + cases, its predicate should be at least as that of the switch > + statement. */ > + if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest)) > p = true; > else if (!max) > p = add_condition (summary, index, size, &aggpos, EQ_EXPR, > @@ -1304,7 +1323,111 @@ set_switch_stmt_execution_predicate (struct > ipa_func_body_info *fbi, > } > *(class predicate *) e->aux > = p.or_with (summary->conds, *(class predicate *) e->aux); > + > + /* If there are too many disjoint case ranges, predicate for default > + case might become too complicated. So add a limit here. */ > + if (bound_count > bound_limit) > + continue; > + > + bool new_range = true; > + > + if (!ranges.is_empty ()) > + { > + tree last_max_plus_one > + = int_const_binop (PLUS_EXPR, ranges.last ().second, one_cst); > + > + /* Merge case ranges if they are continuous. */ > + if (tree_int_cst_equal (min, last_max_plus_one)) > + new_range = false; > + else if (vr.kind () == VR_ANTI_RANGE) > + { > + tree min_minus_one = int_const_binop (MINUS_EXPR, min, one_cst); > + > + /* If two disjoint case ranges can be connected by anti-range > + of switch index, combine them to one range. */ > + if (tree_int_cst_lt (vr.max (), min_minus_one)) > + vr.set_undefined (); > + else if (tree_int_cst_le (vr.min (), last_max_plus_one)) > + new_range = false; > + } I believe the case ranges always has to be INTEGER_CST. In this case all this can be written using wide ints and produce a lot better code (avoiding need to build & lookup & share all temporary tree codes). You can take a look at the tree-switch-conversion code which does quite some of this wide int handling. Richard may have an opinion on this. > @@ -1376,6 +1499,34 @@ compute_bb_predicates (struct ipa_func_body_info *fbi, > *((predicate *) bb->aux) = p; > } > } > + > + /* For switch/if statement, we can OR-combine predicates of all > + its cases/branches to get predicate for basic block in their > + convergence point, but sometimes this will generate very > + complicated predicate. Actually, we can get simplified > + predicate in another way by using the fact that predicate > + for a basic block must also hold true for its post dominators. > + To be specific, basic block in convergence point of > + conditional statement should include predicate of the > + statement. */ > + pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb); > + if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb) > + ; > + else if (!pdom_bb->aux) > + { > + done = false; > + pdom_bb->aux = edge_predicate_pool.allocate (); > + *((predicate *) pdom_bb->aux) = p; > + } > + else if (p != *(predicate *) pdom_bb->aux) > + { > + p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux); > + if (p != *(predicate *) pdom_bb->aux) > + { > + done = false; > + *((predicate *) pdom_bb->aux) = p; > + } > + } So this basically the idea is to or BB's predicate to the post-dominator predicate. This seems safe to do (we need to be careful so that the dataflow still converges), but I would preffer to get this in separately possibly with a testcase that shows an improvement in the dataflow answer. Honza > } > } > } > @@ -1980,6 +2131,7 @@ analyze_function_body (struct cgraph_node *node, bool > early) > if (opt_for_fn (node->decl, optimize)) > { > calculate_dominance_info (CDI_DOMINATORS); > + calculate_dominance_info (CDI_POST_DOMINATORS); > if (!early) > loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); > else > @@ -2360,6 +2512,7 @@ analyze_function_body (struct cgraph_node *node, bool > early) > else if (!ipa_edge_args_sum) > ipa_free_all_node_params (); > free_dominance_info (CDI_DOMINATORS); > + free_dominance_info (CDI_POST_DOMINATORS); > } > if (dump_file) > { > diff --git a/gcc/params.def b/gcc/params.def > index 13001a7bb2d..1461aa4849d 100644 > --- a/gcc/params.def > +++ b/gcc/params.def > @@ -1123,6 +1123,14 @@ DEFPARAM (PARAM_IPA_MAX_AA_STEPS, > "parameter analysis based on alias analysis in any given function.", > 25000, 0, 0) > > +DEFPARAM (PARAM_IPA_MAX_SWITCH_PREDICATE_BOUNDS, > + "ipa-max-switch-predicate-bounds", > + "Maximal number of boundary endpoints of case ranges of switch " > + "statement. For switch exceeding this limit, do not construct cost-" > + "evaluating predicate for default case during IPA function summary" > + "generation.", > + 5, 0, 0) > + > /* WHOPR partitioning configuration. */ > > DEFPARAM (PARAM_LTO_PARTITIONS, > diff --git a/gcc/testsuite/gcc.dg/ipa/pr91089.c > b/gcc/testsuite/gcc.dg/ipa/pr91089.c > new file mode 100644 > index 00000000000..dbd9b8c94fe > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/ipa/pr91089.c > @@ -0,0 +1,111 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fdump-ipa-cp-details -fdump-ipa-fnsummary-details > --param ipa-max-switch-predicate-bounds=10 -fno-inline" } */ > + > +int fn(); > + > +int data; > + > +int callee1(int i) > +{ > + switch (i) > + { > + case -126: return i + 13; > + case -127: return i + 5; > + case -8: return i * i; > + case 0: return i % 9; > + case 5: > + case 7: > + case 6: return 3; > + default: > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + } > + > + return data += i; > +} > + > +int fn2(); > + > +int callee2(int i) > +{ > + switch (i ) > + { > + case 0: > + fn(); > + fn(); > + fn(); > + case 1: > + fn(); > + fn(); > + case -1: > + fn(); > + case -2: > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + data += i; > + break; > + } > + > + if (i == 1000) > + { > + int j; > + > + for (j = 0; j < 100; j++) > + fn2(); > + } > + return i + 3; > +} > + > +int caller() > +{ > + return callee1 (-127) + > + callee1 (-126) + > + callee1 (-8) + > + callee1 (0) + > + callee1 (5) + > + callee1 (6) + > + callee1 (7) + > + callee1 (100) + > + callee2 (9); > + callee2 (13); > +} > + > +/* { dg-final { scan-ipa-dump-times "Creating a specialized node of callee" > 7 "cp" } } */ > +/* { dg-final { scan-ipa-dump "op0 < -127" "fnsummary" } } */ > +/* { dg-final { scan-ipa-dump "op0 > -126" "fnsummary" } } */ > +/* { dg-final { scan-ipa-dump "op0 != -8" "fnsummary" } } */ > +/* { dg-final { scan-ipa-dump "op0 != 0" "fnsummary" } } */ > +/* { dg-final { scan-ipa-dump "op0 < 5" "fnsummary" } } */ > +/* { dg-final { scan-ipa-dump "op0 > 7" "fnsummary" } } */ > +/* { dg-final { scan-ipa-dump "loop depth: 1 .+ time:\[ \]*\[0-9\]+ > predicate: \\(op0 == 1000\\)" "fnsummary" } } */ > -- > 2.17.1