Hi, Actually, this patch is not final one. Since the previous cp-by-ref patch is still on the way to the trunk, I tailored it to only handle by-value recursion, so that it is decoupled with the previous patch, and can be reviewed independently. I attached the final patch, you can have a try.
CP propagation stage might generate values outside of recursion ranges, but cloning stage will not make a duplicate for invalid recursion value, with which we do not need extra code for recursion range analysis. Feng ________________________________________ From: luoxhu <luo...@linux.ibm.com> Sent: Thursday, October 24, 2019 1:44 PM To: Feng Xue OS; gcc-patches@gcc.gnu.org; Jan Hubicka; Martin Jambor Subject: Re: [PATCH] Support multi-versioning on self-recursive function (ipa/92133) Hi, On 2019/10/17 16:23, Feng Xue OS wrote: > IPA does not allow constant propagation on parameter that is used to control > function recursion. > > recur_fn (i) > { > if ( !terminate_recursion (i)) > { > ... > recur_fn (i + 1); > ... > } > ... > } > > This patch is composed to enable multi-versioning for self-recursive function, > and versioning copies is limited by a specified option. > > Feng > --- > diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c > index 045072e02ec..6255a766e4d 100644 > --- a/gcc/ipa-cp.c > +++ b/gcc/ipa-cp.c > @@ -229,7 +229,9 @@ public: > inline bool set_contains_variable (); > bool add_value (valtype newval, cgraph_edge *cs, > ipcp_value<valtype> *src_val = NULL, > - int src_idx = 0, HOST_WIDE_INT offset = -1); > + int src_idx = 0, HOST_WIDE_INT offset = -1, > + ipcp_value<valtype> **val_pos_p = NULL, > + bool unlimited = false); > void print (FILE * f, bool dump_sources, bool dump_benefits); > }; > > @@ -1579,22 +1581,37 @@ allocate_and_init_ipcp_value > (ipa_polymorphic_call_context source) > /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. > CS, > SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same > meaning. OFFSET -1 means the source is scalar and not a part of an > - aggregate. */ > + aggregate. If non-NULL, VAL_POS_P specifies position in value list, > + after which newly created ipcp_value will be inserted, and it is also > + used to record address of the added ipcp_value before function returns. > + UNLIMITED means whether value count should not exceed the limit given > + by PARAM_IPA_CP_VALUE_LIST_SIZE. */ > > template <typename valtype> > bool > ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs, > ipcp_value<valtype> *src_val, > - int src_idx, HOST_WIDE_INT offset) > + int src_idx, HOST_WIDE_INT offset, > + ipcp_value<valtype> **val_pos_p, > + bool unlimited) > { > ipcp_value<valtype> *val; > > + if (val_pos_p) > + { > + for (val = values; val && val != *val_pos_p; val = val->next); > + gcc_checking_assert (val); > + } > + > if (bottom) > return false; > > for (val = values; val; val = val->next) > if (values_equal_for_ipcp_p (val->value, newval)) > { > + if (val_pos_p) > + *val_pos_p = val; > + > if (ipa_edge_within_scc (cs)) > { > ipcp_value_source<valtype> *s; > @@ -1609,7 +1626,8 @@ ipcp_lattice<valtype>::add_value (valtype newval, > cgraph_edge *cs, > return false; > } > > - if (values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE)) > + if (!unlimited > + && values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE)) > { > /* We can only free sources, not the values themselves, because > sources > of other values in this SCC might point to them. */ > @@ -1623,6 +1641,9 @@ ipcp_lattice<valtype>::add_value (valtype newval, > cgraph_edge *cs, > } > } > > + if (val_pos_p) > + *val_pos_p = NULL; > + > values = NULL; > return set_to_bottom (); > } > @@ -1630,8 +1651,54 @@ ipcp_lattice<valtype>::add_value (valtype newval, > cgraph_edge *cs, > values_count++; > val = allocate_and_init_ipcp_value (newval); > val->add_source (cs, src_val, src_idx, offset); > - val->next = values; > - values = val; > + if (val_pos_p) > + { > + val->next = (*val_pos_p)->next; > + (*val_pos_p)->next = val; > + *val_pos_p = val; > + } > + else > + { > + val->next = values; > + values = val; > + } > + > + return true; > +} > + > +/* Return true, if a ipcp_value VAL is orginated from parameter value of > + self-feeding recursive function by applying non-passthrough arithmetic > + transformation. */ > + > +static bool > +self_recursively_generated_p (ipcp_value<tree> *val) > +{ > + class ipa_node_params *info = NULL; > + > + for (ipcp_value_source<tree> *src = val->sources; src; src = src->next) > + { > + cgraph_edge *cs = src->cs; > + > + if (!src->val || cs->caller != cs->callee->function_symbol () > + || src->val == val) > + return false; > + > + if (!info) > + info = IPA_NODE_REF (cs->caller); > + > + class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, > + src->index); > + ipcp_lattice<tree> *src_lat = src->offset == -1 ? &plats->itself > + : plats->aggs; Thanks for the patch. This function doesn't handle the by-ref case after rebasing this patch to your previous ipa-cp by-ref of arith patch as below (Also some conflicts need fix): foo (int * p) { ... return foo(*(&p) + 1); } It will cause value explosion. Secondly, the self_recursive doesn't check or break if the value is above than the recursive boundary, which seems would generate redundant nodes? IMHO, It may be helpful if adding more comments before the return and dump log for dump-ipa-cp-all-details when adding new values and new sources like below for debugging and others can understand the code easier? :) newval, src_val, src_val->sources, *src_val->sources, caller, callee 1, (ipcp_value<tree_node*> *) 0x12bc1828, (ipcp_value_source<tree_node*> *) 0x12bae800, {offset = -1, cs = 0x3fffb5602768, val = 0x0, next = 0x0, index = 0}, "main", recur_fn" 2, (ipcp_value<tree_node*> *) 0x12bc1880, (ipcp_value_source<tree_node*> *) 0x12bae830, {offset = -1, cs = 0x3fffb5602630, val = 0x12bc1828, next = 0x0, index = 0},"recur_fn","recur_fn" ... 1, (ipcp_value<tree_node*> *) 0x12bc1828, (ipcp_value_source<tree_node*> *) 0x12baea70, {offset = -1, cs = 0x3fffb5602630, val = 0x12bc1c48, next = 0x12bae800, index = 0} "recur_fn","recur_fn" 2, (ipcp_value<tree_node*> *) 0x12bc1880, (ipcp_value_source<tree_node*> *) 0x12bae830, {offset = -1, cs = 0x3fffb5602630, val = 0x12bc1828, next = 0x0, index = 0} "recur_fn","recur_fn" Xiong Hu Thanks > + ipcp_value<tree> *src_val; > + > + for (src_val = src_lat->values; src_val && src_val != val; > + src_val = src_val->next); > + > + if (!src_val) > + return false; > + } > + > return true; > } > > @@ -1649,20 +1716,72 @@ propagate_vals_across_pass_through (cgraph_edge *cs, > ipa_jump_func *jfunc, > ipcp_value<tree> *src_val; > bool ret = false; > > - /* Do not create new values when propagating within an SCC because if there > - are arithmetic functions with circular dependencies, there is infinite > - number of them and we would just make lattices bottom. If this > condition > - is ever relaxed we have to detect self-feeding recursive calls in > - cgraph_edge_brings_value_p in a smarter way. */ > + /* Due to circular dependencies, propagating within an SCC through > arithmetic > + transformation would create infinite number of values. But for > + self-feeding recursive function, we could allow propagation in a limited > + count, and this can enable a simple kind of recursive function > versioning. > + For other scenario, we would just make lattices bottom. */ > if ((ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR) > && ipa_edge_within_scc (cs)) > - ret = dest_lat->set_contains_variable (); > + { > + if (src_lat != dest_lat) > + return dest_lat->set_contains_variable (); > + > + auto_vec<ipcp_value<tree> *, 8> val_seeds; > + > + for (src_val = src_lat->values; src_val; src_val = src_val->next) > + { > + /* Now we do not use self-recursively generated value as propagation > + source, this is absolutely conservative, but could avoid explosion > + of lattice's value space, especially when one recursive function > + calls another recursive. */ > + if (self_recursively_generated_p (src_val)) > + { > + /* If the lattice has already been propagated for the call site, > + no need to do that again. */ > + for (ipcp_value_source<tree> *s = src_val->sources; s; > + s = s->next) > + if (s->cs == cs) > + return dest_lat->set_contains_variable (); > + } > + else > + val_seeds.safe_push (src_val); > + } > + > + int clone_copies = PARAM_VALUE (PARAM_IPA_CP_MAX_RECURSION_COPIES); > + int i; > + > + /* Recursively generate lattice values with a limited count. */ > + FOR_EACH_VEC_ELT (val_seeds, i, src_val) > + { > + for (int j = 1; j < clone_copies; j++) > + { > + tree cstval = ipa_get_jf_pass_through_result (jfunc, > + src_val->value, > + parm_type); > + if (!cstval) > + break; > + > + /* Try to place the new lattice value after its source, which > + can decrease iterations of propagate stage. */ > + ret |= dest_lat->add_value (cstval, cs, src_val, src_idx, > + -1, &src_val, true); > + gcc_checking_assert (src_val); > + } > + } > + ret |= dest_lat->set_contains_variable (); > + } > else > for (src_val = src_lat->values; src_val; src_val = src_val->next) > { > + /* Now we do not use self-recursively generated value as propagation > + source, otherwise it is easy to make value space of normal lattice > + overflow. */ > + if (self_recursively_generated_p (src_val)) > + continue; > + > tree cstval = ipa_get_jf_pass_through_result (jfunc, src_val->value, > parm_type); > - > if (cstval) > ret |= dest_lat->add_value (cstval, cs, src_val, src_idx); > else > @@ -3635,6 +3754,9 @@ get_info_about_necessary_edges (ipcp_value<valtype> > *val, cgraph_node *dest, > hot |= cs->maybe_hot_p (); > if (cs->caller != dest) > non_self_recursive = true; > + else if (src->val) > + gcc_assert (values_equal_for_ipcp_p (src->val->value, > + val->value)); > } > cs = get_next_cgraph_edge_clone (cs); > } > diff --git a/gcc/params.def b/gcc/params.def > index 322c37f8b96..3e242e09f01 100644 > --- a/gcc/params.def > +++ b/gcc/params.def > @@ -1135,6 +1135,11 @@ DEFPARAM (PARAM_IPA_CP_RECURSION_PENALTY, > "are evaluated for cloning.", > 40, 0, 100) > > +DEFPARAM (PARAM_IPA_CP_MAX_RECURSION_COPIES, > + "ipa-cp-max-recursion-copies", > + "Maximum function versioning copies for a self-recursive function.", > + 8, 0, 0) > + > DEFPARAM (PARAM_IPA_CP_SINGLE_CALL_PENALTY, > "ipa-cp-single-call-penalty", > "Percentage penalty functions containing a single call to another " > diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c > b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c > new file mode 100644 > index 00000000000..7c1c01047c6 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c > @@ -0,0 +1,47 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fdump-ipa-cp-details -fno-early-inlining" } */ > + > +int fn(); > + > +int data[100]; > + > +int recur_fn (int i) > +{ > + int j; > + > + if (i == 6) > + { > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + return 10; > + } > + > + data[i] = i; > + > + for (j = 0; j < 100; j++) > + recur_fn (i + 1); > + > + return i; > +} > + > +int main () > +{ > + int i; > + > + for (i = 0; i < 100; i++) > + recur_fn (1) + recur_fn (-5); > + > + return 1; > +} > + > +/* { dg-final { scan-ipa-dump-times "Creating a specialized node of > recur_fn/\[0-9\]*\\." 12 "cp" } } */ >
From 51e79933f5264ec35276c3058ed2b5aeb2f28128 Mon Sep 17 00:00:00 2001 From: Feng Xue <f...@os.amperecomputing.com> Date: Tue, 24 Sep 2019 11:48:26 +0800 Subject: [PATCH] cost --- gcc/ipa-cp.c | 171 ++++++++++++++++++++++--- gcc/params.def | 5 + gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c | 47 +++++++ 3 files changed, 204 insertions(+), 19 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index b3e928052dc..dff9eadb64b 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -229,7 +229,9 @@ public: inline bool set_contains_variable (); bool add_value (valtype newval, cgraph_edge *cs, ipcp_value<valtype> *src_val = NULL, - int src_idx = 0, HOST_WIDE_INT offset = -1); + int src_idx = 0, HOST_WIDE_INT offset = -1, + ipcp_value<valtype> **val_pos_p = NULL, + bool unlimited = false); void print (FILE * f, bool dump_sources, bool dump_benefits); }; @@ -1733,22 +1735,37 @@ allocate_and_init_ipcp_value (ipa_polymorphic_call_context source) /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS, SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same meaning. OFFSET -1 means the source is scalar and not a part of an - aggregate. */ + aggregate. If non-NULL, VAL_POS_P specifies position in value list, + after which newly created ipcp_value will be inserted, and it is also + used to record address of the added ipcp_value before function returns. + UNLIMITED means whether value count should not exceed the limit given + by PARAM_IPA_CP_VALUE_LIST_SIZE. */ template <typename valtype> bool ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs, ipcp_value<valtype> *src_val, - int src_idx, HOST_WIDE_INT offset) + int src_idx, HOST_WIDE_INT offset, + ipcp_value<valtype> **val_pos_p, + bool unlimited) { ipcp_value<valtype> *val; + if (val_pos_p) + { + for (val = values; val && val != *val_pos_p; val = val->next); + gcc_checking_assert (val); + } + if (bottom) return false; for (val = values; val; val = val->next) if (values_equal_for_ipcp_p (val->value, newval)) { + if (val_pos_p) + *val_pos_p = val; + if (ipa_edge_within_scc (cs)) { ipcp_value_source<valtype> *s; @@ -1763,7 +1780,8 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs, return false; } - if (values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE)) + if (!unlimited + && values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE)) { /* We can only free sources, not the values themselves, because sources of other values in this SCC might point to them. */ @@ -1777,6 +1795,9 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs, } } + if (val_pos_p) + *val_pos_p = NULL; + values = NULL; return set_to_bottom (); } @@ -1784,11 +1805,74 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs, values_count++; val = allocate_and_init_ipcp_value (newval); val->add_source (cs, src_val, src_idx, offset); - val->next = values; - values = val; + if (val_pos_p) + { + val->next = (*val_pos_p)->next; + (*val_pos_p)->next = val; + *val_pos_p = val; + } + else + { + val->next = values; + values = val; + } + + return true; +} + +/* Return true, if a ipcp_value VAL is orginated from parameter value of + self-feeding recursive function by applying non-passthrough arithmetic + transformation. */ + +static bool +self_recursively_generated_p (ipcp_value<tree> *val) +{ + class ipa_node_params *info = NULL; + + for (ipcp_value_source<tree> *src = val->sources; src; src = src->next) + { + cgraph_edge *cs = src->cs; + + if (!src->val || cs->caller != cs->callee->function_symbol () + || src->val == val) + return false; + + if (!info) + info = IPA_NODE_REF (cs->caller); + + class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, + src->index); + ipcp_lattice<tree> *src_lat = src->offset == -1 ? &plats->itself + : plats->aggs; + ipcp_value<tree> *src_val; + + for (src_val = src_lat->values; src_val && src_val != val; + src_val = src_val->next); + + if (!src_val) + return false; + } + return true; } +static tree +get_val_across_arith_op (enum tree_code opcode, + tree opnd1_type, + tree opnd2, + ipcp_value<tree> *src_val, + tree res_type) +{ + tree opnd1 = src_val->value; + + /* Skip source values that is incompatible with specified type. */ + if (opnd1_type + && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1))) + return NULL_TREE; + + return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type); +} + /* Propagate values through an arithmetic transformation described by a jump function associated with edge CS, taking values from SRC_LAT and putting them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT. @@ -1812,24 +1896,70 @@ propagate_vals_across_arith_jfunc (cgraph_edge *cs, ipcp_value<tree> *src_val; bool ret = false; - /* Do not create new values when propagating within an SCC because if there - are arithmetic functions with circular dependencies, there is infinite - number of them and we would just make lattices bottom. If this condition - is ever relaxed we have to detect self-feeding recursive calls in - cgraph_edge_brings_value_p in a smarter way. */ + /* Due to circular dependencies, propagating within an SCC through arithmetic + transformation would create infinite number of values. But for + self-feeding recursive function, we could allow propagation in a limited + count, and this can enable a simple kind of recursive function versioning. + For other scenario, we would just make lattices bottom. */ if (opcode != NOP_EXPR && ipa_edge_within_scc (cs)) - ret = dest_lat->set_contains_variable (); + { + if (src_lat != dest_lat) + return dest_lat->set_contains_variable (); + + auto_vec<ipcp_value<tree> *, 8> val_seeds; + + for (src_val = src_lat->values; src_val; src_val = src_val->next) + { + /* Now we do not use self-recursively generated value as propagation + source, this is absolutely conservative, but could avoid explosion + of lattice's value space, especially when one recursive function + calls another recursive. */ + if (self_recursively_generated_p (src_val)) + { + /* If the lattice has already been propagated for the call site, + no need to do that again. */ + for (ipcp_value_source<tree> *s = src_val->sources; s; + s = s->next) + if (s->cs == cs) + return dest_lat->set_contains_variable (); + } + else + val_seeds.safe_push (src_val); + } + + int clone_copies = PARAM_VALUE (PARAM_IPA_CP_MAX_RECURSION_COPIES); + int i; + + /* Recursively generate lattice values with a limited count. */ + FOR_EACH_VEC_ELT (val_seeds, i, src_val) + { + for (int j = 1; j < clone_copies; j++) + { + tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, + src_val, res_type); + if (!cstval) + break; + + /* Try to place the new lattice value after its source, which + can decrease iterations of propagate stage. */ + ret |= dest_lat->add_value (cstval, cs, src_val, src_idx, + src_offset, &src_val, true); + gcc_checking_assert (src_val); + } + } + ret |= dest_lat->set_contains_variable (); + } else for (src_val = src_lat->values; src_val; src_val = src_val->next) { - tree opnd1 = src_val->value; - tree cstval = NULL_TREE; - - /* Skip source values that is incompatible with specified type. */ - if (!opnd1_type - || useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1))) - cstval = ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type); + /* Now we do not use self-recursively generated value as propagation + source, otherwise it is easy to make value space of normal lattice + overflow. */ + if (self_recursively_generated_p (src_val)) + continue; + tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, + src_val, res_type); if (cstval) ret |= dest_lat->add_value (cstval, cs, src_val, src_idx, src_offset); @@ -3851,6 +3981,9 @@ get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest, hot |= cs->maybe_hot_p (); if (cs->caller != dest) non_self_recursive = true; + else if (src->val) + gcc_assert (values_equal_for_ipcp_p (src->val->value, + val->value)); } cs = get_next_cgraph_edge_clone (cs); } diff --git a/gcc/params.def b/gcc/params.def index 322c37f8b96..3e242e09f01 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -1135,6 +1135,11 @@ DEFPARAM (PARAM_IPA_CP_RECURSION_PENALTY, "are evaluated for cloning.", 40, 0, 100) +DEFPARAM (PARAM_IPA_CP_MAX_RECURSION_COPIES, + "ipa-cp-max-recursion-copies", + "Maximum function versioning copies for a self-recursive function.", + 8, 0, 0) + DEFPARAM (PARAM_IPA_CP_SINGLE_CALL_PENALTY, "ipa-cp-single-call-penalty", "Percentage penalty functions containing a single call to another " diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c new file mode 100644 index 00000000000..f70fb110c67 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-ipa-cp-details -fno-early-inlining --param ipa-cp-max-recursion-copies=8" } */ + +int fn(); + +int data[100]; + +int recur_fn (int i) +{ + int j; + + if (i == 6) + { + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + return 10; + } + + data[i] = i; + + for (j = 0; j < 100; j++) + recur_fn (i + 1); + + return i; +} + +int main () +{ + int i; + + for (i = 0; i < 100; i++) + recur_fn (1) + recur_fn (-5); + + return 1; +} + +/* { dg-final { scan-ipa-dump-times "Creating a specialized node of recur_fn/\[0-9\]*\\." 12 "cp" } } */ -- 2.17.1