IPA-CP can not identify a constant by-ref argument to a function, if definition of the argument is not in same basic block where the callsite lies in. This is because IPA-CP only does local search in the callsite basic block.So this patch implemented an enhanced algorithm, which uses dominance information to guide traverse in virtual SSA web, to find out constant definitions in dominating basic block.
Feng --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 526ed45be89..cc076c337af 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2019-06-04 Feng Xue <f...@os.amperecomputing.com> + + PR ipa/90401 + * ipa-prop.c (add_to_agg_contents_list): New function. + (clobber_by_agg_contents_list_p): Likewise. + (extract_mem_content): Likewise. + (strictly_dominated_by_ssa_p): Likewise. + (get_place_in_agg_contents_list): Delete. + (determine_known_aggregate_parts): Renamed from + determine_locally_known_aggregate_parts. + 2019-06-04 Segher Boessenkool <seg...@kernel.crashing.org> * config/rs6000/constraints.md (define_register_constraint "wp"): diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c index d86c2f3db55..405cdf7730b 100644 --- a/gcc/ipa-prop.c +++ b/gcc/ipa-prop.c @@ -1458,7 +1458,7 @@ get_ssa_def_if_simple_copy (tree rhs) return rhs; } -/* Simple linked list, describing known contents of an aggregate beforere +/* Simple linked list, describing known contents of an aggregate before call. */ struct ipa_known_agg_contents_list @@ -1471,41 +1471,64 @@ struct ipa_known_agg_contents_list struct ipa_known_agg_contents_list *next; }; -/* Find the proper place in linked list of ipa_known_agg_contents_list - structures where to put a new one with the given LHS_OFFSET and LHS_SIZE, - unless there is a partial overlap, in which case return NULL, or such - element is already there, in which case set *ALREADY_THERE to true. */ +/* Add a known content item into a linked list of ipa_known_agg_contents_list, + in which all elements are sorted ascendingly by offset. When ALLOW_DUP is + false, insert the item only if there is no duplicate one (with same offset + and size) in the list. And if the item is added, return true. */ -static struct ipa_known_agg_contents_list ** -get_place_in_agg_contents_list (struct ipa_known_agg_contents_list **list, - HOST_WIDE_INT lhs_offset, - HOST_WIDE_INT lhs_size, - bool *already_there) +static inline bool +add_to_agg_contents_list (struct ipa_known_agg_contents_list **plist, + struct ipa_known_agg_contents_list *item, + bool allow_dup = true) { - struct ipa_known_agg_contents_list **p = list; - while (*p && (*p)->offset < lhs_offset) + struct ipa_known_agg_contents_list *list = *plist; + + for (; list; list = list->next) { - if ((*p)->offset + (*p)->size > lhs_offset) - return NULL; - p = &(*p)->next; + if (list->offset > item->offset) + break; + + if (list->offset == item->offset && list->size == item->size + && !allow_dup) + return false; + + plist = &list->next; } - if (*p && (*p)->offset < lhs_offset + lhs_size) + item->next = list; + *plist = item; + return true; +} + +/* Check whether a given known content is clobbered by certain element in + a linked list of ipa_known_agg_contents_list. A special case is that + we can ignore those constant items completely same as the given item, + that is they have same offset/size/value. */ + +static inline bool +clobber_by_agg_contents_list_p (struct ipa_known_agg_contents_list *list, + struct ipa_known_agg_contents_list *item) +{ + for (; list; list = list->next) { - if ((*p)->offset == lhs_offset && (*p)->size == lhs_size) - /* We already know this value is subsequently overwritten with - something else. */ - *already_there = true; - else - /* Otherwise this is a partial overlap which we cannot - represent. */ - return NULL; + if (list->offset > item->offset) + return list->offset < item->offset + item->size; + + /* For the constant item, we can skip comparison with identical items in + the list, because its content remains unchanged after clobbering. */ + if (list->offset == item->offset && list->size == item->size + && list->constant && item->constant + && operand_equal_p (list->constant, item->constant, 0)) + continue; + + if (list->offset + list->size > item->offset) + return true; } - return p; + return false; } /* Build aggregate jump function from LIST, assuming there are exactly - CONST_COUNT constant entries there and that th offset of the passed argument + CONST_COUNT constant entries there and that offset of the passed argument is ARG_OFFSET and store it into JFUNC. */ static void @@ -1528,6 +1551,66 @@ build_agg_jump_func_from_list (struct ipa_known_agg_contents_list *list, } } +/* If STMT is a memory store to the object whose address is BASE, extract + information (offset, size, and value) into CONTENT, and return true, + otherwise we conservatively assume the whole object is modified with + unknown content, and return false. CHECK_REF means that access to object + is expected to be in form of MEM_REF expression. */ + +static bool +extract_mem_content (gimple *stmt, tree base, bool check_ref, + struct ipa_known_agg_contents_list *content) +{ + HOST_WIDE_INT lhs_offset, lhs_size; + tree lhs, rhs, lhs_base; + bool reverse; + + if (!gimple_assign_single_p (stmt)) + return false; + + lhs = gimple_assign_lhs (stmt); + rhs = gimple_assign_rhs1 (stmt); + + if (!is_gimple_reg_type (TREE_TYPE (rhs)) + || TREE_CODE (lhs) == BIT_FIELD_REF + || contains_bitfld_component_ref_p (lhs)) + return false; + + lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset, + &lhs_size, &reverse); + if (!lhs_base) + return false; + + if (check_ref) + { + if (TREE_CODE (lhs_base) != MEM_REF + || TREE_OPERAND (lhs_base, 0) != base + || !integer_zerop (TREE_OPERAND (lhs_base, 1))) + return false; + } + else if (lhs_base != base) + return false; + + rhs = get_ssa_def_if_simple_copy (rhs); + + content->size = lhs_size; + content->offset = lhs_offset; + content->constant = is_gimple_ip_invariant (rhs) ? rhs : NULL_TREE; + content->next = NULL; + + return true; +} + +/* Return true if defintion of SSA name strictly dominates basic block BB. */ + +static inline bool +strictly_dominated_by_ssa_p (basic_block bb, tree ssa) +{ + basic_block ssa_bb = gimple_bb (SSA_NAME_DEF_STMT (ssa)); + + return bb != ssa_bb && dominated_by_p (CDI_DOMINATORS, bb, ssa_bb); +} + /* Traverse statements from CALL backwards, scanning whether an aggregate given in ARG is filled in with constant values. ARG can either be an aggregate expression or a pointer to an aggregate. ARG_TYPE is the type of the @@ -1535,19 +1618,22 @@ build_agg_jump_func_from_list (struct ipa_known_agg_contents_list *list, subsequently stored. */ static void -determine_locally_known_aggregate_parts (gcall *call, tree arg, - tree arg_type, - struct ipa_jump_func *jfunc) +determine_known_aggregate_parts (gcall *call, tree arg, + tree arg_type, + struct ipa_jump_func *jfunc) { - struct ipa_known_agg_contents_list *list = NULL; + struct ipa_known_agg_contents_list *list = NULL, *all_list = NULL; + basic_block call_bb = gimple_bb (call); + hash_set <tree> visited; + auto_vec <tree> worklist; int item_count = 0, const_count = 0; + int ipa_max_agg_items = PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS); HOST_WIDE_INT arg_offset, arg_size; - gimple_stmt_iterator gsi; tree arg_base; bool check_ref, by_ref; ao_ref r; - if (PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS) == 0) + if (ipa_max_agg_items == 0) return; /* The function operates in three stages. First, we prepare check_ref, r, @@ -1606,82 +1692,139 @@ determine_locally_known_aggregate_parts (gcall *call, tree arg, ao_ref_init (&r, arg); } - /* Second stage walks back the BB, looks at individual statements and as long - as it is confident of how the statements affect contents of the - aggregates, it builds a sorted linked list of ipa_agg_jf_list structures - describing it. */ - gsi = gsi_for_stmt (call); - gsi_prev (&gsi); - for (; !gsi_end_p (gsi); gsi_prev (&gsi)) - { - struct ipa_known_agg_contents_list *n, **p; - gimple *stmt = gsi_stmt (gsi); - HOST_WIDE_INT lhs_offset, lhs_size; - tree lhs, rhs, lhs_base; - bool reverse; - - if (!stmt_may_clobber_ref_p_1 (stmt, &r)) - continue; - if (!gimple_assign_single_p (stmt)) - break; - - lhs = gimple_assign_lhs (stmt); - rhs = gimple_assign_rhs1 (stmt); - if (!is_gimple_reg_type (TREE_TYPE (rhs)) - || TREE_CODE (lhs) == BIT_FIELD_REF - || contains_bitfld_component_ref_p (lhs)) - break; - - lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset, - &lhs_size, &reverse); - if (!lhs_base) - break; - - if (check_ref) - { - if (TREE_CODE (lhs_base) != MEM_REF - || TREE_OPERAND (lhs_base, 0) != arg_base - || !integer_zerop (TREE_OPERAND (lhs_base, 1))) - break; - } - else if (lhs_base != arg_base) - { - if (DECL_P (lhs_base)) - continue; - else - break; - } - - bool already_there = false; - p = get_place_in_agg_contents_list (&list, lhs_offset, lhs_size, - &already_there); - if (!p) - break; - if (already_there) - continue; - - rhs = get_ssa_def_if_simple_copy (rhs); - n = XALLOCA (struct ipa_known_agg_contents_list); - n->size = lhs_size; - n->offset = lhs_offset; - if (is_gimple_ip_invariant (rhs)) - { - n->constant = rhs; - const_count++; - } - else - n->constant = NULL_TREE; - n->next = *p; - *p = n; - - item_count++; - if (const_count == PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS) - || item_count == 2 * PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS)) - break; - } + /* Second stage walks back from the call statement, looks at individual + virtual operand and as long as it is confident of how definition + statement of the virtual operand affects content of the aggregate, it + builds a sorted linked list of ipa_agg_jf_list structures describing it. + + Actually, the stage involves kind of global reachability analysis on + in-memory variables definitions. A complete resolution requires iterative + dataflow equation computation, which is somewhat complex and seems to be + overkill to this collecting-constant task. + + So here we use a simpler algorithm that we believe can archive nearly + same result in most situations. In the algorithm, we traverse virtual SSA + web backwards starting from the call, by stepping from one dominating + virtual operand (its definition dominates the call) to another dominating + one. Before a dom virtual operand is processed, we will ensure all those + non-dom virtual operands, which are backwards reachable from previous dom + virtual operand, have been processed. But only dom virtual operand is + regarded as possible constant value provider to the call argument. And + non-dom virtual operand is just checked to see whether it kills + definitions of some dom virtual operands. */ + + for (tree dom_vuse = gimple_vuse (call); dom_vuse; ) + { + gimple *stmt = SSA_NAME_DEF_STMT (dom_vuse); + bool exist = visited.add (dom_vuse); + + gcc_assert (!exist); + + if (gimple_code (stmt) != GIMPLE_PHI) + { + if (stmt_may_clobber_ref_p_1 (stmt, &r)) + { + struct ipa_known_agg_contents_list *content = + XALLOCA (struct ipa_known_agg_contents_list); + + if (!extract_mem_content (stmt, arg_base, check_ref, content)) + break; + + /* Now we get a dom virtual operand, and need to check whether + its value can finally reach the call. And put it into the + content list only if it is unique. */ + if (!clobber_by_agg_contents_list_p (all_list, content) + && add_to_agg_contents_list (&list, content, false)) + { + struct ipa_known_agg_contents_list *copy = + XALLOCA (struct ipa_known_agg_contents_list); + + *copy = *content; + content = copy; + + item_count +=1; + const_count += !!(content->constant); + + if (item_count == 2 * ipa_max_agg_items + || const_count == ipa_max_agg_items) + break; + } + add_to_agg_contents_list (&all_list, content); + } + dom_vuse = gimple_vuse (stmt); + continue; + } + + /* We get to a merge point in virtual SSA web, then go through all + non-dom virtual operands before handling next dom virtual operand. */ + worklist.safe_push (dom_vuse); + dom_vuse = NULL_TREE; + + do + { + tree vuse = worklist.pop (); + gimple *stmt = SSA_NAME_DEF_STMT (vuse); + auto_vec<tree, 6> append; + + /* Non-dom virtual operand should not be the DEFAULT definition. */ + gcc_assert (!SSA_NAME_IS_DEFAULT_DEF (vuse)); + + if (gimple_code (stmt) != GIMPLE_PHI) + { + if (stmt_may_clobber_ref_p_1 (stmt, &r)) + { + struct ipa_known_agg_contents_list *content = + XALLOCA (struct ipa_known_agg_contents_list); + + if (!extract_mem_content (stmt, arg_base, check_ref, content)) + goto finish; + + add_to_agg_contents_list (&all_list, content); + } + append.safe_push (gimple_vuse (stmt)); + } + else + { + for (unsigned i = 0; i < gimple_phi_num_args (stmt); ++i) + { + tree phi_arg = gimple_phi_arg_def (stmt, i); + + if (SSA_NAME_IS_DEFAULT_DEF (phi_arg)) + goto finish; + + append.safe_push (phi_arg); + } + } + + for (unsigned i = 0; i < append.length (); ++i) + { + if (visited.add (vuse = append[i])) + continue; + + if (SSA_NAME_IS_DEFAULT_DEF (vuse) + || strictly_dominated_by_ssa_p (call_bb, vuse)) + { + /* Found a new dom virtual operand, stop going further until + all pending non-dom virtual operands are processed. */ + gcc_assert (!dom_vuse); + dom_vuse = vuse; + } + else + worklist.safe_push (vuse); + } + } while (!worklist.is_empty ()); + + gcc_assert (dom_vuse); + + /* It is asserted that unprocessed dom virtual operand should be in + non-visited state. */ + visited.remove (dom_vuse); + } + +finish: /* Third stage just goes over the list and creates an appropriate vector of - ipa_agg_jf_item structures out of it, of sourse only if there are + ipa_agg_jf_item structures out of it, of course only if there are any known constants to begin with. */ if (const_count) @@ -1691,6 +1834,7 @@ determine_locally_known_aggregate_parts (gcall *call, tree arg, } } + /* Return the Ith param type of callee associated with call graph edge E. */ @@ -1797,7 +1941,7 @@ ipa_set_jfunc_vr (ipa_jump_func *jf, enum value_range_kind type, jf->m_vr = ipa_get_value_range (type, min, max); } -/* Assign to JF a pointer to a value_range just liek TMP but either fetch a +/* Assign to JF a pointer to a value_range just like TMP but either fetch a copy from ipa_vr_hash_table or allocate a new on in GC memory. */ static void @@ -1978,7 +2122,7 @@ ipa_compute_jump_functions_for_edge (struct ipa_func_body_info *fbi, || !ipa_get_jf_ancestor_agg_preserved (jfunc)) && (AGGREGATE_TYPE_P (TREE_TYPE (arg)) || POINTER_TYPE_P (param_type))) - determine_locally_known_aggregate_parts (call, arg, param_type, jfunc); + determine_known_aggregate_parts (call, arg, param_type, jfunc); } if (!useful_context) vec_free (args->polymorphic_call_contexts); diff --git a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c new file mode 100644 index 00000000000..131c4e265bb --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c @@ -0,0 +1,78 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-ipa-cp-details -fno-inline" } */ + +int data1; + +int callee1(int *v) +{ + if (*v < 2) + return 0; + else + { + int t = data1; + + data1 = *v; + *v = t; + + return 1; + } +} + +int __attribute((pure)) callee2(int *v) +{ + if (*v < 2) + return 0; + else + { + data1 = v[0] + v[2]; + + return 1; + } +} + +int caller1(int c, int *r) +{ + int a = 1; + + if (c) + return callee1(&a); + else + { + *r = 2; + return callee1(r); + } +} + +int data2[200]; +int data3; + +int __attribute((const)) gen_cond(int); + +int caller2(void) +{ + int i, j; + int sum = 0; + int a[8]; + + a[0] = 3; + for (i = 0; i < 100; i++) + { + if (gen_cond (i)) + continue; + + a[2] = 4; + for (j = 0; j < 100; j++) + { + data2[i + j] = (i ^ j) + data3; + + sum += callee2(a); + } + } + + return sum; +} + +/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 1" 1 "cp" } } */ +/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 2" 1 "cp" } } */ +/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 3" 1 "cp" } } */ +/* { dg-final { scan-ipa-dump-times "offset: 64, cst: 4" 1 "cp" } } */