On Thu, Oct 17, 2024 at 4:43 AM Andrew Pinski <quic_apin...@quicinc.com> wrote: > > After fixing loop-im to do the correct overflow rewriting > for pointer types too. We end up with code like: > ``` > _9 = (unsigned long) &g; > _84 = _9 + 18446744073709551615; > _11 = _42 + _84; > _44 = (signed char *) _11; > ... > *_44 = 10; > g ={v} {CLOBBER(eos)}; > ... > n[0] = &f; > *_44 = 8; > g ={v} {CLOBBER(eos)}; > ``` > Which was not being recongized by the scope conflicts code. > This was because it only handled one level walk backs rather than multiple > ones. > This fixes it by using a work_list to avoid huge recursion and a visited > bitmap to avoid > going into an infinite loops when dealing with loops. > Adds a cache for the addr_expr's that are associated with each ssa name. I > found 2 element > cache was the decent trade off for size and speed. Most ssa names will have > only > one address associated with it but there are times (phis) where 2 or more will > be there. But 2 is the common case for most if statements.
Hmm. I was hoping that, since we walk BBs in RPO order in add_scope_conflicts, we can simply populate the cache in the first forward walk of a BBs stmts without needing to care for cycles or using of a worklist. We even get the set of ADDR_EXPRs in the stmt we visit for free, we just have to associate that set with the SSA def(s). I'd have used a SSA name -> bitmap mapping for simplicity. For backedges the iteration should get things corrected, so we should be able to use the "cache" as conservative answer even for those. Richard. > gcc/ChangeLog: > > PR middle-end/111422 > * cfgexpand.cc: Define INCLUDE_STRING if ADDR_WALKER_STATS > is defined. > (class addr_ssa_walker): New class. > (add_scope_conflicts_2): Rename to ... > (addr_ssa_walker::operator()): This and rewrite to be a full walk > of all operands and their uses and use a cache. > (add_scope_conflicts_1): Add walker new argument for the addr cache. > Just walk the phi result since that will include all addr_exprs. > Change call to add_scope_conflicts_2 to walker. > (add_scope_conflicts): Add walker variable and update call to > add_scope_conflicts_1. > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > --- > gcc/cfgexpand.cc | 207 ++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 176 insertions(+), 31 deletions(-) > > diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc > index 6c1096363af..74f4cfc0f22 100644 > --- a/gcc/cfgexpand.cc > +++ b/gcc/cfgexpand.cc > @@ -17,6 +17,9 @@ You should have received a copy of the GNU General Public > License > along with GCC; see the file COPYING3. If not see > <http://www.gnu.org/licenses/>. */ > > +#ifdef ADDR_WALKER_STATS > +#define INCLUDE_STRING > +#endif > #include "config.h" > #include "system.h" > #include "coretypes.h" > @@ -571,35 +574,175 @@ visit_conflict (gimple *, tree op, tree, void *data) > return false; > } > > -/* Helper function for add_scope_conflicts_1. For USE on > - a stmt, if it is a SSA_NAME and in its SSA_NAME_DEF_STMT is known to be > - based on some ADDR_EXPR, invoke VISIT on that ADDR_EXPR. */ > +namespace { > > -static inline void > -add_scope_conflicts_2 (tree use, bitmap work, > - walk_stmt_load_store_addr_fn visit) > +class addr_ssa_walker > +{ > +private: > + struct addr_cache > + { > + private: > + unsigned elems = 0; > + static constexpr unsigned maxelements = 2; > + bool visited = false; > + tree cached[maxelements] = {}; > + public: > + /* Returns true if the cache is valid. */ > + operator bool () > + { > + return visited && elems <= maxelements; > + } > + /* Mark as visited. The cache might be invalidated > + by adding too many elements though. */ > + void visit () { visited = true; } > + /* Iterator over the cached values. */ > + tree *begin () { return &cached[0]; } > + tree *end () > + { > + /* If there was too many elements, then there are > + nothing to vist in the cache. */ > + if (elems > maxelements) > + return &cached[0]; > + return &cached[elems]; > + } > + /* Add ADDR_EXPR to the cache if it is not there already. */ > + void add (tree addr) > + { > + if (elems > maxelements) > + { > + statistics_counter_event (cfun, "addr_walker already overflow", 1); > + return; > + } > + /* Skip if the cache already contains the addr_expr. */ > + for(tree other : *this) > + if (operand_equal_p (other, addr)) > + return; > + elems++; > + /* Record that the cache overflowed. */ > + if (elems > maxelements) > + { > + statistics_counter_event (cfun, "addr_walker overflow", 1); > + return; > + } > + cached[elems - 1] = addr; > + } > + }; > +public: > + addr_ssa_walker () : cache (new addr_cache[num_ssa_names]{}) { } > + ~addr_ssa_walker (){ delete[] cache; } > + > + /* Walk the name and its defining statement, > + call func with for addr_expr's. */ > + template<typename T> > + void operator ()(tree name, T func); > + > +private: > + > + /* Cannot create a copy. */ > + addr_ssa_walker (const addr_ssa_walker &) = delete; > + addr_ssa_walker (addr_ssa_walker &&) = delete; > + /* Return the cache entries for a SSA NAME. */ > + addr_cache &operator[] (tree name) > + { > + return cache[SSA_NAME_VERSION (name)]; > + } > + > + addr_cache *cache; > +}; > + > +/* Walk backwards on the defining statements of NAME > + and call FUNC on the addr_expr. Use the cache for > + the SSA name if possible. */ > + > +template<typename T> > +void > +addr_ssa_walker::operator() (tree name, T func) > { > - if (TREE_CODE (use) == SSA_NAME > - && (POINTER_TYPE_P (TREE_TYPE (use)) > - || INTEGRAL_TYPE_P (TREE_TYPE (use)))) > + gcc_assert (TREE_CODE (name) == SSA_NAME); > + auto_vec<std::pair<tree,tree>, 4> work_list; > + auto_bitmap visited_ssa_names; > + work_list.safe_push (std::make_pair (name, name)); > + > +#ifdef ADDR_WALKER_STATS > + unsigned process_list = 0; > +#endif > + > + while (!work_list.is_empty()) > { > + auto work = work_list.pop(); > + tree use = work.first; > + tree old_name = work.second; > +#ifdef ADDR_WALKER_STATS > + process_list++; > +#endif > + > + if (!use) > + continue; > + /* For addr_expr's call the function and update the cache. */ > + if (TREE_CODE (use) == ADDR_EXPR) > + { > + func (use); > + (*this)[old_name].add (use); > + continue; > + } > + /* Ignore all non SSA names. */ > + if (TREE_CODE (use) != SSA_NAME) > + continue; > + > + /* Only Pointers and integral types are used to track addresses. */ > + if (!POINTER_TYPE_P (TREE_TYPE (use)) > + && !INTEGRAL_TYPE_P (TREE_TYPE (use))) > + continue; > + > + /* Check the cache, if there is a hit use it. */ > + if ((*this)[use]) > + { > + statistics_counter_event (cfun, "addr_walker cache hit", 1); > + /* Call the function and update the cache. */ > + for (tree naddr : (*this)[use]) > + { > + (*this)[old_name].add (naddr); > + func (naddr); > + } > + continue; > + } > gimple *g = SSA_NAME_DEF_STMT (use); > + /* Mark the use as being visited so even if the cache is empty, > + there is no reason to walk the names backwards. */ > + (*this)[use].visit(); > + /* Skip the name if already visted this time. */ > + if (!bitmap_set_bit (visited_ssa_names, SSA_NAME_VERSION (use))) > + continue; > + /* Need to update the old_name afterwards add it to the work list, > + this will either hit the cache or the check bitmap will skip it > + if there was too many names associated with the cache. */ > + work_list.safe_push (work); > + > + /* For assign statements, add each operand to the work list. > + Note operand 0 is the same as the use here so there is nothing > + to be done. */ > if (gassign *a = dyn_cast <gassign *> (g)) > { > - if (tree op = gimple_assign_rhs1 (a)) > - if (TREE_CODE (op) == ADDR_EXPR) > - visit (a, TREE_OPERAND (op, 0), op, work); > + for (unsigned i = 1; i < gimple_num_ops (g); i++) > + work_list.safe_push (std::make_pair (gimple_op (a, i), use)); > } > + /* PHI nodes, add each argument to the work list. */ > else if (gphi *p = dyn_cast <gphi *> (g)) > for (unsigned i = 0; i < gimple_phi_num_args (p); ++i) > - if (TREE_CODE (use = gimple_phi_arg_def (p, i)) == SSA_NAME) > - if (gassign *a = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (use))) > - { > - if (tree op = gimple_assign_rhs1 (a)) > - if (TREE_CODE (op) == ADDR_EXPR) > - visit (a, TREE_OPERAND (op, 0), op, work); > - } > - } > + work_list.safe_push (std::make_pair (gimple_phi_arg_def (p, i), > use)); > + /* All other kind of statements assume they can't contribute to an > address > + being alive. */ > + } > + /* This stat is here to see how long is the longest walk, > + it is not useful stat except when tuning > + addr_ssa_walker::addr_cache::maxelements. */ > +#ifdef ADDR_WALKER_STATS > + statistics_counter_event (cfun, > + ("addr_walker process " + std::to_string > (process_list)).c_str (), > + 1); > +#endif > +} > + > } > > /* Helper routine for add_scope_conflicts, calculating the active partitions > @@ -608,7 +751,8 @@ add_scope_conflicts_2 (tree use, bitmap work, > liveness. */ > > static void > -add_scope_conflicts_1 (basic_block bb, bitmap work, bool for_conflict) > +add_scope_conflicts_1 (basic_block bb, bitmap work, bool for_conflict, > + addr_ssa_walker &walker) > { > edge e; > edge_iterator ei; > @@ -623,14 +767,14 @@ add_scope_conflicts_1 (basic_block bb, bitmap work, > bool for_conflict) > > visit = visit_op; > > + auto addrvisitor = [&visit,&work](tree addr) { > + gcc_assert (TREE_CODE (addr) == ADDR_EXPR); > + visit (nullptr, TREE_OPERAND (addr, 0), addr, work); > + }; > + > + /* Walk over the phi for the incoming addresses to be alive. */ > for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > - { > - gimple *stmt = gsi_stmt (gsi); > - gphi *phi = as_a <gphi *> (stmt); > - walk_stmt_load_store_addr_ops (stmt, work, NULL, NULL, visit); > - FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE) > - add_scope_conflicts_2 (USE_FROM_PTR (use_p), work, visit); > - } > + walker (gimple_phi_result (gsi_stmt (gsi)), addrvisitor); > for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > { > gimple *stmt = gsi_stmt (gsi); > @@ -676,7 +820,7 @@ add_scope_conflicts_1 (basic_block bb, bitmap work, bool > for_conflict) > } > walk_stmt_load_store_addr_ops (stmt, work, visit, visit, visit); > FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE) > - add_scope_conflicts_2 (USE_FROM_PTR (use_p), work, visit); > + walker (USE_FROM_PTR (use_p), addrvisitor); > } > } > > @@ -707,6 +851,7 @@ add_scope_conflicts (void) > bitmap work = BITMAP_ALLOC (NULL); > int *rpo; > int n_bbs; > + addr_ssa_walker walker; > > /* We approximate the live range of a stack variable by taking the first > mention of its name as starting point(s), and by the end-of-scope > @@ -734,14 +879,14 @@ add_scope_conflicts (void) > bitmap active; > bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]); > active = (bitmap)bb->aux; > - add_scope_conflicts_1 (bb, work, false); > + add_scope_conflicts_1 (bb, work, false, walker); > if (bitmap_ior_into (active, work)) > changed = true; > } > } > > FOR_EACH_BB_FN (bb, cfun) > - add_scope_conflicts_1 (bb, work, true); > + add_scope_conflicts_1 (bb, work, true, walker); > > free (rpo); > BITMAP_FREE (work); > -- > 2.34.1 >