On Thu, Oct 17, 2024 at 4:43 AM Andrew Pinski <quic_apin...@quicinc.com> wrote:
>
> After fixing loop-im to do the correct overflow rewriting
> for pointer types too. We end up with code like:
> ```
>   _9 = (unsigned long) &g;
>   _84 = _9 + 18446744073709551615;
>   _11 = _42 + _84;
>   _44 = (signed char *) _11;
> ...
>   *_44 = 10;
>   g ={v} {CLOBBER(eos)};
> ...
>   n[0] = &f;
>   *_44 = 8;
>   g ={v} {CLOBBER(eos)};
> ```
> Which was not being recongized by the scope conflicts code.
> This was because it only handled one level walk backs rather than multiple 
> ones.
> This fixes it by using a work_list to avoid huge recursion and a visited 
> bitmap to avoid
> going into an infinite loops when dealing with loops.
> Adds a cache for the addr_expr's that are associated with each ssa name. I 
> found 2 element
> cache was the decent trade off for size and speed.  Most ssa names will have 
> only
> one address associated with it but there are times (phis) where 2 or more will
> be there. But 2 is the common case for most if statements.

Hmm.  I was hoping that, since we walk BBs in RPO order in add_scope_conflicts,
we can simply populate the cache in the first forward walk of a BBs
stmts without
needing to care for cycles or using of a worklist.  We even get the
set of ADDR_EXPRs
in the stmt we visit for free, we just have to associate that set with
the SSA def(s).
I'd have used a SSA name -> bitmap mapping for simplicity.  For
backedges the iteration
should get things corrected, so we should be able to use the "cache"
as conservative
answer even for those.

Richard.

> gcc/ChangeLog:
>
>         PR middle-end/111422
>         * cfgexpand.cc: Define INCLUDE_STRING if ADDR_WALKER_STATS
>         is defined.
>         (class addr_ssa_walker): New class.
>         (add_scope_conflicts_2): Rename to ...
>         (addr_ssa_walker::operator()): This and rewrite to be a full walk
>         of all operands and their uses and use a cache.
>         (add_scope_conflicts_1): Add walker new argument for the addr cache.
>         Just walk the phi result since that will include all addr_exprs.
>         Change call to add_scope_conflicts_2 to walker.
>         (add_scope_conflicts): Add walker variable and update call to
>         add_scope_conflicts_1.
>
> Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
> ---
>  gcc/cfgexpand.cc | 207 ++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 176 insertions(+), 31 deletions(-)
>
> diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> index 6c1096363af..74f4cfc0f22 100644
> --- a/gcc/cfgexpand.cc
> +++ b/gcc/cfgexpand.cc
> @@ -17,6 +17,9 @@ You should have received a copy of the GNU General Public 
> License
>  along with GCC; see the file COPYING3.  If not see
>  <http://www.gnu.org/licenses/>.  */
>
> +#ifdef ADDR_WALKER_STATS
> +#define INCLUDE_STRING
> +#endif
>  #include "config.h"
>  #include "system.h"
>  #include "coretypes.h"
> @@ -571,35 +574,175 @@ visit_conflict (gimple *, tree op, tree, void *data)
>    return false;
>  }
>
> -/* Helper function for add_scope_conflicts_1.  For USE on
> -   a stmt, if it is a SSA_NAME and in its SSA_NAME_DEF_STMT is known to be
> -   based on some ADDR_EXPR, invoke VISIT on that ADDR_EXPR.  */
> +namespace {
>
> -static inline void
> -add_scope_conflicts_2 (tree use, bitmap work,
> -                      walk_stmt_load_store_addr_fn visit)
> +class addr_ssa_walker
> +{
> +private:
> +  struct addr_cache
> +  {
> +  private:
> +    unsigned elems = 0;
> +    static constexpr unsigned maxelements = 2;
> +    bool visited = false;
> +    tree cached[maxelements] = {};
> +  public:
> +    /* Returns true if the cache is valid. */
> +    operator bool ()
> +    {
> +      return visited && elems <= maxelements;
> +    }
> +    /* Mark as visited. The cache might be invalidated
> +       by adding too many elements though. */
> +    void visit () { visited = true; }
> +    /* Iterator over the cached values. */
> +    tree *begin () { return &cached[0]; }
> +    tree *end ()
> +    {
> +      /* If there was too many elements, then there are
> +        nothing to vist in the cache. */
> +      if (elems > maxelements)
> +       return &cached[0];
> +      return &cached[elems];
> +    }
> +    /* Add ADDR_EXPR to the cache if it is not there already. */
> +    void add (tree addr)
> +    {
> +      if (elems > maxelements)
> +       {
> +         statistics_counter_event (cfun, "addr_walker already overflow", 1);
> +         return;
> +       }
> +      /* Skip if the cache already contains the addr_expr. */
> +      for(tree other : *this)
> +       if (operand_equal_p (other, addr))
> +         return;
> +      elems++;
> +      /* Record that the cache overflowed. */
> +      if (elems > maxelements)
> +       {
> +         statistics_counter_event (cfun, "addr_walker overflow", 1);
> +         return;
> +       }
> +      cached[elems - 1] = addr;
> +    }
> +  };
> +public:
> +  addr_ssa_walker () : cache (new addr_cache[num_ssa_names]{}) { }
> +  ~addr_ssa_walker (){ delete[] cache; }
> +
> +  /* Walk the name and its defining statement,
> +     call func with for addr_expr's. */
> +  template<typename T>
> +  void operator ()(tree name, T func);
> +
> +private:
> +
> +  /* Cannot create a copy. */
> +  addr_ssa_walker (const addr_ssa_walker &) = delete;
> +  addr_ssa_walker (addr_ssa_walker &&) = delete;
> +  /* Return the cache entries for a SSA NAME. */
> +  addr_cache &operator[] (tree name)
> +  {
> +    return cache[SSA_NAME_VERSION (name)];
> +  }
> +
> +  addr_cache *cache;
> +};
> +
> +/* Walk backwards on the defining statements of NAME
> +   and call FUNC on the addr_expr. Use the cache for
> +   the SSA name if possible.  */
> +
> +template<typename T>
> +void
> +addr_ssa_walker::operator() (tree name, T func)
>  {
> -  if (TREE_CODE (use) == SSA_NAME
> -      && (POINTER_TYPE_P (TREE_TYPE (use))
> -         || INTEGRAL_TYPE_P (TREE_TYPE (use))))
> +  gcc_assert (TREE_CODE (name) == SSA_NAME);
> +  auto_vec<std::pair<tree,tree>, 4> work_list;
> +  auto_bitmap visited_ssa_names;
> +  work_list.safe_push (std::make_pair (name, name));
> +
> +#ifdef ADDR_WALKER_STATS
> +  unsigned process_list = 0;
> +#endif
> +
> +  while (!work_list.is_empty())
>      {
> +      auto work = work_list.pop();
> +      tree use = work.first;
> +      tree old_name = work.second;
> +#ifdef ADDR_WALKER_STATS
> +      process_list++;
> +#endif
> +
> +      if (!use)
> +       continue;
> +      /* For addr_expr's call the function and update the cache. */
> +      if (TREE_CODE (use) == ADDR_EXPR)
> +       {
> +         func (use);
> +         (*this)[old_name].add (use);
> +         continue;
> +       }
> +      /* Ignore all non SSA names. */
> +      if (TREE_CODE (use) != SSA_NAME)
> +       continue;
> +
> +      /* Only Pointers and integral types are used to track addresses.  */
> +      if (!POINTER_TYPE_P (TREE_TYPE (use))
> +         && !INTEGRAL_TYPE_P (TREE_TYPE (use)))
> +       continue;
> +
> +      /* Check the cache, if there is a hit use it.  */
> +      if ((*this)[use])
> +       {
> +         statistics_counter_event (cfun, "addr_walker cache hit", 1);
> +         /* Call the function and update the cache.  */
> +         for (tree naddr : (*this)[use])
> +           {
> +             (*this)[old_name].add (naddr);
> +             func (naddr);
> +           }
> +         continue;
> +       }
>        gimple *g = SSA_NAME_DEF_STMT (use);
> +      /* Mark the use as being visited so even if the cache is empty,
> +        there is no reason to walk the names backwards. */
> +      (*this)[use].visit();
> +      /* Skip the name if already visted this time.  */
> +      if (!bitmap_set_bit (visited_ssa_names, SSA_NAME_VERSION (use)))
> +       continue;
> +      /* Need to update the old_name afterwards add it to the work list,
> +        this will either hit the cache or the check bitmap will skip it
> +        if there was too many names associated with the cache. */
> +      work_list.safe_push (work);
> +
> +      /* For assign statements, add each operand to the work list.
> +        Note operand 0 is the same as the use here so there is nothing
> +        to be done.  */
>        if (gassign *a = dyn_cast <gassign *> (g))
>         {
> -         if (tree op = gimple_assign_rhs1 (a))
> -           if (TREE_CODE (op) == ADDR_EXPR)
> -             visit (a, TREE_OPERAND (op, 0), op, work);
> +         for (unsigned i = 1; i < gimple_num_ops (g); i++)
> +           work_list.safe_push (std::make_pair (gimple_op (a, i), use));
>         }
> +      /* PHI nodes, add each argument to the work list.  */
>        else if (gphi *p = dyn_cast <gphi *> (g))
>         for (unsigned i = 0; i < gimple_phi_num_args (p); ++i)
> -         if (TREE_CODE (use = gimple_phi_arg_def (p, i)) == SSA_NAME)
> -           if (gassign *a = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (use)))
> -             {
> -               if (tree op = gimple_assign_rhs1 (a))
> -                 if (TREE_CODE (op) == ADDR_EXPR)
> -                   visit (a, TREE_OPERAND (op, 0), op, work);
> -             }
> -    }
> +         work_list.safe_push (std::make_pair (gimple_phi_arg_def (p, i), 
> use));
> +      /* All other kind of statements assume they can't contribute to an 
> address
> +        being alive.  */
> +    }
> +  /* This stat is here to see how long is the longest walk,
> +     it is not useful stat except when tuning
> +     addr_ssa_walker::addr_cache::maxelements.  */
> +#ifdef ADDR_WALKER_STATS
> +  statistics_counter_event (cfun,
> +                           ("addr_walker process " + std::to_string 
> (process_list)).c_str (),
> +                           1);
> +#endif
> +}
> +
>  }
>
>  /* Helper routine for add_scope_conflicts, calculating the active partitions
> @@ -608,7 +751,8 @@ add_scope_conflicts_2 (tree use, bitmap work,
>     liveness.  */
>
>  static void
> -add_scope_conflicts_1 (basic_block bb, bitmap work, bool for_conflict)
> +add_scope_conflicts_1 (basic_block bb, bitmap work, bool for_conflict,
> +                      addr_ssa_walker &walker)
>  {
>    edge e;
>    edge_iterator ei;
> @@ -623,14 +767,14 @@ add_scope_conflicts_1 (basic_block bb, bitmap work, 
> bool for_conflict)
>
>    visit = visit_op;
>
> +  auto addrvisitor = [&visit,&work](tree addr) {
> +                  gcc_assert (TREE_CODE (addr) == ADDR_EXPR);
> +                  visit (nullptr, TREE_OPERAND (addr, 0), addr, work);
> +                };
> +
> +  /* Walk over the phi for the incoming addresses to be alive.  */
>    for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> -    {
> -      gimple *stmt = gsi_stmt (gsi);
> -      gphi *phi = as_a <gphi *> (stmt);
> -      walk_stmt_load_store_addr_ops (stmt, work, NULL, NULL, visit);
> -      FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
> -       add_scope_conflicts_2 (USE_FROM_PTR (use_p), work, visit);
> -    }
> +    walker (gimple_phi_result (gsi_stmt (gsi)), addrvisitor);
>    for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>      {
>        gimple *stmt = gsi_stmt (gsi);
> @@ -676,7 +820,7 @@ add_scope_conflicts_1 (basic_block bb, bitmap work, bool 
> for_conflict)
>             }
>           walk_stmt_load_store_addr_ops (stmt, work, visit, visit, visit);
>           FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
> -           add_scope_conflicts_2 (USE_FROM_PTR (use_p), work, visit);
> +           walker (USE_FROM_PTR (use_p), addrvisitor);
>         }
>      }
>
> @@ -707,6 +851,7 @@ add_scope_conflicts (void)
>    bitmap work = BITMAP_ALLOC (NULL);
>    int *rpo;
>    int n_bbs;
> +  addr_ssa_walker walker;
>
>    /* We approximate the live range of a stack variable by taking the first
>       mention of its name as starting point(s), and by the end-of-scope
> @@ -734,14 +879,14 @@ add_scope_conflicts (void)
>           bitmap active;
>           bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]);
>           active = (bitmap)bb->aux;
> -         add_scope_conflicts_1 (bb, work, false);
> +         add_scope_conflicts_1 (bb, work, false, walker);
>           if (bitmap_ior_into (active, work))
>             changed = true;
>         }
>      }
>
>    FOR_EACH_BB_FN (bb, cfun)
> -    add_scope_conflicts_1 (bb, work, true);
> +    add_scope_conflicts_1 (bb, work, true, walker);
>
>    free (rpo);
>    BITMAP_FREE (work);
> --
> 2.34.1
>

Reply via email to