On Tue, 22 May 2018, Richard Biener wrote: > On May 22, 2018 6:53:57 PM GMT+02:00, Joseph Myers <jos...@codesourcery.com> > wrote: > >On Tue, 22 May 2018, Richard Biener wrote: > > > >> + if (*sptr & (1 << (CHAR_BIT * sizeof (T) - 1))) > >> + gcc_unreachable (); > >> + m_flag = 1 << ((CHAR_BIT * sizeof (T)) - clz_hwi (*sptr)); > > > >I don't see how the use of clz_hwi works with a type T that may be > >narrower than HOST_WIDE_INT. Surely this logic requires a count of > >leading zeros in something of type T, not a possibly larger number of > >leading zeros after conversion to HOST_WIDE_INT? Also, if T is wider > >than > >int, shifting plain 1 won't work here. > > I messed up the conversion to a template. The bitnum should be subtracted > from HOST_BITS_PER_WIDE_INT and yes, 1 in unsigned hwi should be shifted.
So this is the final patch, I've changed the flags compute to use ffs which is better suited to find a "random" unset bit. For types smaller than HOST_WIDE_INT we'll find bits outside of the range but the truncated mask will be zero. I guess the ffsl ((unsigned long)~intvar) cannot be easily pattern-matched to ffs so a way to do that unsigned conversion would be nice (or mass-change all signed flag ints to unsigned...). I took the opportunity to change dfs_enumerate_from to use an auto_bb_flag rather than a static sbitmap. That should be profitable given we currently have the cacheline with BB flags loaded anyways because we access BB index. So using a BB flag will avoid pulling in the sbitmap cachelines. But it trades possibly less memory stores for it in case the sbitmap modifications were adjacent. OTOH applying a mask should be cheaper than variable shifts involved in sbitmap bit access. It's definitely less code ;) Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. OK for trunk? Thanks, Richard. >From 0091e95a133454da62973ad570c97e7b61bfd0ec Mon Sep 17 00:00:00 2001 From: Richard Guenther <rguent...@suse.de> Date: Fri, 18 May 2018 13:01:36 +0200 Subject: [PATCH] add dynamic cfg flag allocation * cfg.h (struct control_flow_graph): Add edge_flags_allocated and bb_flags_allocated members. (auto_flag): New RAII class for allocating flags. (auto_edge_flag): New RAII class for allocating edge flags. (auto_bb_flag): New RAII class for allocating bb flags. * cfgloop.c (verify_loop_structure): Allocate temporary edge flag dynamically. * cfganal.c (dfs_enumerate_from): Remove use of visited sbitmap in favor of temporarily allocated BB flag. * hsa-brig.c: Re-order includes. * hsa-dump.c: Likewise. * hsa-regalloc.c: Likewise. * print-rtl.c: Likewise. * profile-count.c: Likewise. diff --git a/gcc/cfg.c b/gcc/cfg.c index 11026e7209a..f8b217d39ca 100644 --- a/gcc/cfg.c +++ b/gcc/cfg.c @@ -79,6 +79,8 @@ init_flow (struct function *the_fun) = EXIT_BLOCK_PTR_FOR_FN (the_fun); EXIT_BLOCK_PTR_FOR_FN (the_fun)->prev_bb = ENTRY_BLOCK_PTR_FOR_FN (the_fun); + the_fun->cfg->edge_flags_allocated = EDGE_ALL_FLAGS; + the_fun->cfg->bb_flags_allocated = BB_ALL_FLAGS; } /* Helper function for remove_edge and clear_edges. Frees edge structure diff --git a/gcc/cfg.h b/gcc/cfg.h index 0953456782b..9fff135d11f 100644 --- a/gcc/cfg.h +++ b/gcc/cfg.h @@ -74,6 +74,10 @@ struct GTY(()) control_flow_graph { /* Maximal count of BB in function. */ profile_count count_max; + + /* Dynamically allocated edge/bb flags. */ + int edge_flags_allocated; + int bb_flags_allocated; }; @@ -121,4 +125,60 @@ extern basic_block get_bb_copy (basic_block); void set_loop_copy (struct loop *, struct loop *); struct loop *get_loop_copy (struct loop *); +/* Generic RAII class to allocate a bit from storage of integer type T. + The allocated bit is accessible as mask with the single bit set + via the conversion operator to T. */ + +template <class T> +class auto_flag +{ +public: + /* static assert T is integer type of max HOST_WIDE_INT precision. */ + auto_flag (T *sptr) + { + m_sptr = sptr; + int free_bit = ffs_hwi (~*sptr); + /* If there are no unset bits... */ + if (free_bit == 0) + gcc_unreachable (); + m_flag = HOST_WIDE_INT_1U << (free_bit - 1); + /* ...or if T is signed and thus the complement is sign-extended, + check if we ran out of bits. We could spare us this bit + if we could use C++11 std::make_unsigned<T>::type to pass + ~*sptr to ffs_hwi. */ + if (m_flag == 0) + gcc_unreachable (); + gcc_checking_assert ((*sptr & m_flag) == 0); + *sptr |= m_flag; + } + ~auto_flag () + { + gcc_checking_assert ((*m_sptr & m_flag) == m_flag); + *m_sptr &= ~m_flag; + } + operator T () const { return m_flag; } +private: + T *m_sptr; + T m_flag; +}; + +/* RAII class to allocate an edge flag for temporary use. You have + to clear the flag from all edges when you are finished using it. */ + +class auto_edge_flag : public auto_flag<int> +{ +public: + auto_edge_flag (function *fun) + : auto_flag (&fun->cfg->edge_flags_allocated) {} +}; + +/* RAII class to allocate a bb flag for temporary use. You have + to clear the flag from all edges when you are finished using it. */ +class auto_bb_flag : public auto_flag<int> +{ +public: + auto_bb_flag (function *fun) + : auto_flag (&fun->cfg->bb_flags_allocated) {} +}; + #endif /* GCC_CFG_H */ diff --git a/gcc/cfganal.c b/gcc/cfganal.c index a901b3f3f2c..b9944c6ef98 100644 --- a/gcc/cfganal.c +++ b/gcc/cfganal.c @@ -1145,41 +1145,12 @@ dfs_enumerate_from (basic_block bb, int reverse, { basic_block *st, lbb; int sp = 0, tv = 0; - unsigned size; - /* A bitmap to keep track of visited blocks. Allocating it each time - this function is called is not possible, since dfs_enumerate_from - is often used on small (almost) disjoint parts of cfg (bodies of - loops), and allocating a large sbitmap would lead to quadratic - behavior. */ - static sbitmap visited; - static unsigned v_size; + auto_bb_flag visited (cfun); -#define MARK_VISITED(BB) (bitmap_set_bit (visited, (BB)->index)) -#define UNMARK_VISITED(BB) (bitmap_clear_bit (visited, (BB)->index)) -#define VISITED_P(BB) (bitmap_bit_p (visited, (BB)->index)) - - /* Resize the VISITED sbitmap if necessary. */ - size = last_basic_block_for_fn (cfun); - if (size < 10) - size = 10; - - if (!visited) - { - - visited = sbitmap_alloc (size); - bitmap_clear (visited); - v_size = size; - } - else if (v_size < size) - { - /* Ensure that we increase the size of the sbitmap exponentially. */ - if (2 * v_size > size) - size = 2 * v_size; - - visited = sbitmap_resize (visited, size, 0); - v_size = size; - } +#define MARK_VISITED(BB) ((BB)->flags |= visited) +#define UNMARK_VISITED(BB) ((BB)->flags &= ~visited) +#define VISITED_P(BB) (((BB)->flags & visited) != 0) st = XNEWVEC (basic_block, rslt_max); rslt[tv++] = st[sp++] = bb; diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c index 8af793c6015..fb5ebad1dfd 100644 --- a/gcc/cfgloop.c +++ b/gcc/cfgloop.c @@ -1539,6 +1539,7 @@ verify_loop_structure (void) /* Check irreducible loops. */ if (loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) { + auto_edge_flag saved_irr_mask (cfun); /* Record old info. */ auto_sbitmap irreds (last_basic_block_for_fn (cfun)); FOR_EACH_BB_FN (bb, cfun) @@ -1550,7 +1551,7 @@ verify_loop_structure (void) bitmap_clear_bit (irreds, bb->index); FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_IRREDUCIBLE_LOOP) - e->flags |= EDGE_ALL_FLAGS + 1; + e->flags |= saved_irr_mask; } /* Recount it. */ @@ -1576,20 +1577,20 @@ verify_loop_structure (void) FOR_EACH_EDGE (e, ei, bb->succs) { if ((e->flags & EDGE_IRREDUCIBLE_LOOP) - && !(e->flags & (EDGE_ALL_FLAGS + 1))) + && !(e->flags & saved_irr_mask)) { error ("edge from %d to %d should be marked irreducible", e->src->index, e->dest->index); err = 1; } else if (!(e->flags & EDGE_IRREDUCIBLE_LOOP) - && (e->flags & (EDGE_ALL_FLAGS + 1))) + && (e->flags & saved_irr_mask)) { error ("edge from %d to %d should not be marked irreducible", e->src->index, e->dest->index); err = 1; } - e->flags &= ~(EDGE_ALL_FLAGS + 1); + e->flags &= ~saved_irr_mask; } } } diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c index d3efff40453..ca066118ebd 100644 --- a/gcc/hsa-brig.c +++ b/gcc/hsa-brig.c @@ -35,8 +35,8 @@ along with GCC; see the file COPYING3. If not see #include "stor-layout.h" #include "output.h" #include "basic-block.h" -#include "cfg.h" #include "function.h" +#include "cfg.h" #include "fold-const.h" #include "stringpool.h" #include "gimple-pretty-print.h" diff --git a/gcc/hsa-dump.c b/gcc/hsa-dump.c index 4ee53c81277..77fef5ee5d8 100644 --- a/gcc/hsa-dump.c +++ b/gcc/hsa-dump.c @@ -27,8 +27,8 @@ along with GCC; see the file COPYING3. If not see #include "vec.h" #include "tree.h" #include "basic-block.h" -#include "cfg.h" #include "function.h" +#include "cfg.h" #include "dumpfile.h" #include "gimple-pretty-print.h" #include "cgraph.h" diff --git a/gcc/hsa-regalloc.c b/gcc/hsa-regalloc.c index f402587408d..819f680d1bc 100644 --- a/gcc/hsa-regalloc.c +++ b/gcc/hsa-regalloc.c @@ -27,9 +27,9 @@ along with GCC; see the file COPYING3. If not see #include "tree.h" #include "dominance.h" #include "basic-block.h" -#include "cfg.h" -#include "cfganal.h" #include "function.h" +#include "cfganal.h" +#include "cfg.h" #include "bitmap.h" #include "dumpfile.h" #include "cgraph.h" diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c index 37c0d53fae2..ba7aa260194 100644 --- a/gcc/print-rtl.c +++ b/gcc/print-rtl.c @@ -36,11 +36,11 @@ along with GCC; see the file COPYING3. If not see #include "alias.h" #include "tree.h" #include "basic-block.h" -#include "cfg.h" #include "print-tree.h" #include "flags.h" #include "predict.h" #include "function.h" +#include "cfg.h" #include "basic-block.h" #include "diagnostic.h" #include "tree-pretty-print.h" diff --git a/gcc/profile-count.c b/gcc/profile-count.c index 3d411cfbfb3..3745ae073c8 100644 --- a/gcc/profile-count.c +++ b/gcc/profile-count.c @@ -25,8 +25,8 @@ along with GCC; see the file COPYING3. If not see #include "options.h" #include "tree.h" #include "basic-block.h" -#include "cfg.h" #include "function.h" +#include "cfg.h" #include "gimple.h" #include "data-streamer.h" #include "cgraph.h" -- 2.12.3