The testcase in the PR shows that it's worth splitting the processing
of the initial workset, which is def_blocks from the main iteration.
This reduces SSA incremental update time from 44.7s to 32.9s.  Further
changing the workset bitmap of the main iteration to a vector
speeds up things further to 23.5s for an overall nearly halving of
the SSA incremental update compile-time and an overall 12% compile-time
saving at -O1.

Using bitmap_ior in the first loop or avoiding (immediate) re-processing
of blocks in def_blocks does not make a measurable difference for the
testcase so I left this as-is.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

        PR tree-optimization/114480
        * cfganal.cc (compute_idf): Split processing of the initial
        workset from the main iteration.  Use a vector for the
        workset of the main iteration.
---
 gcc/cfganal.cc | 44 +++++++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/gcc/cfganal.cc b/gcc/cfganal.cc
index 79035750771..3537e792791 100644
--- a/gcc/cfganal.cc
+++ b/gcc/cfganal.cc
@@ -1679,30 +1679,17 @@ compute_dominance_frontiers (bitmap_head *frontiers)
 bitmap
 compute_idf (bitmap def_blocks, bitmap_head *dfs)
 {
-  bitmap_iterator bi;
+  bitmap_iterator bi, bi2;
   unsigned bb_index, i;
   bitmap phi_insertion_points;
 
   phi_insertion_points = BITMAP_ALLOC (NULL);
 
-  /* Seed the work set with all the blocks in DEF_BLOCKS.  */
-  auto_bitmap work_set;
-  bitmap_copy (work_set, def_blocks);
-  bitmap_tree_view (work_set);
-
-  /* Pop a block off the workset, add every block that appears in
-     the original block's DF that we have not already processed to
-     the workset.  Iterate until the workset is empty.   Blocks
-     which are added to the workset are potential sites for
-     PHI nodes.  */
-  while (!bitmap_empty_p (work_set))
+  /* The initial workset is the DEF_BLOCKs, process that first,
+     seeding phi_insertion_points as the start of the worklist
+     for the iteration which then also serves as a visited set.  */
+  EXECUTE_IF_SET_IN_BITMAP (def_blocks, 0, bb_index, bi2)
     {
-      /* The dominance frontier of a block is blocks after it so iterating
-         on earlier blocks first is better.
-        ???  Basic blocks are by no means guaranteed to be ordered in
-        optimal order for this iteration.  */
-      bb_index = bitmap_clear_first_set_bit (work_set);
-
       /* Since the registration of NEW -> OLD name mappings is done
         separately from the call to update_ssa, when updating the SSA
         form, the basic blocks where new and/or old names are defined
@@ -1716,9 +1703,28 @@ compute_idf (bitmap def_blocks, bitmap_head *dfs)
         the IDF and of work_set which is at most that of the IDF
         as well.  That makes iterating over the DFS bitmap preferential
         to whole bitmap operations involving also phi_insertion_points.  */
+      EXECUTE_IF_SET_IN_BITMAP (&dfs[bb_index], 0, i, bi)
+       bitmap_set_bit (phi_insertion_points, i);
+    }
+
+  /* Seed the work set with the initial phi_insertion_points.  */
+  auto_vec<int, 64> work_set (n_basic_blocks_for_fn (cfun));
+  EXECUTE_IF_SET_IN_BITMAP (phi_insertion_points, 0, i, bi)
+    work_set.quick_push (i);
+
+  /* Pop a block off the workset, add every block that appears in
+     the original block's DF that we have not already processed to
+     the workset.  Iterate until the workset is empty.   Blocks
+     which are added to the workset are potential sites for
+     PHI nodes.  */
+  while (!work_set.is_empty ())
+    {
+      bb_index = work_set.pop ();
+      gcc_checking_assert (bb_index
+                          < (unsigned) last_basic_block_for_fn (cfun));
       EXECUTE_IF_SET_IN_BITMAP (&dfs[bb_index], 0, i, bi)
        if (bitmap_set_bit (phi_insertion_points, i))
-         bitmap_set_bit (work_set, i);
+         work_set.quick_push (i);
     }
 
   return phi_insertion_points;
-- 
2.43.0

Reply via email to