The second testcase in the above PR runs into our O(N) bitmap element
search limitation and spends 8s (60%) of the compile-time in the SSA propagator
engine (when optimizing).  The patch improves that to 0.9s (15%).  For the 
first testcase it isn't that bad but still the patch improves CCP from 36% to 
14%.

The "solution" is to use sbitmap instead of a bitmap to avoid
the linearity when doing add_ssa_edge.  We pay for that (but not
actually with the testcases) with a linear-time bitmap_first_set_bit
in process_ssa_edge_worklist.  I do not (yet...) have a testcase
that overall gets slower with this approach.  I suppose using
std::set<int> would "solve" the complexity issue but we'd pay
back with horribly inefficient memory use.  Similarly with
our sparse bitmap implementation which lacks an ordered
first_set_bit (it only can get any set bit fast, breaking optimal
iteration order).

If we'd only had a O(log n) search sparse bitmap implementation ...
(Steven posted patches to switch bitmap from/to such one but IIRC
that at least lacked bitmap_first_set_bit).

Bootstrapped and tested on x86_64-unknown-linux-gnu.

OK for trunk?

Thanks,
Richard.

2018-09-19  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/63155
        * tree-ssa-propagate.h
        (ssa_propagation_engine::process_ssa_edge_worklist): Change
        prototype.
        * tree-ssa-propagate.c (ssa_edge_worklist): Turn into an sbitmap.
        (add_ssa_edge): Adjust.
        (ssa_propagation_engine::process_ssa_edge_worklist): If there
        was nothing to do return false.
        (ssa_prop_init): Allocate and clear ssa_edge_worklist.
        (ssa_prop_fini): Adjust.
        (ssa_propagation_engine::ssa_propagate): Elide the check
        for an empty ssa_edge_worklist by using the
        process_ssa_edge_worklist return value.

Index: gcc/tree-ssa-propagate.h
===================================================================
--- gcc/tree-ssa-propagate.h    (revision 264418)
+++ gcc/tree-ssa-propagate.h    (working copy)
@@ -94,7 +94,7 @@ class ssa_propagation_engine
  private:
   /* Internal implementation details.  */
   void simulate_stmt (gimple *stmt);
-  void process_ssa_edge_worklist (void);
+  bool process_ssa_edge_worklist (void);
   void simulate_block (basic_block);
 
 };
Index: gcc/tree-ssa-propagate.c
===================================================================
--- gcc/tree-ssa-propagate.c    (revision 264418)
+++ gcc/tree-ssa-propagate.c    (working copy)
@@ -119,7 +119,7 @@ static int *cfg_order_to_bb;
    definition has changed.  SSA edges are def-use edges in the SSA
    web.  For each D-U edge, we store the target statement or PHI node
    UID in a bitmap.  UIDs order stmts in execution order.   */
-static bitmap ssa_edge_worklist;
+static sbitmap ssa_edge_worklist;
 static vec<gimple *> uid_to_stmt;
 
 /* Return true if the block worklist empty.  */
@@ -175,8 +175,9 @@ add_ssa_edge (tree var)
        continue;
 
       if (prop_simulate_again_p (use_stmt)
-         && bitmap_set_bit (ssa_edge_worklist, gimple_uid (use_stmt)))
+         && !bitmap_bit_p (ssa_edge_worklist, gimple_uid (use_stmt)))
        {
+         bitmap_set_bit (ssa_edge_worklist, gimple_uid (use_stmt));
          uid_to_stmt[gimple_uid (use_stmt)] = use_stmt;
          if (dump_file && (dump_flags & TDF_DETAILS))
            {
@@ -317,11 +318,14 @@ ssa_propagation_engine::simulate_stmt (g
    when an SSA edge is added to it in simulate_stmt.  Return true if a stmt
    was simulated.  */
 
-void
+bool
 ssa_propagation_engine::process_ssa_edge_worklist (void)
 {
   /* Process the next entry from the worklist.  */
-  unsigned stmt_uid = bitmap_first_set_bit (ssa_edge_worklist);
+  int stmt_uid = bitmap_first_set_bit (ssa_edge_worklist);
+  if (stmt_uid == -1)
+    return false;
+
   bitmap_clear_bit (ssa_edge_worklist, stmt_uid);
   gimple *stmt = uid_to_stmt[stmt_uid];
 
@@ -335,6 +339,7 @@ ssa_propagation_engine::process_ssa_edge
     }
 
   simulate_stmt (stmt);
+  return true;
 }
 
 
@@ -412,9 +417,6 @@ ssa_prop_init (void)
   edge_iterator ei;
   basic_block bb;
 
-  /* Worklists of SSA edges.  */
-  ssa_edge_worklist = BITMAP_ALLOC (NULL);
-
   /* Worklist of basic-blocks.  */
   bb_to_cfg_order = XNEWVEC (int, last_basic_block_for_fn (cfun) + 1);
   cfg_order_to_bb = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
@@ -455,6 +457,10 @@ ssa_prop_init (void)
     }
   uid_to_stmt.safe_grow (gimple_stmt_max_uid (cfun));
 
+  /* Worklists of SSA edges.  */
+  ssa_edge_worklist = sbitmap_alloc (gimple_stmt_max_uid (cfun));
+  bitmap_clear (ssa_edge_worklist);
+
   /* Seed the algorithm by adding the successors of the entry block to the
      edge worklist.  */
   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
@@ -473,7 +479,8 @@ ssa_prop_fini (void)
   BITMAP_FREE (cfg_blocks);
   free (bb_to_cfg_order);
   free (cfg_order_to_bb);
-  BITMAP_FREE (ssa_edge_worklist);
+  sbitmap_free (ssa_edge_worklist);
+  ssa_edge_worklist = NULL;
   uid_to_stmt.release ();
 }
 
@@ -789,8 +796,7 @@ ssa_propagation_engine::ssa_propagate (v
   ssa_prop_init ();
 
   /* Iterate until the worklists are empty.  */
-  while (! cfg_blocks_empty_p ()
-        || ! bitmap_empty_p (ssa_edge_worklist))
+  while (1)
     {
       /* First simulate whole blocks.  */
       if (! cfg_blocks_empty_p ())
@@ -802,7 +808,10 @@ ssa_propagation_engine::ssa_propagate (v
        }
 
       /* Then simulate from the SSA edge worklist.  */
-      process_ssa_edge_worklist ();
+      if (process_ssa_edge_worklist ())
+       continue;
+
+      break;
     }
 
   ssa_prop_fini ();

Reply via email to