The second testcase in the above PR runs into our O(N) bitmap element search limitation and spends 8s (60%) of the compile-time in the SSA propagator engine (when optimizing). The patch improves that to 0.9s (15%). For the first testcase it isn't that bad but still the patch improves CCP from 36% to 14%.
The "solution" is to use sbitmap instead of a bitmap to avoid the linearity when doing add_ssa_edge. We pay for that (but not actually with the testcases) with a linear-time bitmap_first_set_bit in process_ssa_edge_worklist. I do not (yet...) have a testcase that overall gets slower with this approach. I suppose using std::set<int> would "solve" the complexity issue but we'd pay back with horribly inefficient memory use. Similarly with our sparse bitmap implementation which lacks an ordered first_set_bit (it only can get any set bit fast, breaking optimal iteration order). If we'd only had a O(log n) search sparse bitmap implementation ... (Steven posted patches to switch bitmap from/to such one but IIRC that at least lacked bitmap_first_set_bit). Bootstrapped and tested on x86_64-unknown-linux-gnu. OK for trunk? Thanks, Richard. 2018-09-19 Richard Biener <rguent...@suse.de> PR tree-optimization/63155 * tree-ssa-propagate.h (ssa_propagation_engine::process_ssa_edge_worklist): Change prototype. * tree-ssa-propagate.c (ssa_edge_worklist): Turn into an sbitmap. (add_ssa_edge): Adjust. (ssa_propagation_engine::process_ssa_edge_worklist): If there was nothing to do return false. (ssa_prop_init): Allocate and clear ssa_edge_worklist. (ssa_prop_fini): Adjust. (ssa_propagation_engine::ssa_propagate): Elide the check for an empty ssa_edge_worklist by using the process_ssa_edge_worklist return value. Index: gcc/tree-ssa-propagate.h =================================================================== --- gcc/tree-ssa-propagate.h (revision 264418) +++ gcc/tree-ssa-propagate.h (working copy) @@ -94,7 +94,7 @@ class ssa_propagation_engine private: /* Internal implementation details. */ void simulate_stmt (gimple *stmt); - void process_ssa_edge_worklist (void); + bool process_ssa_edge_worklist (void); void simulate_block (basic_block); }; Index: gcc/tree-ssa-propagate.c =================================================================== --- gcc/tree-ssa-propagate.c (revision 264418) +++ gcc/tree-ssa-propagate.c (working copy) @@ -119,7 +119,7 @@ static int *cfg_order_to_bb; definition has changed. SSA edges are def-use edges in the SSA web. For each D-U edge, we store the target statement or PHI node UID in a bitmap. UIDs order stmts in execution order. */ -static bitmap ssa_edge_worklist; +static sbitmap ssa_edge_worklist; static vec<gimple *> uid_to_stmt; /* Return true if the block worklist empty. */ @@ -175,8 +175,9 @@ add_ssa_edge (tree var) continue; if (prop_simulate_again_p (use_stmt) - && bitmap_set_bit (ssa_edge_worklist, gimple_uid (use_stmt))) + && !bitmap_bit_p (ssa_edge_worklist, gimple_uid (use_stmt))) { + bitmap_set_bit (ssa_edge_worklist, gimple_uid (use_stmt)); uid_to_stmt[gimple_uid (use_stmt)] = use_stmt; if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -317,11 +318,14 @@ ssa_propagation_engine::simulate_stmt (g when an SSA edge is added to it in simulate_stmt. Return true if a stmt was simulated. */ -void +bool ssa_propagation_engine::process_ssa_edge_worklist (void) { /* Process the next entry from the worklist. */ - unsigned stmt_uid = bitmap_first_set_bit (ssa_edge_worklist); + int stmt_uid = bitmap_first_set_bit (ssa_edge_worklist); + if (stmt_uid == -1) + return false; + bitmap_clear_bit (ssa_edge_worklist, stmt_uid); gimple *stmt = uid_to_stmt[stmt_uid]; @@ -335,6 +339,7 @@ ssa_propagation_engine::process_ssa_edge } simulate_stmt (stmt); + return true; } @@ -412,9 +417,6 @@ ssa_prop_init (void) edge_iterator ei; basic_block bb; - /* Worklists of SSA edges. */ - ssa_edge_worklist = BITMAP_ALLOC (NULL); - /* Worklist of basic-blocks. */ bb_to_cfg_order = XNEWVEC (int, last_basic_block_for_fn (cfun) + 1); cfg_order_to_bb = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); @@ -455,6 +457,10 @@ ssa_prop_init (void) } uid_to_stmt.safe_grow (gimple_stmt_max_uid (cfun)); + /* Worklists of SSA edges. */ + ssa_edge_worklist = sbitmap_alloc (gimple_stmt_max_uid (cfun)); + bitmap_clear (ssa_edge_worklist); + /* Seed the algorithm by adding the successors of the entry block to the edge worklist. */ FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) @@ -473,7 +479,8 @@ ssa_prop_fini (void) BITMAP_FREE (cfg_blocks); free (bb_to_cfg_order); free (cfg_order_to_bb); - BITMAP_FREE (ssa_edge_worklist); + sbitmap_free (ssa_edge_worklist); + ssa_edge_worklist = NULL; uid_to_stmt.release (); } @@ -789,8 +796,7 @@ ssa_propagation_engine::ssa_propagate (v ssa_prop_init (); /* Iterate until the worklists are empty. */ - while (! cfg_blocks_empty_p () - || ! bitmap_empty_p (ssa_edge_worklist)) + while (1) { /* First simulate whole blocks. */ if (! cfg_blocks_empty_p ()) @@ -802,7 +808,10 @@ ssa_propagation_engine::ssa_propagate (v } /* Then simulate from the SSA edge worklist. */ - process_ssa_edge_worklist (); + if (process_ssa_edge_worklist ()) + continue; + + break; } ssa_prop_fini ();