This improves compile time of the testcase in PR39326 at -O3 from loop invariant motion : 24.41 (32%) usr 0.03 ( 4%) sys 24.43 (32%) wall 148 kB ( 0%) ggc loop unswitching : 15.16 (20%) usr 0.02 ( 3%) sys 15.30 (20%) wall 0 kB ( 0%) ggc TOTAL : 76.51 0.72 77.23 456534 kB
to loop invariant motion : 0.18 ( 0%) usr 0.00 ( 0%) sys 0.16 ( 0%) wall 148 kB ( 0%) ggc loop unswitching : 0.01 ( 0%) usr 0.00 ( 0%) sys 0.01 ( 0%) wall 0 kB ( 0%) ggc TOTAL : 37.14 0.67 37.80 456534 kB The problem with DF sub-CFG analysis (df_set_blocks) is that while the actual analysis works on the sub-CFG the function first computes inverted postorder and postorder of the whole function and then prunes it, which takes 99% of the compile time for the testcase with a lot of very simple loops. There are two users of df_set_blocks, RTL loop invariant motion, doing a df_analyze on each loop, and RTL loop IV analysis (done on each loop by unswitching). The following patch fixes the slowness when the sub-CFG is the body of a single loop as it's reasonably easy to implement (inverted) postorder computation for a SEME region. Now the question is what the desired interface of this should be (the patch is a kludge here) and if the generic df_set_blocks interface should be retained (it doesn't prohibit you to analyze unconnected parts of a CFG for example) - it also retains its slowness for simple analysis cases. I can split off a df_analyze_1 and have a df_analyze () doing what it does now and a df_analyze_loop () combining df_set_blocks () and df_analyze () for example. Any preference? Or do we want a df_set_loop ()-like interface? df_analyze seems to be the only public interface caring about blocks (its comment even suggests that the sub-CFG bitmap was even part of its interface at some point). I'll probably move the CFG order computes to cfgloop.c and eventually make it use flow_bb_inside_loop_p instead of the bitmap with all loop blocks (even though that will be a little slower in the end). Thanks, Richard. Index: gcc/df.h =================================================================== *** gcc/df.h (revision 206624) --- gcc/df.h (working copy) *************** extern void df_set_blocks (bitmap); *** 900,906 **** extern void df_remove_problem (struct dataflow *); extern void df_finish_pass (bool); extern void df_analyze_problem (struct dataflow *, bitmap, int *, int); ! extern void df_analyze (void); extern int df_get_n_blocks (enum df_flow_dir); extern int *df_get_postorder (enum df_flow_dir); extern void df_simple_dataflow (enum df_flow_dir, df_init_function, --- 900,906 ---- extern void df_remove_problem (struct dataflow *); extern void df_finish_pass (bool); extern void df_analyze_problem (struct dataflow *, bitmap, int *, int); ! extern void df_analyze (struct loop *loop = NULL); extern int df_get_n_blocks (enum df_flow_dir); extern int *df_get_postorder (enum df_flow_dir); extern void df_simple_dataflow (enum df_flow_dir, df_init_function, Index: gcc/df-core.c =================================================================== *** gcc/df-core.c (revision 206624) --- gcc/df-core.c (working copy) *************** are write-only operations. *** 393,398 **** --- 393,399 ---- #include "df.h" #include "tree-pass.h" #include "params.h" + #include "cfgloop.h" static void *df_get_bb_info (struct dataflow *, unsigned int); static void df_set_bb_info (struct dataflow *, unsigned int, void *); *************** df_analyze_problem (struct dataflow *dfl *** 1225,1235 **** } /* Analyze dataflow info for the basic blocks specified by the bitmap BLOCKS, or for the whole CFG if BLOCKS is zero. */ void ! df_analyze (void) { bitmap current_all_blocks = BITMAP_ALLOC (&df_bitmap_obstack); bool everything; --- 1226,1383 ---- } + static int + loop_post_order_compute (int *post_order, struct loop *loop, + bitmap loop_blocks) + { + edge_iterator *stack; + int sp; + int post_order_num = 0; + bitmap visited; + + /* Allocate stack for back-tracking up CFG. */ + stack = XNEWVEC (edge_iterator, loop->num_nodes + 1); + sp = 0; + + /* Allocate bitmap to track nodes that have been visited. */ + visited = BITMAP_ALLOC (NULL); + + /* Push the first edge on to the stack. */ + stack[sp++] = ei_start (loop_preheader_edge (loop)->src->succs); + + while (sp) + { + edge_iterator ei; + basic_block src; + basic_block dest; + + /* Look at the edge on the top of the stack. */ + ei = stack[sp - 1]; + src = ei_edge (ei)->src; + dest = ei_edge (ei)->dest; + + /* Check if the edge destination has been visited yet and mark it + if not so. */ + if (bitmap_bit_p (loop_blocks, dest->index) + && bitmap_set_bit (visited, dest->index)) + { + if (EDGE_COUNT (dest->succs) > 0) + /* Since the DEST node has been visited for the first + time, check its successors. */ + stack[sp++] = ei_start (dest->succs); + else + post_order[post_order_num++] = dest->index; + } + else + { + if (ei_one_before_end_p (ei) + && src != loop_preheader_edge (loop)->src) + post_order[post_order_num++] = src->index; + + if (!ei_one_before_end_p (ei)) + ei_next (&stack[sp - 1]); + else + sp--; + } + } + + free (stack); + BITMAP_FREE (visited); + + return post_order_num; + } + + int + loop_inverted_post_order_compute (int *post_order, struct loop *loop, + bitmap loop_blocks) + { + basic_block bb; + edge_iterator *stack; + int sp; + int post_order_num = 0; + bitmap visited; + unsigned i; + + vec<edge> exits = get_loop_exit_edges (loop); + + /* Allocate stack for back-tracking up CFG. */ + stack = XNEWVEC (edge_iterator, loop->num_nodes + exits.length () + 1); + sp = 0; + + /* Allocate bitmap to track nodes that have been visited. */ + visited = BITMAP_ALLOC (NULL); + + /* Put all loop exit blocks into the initial work list or the latches, + if there is no exit. */ + if (!exits.is_empty ()) + { + edge e; + for (i = 0; exits.iterate (i, &e); ++i) + stack[sp++] = ei_start (e->dest->preds); + } + else + { + stack[sp++] = ei_start (loop->header->preds); + bitmap_set_bit (visited, loop->header->index); + } + exits.release (); + + /* The inverted traversal loop. */ + while (sp) + { + edge_iterator ei; + basic_block pred; + + /* Look at the edge on the top of the stack. */ + ei = stack[sp - 1]; + bb = ei_edge (ei)->dest; + pred = ei_edge (ei)->src; + + /* Check if the predecessor has been visited yet and mark it + if not so. */ + if (bitmap_bit_p (loop_blocks, pred->index) + && bitmap_set_bit (visited, pred->index)) + { + if (EDGE_COUNT (pred->preds) > 0) + /* Since the predecessor node has been visited for the first + time, check its predecessors. */ + stack[sp++] = ei_start (pred->preds); + else + post_order[post_order_num++] = pred->index; + } + else + { + if (bitmap_bit_p (loop_blocks, bb->index) + && ei_one_before_end_p (ei)) + post_order[post_order_num++] = bb->index; + + if (!ei_one_before_end_p (ei)) + ei_next (&stack[sp - 1]); + else + sp--; + } + } + + /* Detect any infinite loop and activate the kludge. + Note that this doesn't check EXIT_BLOCK itself + since EXIT_BLOCK is always added after the outer do-while loop. */ + /* ??? Assert this doesn't happen, thus that we visited all + loop_blocks. */ + bitmap_compl_and_into (visited, loop_blocks); + gcc_assert (bitmap_empty_p (visited)); + + free (stack); + BITMAP_FREE (visited); + return post_order_num; + } + + + /* Analyze dataflow info for the basic blocks specified by the bitmap BLOCKS, or for the whole CFG if BLOCKS is zero. */ void ! df_analyze (struct loop *loop) { bitmap current_all_blocks = BITMAP_ALLOC (&df_bitmap_obstack); bool everything; *************** df_analyze (void) *** 1237,1246 **** free (df->postorder); free (df->postorder_inverted); ! df->postorder = XNEWVEC (int, last_basic_block_for_fn (cfun)); ! df->postorder_inverted = XNEWVEC (int, last_basic_block_for_fn (cfun)); ! df->n_blocks = post_order_compute (df->postorder, true, true); ! df->n_blocks_inverted = inverted_post_order_compute (df->postorder_inverted); /* These should be the same. */ gcc_assert (df->n_blocks == df->n_blocks_inverted); --- 1385,1412 ---- free (df->postorder); free (df->postorder_inverted); ! if (loop) ! { ! gcc_assert (df->analyze_subset); ! gcc_checking_assert (bitmap_count_bits (df->blocks_to_analyze) ! == loop->num_nodes); ! df->postorder = XNEWVEC (int, loop->num_nodes); ! df->postorder_inverted = XNEWVEC (int, loop->num_nodes); ! df->n_blocks = loop_post_order_compute (df->postorder, loop, ! df->blocks_to_analyze); ! df->n_blocks_inverted ! = loop_inverted_post_order_compute (df->postorder_inverted, loop, ! df->blocks_to_analyze); ! gcc_assert ((unsigned) df->n_blocks == loop->num_nodes); ! gcc_assert ((unsigned) df->n_blocks_inverted == loop->num_nodes); ! } ! else ! { ! df->postorder = XNEWVEC (int, last_basic_block_for_fn (cfun)); ! df->postorder_inverted = XNEWVEC (int, last_basic_block_for_fn (cfun)); ! df->n_blocks = post_order_compute (df->postorder, true, true); ! df->n_blocks_inverted = inverted_post_order_compute (df->postorder_inverted); ! } /* These should be the same. */ gcc_assert (df->n_blocks == df->n_blocks_inverted); *************** df_analyze (void) *** 1273,1284 **** if (df->analyze_subset) { everything = false; ! bitmap_and_into (df->blocks_to_analyze, current_all_blocks); ! df->n_blocks = df_prune_to_subcfg (df->postorder, ! df->n_blocks, df->blocks_to_analyze); ! df->n_blocks_inverted = df_prune_to_subcfg (df->postorder_inverted, ! df->n_blocks_inverted, ! df->blocks_to_analyze); BITMAP_FREE (current_all_blocks); } else --- 1439,1456 ---- if (df->analyze_subset) { everything = false; ! if (!loop) ! { ! bitmap_and_into (df->blocks_to_analyze, current_all_blocks); ! df->n_blocks = df_prune_to_subcfg (df->postorder, ! df->n_blocks, df->blocks_to_analyze); ! df->n_blocks_inverted = df_prune_to_subcfg (df->postorder_inverted, ! df->n_blocks_inverted, ! df->blocks_to_analyze); ! } ! else ! { ! } BITMAP_FREE (current_all_blocks); } else Index: gcc/loop-invariant.c =================================================================== *** gcc/loop-invariant.c (revision 206624) --- gcc/loop-invariant.c (working copy) *************** find_defs (struct loop *loop, basic_bloc *** 672,678 **** df_chain_add_problem (DF_UD_CHAIN); df_set_blocks (blocks); df_set_flags (DF_RD_PRUNE_DEAD_DEFS); ! df_analyze (); check_invariant_table_size (); if (dump_file) --- 672,678 ---- df_chain_add_problem (DF_UD_CHAIN); df_set_blocks (blocks); df_set_flags (DF_RD_PRUNE_DEAD_DEFS); ! df_analyze (loop); check_invariant_table_size (); if (dump_file) Index: gcc/loop-iv.c =================================================================== *** gcc/loop-iv.c (revision 206624) --- gcc/loop-iv.c (working copy) *************** iv_analysis_loop_init (struct loop *loop *** 307,313 **** df_chain_add_problem (DF_UD_CHAIN); df_note_add_problem (); df_set_blocks (blocks); ! df_analyze (); if (dump_file) df_dump_region (dump_file); --- 307,313 ---- df_chain_add_problem (DF_UD_CHAIN); df_note_add_problem (); df_set_blocks (blocks); ! df_analyze (loop); if (dump_file) df_dump_region (dump_file);