> On Sat, 25 Jun 2011, Jan Hubicka wrote:
>
> > Hi,
> > just for those who are interested, this is quick&dirty patch adding another
> > pass of local optimization passes at WPA time. I've added early inliner and
> > IPA-SRA because I was curious how much of optimization oppurtunities we are
> > missing by limiting those to early pass.
>
> At WPA time? I thought we don't have function bodies around.
I meant LTRANS time, indeed.
Anyway the tests was made with -flto-partition=none.
Honza
>
> Richard.
>
> > With Early inlining it seems to be very little. We inline one extra call
> > when
> > building Mozilla in LTO mode.
> >
> > IPA SRA is different story. While we do 579 IPA SRA clones in the early
> > pass,
> > the late pass produces 13014 clones (22 times more ;) suggesting that the
> > pass
> > might be interesting at IPA level after all.
> >
> > There are 78686 functions after inlining in Mozilla, so one out of 7
> > functions
> > is touched.
> >
> > Size difference of libxul is not great, about 100Kb reduction. I will try
> > benchmarking it eventually, too.
> >
> > Honza
> >
> >
> > Index: cgraph.c
> > ===================================================================
> > *** cgraph.c (revision 175350)
> > --- cgraph.c (working copy)
> > *************** cgraph_release_function_body (struct cgr
> > *** 1389,1396 ****
> > }
> > if (cfun->cfg)
> > {
> > ! gcc_assert (dom_computed[0] == DOM_NONE);
> > ! gcc_assert (dom_computed[1] == DOM_NONE);
> > clear_edges ();
> > }
> > if (cfun->value_histograms)
> > --- 1393,1403 ----
> > }
> > if (cfun->cfg)
> > {
> > ! /*gcc_assert (dom_computed[0] == DOM_NONE);
> > ! gcc_assert (dom_computed[1] == DOM_NONE);*/
> > ! free_dominance_info (CDI_DOMINATORS);
> > ! free_dominance_info (CDI_POST_DOMINATORS);
> > !
> > clear_edges ();
> > }
> > if (cfun->value_histograms)
> > Index: tree-pass.h
> > ===================================================================
> > *** tree-pass.h (revision 175350)
> > --- tree-pass.h (working copy)
> > *************** extern struct simple_ipa_opt_pass pass_i
> > *** 452,458 ****
> > extern struct simple_ipa_opt_pass
> > pass_ipa_function_and_variable_visibility;
> > extern struct simple_ipa_opt_pass pass_ipa_tree_profile;
> >
> > ! extern struct simple_ipa_opt_pass pass_early_local_passes;
> >
> > extern struct ipa_opt_pass_d pass_ipa_whole_program_visibility;
> > extern struct ipa_opt_pass_d pass_ipa_lto_gimple_out;
> > --- 452,458 ----
> > extern struct simple_ipa_opt_pass
> > pass_ipa_function_and_variable_visibility;
> > extern struct simple_ipa_opt_pass pass_ipa_tree_profile;
> >
> > ! extern struct simple_ipa_opt_pass pass_early_local_passes,
> > pass_late_local_passes, pass_late_local_passes2;
> >
> > extern struct ipa_opt_pass_d pass_ipa_whole_program_visibility;
> > extern struct ipa_opt_pass_d pass_ipa_lto_gimple_out;
> > Index: ipa-inline-analysis.c
> > ===================================================================
> > *** ipa-inline-analysis.c (revision 175350)
> > --- ipa-inline-analysis.c (working copy)
> > *************** estimate_function_body_sizes (struct cgr
> > *** 1535,1542 ****
> > edge->call_stmt_cannot_inline_p = true;
> > gimple_call_set_cannot_inline (stmt, true);
> > }
> > ! else
> > ! gcc_assert (!gimple_call_cannot_inline_p (stmt));
> > }
> >
> > /* TODO: When conditional jump or swithc is known to be constant, but
> > --- 1535,1542 ----
> > edge->call_stmt_cannot_inline_p = true;
> > gimple_call_set_cannot_inline (stmt, true);
> > }
> > ! /*else
> > ! gcc_assert (!gimple_call_cannot_inline_p (stmt));*/
> > }
> >
> > /* TODO: When conditional jump or swithc is known to be constant, but
> > Index: tree-inline.c
> > ===================================================================
> > *** tree-inline.c (revision 175350)
> > --- tree-inline.c (working copy)
> > *************** expand_call_inline (basic_block bb, gimp
> > *** 3891,3897 ****
> > id->src_cfun = DECL_STRUCT_FUNCTION (fn);
> > id->gimple_call = stmt;
> >
> > ! gcc_assert (!id->src_cfun->after_inlining);
> >
> > id->entry_bb = bb;
> > if (lookup_attribute ("cold", DECL_ATTRIBUTES (fn)))
> > --- 3891,3897 ----
> > id->src_cfun = DECL_STRUCT_FUNCTION (fn);
> > id->gimple_call = stmt;
> >
> > ! /*gcc_assert (!id->src_cfun->after_inlining);*/
> >
> > id->entry_bb = bb;
> > if (lookup_attribute ("cold", DECL_ATTRIBUTES (fn)))
> > Index: tree-optimize.c
> > ===================================================================
> > *** tree-optimize.c (revision 175350)
> > --- tree-optimize.c (working copy)
> > *************** struct simple_ipa_opt_pass pass_early_lo
> > *** 123,128 ****
> > --- 123,189 ----
> > /* Gate: execute, or not, all of the non-trivial optimizations. */
> >
> > static bool
> > + gate_all_late_local_passes (void)
> > + {
> > + /* Don't bother doing anything if the program has errors. */
> > + return (!seen_error () && optimize);
> > + }
> > +
> > + static unsigned int
> > + execute_all_late_local_passes (void)
> > + {
> > + /* Once this pass (and its sub-passes) are complete, all functions
> > + will be in SSA form. Technically this state change is happening
> > + a tad late, since the sub-passes have not yet run, but since
> > + none of the sub-passes are IPA passes and do not create new
> > + functions, this is ok. We're setting this value for the benefit
> > + of IPA passes that follow. */
> > + if (cgraph_state < CGRAPH_STATE_IPA_SSA)
> > + cgraph_state = CGRAPH_STATE_IPA_SSA;
> > + return 0;
> > + }
> > +
> > + struct simple_ipa_opt_pass pass_late_local_passes =
> > + {
> > + {
> > + SIMPLE_IPA_PASS,
> > + "late_local_cleanups", /* name */
> > + gate_all_late_local_passes, /* gate */
> > + execute_all_late_local_passes, /* execute */
> > + NULL, /* sub */
> > + NULL, /* next */
> > + 0, /* static_pass_number */
> > + TV_EARLY_LOCAL, /* tv_id */
> > + 0, /* properties_required */
> > + 0, /* properties_provided */
> > + 0, /* properties_destroyed */
> > + 0, /* todo_flags_start */
> > + TODO_remove_functions /* todo_flags_finish */
> > + }
> > + };
> > +
> > + struct simple_ipa_opt_pass pass_late_local_passes2 =
> > + {
> > + {
> > + SIMPLE_IPA_PASS,
> > + "late_local_cleanups2", /* name */
> > + gate_all_late_local_passes, /* gate */
> > + execute_all_late_local_passes, /* execute */
> > + NULL, /* sub */
> > + NULL, /* next */
> > + 0, /* static_pass_number */
> > + TV_EARLY_LOCAL, /* tv_id */
> > + 0, /* properties_required */
> > + 0, /* properties_provided */
> > + 0, /* properties_destroyed */
> > + 0, /* todo_flags_start */
> > + TODO_remove_functions /* todo_flags_finish */
> > + }
> > + };
> > +
> > + /* Gate: execute, or not, all of the non-trivial optimizations. */
> > +
> > + static bool
> > gate_all_early_optimizations (void)
> > {
> > return (optimize >= 1
> > Index: passes.c
> > ===================================================================
> > *** passes.c (revision 175350)
> > --- passes.c (working copy)
> > *************** init_optimization_passes (void)
> > *** 1263,1268 ****
> > --- 1263,1288 ----
> > passes are executed after partitioning and thus see just parts of the
> > compiled unit. */
> > p = &all_late_ipa_passes;
> > + NEXT_PASS (pass_late_local_passes);
> > + {
> > + struct opt_pass **p = &pass_late_local_passes.pass.sub;
> > + NEXT_PASS (pass_inline_parameters);
> > + NEXT_PASS (pass_release_ssa_names);
> > + }
> > + NEXT_PASS (pass_late_local_passes2);
> > + {
> > + struct opt_pass **p = &pass_late_local_passes2.pass.sub;
> > + NEXT_PASS (pass_early_inline);
> > + NEXT_PASS (pass_remove_cgraph_callee_edges);
> > + NEXT_PASS (pass_ccp);
> > + NEXT_PASS (pass_forwprop);
> > + NEXT_PASS (pass_fre);
> > + NEXT_PASS (pass_cd_dce);
> > + NEXT_PASS (pass_early_ipa_sra);
> > + NEXT_PASS (pass_release_ssa_names);
> > + NEXT_PASS (pass_rebuild_cgraph_edges);
> > + NEXT_PASS (pass_inline_parameters);
> > + }
> > NEXT_PASS (pass_ipa_pta);
> > *p = NULL;
> > /* These passes are run after IPA passes on every function that is being
> > Index: statistics.c
> > ===================================================================
> > *** statistics.c (revision 175350)
> > --- statistics.c (working copy)
> > *************** statistics_fini_pass_3 (void **slot, voi
> > *** 171,176 ****
> > --- 171,178 ----
> > void
> > statistics_fini_pass (void)
> > {
> > + if (!current_pass)
> > + return;
> > if (current_pass->static_pass_number == -1)
> > return;
> >
> >
> >
>
> --
> Richard Guenther <[email protected]>
> Novell / SUSE Labs
> SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
> GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer