On Sat, Jun 22, 2024 at 8:59 PM Andi Kleen <a...@linux.intel.com> wrote: > > Enable the tailcall optimization for non optimizing builds, > but in this case only checks calls that have the musttail attribute set. > This makes musttail work without optimization. > > This is done with a new late musttail pass that is only active when > not optimizing. The new pass relies on tree-cfg to discover musttails. > This avoids a ~0.8% compiler run time penalty at -O0. > > gcc/ChangeLog: > > * function.h (struct function): Add has_musttail. > * lto-streamer-in.cc (input_struct_function_base): Stream > has_musttail. > * lto-streamer-out.cc (output_struct_function_base): Dito. > * passes.def (pass_musttail): Add. > * tree-cfg.cc (notice_special_calls): Record has_musttail. > (clear_special_calls): Clear has_musttail. > * tree-pass.h (make_pass_musttail): Add. > * tree-tailcall.cc (find_tail_calls): Handle only_musttail > argument. > (tree_optimize_tail_calls_1): Pass on only_musttail. > (execute_tail_calls): Pass only_musttail as false. > (class pass_musttail): Add. > (make_pass_musttail): Add. > --- > gcc/function.h | 3 ++ > gcc/lto-streamer-in.cc | 1 + > gcc/lto-streamer-out.cc | 1 + > gcc/passes.def | 1 + > gcc/tree-cfg.cc | 3 ++ > gcc/tree-pass.h | 1 + > gcc/tree-tailcall.cc | 66 +++++++++++++++++++++++++++++++++++------ > 7 files changed, 67 insertions(+), 9 deletions(-) > > diff --git a/gcc/function.h b/gcc/function.h > index c0ba6cc1531a..fbeadeaf4104 100644 > --- a/gcc/function.h > +++ b/gcc/function.h > @@ -430,6 +430,9 @@ struct GTY(()) function { > /* Nonzero when the tail call has been identified. */ > unsigned int tail_call_marked : 1; > > + /* Has musttail marked calls. */ > + unsigned int has_musttail : 1; > + > /* Nonzero if the current function contains a #pragma GCC unroll. */ > unsigned int has_unroll : 1; > > diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc > index ad0ca24007a0..2e592be80823 100644 > --- a/gcc/lto-streamer-in.cc > +++ b/gcc/lto-streamer-in.cc > @@ -1325,6 +1325,7 @@ input_struct_function_base (struct function *fn, class > data_in *data_in, > fn->calls_eh_return = bp_unpack_value (&bp, 1); > fn->has_force_vectorize_loops = bp_unpack_value (&bp, 1); > fn->has_simduid_loops = bp_unpack_value (&bp, 1); > + fn->has_musttail = bp_unpack_value (&bp, 1); > fn->assume_function = bp_unpack_value (&bp, 1); > fn->va_list_fpr_size = bp_unpack_value (&bp, 8); > fn->va_list_gpr_size = bp_unpack_value (&bp, 8); > diff --git a/gcc/lto-streamer-out.cc b/gcc/lto-streamer-out.cc > index d4f728094ed5..0be381abbd96 100644 > --- a/gcc/lto-streamer-out.cc > +++ b/gcc/lto-streamer-out.cc > @@ -2290,6 +2290,7 @@ output_struct_function_base (struct output_block *ob, > struct function *fn) > bp_pack_value (&bp, fn->calls_eh_return, 1); > bp_pack_value (&bp, fn->has_force_vectorize_loops, 1); > bp_pack_value (&bp, fn->has_simduid_loops, 1); > + bp_pack_value (&bp, fn->has_musttail, 1); > bp_pack_value (&bp, fn->assume_function, 1); > bp_pack_value (&bp, fn->va_list_fpr_size, 8); > bp_pack_value (&bp, fn->va_list_gpr_size, 8); > diff --git a/gcc/passes.def b/gcc/passes.def > index 041229e47a68..5b5390e6ac0b 100644 > --- a/gcc/passes.def > +++ b/gcc/passes.def > @@ -444,6 +444,7 @@ along with GCC; see the file COPYING3. If not see > NEXT_PASS (pass_tsan_O0); > NEXT_PASS (pass_sanopt); > NEXT_PASS (pass_cleanup_eh); > + NEXT_PASS (pass_musttail); > NEXT_PASS (pass_lower_resx); > NEXT_PASS (pass_nrv); > NEXT_PASS (pass_gimple_isel); > diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc > index 7fb7b92966be..e6fd1294b958 100644 > --- a/gcc/tree-cfg.cc > +++ b/gcc/tree-cfg.cc > @@ -2290,6 +2290,8 @@ notice_special_calls (gcall *call) > cfun->calls_alloca = true; > if (flags & ECF_RETURNS_TWICE) > cfun->calls_setjmp = true; > + if (gimple_call_must_tail_p (call)) > + cfun->has_musttail = true; > } > > > @@ -2301,6 +2303,7 @@ clear_special_calls (void) > { > cfun->calls_alloca = false; > cfun->calls_setjmp = false; > + cfun->has_musttail = false; > } > > /* Remove PHI nodes associated with basic block BB and all edges out of BB. > */ > diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h > index edebb2be245d..59e53558034f 100644 > --- a/gcc/tree-pass.h > +++ b/gcc/tree-pass.h > @@ -368,6 +368,7 @@ extern gimple_opt_pass *make_pass_sra (gcc::context > *ctxt); > extern gimple_opt_pass *make_pass_sra_early (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_tail_recursion (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_tail_calls (gcc::context *ctxt); > +extern gimple_opt_pass *make_pass_musttail (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_fix_loops (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_tree_loop (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_tree_no_loop (gcc::context *ctxt); > diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc > index e9f7f8a12b3a..0c6df10e64f7 100644 > --- a/gcc/tree-tailcall.cc > +++ b/gcc/tree-tailcall.cc > @@ -408,10 +408,10 @@ static live_vars_map *live_vars; > static vec<bitmap_head> live_vars_vec; > > /* Finds tailcalls falling into basic block BB. The list of found tailcalls > is > - added to the start of RET. */ > + added to the start of RET. When ONLY_MUSTTAIL is set only handle > musttail. */ > > static void > -find_tail_calls (basic_block bb, struct tailcall **ret) > +find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail) > { > tree ass_var = NULL_TREE, ret_var, func, param; > gimple *stmt; > @@ -445,6 +445,9 @@ find_tail_calls (basic_block bb, struct tailcall **ret) > if (is_gimple_call (stmt)) > { > call = as_a <gcall *> (stmt); > + /* Handle only musttail calls when not optimizing. */ > + if (only_musttail && !gimple_call_must_tail_p (call)) > + return; > ass_var = gimple_call_lhs (call); > break; > } > @@ -467,7 +470,7 @@ find_tail_calls (basic_block bb, struct tailcall **ret) > edge_iterator ei; > /* Recurse to the predecessors. */ > FOR_EACH_EDGE (e, ei, bb->preds) > - find_tail_calls (e->src, ret); > + find_tail_calls (e->src, ret, only_musttail); > > return; > } > @@ -528,7 +531,8 @@ find_tail_calls (basic_block bb, struct tailcall **ret) > func = gimple_call_fndecl (call); > if (func > && !fndecl_built_in_p (func) > - && recursive_call_p (current_function_decl, func)) > + && recursive_call_p (current_function_decl, func) > + && !only_musttail) > { > tree arg; > > @@ -1094,10 +1098,11 @@ create_tailcall_accumulator (const char *label, > basic_block bb, tree init) > } > > /* Optimizes tail calls in the function, turning the tail recursion > - into iteration. */ > + into iteration. When ONLY_MUSTCALL is true only optimize mustcall > + marked calls. */ > > static unsigned int > -tree_optimize_tail_calls_1 (bool opt_tailcalls) > +tree_optimize_tail_calls_1 (bool opt_tailcalls, bool only_mustcall) > { > edge e; > bool phis_constructed = false; > @@ -1117,7 +1122,7 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls) > /* Only traverse the normal exits, i.e. those that end with return > statement. */ > if (safe_is_a <greturn *> (*gsi_last_bb (e->src))) > - find_tail_calls (e->src, &tailcalls); > + find_tail_calls (e->src, &tailcalls, only_mustcall); > } > > if (live_vars) > @@ -1228,7 +1233,7 @@ gate_tail_calls (void) > static unsigned int > execute_tail_calls (void) > { > - return tree_optimize_tail_calls_1 (true); > + return tree_optimize_tail_calls_1 (true, false); > } > > namespace { > @@ -1261,7 +1266,7 @@ public: > bool gate (function *) final override { return gate_tail_calls (); } > unsigned int execute (function *) final override > { > - return tree_optimize_tail_calls_1 (false); > + return tree_optimize_tail_calls_1 (false, false); > } > > }; // class pass_tail_recursion > @@ -1312,3 +1317,46 @@ make_pass_tail_calls (gcc::context *ctxt) > { > return new pass_tail_calls (ctxt); > } > + > +namespace { > + > +const pass_data pass_data_musttail = > +{ > + GIMPLE_PASS, /* type */ > + "musttail", /* name */ > + OPTGROUP_NONE, /* optinfo_flags */ > + TV_NONE, /* tv_id */ > + ( PROP_cfg | PROP_ssa ), /* properties_required */ > + 0, /* properties_provided */ > + 0, /* properties_destroyed */ > + 0, /* todo_flags_start */ > + 0, /* todo_flags_finish */ > +}; > + > +class pass_musttail : public gimple_opt_pass > +{ > +public: > + pass_musttail (gcc::context *ctxt) > + : gimple_opt_pass (pass_data_musttail, ctxt) > + {} > + > + /* opt_pass methods: */ > + /* This pass is only used when not optimizing to make [[musttail]] still > + work. */ > + bool gate (function *) final override { return > !flag_optimize_sibling_calls; }
Shouldn't this check f->has_musttail only? That is, I would expect -fno-optimize-sibling-calls to still tail-call [[musttail]]? The comment says the pass only runs when not optimizing - so maybe you wanted to do return optimize == 0;? Otherwise this patch looks OK. > + unsigned int execute (function *f) final override > + { > + if (!f->has_musttail) > + return 0; > + return tree_optimize_tail_calls_1 (true, true); > + } > + > +}; // class pass_musttail > + > +} // anon namespace > + > +gimple_opt_pass * > +make_pass_musttail (gcc::context *ctxt) > +{ > + return new pass_musttail (ctxt); > +} > -- > 2.45.2 >