On Sat, Jun 22, 2024 at 8:59 PM Andi Kleen <a...@linux.intel.com> wrote:
>
> Enable the tailcall optimization for non optimizing builds,
> but in this case only checks calls that have the musttail attribute set.
> This makes musttail work without optimization.
>
> This is done with a new late musttail pass that is only active when
> not optimizing. The new pass relies on tree-cfg to discover musttails.
> This avoids a ~0.8% compiler run time penalty at -O0.
>
> gcc/ChangeLog:
>
>         * function.h (struct function): Add has_musttail.
>         * lto-streamer-in.cc (input_struct_function_base): Stream
>         has_musttail.
>         * lto-streamer-out.cc (output_struct_function_base): Dito.
>         * passes.def (pass_musttail): Add.
>         * tree-cfg.cc (notice_special_calls): Record has_musttail.
>         (clear_special_calls): Clear has_musttail.
>         * tree-pass.h (make_pass_musttail): Add.
>         * tree-tailcall.cc (find_tail_calls): Handle only_musttail
>           argument.
>         (tree_optimize_tail_calls_1): Pass on only_musttail.
>         (execute_tail_calls): Pass only_musttail as false.
>         (class pass_musttail): Add.
>         (make_pass_musttail): Add.
> ---
>  gcc/function.h          |  3 ++
>  gcc/lto-streamer-in.cc  |  1 +
>  gcc/lto-streamer-out.cc |  1 +
>  gcc/passes.def          |  1 +
>  gcc/tree-cfg.cc         |  3 ++
>  gcc/tree-pass.h         |  1 +
>  gcc/tree-tailcall.cc    | 66 +++++++++++++++++++++++++++++++++++------
>  7 files changed, 67 insertions(+), 9 deletions(-)
>
> diff --git a/gcc/function.h b/gcc/function.h
> index c0ba6cc1531a..fbeadeaf4104 100644
> --- a/gcc/function.h
> +++ b/gcc/function.h
> @@ -430,6 +430,9 @@ struct GTY(()) function {
>    /* Nonzero when the tail call has been identified.  */
>    unsigned int tail_call_marked : 1;
>
> +  /* Has musttail marked calls.  */
> +  unsigned int has_musttail : 1;
> +
>    /* Nonzero if the current function contains a #pragma GCC unroll.  */
>    unsigned int has_unroll : 1;
>
> diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
> index ad0ca24007a0..2e592be80823 100644
> --- a/gcc/lto-streamer-in.cc
> +++ b/gcc/lto-streamer-in.cc
> @@ -1325,6 +1325,7 @@ input_struct_function_base (struct function *fn, class 
> data_in *data_in,
>    fn->calls_eh_return = bp_unpack_value (&bp, 1);
>    fn->has_force_vectorize_loops = bp_unpack_value (&bp, 1);
>    fn->has_simduid_loops = bp_unpack_value (&bp, 1);
> +  fn->has_musttail = bp_unpack_value (&bp, 1);
>    fn->assume_function = bp_unpack_value (&bp, 1);
>    fn->va_list_fpr_size = bp_unpack_value (&bp, 8);
>    fn->va_list_gpr_size = bp_unpack_value (&bp, 8);
> diff --git a/gcc/lto-streamer-out.cc b/gcc/lto-streamer-out.cc
> index d4f728094ed5..0be381abbd96 100644
> --- a/gcc/lto-streamer-out.cc
> +++ b/gcc/lto-streamer-out.cc
> @@ -2290,6 +2290,7 @@ output_struct_function_base (struct output_block *ob, 
> struct function *fn)
>    bp_pack_value (&bp, fn->calls_eh_return, 1);
>    bp_pack_value (&bp, fn->has_force_vectorize_loops, 1);
>    bp_pack_value (&bp, fn->has_simduid_loops, 1);
> +  bp_pack_value (&bp, fn->has_musttail, 1);
>    bp_pack_value (&bp, fn->assume_function, 1);
>    bp_pack_value (&bp, fn->va_list_fpr_size, 8);
>    bp_pack_value (&bp, fn->va_list_gpr_size, 8);
> diff --git a/gcc/passes.def b/gcc/passes.def
> index 041229e47a68..5b5390e6ac0b 100644
> --- a/gcc/passes.def
> +++ b/gcc/passes.def
> @@ -444,6 +444,7 @@ along with GCC; see the file COPYING3.  If not see
>    NEXT_PASS (pass_tsan_O0);
>    NEXT_PASS (pass_sanopt);
>    NEXT_PASS (pass_cleanup_eh);
> +  NEXT_PASS (pass_musttail);
>    NEXT_PASS (pass_lower_resx);
>    NEXT_PASS (pass_nrv);
>    NEXT_PASS (pass_gimple_isel);
> diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
> index 7fb7b92966be..e6fd1294b958 100644
> --- a/gcc/tree-cfg.cc
> +++ b/gcc/tree-cfg.cc
> @@ -2290,6 +2290,8 @@ notice_special_calls (gcall *call)
>      cfun->calls_alloca = true;
>    if (flags & ECF_RETURNS_TWICE)
>      cfun->calls_setjmp = true;
> +  if (gimple_call_must_tail_p (call))
> +    cfun->has_musttail = true;
>  }
>
>
> @@ -2301,6 +2303,7 @@ clear_special_calls (void)
>  {
>    cfun->calls_alloca = false;
>    cfun->calls_setjmp = false;
> +  cfun->has_musttail = false;
>  }
>
>  /* Remove PHI nodes associated with basic block BB and all edges out of BB.  
> */
> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
> index edebb2be245d..59e53558034f 100644
> --- a/gcc/tree-pass.h
> +++ b/gcc/tree-pass.h
> @@ -368,6 +368,7 @@ extern gimple_opt_pass *make_pass_sra (gcc::context 
> *ctxt);
>  extern gimple_opt_pass *make_pass_sra_early (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_tail_recursion (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_tail_calls (gcc::context *ctxt);
> +extern gimple_opt_pass *make_pass_musttail (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_fix_loops (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_tree_loop (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_tree_no_loop (gcc::context *ctxt);
> diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc
> index e9f7f8a12b3a..0c6df10e64f7 100644
> --- a/gcc/tree-tailcall.cc
> +++ b/gcc/tree-tailcall.cc
> @@ -408,10 +408,10 @@ static live_vars_map *live_vars;
>  static vec<bitmap_head> live_vars_vec;
>
>  /* Finds tailcalls falling into basic block BB. The list of found tailcalls 
> is
> -   added to the start of RET.  */
> +   added to the start of RET. When ONLY_MUSTTAIL is set only handle 
> musttail.  */
>
>  static void
> -find_tail_calls (basic_block bb, struct tailcall **ret)
> +find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail)
>  {
>    tree ass_var = NULL_TREE, ret_var, func, param;
>    gimple *stmt;
> @@ -445,6 +445,9 @@ find_tail_calls (basic_block bb, struct tailcall **ret)
>        if (is_gimple_call (stmt))
>         {
>           call = as_a <gcall *> (stmt);
> +         /* Handle only musttail calls when not optimizing.  */
> +         if (only_musttail && !gimple_call_must_tail_p (call))
> +           return;
>           ass_var = gimple_call_lhs (call);
>           break;
>         }
> @@ -467,7 +470,7 @@ find_tail_calls (basic_block bb, struct tailcall **ret)
>        edge_iterator ei;
>        /* Recurse to the predecessors.  */
>        FOR_EACH_EDGE (e, ei, bb->preds)
> -       find_tail_calls (e->src, ret);
> +       find_tail_calls (e->src, ret, only_musttail);
>
>        return;
>      }
> @@ -528,7 +531,8 @@ find_tail_calls (basic_block bb, struct tailcall **ret)
>    func = gimple_call_fndecl (call);
>    if (func
>        && !fndecl_built_in_p (func)
> -      && recursive_call_p (current_function_decl, func))
> +      && recursive_call_p (current_function_decl, func)
> +      && !only_musttail)
>      {
>        tree arg;
>
> @@ -1094,10 +1098,11 @@ create_tailcall_accumulator (const char *label, 
> basic_block bb, tree init)
>  }
>
>  /* Optimizes tail calls in the function, turning the tail recursion
> -   into iteration.  */
> +   into iteration. When ONLY_MUSTCALL is true only optimize mustcall
> +   marked calls.  */
>
>  static unsigned int
> -tree_optimize_tail_calls_1 (bool opt_tailcalls)
> +tree_optimize_tail_calls_1 (bool opt_tailcalls, bool only_mustcall)
>  {
>    edge e;
>    bool phis_constructed = false;
> @@ -1117,7 +1122,7 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls)
>        /* Only traverse the normal exits, i.e. those that end with return
>          statement.  */
>        if (safe_is_a <greturn *> (*gsi_last_bb (e->src)))
> -       find_tail_calls (e->src, &tailcalls);
> +       find_tail_calls (e->src, &tailcalls, only_mustcall);
>      }
>
>    if (live_vars)
> @@ -1228,7 +1233,7 @@ gate_tail_calls (void)
>  static unsigned int
>  execute_tail_calls (void)
>  {
> -  return tree_optimize_tail_calls_1 (true);
> +  return tree_optimize_tail_calls_1 (true, false);
>  }
>
>  namespace {
> @@ -1261,7 +1266,7 @@ public:
>    bool gate (function *) final override { return gate_tail_calls (); }
>    unsigned int execute (function *) final override
>      {
> -      return tree_optimize_tail_calls_1 (false);
> +      return tree_optimize_tail_calls_1 (false, false);
>      }
>
>  }; // class pass_tail_recursion
> @@ -1312,3 +1317,46 @@ make_pass_tail_calls (gcc::context *ctxt)
>  {
>    return new pass_tail_calls (ctxt);
>  }
> +
> +namespace {
> +
> +const pass_data pass_data_musttail =
> +{
> +  GIMPLE_PASS, /* type */
> +  "musttail", /* name */
> +  OPTGROUP_NONE, /* optinfo_flags */
> +  TV_NONE, /* tv_id */
> +  ( PROP_cfg | PROP_ssa ), /* properties_required */
> +  0, /* properties_provided */
> +  0, /* properties_destroyed */
> +  0, /* todo_flags_start */
> +  0, /* todo_flags_finish */
> +};
> +
> +class pass_musttail : public gimple_opt_pass
> +{
> +public:
> +  pass_musttail (gcc::context *ctxt)
> +    : gimple_opt_pass (pass_data_musttail, ctxt)
> +  {}
> +
> +  /* opt_pass methods: */
> +  /* This pass is only used when not optimizing to make [[musttail]] still
> +     work.  */
> +  bool gate (function *) final override { return 
> !flag_optimize_sibling_calls; }

Shouldn't this check f->has_musttail only?  That is, I would expect
-fno-optimize-sibling-calls to still tail-call [[musttail]]?  The comment says
the pass only runs when not optimizing - so maybe you wanted to do
return optimize == 0;?

Otherwise this patch looks OK.

> +  unsigned int execute (function *f) final override
> +  {
> +    if (!f->has_musttail)
> +      return 0;
> +    return tree_optimize_tail_calls_1 (true, true);
> +  }
> +
> +}; // class pass_musttail
> +
> +} // anon namespace
> +
> +gimple_opt_pass *
> +make_pass_musttail (gcc::context *ctxt)
> +{
> +  return new pass_musttail (ctxt);
> +}
> --
> 2.45.2
>

Reply via email to