On Sat, 4 Apr 2020 at 11:47, Jan Hubicka <hubi...@ucw.cz> wrote:
>
> Hi,
> this patch fixes wrong code on a testcase where inline predicts
> builtin_constant_p to be true but we fail to optimize its parameter to 
> constant
> becuase FRE is not run and the value is passed by an aggregate.
>
> This patch makes the inline predicates to disable aggregate tracking
> when FRE is not going to be run and similarly value range when VRP is not
> going to be run.
>
> This is just partial fix.  Even with it we can arrange FRE/VRP to fail and
> produce wrong code, unforutnately.
>
> I think for GCC11 I will need to implement transformation in ipa-inline
> but this is bit hard to do: predicates only tracks that value will be constant
> and do not track what constant to be.
>
> Optimizing builtin_constant_p in a conditional is not going to do good job
> when the value is used later in a place that expects it to be constant.
> This is pre-existing problem that is not limited to inline tracking. For 
> example,
> FRE may do the transofrm at one place but not in another due to alias oracle
> walking limits.
>
> So I am not sure what full fix would be :(
>
> gcc/ChangeLog:
>
> 2020-04-04  Jan Hubicka  <hubi...@ucw.cz>
>
>         PR ipa/93940
>         * ipa-fnsummary.c (vrp_will_run_p): New function.
>         (fre_will_run_p): New function.
>         (evaluate_properties_for_edge): Use it.
>         * ipa-inline.c (can_inline_edge_by_limits_p): Do not inline
>         !optimize_debug to optimize_debug.
>
> gcc/testsuite/ChangeLog:
>
> 2020-04-04  Jan Hubicka  <hubi...@ucw.cz>
>
>         * g++.dg/tree-ssa/pr93940.C: New test.
>
> diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
> index b411bc4d660..279ac8f7cc9 100644
> --- a/gcc/ipa-fnsummary.c
> +++ b/gcc/ipa-fnsummary.c
> @@ -503,6 +503,32 @@ evaluate_conditions_for_known_args (struct cgraph_node 
> *node,
>      *ret_nonspec_clause = nonspec_clause;
>  }
>
> +/* Return true if VRP will be exectued on the function.
> +   We do not want to anticipate optimizations that will not happen.
> +
> +   FIXME: This can be confused with -fdisable and debug counters and thus
> +   it should not be used for correctness (only to make heuristics work).
> +   This means that inliner should do its own optimizations of expressions
> +   that it predicts to be constant so wrong code can not be triggered by
> +   builtin_constant_p.  */
> +
> +static bool
> +vrp_will_run_p (struct cgraph_node *node)
> +{
> +  return (opt_for_fn (node->decl, optimize)
> +         && !opt_for_fn (node->decl, optimize_debug)
> +         && opt_for_fn (node->decl, flag_tree_vrp));
> +}
> +
> +/* Similarly about FRE.  */
> +
> +static bool
> +fre_will_run_p (struct cgraph_node *node)
> +{
> +  return (opt_for_fn (node->decl, optimize)
> +         && !opt_for_fn (node->decl, optimize_debug)
> +         && opt_for_fn (node->decl, flag_tree_fre));
> +}
>
>  /* Work out what conditions might be true at invocation of E.
>     Compute costs for inlined edge if INLINE_P is true.
> @@ -594,6 +620,7 @@ evaluate_properties_for_edge (struct cgraph_edge *e, bool 
> inline_p,
>
>                 /* If we failed to get simple constant, try value range.  */
>                 if ((!cst || TREE_CODE (cst) != INTEGER_CST)
> +                   && vrp_will_run_p (caller)
>                     && ipa_is_param_used_by_ipa_predicates (callee_pi, i))
>                   {
>                     value_range vr
> @@ -609,14 +636,17 @@ evaluate_properties_for_edge (struct cgraph_edge *e, 
> bool inline_p,
>                   }
>
>                 /* Determine known aggregate values.  */
> -               ipa_agg_value_set agg
> -                   = ipa_agg_value_set_from_jfunc (caller_parms_info,
> -                                                   caller, &jf->agg);
> -               if (agg.items.length ())
> +               if (vrp_will_run_p (caller))
>                   {
> -                   if (!known_aggs_ptr->length ())
> -                     vec_safe_grow_cleared (known_aggs_ptr, count);
> -                   (*known_aggs_ptr)[i] = agg;
> +                   ipa_agg_value_set agg
> +                       = ipa_agg_value_set_from_jfunc (caller_parms_info,
> +                                                       caller, &jf->agg);
> +                   if (agg.items.length ())
> +                     {
> +                       if (!known_aggs_ptr->length ())
> +                         vec_safe_grow_cleared (known_aggs_ptr, count);
> +                       (*known_aggs_ptr)[i] = agg;
> +                     }
>                   }
>               }
>
> diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
> index 302ce16a646..f71443feff7 100644
> --- a/gcc/ipa-inline.c
> +++ b/gcc/ipa-inline.c
> @@ -485,6 +485,7 @@ can_inline_edge_by_limits_p (struct cgraph_edge *e, bool 
> report,
>       else if (check_match (flag_wrapv)
>               || check_match (flag_trapv)
>               || check_match (flag_pcc_struct_return)
> +             || check_maybe_down (optimize_debug)
>               /* When caller or callee does FP math, be sure FP codegen flags
>                  compatible.  */
>               || ((caller_info->fp_expressions && callee_info->fp_expressions)
> diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr93940.C 
> b/gcc/testsuite/g++.dg/tree-ssa/pr93940.C
> new file mode 100644
> index 00000000000..b656aad3ef8
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/tree-ssa/pr93940.C
> @@ -0,0 +1,38 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Og --coverage -pthread -fdump-tree-optimized -std=c++17" } 
> */

Hi,

Does the test require -pthread?
It fails on bare-metal targets (eg arm-eabi, aarch64-elf) because:
FAIL: g++.dg/tree-ssa/pr93940.C   (test for excess errors)
Excess errors:
xg++: error: unrecognized command-line option '-pthread'

Christophe

> +using uint16_t = unsigned short;
> +
> +struct a {
> +    uint16_t b = 0;
> +};
> +struct c {
> +    short d;
> +};
> +class e {
> +public:
> +    void f();
> +    void init_session(c);
> +};
> +
> +auto htons = [](uint16_t s) {
> +    if (__builtin_constant_p(s)) {
> +        return uint16_t(uint16_t(s >> 8) | uint16_t(s << 8));
> +    }
> +    return uint16_t(uint16_t(s >> 8) | uint16_t(s << 8));
> +};
> +
> +struct g {
> +    e h;
> +    void i(a k) {
> +        h.f();
> +        auto j = c();
> +        j.d = htons(k.b);
> +        h.init_session(j);
> +    }
> +};
> +
> +void test() {
> +    g().i({});
> +}
> +
> +/* { dg-final { scan-tree-dump-not "builtin_unreachable" "optimized"} } */

Reply via email to