On Sat, 4 Apr 2020 at 11:47, Jan Hubicka <hubi...@ucw.cz> wrote: > > Hi, > this patch fixes wrong code on a testcase where inline predicts > builtin_constant_p to be true but we fail to optimize its parameter to > constant > becuase FRE is not run and the value is passed by an aggregate. > > This patch makes the inline predicates to disable aggregate tracking > when FRE is not going to be run and similarly value range when VRP is not > going to be run. > > This is just partial fix. Even with it we can arrange FRE/VRP to fail and > produce wrong code, unforutnately. > > I think for GCC11 I will need to implement transformation in ipa-inline > but this is bit hard to do: predicates only tracks that value will be constant > and do not track what constant to be. > > Optimizing builtin_constant_p in a conditional is not going to do good job > when the value is used later in a place that expects it to be constant. > This is pre-existing problem that is not limited to inline tracking. For > example, > FRE may do the transofrm at one place but not in another due to alias oracle > walking limits. > > So I am not sure what full fix would be :( > > gcc/ChangeLog: > > 2020-04-04 Jan Hubicka <hubi...@ucw.cz> > > PR ipa/93940 > * ipa-fnsummary.c (vrp_will_run_p): New function. > (fre_will_run_p): New function. > (evaluate_properties_for_edge): Use it. > * ipa-inline.c (can_inline_edge_by_limits_p): Do not inline > !optimize_debug to optimize_debug. > > gcc/testsuite/ChangeLog: > > 2020-04-04 Jan Hubicka <hubi...@ucw.cz> > > * g++.dg/tree-ssa/pr93940.C: New test. > > diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c > index b411bc4d660..279ac8f7cc9 100644 > --- a/gcc/ipa-fnsummary.c > +++ b/gcc/ipa-fnsummary.c > @@ -503,6 +503,32 @@ evaluate_conditions_for_known_args (struct cgraph_node > *node, > *ret_nonspec_clause = nonspec_clause; > } > > +/* Return true if VRP will be exectued on the function. > + We do not want to anticipate optimizations that will not happen. > + > + FIXME: This can be confused with -fdisable and debug counters and thus > + it should not be used for correctness (only to make heuristics work). > + This means that inliner should do its own optimizations of expressions > + that it predicts to be constant so wrong code can not be triggered by > + builtin_constant_p. */ > + > +static bool > +vrp_will_run_p (struct cgraph_node *node) > +{ > + return (opt_for_fn (node->decl, optimize) > + && !opt_for_fn (node->decl, optimize_debug) > + && opt_for_fn (node->decl, flag_tree_vrp)); > +} > + > +/* Similarly about FRE. */ > + > +static bool > +fre_will_run_p (struct cgraph_node *node) > +{ > + return (opt_for_fn (node->decl, optimize) > + && !opt_for_fn (node->decl, optimize_debug) > + && opt_for_fn (node->decl, flag_tree_fre)); > +} > > /* Work out what conditions might be true at invocation of E. > Compute costs for inlined edge if INLINE_P is true. > @@ -594,6 +620,7 @@ evaluate_properties_for_edge (struct cgraph_edge *e, bool > inline_p, > > /* If we failed to get simple constant, try value range. */ > if ((!cst || TREE_CODE (cst) != INTEGER_CST) > + && vrp_will_run_p (caller) > && ipa_is_param_used_by_ipa_predicates (callee_pi, i)) > { > value_range vr > @@ -609,14 +636,17 @@ evaluate_properties_for_edge (struct cgraph_edge *e, > bool inline_p, > } > > /* Determine known aggregate values. */ > - ipa_agg_value_set agg > - = ipa_agg_value_set_from_jfunc (caller_parms_info, > - caller, &jf->agg); > - if (agg.items.length ()) > + if (vrp_will_run_p (caller)) > { > - if (!known_aggs_ptr->length ()) > - vec_safe_grow_cleared (known_aggs_ptr, count); > - (*known_aggs_ptr)[i] = agg; > + ipa_agg_value_set agg > + = ipa_agg_value_set_from_jfunc (caller_parms_info, > + caller, &jf->agg); > + if (agg.items.length ()) > + { > + if (!known_aggs_ptr->length ()) > + vec_safe_grow_cleared (known_aggs_ptr, count); > + (*known_aggs_ptr)[i] = agg; > + } > } > } > > diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c > index 302ce16a646..f71443feff7 100644 > --- a/gcc/ipa-inline.c > +++ b/gcc/ipa-inline.c > @@ -485,6 +485,7 @@ can_inline_edge_by_limits_p (struct cgraph_edge *e, bool > report, > else if (check_match (flag_wrapv) > || check_match (flag_trapv) > || check_match (flag_pcc_struct_return) > + || check_maybe_down (optimize_debug) > /* When caller or callee does FP math, be sure FP codegen flags > compatible. */ > || ((caller_info->fp_expressions && callee_info->fp_expressions) > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr93940.C > b/gcc/testsuite/g++.dg/tree-ssa/pr93940.C > new file mode 100644 > index 00000000000..b656aad3ef8 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr93940.C > @@ -0,0 +1,38 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Og --coverage -pthread -fdump-tree-optimized -std=c++17" } > */
Hi, Does the test require -pthread? It fails on bare-metal targets (eg arm-eabi, aarch64-elf) because: FAIL: g++.dg/tree-ssa/pr93940.C (test for excess errors) Excess errors: xg++: error: unrecognized command-line option '-pthread' Christophe > +using uint16_t = unsigned short; > + > +struct a { > + uint16_t b = 0; > +}; > +struct c { > + short d; > +}; > +class e { > +public: > + void f(); > + void init_session(c); > +}; > + > +auto htons = [](uint16_t s) { > + if (__builtin_constant_p(s)) { > + return uint16_t(uint16_t(s >> 8) | uint16_t(s << 8)); > + } > + return uint16_t(uint16_t(s >> 8) | uint16_t(s << 8)); > +}; > + > +struct g { > + e h; > + void i(a k) { > + h.f(); > + auto j = c(); > + j.d = htons(k.b); > + h.init_session(j); > + } > +}; > + > +void test() { > + g().i({}); > +} > + > +/* { dg-final { scan-tree-dump-not "builtin_unreachable" "optimized"} } */