On Thu, 27 Apr 2017, Martin Jambor wrote: > Hi, > > PR 78687 is a reported SRA missed-optimization issue. The problem, in > its simplest form, happens when we have two big local non-addressable > structures, we initialize only small portions of the first one and > then assign all of it it to the second one, where we still only work > with the initialized small portion. Such an assignment will make > current SRA think that all of the second aggregate contains useful > unknown data, (i.e. the grp_unscalarized_data flag will be set), and > SRA will not remove this assignment and/or any other assignments with > the aggregate on the right side, even though it could. > > grp_unscalarized_data is set during access analysis when (at the level > described by the access structure instance) there are unscalarized > accesses and there is a write to the access, as signalled by grp_write > flag. > > This patch deals with this issue by using assignment links to set > grp_write lazily when it can. I.e. we do not set the write flag when > scanning the function for assignments between SRA candidates but > postpone that when processing links and only set grp_write of the > access representing the RHS when the access representing LHS has it. > > In order for this to work, grp_writes need to be propagated downwards > in the access tree before processing links, and the propagation needs > to be done transitively, and now it is also necessary for correctness > that all links of all ancestors of the LHS are processed, so we need a > parent link. And when a candidate is disqualified, we need to set the > grp_write too, of course. > > All in all, it is a lot of tinkering, very imperfect because none of > this is flow-sensitive, but it does generate much better code for the > testcase, which might actually be typical for a lot of "modern" C++ > input. Because any substantial rewrite of SRA is unlikely to happen > in time for GCC 8, I'd like t commit it to trunk. Needless to say, it > passes bootstrap and testing on x86_64-linux. > > What do you think?
Sounds great. I think it's reasonable to do this (and I suspected for the full scalarization case sth similar would help eventually not scalarizing using the "element" types). Thus ok. Thanks, Richard. > Martin > > > > 2017-03-15 Martin Jambor <mjam...@suse.cz> > > PR tree-optimization/78687 > * tree-sra.c (access): New field parent. > (process_subtree_disqualification): New function. > (disqualify_candidate): Call it. > (build_accesses_from_assign): Reset write flag if creating an > assighnment link. > (build_access_subtree): Fill in parent field and also prpagate > down grp_write flag. > (create_artificial_child_access): New parameter set_grp_write, set > grp_write to its value. > (propagate_subaccesses_across_link): Also propagate grp_write flag > values. > (propagate_all_subaccesses): Push the closest parent back to work > queue if add_access_to_work_queue returned true. > > testsuite/ > * g++.dg/tree-ssa/pr78687.C: New test. > --- > gcc/testsuite/g++.dg/tree-ssa/pr78687.C | 483 > ++++++++++++++++++++++++++++++++ > gcc/tree-sra.c | 91 +++++- > 2 files changed, 561 insertions(+), 13 deletions(-) > create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr78687.C > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr78687.C > b/gcc/testsuite/g++.dg/tree-ssa/pr78687.C > new file mode 100644 > index 00000000000..698458f0e9a > --- /dev/null > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr78687.C > @@ -0,0 +1,483 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -std=gnu++14 -fdump-tree-sra" } */ > + > +#include <utility> > + > +#define EGGS_CXX11_CONSTEXPR constexpr > +#define EGGS_CXX11_STATIC_CONSTEXPR static constexpr > +#define EGGS_CXX14_CONSTEXPR constexpr > +#define EGGS_CXX11_NOEXCEPT noexcept > + > +namespace eggs { namespace variants { namespace detail > +{ > + struct empty > + { > + EGGS_CXX11_CONSTEXPR bool operator==(empty) const { return true; } > + EGGS_CXX11_CONSTEXPR bool operator<(empty) const { return false; } > + }; > + > + template <typename T> > + struct identity > + { > + using type = T; > + }; > + > + template <std::size_t I> > + struct index > + { > + EGGS_CXX11_STATIC_CONSTEXPR std::size_t value = I; > + }; > + > + template <typename ...Ts> > + struct pack > + { > + using type = pack; > + EGGS_CXX11_STATIC_CONSTEXPR std::size_t size = sizeof...(Ts); > + }; > + > + template <typename T, T ...Vs> > + struct pack_c > + { > + using type = pack_c; > + EGGS_CXX11_STATIC_CONSTEXPR std::size_t size = sizeof...(Vs); > + }; > + > + > /////////////////////////////////////////////////////////////////////////// > + template <typename Is, bool Odd> > + struct _make_index_pack_twice; > + > + template <std::size_t ...Is> > + struct _make_index_pack_twice< > + pack_c<std::size_t, Is...> > + , false > + > : pack_c<std::size_t, Is..., (sizeof...(Is) + Is)...> > + {}; > + > + template <std::size_t ...Is> > + struct _make_index_pack_twice< > + pack_c<std::size_t, Is...> > + , true > + > : pack_c<std::size_t, Is..., (sizeof...(Is) + Is)..., sizeof...(Is) * > 2> > + {}; > + > + template <std::size_t N> > + struct _make_index_pack > + : _make_index_pack_twice< > + typename _make_index_pack<N / 2>::type > + , N % 2 != 0 > + > > + {}; > + > + template <> > + struct _make_index_pack<1> > + : pack_c<std::size_t, 0> > + {}; > + > + template <> > + struct _make_index_pack<0> > + : pack_c<std::size_t> > + {}; > + > + template <std::size_t N> > + using make_index_pack = typename _make_index_pack<N>::type; > + > + template <typename Ts> > + struct _index_pack; > + > + template <typename ...Ts> > + struct _index_pack<pack<Ts...>> > + : _make_index_pack<sizeof...(Ts)> > + {}; > + > + template <typename Ts> > + using index_pack = typename _index_pack<Ts>::type; > + > + > /////////////////////////////////////////////////////////////////////////// > + template <typename Vs> > + struct all_of; > + > + template <bool ...Vs> > + struct all_of<pack_c<bool, Vs...>> > + : std::integral_constant< > + bool > + , std::is_same< > + pack_c<bool, Vs...> > + , pack_c<bool, (Vs || true)...> // true... > + >::value > + > > + {}; > + > + template <typename ...Ts> > + struct all_of<pack<Ts...>> > + : all_of<pack_c<bool, (Ts::value)...>> > + {}; > + > + template <typename ...Vs> > + struct any_of; > + > + template <bool ...Vs> > + struct any_of<pack_c<bool, Vs...>> > + : std::integral_constant< > + bool > + , !all_of<pack_c<bool, !Vs...>>::value > + > > + {}; > + > + template <typename ...Ts> > + struct any_of<pack<Ts...>> > + : any_of<pack_c<bool, (Ts::value)...>> > + {}; > + > + > /////////////////////////////////////////////////////////////////////////// > + template <std::size_t I, typename T> > + struct _indexed {}; > + > + template <typename Ts, typename Is = index_pack<Ts>> > + struct _indexer; > + > + template <typename ...Ts, std::size_t ...Is> > + struct _indexer<pack<Ts...>, pack_c<std::size_t, Is...>> > + : _indexed<Is, Ts>... > + {}; > + > + empty _at_index(...); > + > + template <std::size_t I, typename T> > + identity<T> _at_index(_indexed<I, T> const&); > + > + template <std::size_t I, typename Ts> > + struct at_index > + : decltype(_at_index<I>(_indexer<Ts>{})) > + {}; > + > + empty _index_of(...); > + > + template <typename T, std::size_t I> > + index<I> _index_of(_indexed<I, T> const&); > + > + template <typename T, typename Ts> > + struct index_of > + : decltype(_index_of<T>(_indexer<Ts>{})) > + {}; > +}}} > + > +namespace eggs { namespace variants { namespace detail > +{ > + template <typename Ts, bool IsTriviallyDestructible> > + struct _union; > + > + > /////////////////////////////////////////////////////////////////////////// > + template <bool IsTriviallyDestructible> > + struct _union<pack<>, IsTriviallyDestructible> > + {}; > + > + template <typename T, typename ...Ts> > + struct _union<pack<T, Ts...>, true> > + { > + EGGS_CXX11_STATIC_CONSTEXPR std::size_t size = 1 + sizeof...(Ts); > + > + template <typename ...Args> > + EGGS_CXX11_CONSTEXPR _union(index<0>, Args&&... args) > + : _head(std::forward<Args>(args)...) > + {} > + > + template <std::size_t I, typename ...Args> > + EGGS_CXX11_CONSTEXPR _union(index<I>, Args&&... args) > + : _tail(index<I - 1>{}, std::forward<Args>(args)...) > + {} > + > + EGGS_CXX14_CONSTEXPR void* target() EGGS_CXX11_NOEXCEPT > + { > + return &_target; > + } > + > + EGGS_CXX11_CONSTEXPR void const* target() const EGGS_CXX11_NOEXCEPT > + { > + return &_target; > + } > + > + EGGS_CXX14_CONSTEXPR T& get(index<0>) EGGS_CXX11_NOEXCEPT > + { > + return this->_head; > + } > + > + EGGS_CXX11_CONSTEXPR T const& get(index<0>) const EGGS_CXX11_NOEXCEPT > + { > + return this->_head; > + } > + > + template < > + std::size_t I > + , typename U = typename at_index<I, pack<T, Ts...>>::type > + > > + EGGS_CXX14_CONSTEXPR U& get(index<I>) EGGS_CXX11_NOEXCEPT > + { > + return this->_tail.get(index<I - 1>{}); > + } > + > + template < > + std::size_t I > + , typename U = typename at_index<I, pack<T, Ts...>>::type > + > > + EGGS_CXX11_CONSTEXPR U const& get(index<I>) const EGGS_CXX11_NOEXCEPT > + { > + return this->_tail.get(index<I - 1>{}); > + } > + > + private: > + union > + { > + char _target; > + T _head; > + _union<pack<Ts...>, true> _tail; > + }; > + }; > + > + > /////////////////////////////////////////////////////////////////////////// > + template <typename Ts, bool TriviallyCopyable, bool > TriviallyDestructible> > + struct _storage; > + > + template <typename ...Ts> > + struct _storage<pack<Ts...>, true, true> > + : _union< > + pack<Ts...> > + , all_of<pack<std::is_trivially_destructible<Ts>...>>::value > + > > + { > + using base_type = _union< > + pack<Ts...> > + , all_of<pack<std::is_trivially_destructible<Ts>...>>::value > + >; > + > + EGGS_CXX11_CONSTEXPR _storage() EGGS_CXX11_NOEXCEPT > + : base_type{index<0>{}} > + , _which{0} > + {} > + > + _storage(_storage const& rhs) = default; > + _storage(_storage&& rhs) = default; > + > + template <std::size_t I, typename ...Args> > + EGGS_CXX11_CONSTEXPR _storage(index<I> which, Args&&... args) > + : base_type{which, std::forward<Args>(args)...} > + , _which{I} > + {} > + > + _storage& operator=(_storage const& rhs) = default; > + _storage& operator=(_storage&& rhs) = default; > + > + EGGS_CXX11_CONSTEXPR std::size_t which() const > + { > + return _which; > + } > + > + using base_type::target; > + using base_type::get; > + > + protected: > + std::size_t _which; > + }; > + > + template <typename ...Ts> > + using storage = _storage< > + pack<empty, Ts...> > + , all_of<pack<std::is_trivially_copyable<Ts>...>>::value > + , all_of<pack<std::is_trivially_destructible<Ts>...>>::value > + >; > +}}} > + > +namespace eggs { namespace variants > +{ > + template <typename ...Ts> > + class variant; > + > + namespace detail > + { > + > /////////////////////////////////////////////////////////////////////// > + namespace _best_match > + { > + template <typename Ts, std::size_t I = 0> > + struct overloads > + {}; > + > + template <typename T, typename ...Ts, std::size_t I> > + struct overloads<pack<T, Ts...>, I> > + : overloads<pack<Ts...>, I + 1> > + { > + using fun_ptr = index<I>(*)(T); > + operator fun_ptr(); > + }; > + > + template <typename F, typename T> > + auto _invoke(F&&, T&&) > + -> decltype(std::declval<F>()(std::declval<T>())); > + > + struct _fallback {}; > + > + _fallback _invoke(...); > + > + template < > + typename T, typename U > + , typename R = decltype(_best_match::_invoke( > + std::declval<T>(), std::declval<U>())) > + > > + struct result_of : R > + {}; > + > + template <typename T, typename U> > + struct result_of<T, U, _fallback> > + {}; > + } > + > + template <typename U, typename ...Ts> > + struct index_of_best_match > + : _best_match::result_of<_best_match::overloads<Ts...>, U> > + {}; > + > + } > + > + template <typename ...Ts> > + class variant > + { > + > + public: > + EGGS_CXX11_CONSTEXPR variant() EGGS_CXX11_NOEXCEPT = delete; > + > + variant(variant const& rhs) = default; > + > + variant(variant&& rhs) = default; > + > + template < > + typename U > + , typename Enable = typename std::enable_if<!std::is_same< > + typename std::decay<U>::type, variant > + >::value>::type > + , std::size_t I = detail::index_of_best_match< > + U&&, detail::pack<Ts...>>::value > + , typename T = typename detail::at_index< > + I, detail::pack<Ts...>>::type > + > > + EGGS_CXX11_CONSTEXPR variant(U&& v) > + noexcept( > + std::is_nothrow_constructible<T, U&&>::value) > + : _storage{detail::index<I + 1>{}, std::forward<U>(v)} > + {} > + > + ~variant() = default; > + variant& operator=(variant const& rhs) = delete; > + > + private: > + detail::storage<Ts...> _storage; > + }; > +}} > + > +template <class T, class Base> > +struct ref_proxy : Base > +{ > + using Base::Base; > + > + ref_proxy() > + : Base() > + { > + } > + > + ref_proxy(Base ptr) > + : Base(std::move(ptr)) > + { > + } > +}; > + > +template <class T> > +struct inplace_ref > +{ > + explicit inplace_ref(T inner) > + : inner_(inner) > + { > + } > + > + T inner_; > +}; > + > +template <class ...Variants> > +struct variant_ref > +{ > + variant_ref() = delete; > + > + explicit variant_ref(eggs::variants::variant<Variants...> t) > + : inner_storage_(t) > + { > + } > + > + template <class Source> > + variant_ref(ref_proxy<Source, variant_ref> ptr) > + : inner_storage_(ptr.inner_storage_) > + {} > + > +private: > + eggs::variants::variant<Variants...> inner_storage_; > +}; > + > +struct option_1 > +{ > + void *a, *b, *c, *d, *e; > +}; > + > +struct option_2 > +{ > +}; > + > +using option_ref = variant_ref<option_1, option_2>; > + > + > +struct qual_option > +{ > + qual_option(ref_proxy<void, option_ref > type, int quals) > + : type_(type) > + , quals_(quals) > + { > + } > + > + explicit qual_option(ref_proxy<void, option_ref > type) > + : qual_option(type, 0) > + { > + } > + > + ref_proxy<void, option_ref > type_; > + int quals_; > +}; > + > +inline ref_proxy<option_2, option_ref > make_object_1() > +{ > + return ref_proxy<option_2, option_ref >(option_2()); > +} > + > +inline ref_proxy<option_2, option_ref > make_object_2() > +{ > + return make_object_1(); > +} > + > +inline inplace_ref<qual_option> make_object_3(ref_proxy<option_2, > option_ref>&& a0) > +{ > + return inplace_ref<qual_option>(qual_option(a0)); > +} > + > +inline ref_proxy<qual_option, inplace_ref<qual_option> > > make_object_4(ref_proxy<option_2, option_ref>&& a0) > +{ > + return make_object_3(std::move(a0)); > +} > + > + > +ref_proxy<qual_option, inplace_ref<qual_option> > f() > __attribute__((noinline)); > + > +ref_proxy<qual_option, inplace_ref<qual_option> > f() > +{ > + return make_object_4(make_object_2()); > +} > + > +int main(int argc, char* argv[]) > +{ > + for (;;) > + f(); > +} > + > +/* { dg-final { scan-tree-dump "Removing load:.*ptr;" "sra" } } */ > diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c > index 31834ed7af7..b05707e69c2 100644 > --- a/gcc/tree-sra.c > +++ b/gcc/tree-sra.c > @@ -158,6 +158,10 @@ struct access > the representative. */ > struct access *group_representative; > > + /* After access tree has been constructed, this points to the parent of the > + current access, if there is one. NULL for roots. */ > + struct access *parent; > + > /* If this access has any children (in terms of the definition above), this > points to the first one. */ > struct access *first_child; > @@ -690,6 +694,19 @@ static bool constant_decl_p (tree decl) > return VAR_P (decl) && DECL_IN_CONSTANT_POOL (decl); > } > > + > +/* Mark LHS of assign links out of ACCESS and its children as written to. */ > + > +static void > +process_subtree_disqualification (struct access *access) > +{ > + struct access *child; > + for (struct assign_link *link = access->first_link; link; link = > link->next) > + link->lacc->grp_write = true; > + for (child = access->first_child; child; child = child->next_sibling) > + process_subtree_disqualification (child); > +} > + > /* Remove DECL from candidates for SRA and write REASON to the dump file if > there is one. */ > static void > @@ -706,6 +723,13 @@ disqualify_candidate (tree decl, const char *reason) > print_generic_expr (dump_file, decl, 0); > fprintf (dump_file, " - %s\n", reason); > } > + > + struct access *access = get_first_repr_for_decl (decl); > + while (access) > + { > + process_subtree_disqualification (access); > + access = access->next_grp; > + } > } > > /* Return true iff the type contains a field or an element which does not > allow > @@ -1330,8 +1354,10 @@ build_accesses_from_assign (gimple *stmt) > > link->lacc = lacc; > link->racc = racc; > - > add_link_to_rhs (racc, link); > + /* Let's delay marking the areas as written until propagation of > accesses > + across link. */ > + lacc->write = false; > } > > return lacc || racc; > @@ -2244,6 +2270,8 @@ build_access_subtree (struct access **access) > else > last_child->next_sibling = *access; > last_child = *access; > + (*access)->parent = root; > + (*access)->grp_write |= root->grp_write; > > if (!build_access_subtree (access)) > return false; > @@ -2487,13 +2515,15 @@ child_would_conflict_in_lacc (struct access *lacc, > HOST_WIDE_INT norm_offset, > > /* Create a new child access of PARENT, with all properties just like MODEL > except for its offset and with its grp_write false and grp_read true. > - Return the new access or NULL if it cannot be created. Note that this > access > - is created long after all splicing and sorting, it's not located in any > - access vector and is automatically a representative of its group. */ > + Return the new access or NULL if it cannot be created. Note that this > + access is created long after all splicing and sorting, it's not located in > + any access vector and is automatically a representative of its group. Set > + the gpr_write flag of the new accesss if SET_GRP_WRITE is true. */ > > static struct access * > create_artificial_child_access (struct access *parent, struct access *model, > - HOST_WIDE_INT new_offset) > + HOST_WIDE_INT new_offset, > + bool set_grp_write) > { > struct access **child; > tree expr = parent->base; > @@ -2515,7 +2545,7 @@ create_artificial_child_access (struct access *parent, > struct access *model, > access->offset = new_offset; > access->size = model->size; > access->type = model->type; > - access->grp_write = true; > + access->grp_write = set_grp_write; > access->grp_read = false; > access->reverse = model->reverse; > > @@ -2541,10 +2571,23 @@ propagate_subaccesses_across_link (struct access > *lacc, struct access *racc) > HOST_WIDE_INT norm_delta = lacc->offset - racc->offset; > bool ret = false; > > + /* IF the LHS is still not marked as being written to, we only need to do > so > + if the RHS at this level actually was. */ > + if (!lacc->grp_write && > + (racc->grp_write || TREE_CODE (racc->base) == PARM_DECL)) > + { > + lacc->grp_write = true; > + ret = true; > + } > + > if (is_gimple_reg_type (lacc->type) > || lacc->grp_unscalarizable_region > || racc->grp_unscalarizable_region) > - return false; > + { > + ret |= !lacc->grp_write; > + lacc->grp_write = true; > + return ret; > + } > > if (is_gimple_reg_type (racc->type)) > { > @@ -2564,7 +2607,7 @@ propagate_subaccesses_across_link (struct access *lacc, > struct access *racc) > lacc->grp_no_warning = true; > } > } > - return false; > + return ret; > } > > for (rchild = racc->first_child; rchild; rchild = rchild->next_sibling) > @@ -2573,23 +2616,37 @@ propagate_subaccesses_across_link (struct access > *lacc, struct access *racc) > HOST_WIDE_INT norm_offset = rchild->offset + norm_delta; > > if (rchild->grp_unscalarizable_region) > - continue; > + { > + lacc->grp_write = true; > + continue; > + } > > if (child_would_conflict_in_lacc (lacc, norm_offset, rchild->size, > &new_acc)) > { > if (new_acc) > { > + if (!new_acc->grp_write > + && (lacc->grp_write || rchild->grp_write)) > + { > + new_acc ->grp_write = true; > + ret = true; > + } > + > rchild->grp_hint = 1; > new_acc->grp_hint |= new_acc->grp_read; > if (rchild->first_child) > ret |= propagate_subaccesses_across_link (new_acc, rchild); > } > + else > + lacc->grp_write = true; > continue; > } > > rchild->grp_hint = 1; > - new_acc = create_artificial_child_access (lacc, rchild, norm_offset); > + new_acc = create_artificial_child_access (lacc, rchild, norm_offset, > + lacc->grp_write > + || rchild->grp_write); > if (new_acc) > { > ret = true; > @@ -2620,9 +2677,17 @@ propagate_all_subaccesses (void) > if (!bitmap_bit_p (candidate_bitmap, DECL_UID (lacc->base))) > continue; > lacc = lacc->group_representative; > - if (propagate_subaccesses_across_link (lacc, racc) > - && lacc->first_link) > - add_access_to_work_queue (lacc); > + if (propagate_subaccesses_across_link (lacc, racc)) > + do > + { > + if (lacc->first_link) > + { > + add_access_to_work_queue (lacc); > + break; > + } > + lacc = lacc->parent; > + } > + while (lacc); > } > } > } > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)