PRE implicitely keeps virtual operands at the blocks incoming version but the explicit updating point during PHI translation fails to trigger when there are no PHIs at all in a block. Later lazy updating then fails because of a too lose block check. A similar issues plagues reference invalidation when checking the ANTIC_OUT to ANTIC_IN translation. The following fixes both and makes the lazy updating work.
The diagnostic testcase unfortunately requires boost so the testcase is the one I reduced for a missed optimization in PRE. The testcase fails with -m32 on x86_64 because we optimize too much before PRE which causes PRE to not trigger so we fail to eliminate a full redundancy. I'm going to open a separate bug for this. Hopefully the !lp64 selector is good enough. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. PR tree-optimization/106922 * tree-ssa-pre.cc (translate_vuse_through_block): Only keep the VUSE if its def dominates PHIBLOCK. (prune_clobbered_mems): Rewrite logic so we check whether a value dies in a block when the VUSE def doesn't dominate it. * g++.dg/tree-ssa/pr106922.C: New testcase. --- gcc/testsuite/g++.dg/tree-ssa/pr106922.C | 91 ++++++++++++++++++++++++ gcc/tree-ssa-pre.cc | 18 +++-- 2 files changed, 103 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr106922.C diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr106922.C b/gcc/testsuite/g++.dg/tree-ssa/pr106922.C new file mode 100644 index 00000000000..faf379b0361 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr106922.C @@ -0,0 +1,91 @@ +// { dg-require-effective-target c++20 } +// { dg-options "-O2 -fdump-tree-pre-details -fdump-tree-cddce3" } + +template <typename> struct __new_allocator { + void deallocate(int *, int) { operator delete(0); } +}; +template <typename _Tp> using __allocator_base = __new_allocator<_Tp>; +template <typename> struct allocator : __allocator_base<int> { + [[__gnu__::__always_inline__]] void deallocate(int *__p, int __n) { + __allocator_base<int>::deallocate(__p, __n); + } +}; +template <typename> struct allocator_traits; +template <typename _Tp> struct allocator_traits<allocator<_Tp>> { + using allocator_type = allocator<_Tp>; + using pointer = _Tp *; + using size_type = int; + template <typename _Up> using rebind_alloc = allocator<_Up>; + static void deallocate(allocator_type &__a, pointer __p, size_type __n) { + __a.deallocate(__p, __n); + } +}; +template <typename _Alloc> struct __alloc_traits : allocator_traits<_Alloc> { + typedef allocator_traits<_Alloc> _Base_type; + template <typename _Tp> struct rebind { + typedef _Base_type::template rebind_alloc<_Tp> other; + }; +}; +long _M_deallocate___n; +struct _Vector_base { + typedef __alloc_traits<allocator<int>>::rebind<int>::other _Tp_alloc_type; + typedef __alloc_traits<_Tp_alloc_type>::pointer pointer; + struct _Vector_impl_data { + pointer _M_start; + }; + struct _Vector_impl : _Tp_alloc_type, _Vector_impl_data {}; + ~_Vector_base() { _M_deallocate(_M_impl._M_start); } + _Vector_impl _M_impl; + void _M_deallocate(pointer __p) { + if (__p) + __alloc_traits<_Tp_alloc_type>::deallocate(_M_impl, __p, + _M_deallocate___n); + } +}; +struct vector : _Vector_base {}; +struct aligned_storage { + int dummy_; + int *ptr_ref0; + vector &ref() { + vector *__trans_tmp_2; + void *__trans_tmp_1 = &dummy_; + union { + void *ap_pvoid; + vector *as_ptype; + } caster{__trans_tmp_1}; + __trans_tmp_2 = caster.as_ptype; + return *__trans_tmp_2; + } +}; +struct optional_base { + optional_base operator=(optional_base &) { + bool __trans_tmp_3 = m_initialized; + if (__trans_tmp_3) + m_initialized = false; + return *this; + } + ~optional_base() { + if (m_initialized) + m_storage.ref().~vector(); + } + bool m_initialized; + aligned_storage m_storage; +}; +struct optional : optional_base { + optional() : optional_base() {} +}; +template <class> using Optional = optional; +struct Trans_NS___cxx11_basic_stringstream {}; +void operator<<(Trans_NS___cxx11_basic_stringstream, int); +int testfunctionfoo_myStructs[10]; +void testfunctionfoo() { + Optional<char> external, internal; + for (auto myStruct : testfunctionfoo_myStructs) { + Trans_NS___cxx11_basic_stringstream address_stream; + address_stream << myStruct; + external = internal; + } +} + +// { dg-final { scan-tree-dump-times "Found fully redundant value" 4 "pre" { xfail { ! lp64 } } } } +// { dg-final { scan-tree-dump-not "m_initialized" "cddce3" { xfail { ! lp64 } } } } diff --git a/gcc/tree-ssa-pre.cc b/gcc/tree-ssa-pre.cc index e029bd36da3..2afc74fc57c 100644 --- a/gcc/tree-ssa-pre.cc +++ b/gcc/tree-ssa-pre.cc @@ -1236,7 +1236,11 @@ translate_vuse_through_block (vec<vn_reference_op_s> operands, if (same_valid) *same_valid = true; - if (gimple_bb (phi) != phiblock) + /* If value-numbering provided a memory state for this + that dominates PHIBLOCK we can just use that. */ + if (gimple_nop_p (phi) + || (gimple_bb (phi) != phiblock + && dominated_by_p (CDI_DOMINATORS, phiblock, gimple_bb (phi)))) return vuse; /* We have pruned expressions that are killed in PHIBLOCK via @@ -2031,11 +2035,13 @@ prune_clobbered_mems (bitmap_set_t set, basic_block block) { gimple *def_stmt = SSA_NAME_DEF_STMT (ref->vuse); if (!gimple_nop_p (def_stmt) - && ((gimple_bb (def_stmt) != block - && !dominated_by_p (CDI_DOMINATORS, - block, gimple_bb (def_stmt))) - || (gimple_bb (def_stmt) == block - && value_dies_in_block_x (expr, block)))) + /* If value-numbering provided a memory state for this + that dominates BLOCK we're done, otherwise we have + to check if the value dies in BLOCK. */ + && !(gimple_bb (def_stmt) != block + && dominated_by_p (CDI_DOMINATORS, + block, gimple_bb (def_stmt))) + && value_dies_in_block_x (expr, block)) to_remove = i; } /* If the REFERENCE may trap make sure the block does not contain -- 2.35.3