[gcc r15-5735] optimize basic_string
https://gcc.gnu.org/g:1046c32de4956c3d706a2ff8683582fd21b8f360 commit r15-5735-g1046c32de4956c3d706a2ff8683582fd21b8f360 Author: Jan Hubicka Date: Wed Nov 27 23:52:37 2024 +0100 optimize basic_string Add __builtin_unreachable conditionls to declare value ranges of basic_string::length(). FIx max_size() to return actual max size using logic similar to std::vector. Aviod use of size() in empty() to save some compile time overhead. As disucced, max_size() change is technically ABI breaking, but hopefully this does not really matter in practice. Change of length() breaks empty-loop testcase where we now optimize the loop only after inlining, so template is updated to check cddce3 instead of cddce2. This is PR117764. With these chages we now optimize out unused strings as tested in string-1.C libstdc++-v3/ChangeLog: * include/bits/basic_string.h (basic_string::size(), basic_string::length(), basic_string::capacity()): Add __builtin_unreachable to declare value ranges. (basic_string::empty()): Implement directly (basic_string::max_size()): Account correctly the terminating 0 and limits implied by ptrdiff_t. gcc/testsuite/ChangeLog: * g++.dg/tree-ssa/empty-loop.C: xfail optimization at cddce2 and check it happens at cddce3. * g++.dg/tree-ssa/string-1.C: New test. Diff: --- gcc/testsuite/g++.dg/tree-ssa/empty-loop.C | 5 - gcc/testsuite/g++.dg/tree-ssa/string-1.C | 9 + libstdc++-v3/include/bits/basic_string.h | 25 +++-- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C index ed4a603bf5b3..b7e7e27cc042 100644 --- a/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C +++ b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C @@ -30,5 +30,8 @@ int foo (vector &v, list &l, set &s, map &m return 0; } -/* { dg-final { scan-tree-dump-not "if" "cddce2"} } */ +/* Adding __builtin_unreachable to std::string::size() prevents cddce2 from + eliminating the loop early, see PR117764. */ +/* { dg-final { scan-tree-dump-not "if" "cddce2" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-not "if" "cddce3"} } */ diff --git a/gcc/testsuite/g++.dg/tree-ssa/string-1.C b/gcc/testsuite/g++.dg/tree-ssa/string-1.C new file mode 100644 index ..d38c23a7628b --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/string-1.C @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -std=c++20 -fdump-tree-optimized" } */ +#include +std::string +test (std::string &a) +{ + return a; +} +/* { dg-final { scan-tree-dump-not "throw" "optimized" } } */ diff --git a/libstdc++-v3/include/bits/basic_string.h b/libstdc++-v3/include/bits/basic_string.h index f5b320099b17..17b973c8b45c 100644 --- a/libstdc++-v3/include/bits/basic_string.h +++ b/libstdc++-v3/include/bits/basic_string.h @@ -1079,20 +1079,30 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR size_type size() const _GLIBCXX_NOEXCEPT - { return _M_string_length; } + { + size_type __sz = _M_string_length; + if (__sz > max_size ()) + __builtin_unreachable (); + return __sz; + } /// Returns the number of characters in the string, not including any /// null-termination. _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR size_type length() const _GLIBCXX_NOEXCEPT - { return _M_string_length; } + { return size(); } /// Returns the size() of the largest possible %string. _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR size_type max_size() const _GLIBCXX_NOEXCEPT - { return (_Alloc_traits::max_size(_M_get_allocator()) - 1) / 2; } + { + const size_t __diffmax + = __gnu_cxx::__numeric_traits::__max / sizeof(_CharT); + const size_t __allocmax = _Alloc_traits::max_size(_M_get_allocator()); + return (std::min)(__diffmax, __allocmax) - 1; + } /** * @brief Resizes the %string to the specified number of characters. @@ -1184,8 +1194,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 size_type capacity() const _GLIBCXX_NOEXCEPT { - return _M_is_local() ? size_type(_S_local_capacity) -: _M_allocated_capacity; + size_t __sz = _M_is_local() ? size_type(_S_local_capacity) +: _M_allocated_capacity; + if (__sz < _S_local_capacity || __sz > max_size ()) + __builtin_unreachable (); + return __sz; } /** @@ -1234,7 +1247,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR bool empty() const _GLIBCXX_NOEXCEPT - { return this->size() == 0; } + { return _M_string_length == 0; }
[gcc r15-5361] Add __builtion_unreachable to vector::size(), vector::capacity()
https://gcc.gnu.org/g:aac5c57ee167230cea466064951daf06e42197b9 commit r15-5361-gaac5c57ee167230cea466064951daf06e42197b9 Author: Jan Hubicka Date: Sun Nov 17 01:21:04 2024 +0100 Add __builtion_unreachable to vector::size(), vector::capacity() This patch makes it clear that vector sizes and capacities are not negative. With recent change to ipa-fnsummary this should not affect inlining and improves codegen of some vector manipulation functions. I tested clang build. Looking for throw_bad calls there are only 3 called considerably often (bad_allloc, bad_array_new_length and function_callv). The patch seems to reduce bad_alloc and bad_array_new_length calls considerably: bad_alloc 380->147 bad_array_new_length 832->128 libstdc++-v3/ChangeLog: PR tree-optimization/109442 * include/bits/stl_vector.h: (vector::size(), vector::capacity()): Add __builtin_unreachable call to announce that size and capacity are non-negative. gcc/testsuite/ChangeLog: PR tree-optimization/109442 * g++.dg/tree-ssa/pr109442.C: New test. Diff: --- gcc/testsuite/g++.dg/tree-ssa/pr109442.C | 2 +- libstdc++-v3/include/bits/stl_vector.h | 14 +++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr109442.C b/gcc/testsuite/g++.dg/tree-ssa/pr109442.C index ec40c470c8dd..ea5800aa1340 100644 --- a/gcc/testsuite/g++.dg/tree-ssa/pr109442.C +++ b/gcc/testsuite/g++.dg/tree-ssa/pr109442.C @@ -1,5 +1,5 @@ // { dg-do compile { target c++11 } } -// { dg-options "-O1 -fdump-tree-optimized" } +// { dg-options "-O2 -fdump-tree-optimized" } #include #define T int T vat1(std::vector v1) { diff --git a/libstdc++-v3/include/bits/stl_vector.h b/libstdc++-v3/include/bits/stl_vector.h index df48ba3377fa..3ece16b2facd 100644 --- a/libstdc++-v3/include/bits/stl_vector.h +++ b/libstdc++-v3/include/bits/stl_vector.h @@ -1114,7 +1114,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR size_type size() const _GLIBCXX_NOEXCEPT - { return size_type(this->_M_impl._M_finish - this->_M_impl._M_start); } + { + ptrdiff_t __dif = this->_M_impl._M_finish - this->_M_impl._M_start; + if (__dif < 0) + __builtin_unreachable (); + return size_type(__dif); + } /** Returns the size() of the largest possible %vector. */ _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR @@ -1201,8 +1206,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER size_type capacity() const _GLIBCXX_NOEXCEPT { - return size_type(this->_M_impl._M_end_of_storage - - this->_M_impl._M_start); + ptrdiff_t __dif = this->_M_impl._M_end_of_storage + - this->_M_impl._M_start; + if (__dif < 0) + __builtin_unreachable (); + return size_type(__dif); } /**
[gcc r15-7163] Optimize vector::operator[]
https://gcc.gnu.org/g:2d55c0161562f96d2230cd132b494a5d06352a23 commit r15-7163-g2d55c0161562f96d2230cd132b494a5d06352a23 Author: Jan Hubicka Date: Thu Jan 23 15:50:50 2025 +0100 Optimize vector::operator[] the following testcase: bool f(const std::vector& v, std::size_t x) { return v[x]; } is compiled as: f(std::vector > const&, unsigned long): testq %rsi, %rsi leaq63(%rsi), %rax movq(%rdi), %rdx cmovns %rsi, %rax sarq$6, %rax leaq(%rdx,%rax,8), %rdx movq%rsi, %rax sarq$63, %rax shrq$58, %rax addq%rax, %rsi andl$63, %esi subq%rax, %rsi jns .L2 addq$64, %rsi subq$8, %rdx .L2: movl$1, %eax shlx%rsi, %rax, %rax andq(%rdx), %rax setne %al ret which is quite expensive for simple bit access in a bitmap. The reason is that the bit access is implemented using iterators return begin()[__n]; Which in turn cares about situation where __n is negative yielding the extra conditional. _GLIBCXX20_CONSTEXPR void _M_incr(ptrdiff_t __i) { _M_assume_normalized(); difference_type __n = __i + _M_offset; _M_p += __n / int(_S_word_bit); __n = __n % int(_S_word_bit); if (__n < 0) { __n += int(_S_word_bit); --_M_p; } _M_offset = static_cast(__n); } While we can use __builtin_unreachable to declare that __n is in range 0...max_size () but I think it is better to implement it directly, since resulting code is shorter and much easier to optimize. We now porduce: .LFB1248: .cfi_startproc movq(%rdi), %rax movq%rsi, %rdx shrq$6, %rdx andq(%rax,%rdx,8), %rsi andl$63, %esi setne %al ret Testcase suggests movq(%rdi), %rax movl%esi, %ecx shrq$5, %rsi# does still need to be 64-bit movl(%rax,%rsi,4), %eax btl %ecx, %eax setb%al retq Which is still one instruction shorter. libstdc++-v3/ChangeLog: PR target/80813 * include/bits/stl_bvector.h (vector::operator []): Do not use iterators. gcc/testsuite/ChangeLog: PR target/80813 * g++.dg/tree-ssa/bvector-3.C: New test. Diff: --- gcc/testsuite/g++.dg/tree-ssa/bvector-3.C | 10 ++ libstdc++-v3/include/bits/stl_bvector.h | 8 ++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/g++.dg/tree-ssa/bvector-3.C b/gcc/testsuite/g++.dg/tree-ssa/bvector-3.C new file mode 100644 index ..feae791b20dd --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/bvector-3.C @@ -0,0 +1,10 @@ +// { dg-do compile } +// { dg-options "-O2 -fdump-tree-optimized" } +// { dg-skip-if "requires hosted libstdc++ for vector" { ! hostedlib } } + +#include +bool f(const std::vector& v, std::size_t x) { + return v[x]; +} +// All references to src should be optimized out, so there should be no name for it +// { dg-final { scan-tree-dump-not "if \\(" optimized } } diff --git a/libstdc++-v3/include/bits/stl_bvector.h b/libstdc++-v3/include/bits/stl_bvector.h index 0e6cc0d31132..961e4a252996 100644 --- a/libstdc++-v3/include/bits/stl_bvector.h +++ b/libstdc++-v3/include/bits/stl_bvector.h @@ -1132,7 +1132,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER operator[](size_type __n) { __glibcxx_requires_subscript(__n); - return begin()[__n]; + return _Bit_reference (this->_M_impl._M_start._M_p + + __n / int(_S_word_bit), + 1UL << __n % int(_S_word_bit)); } _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR @@ -1140,7 +1142,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER operator[](size_type __n) const { __glibcxx_requires_subscript(__n); - return begin()[__n]; + return _Bit_reference (this->_M_impl._M_start._M_p + + __n / int(_S_word_bit), + 1UL << __n % int(_S_word_bit)); } protected: