[gcc r15-5735] optimize basic_string

2024-11-27 Thread Jan Hubicka via Libstdc++-cvs
https://gcc.gnu.org/g:1046c32de4956c3d706a2ff8683582fd21b8f360

commit r15-5735-g1046c32de4956c3d706a2ff8683582fd21b8f360
Author: Jan Hubicka 
Date:   Wed Nov 27 23:52:37 2024 +0100

optimize basic_string

Add __builtin_unreachable conditionls to declare value ranges of
basic_string::length().  FIx max_size() to return actual max size
using logic similar to std::vector. Aviod use of size() in empty()
to save some compile time overhead.

As disucced, max_size() change is technically ABI breaking, but
hopefully this does not really matter in practice.

Change of length() breaks empty-loop testcase where we now optimize the
loop only after inlining, so template is updated to check cddce3 instead
of cddce2. This is PR117764.

With these chages we now optimize out unused strings as tested in
string-1.C

libstdc++-v3/ChangeLog:

* include/bits/basic_string.h (basic_string::size(),
basic_string::length(), basic_string::capacity()): Add
__builtin_unreachable to declare value ranges.
(basic_string::empty()): Implement directly
(basic_string::max_size()): Account correctly the terminating 0
and limits implied by ptrdiff_t.

gcc/testsuite/ChangeLog:

* g++.dg/tree-ssa/empty-loop.C: xfail optimization at cddce2 and 
check
it happens at cddce3.
* g++.dg/tree-ssa/string-1.C: New test.

Diff:
---
 gcc/testsuite/g++.dg/tree-ssa/empty-loop.C |  5 -
 gcc/testsuite/g++.dg/tree-ssa/string-1.C   |  9 +
 libstdc++-v3/include/bits/basic_string.h   | 25 +++--
 3 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C 
b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C
index ed4a603bf5b3..b7e7e27cc042 100644
--- a/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C
+++ b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C
@@ -30,5 +30,8 @@ int foo (vector &v, list &l, set &s, 
map &m
 
   return 0;
 }
-/* { dg-final { scan-tree-dump-not "if" "cddce2"} } */
+/* Adding __builtin_unreachable to std::string::size() prevents cddce2 from
+   eliminating the loop early, see PR117764.  */
+/* { dg-final { scan-tree-dump-not "if" "cddce2" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-not "if" "cddce3"} } */
 
diff --git a/gcc/testsuite/g++.dg/tree-ssa/string-1.C 
b/gcc/testsuite/g++.dg/tree-ssa/string-1.C
new file mode 100644
index ..d38c23a7628b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/string-1.C
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -std=c++20 -fdump-tree-optimized" } */
+#include 
+std::string
+test (std::string &a)
+{
+   return a;
+}
+/* { dg-final { scan-tree-dump-not "throw" "optimized" } } */
diff --git a/libstdc++-v3/include/bits/basic_string.h 
b/libstdc++-v3/include/bits/basic_string.h
index f5b320099b17..17b973c8b45c 100644
--- a/libstdc++-v3/include/bits/basic_string.h
+++ b/libstdc++-v3/include/bits/basic_string.h
@@ -1079,20 +1079,30 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
   size_type
   size() const _GLIBCXX_NOEXCEPT
-  { return _M_string_length; }
+  {
+   size_type __sz = _M_string_length;
+   if (__sz > max_size ())
+ __builtin_unreachable ();
+   return __sz;
+  }
 
   ///  Returns the number of characters in the string, not including any
   ///  null-termination.
   _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
   size_type
   length() const _GLIBCXX_NOEXCEPT
-  { return _M_string_length; }
+  { return size(); }
 
   ///  Returns the size() of the largest possible %string.
   _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
   size_type
   max_size() const _GLIBCXX_NOEXCEPT
-  { return (_Alloc_traits::max_size(_M_get_allocator()) - 1) / 2; }
+  {
+   const size_t __diffmax
+ = __gnu_cxx::__numeric_traits::__max / sizeof(_CharT);
+   const size_t __allocmax = _Alloc_traits::max_size(_M_get_allocator());
+   return (std::min)(__diffmax, __allocmax) - 1;
+  }
 
   /**
*  @brief  Resizes the %string to the specified number of characters.
@@ -1184,8 +1194,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   size_type
   capacity() const _GLIBCXX_NOEXCEPT
   {
-   return _M_is_local() ? size_type(_S_local_capacity)
-: _M_allocated_capacity;
+   size_t __sz = _M_is_local() ? size_type(_S_local_capacity)
+: _M_allocated_capacity;
+   if (__sz < _S_local_capacity || __sz > max_size ())
+ __builtin_unreachable ();
+   return __sz;
   }
 
   /**
@@ -1234,7 +1247,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
   bool
   empty() const _GLIBCXX_NOEXCEPT
-  { return this->size() == 0; }
+  { return _M_string_length == 0; }
 
  

[gcc r15-5361] Add __builtion_unreachable to vector::size(), vector::capacity()

2024-11-16 Thread Jan Hubicka via Libstdc++-cvs
https://gcc.gnu.org/g:aac5c57ee167230cea466064951daf06e42197b9

commit r15-5361-gaac5c57ee167230cea466064951daf06e42197b9
Author: Jan Hubicka 
Date:   Sun Nov 17 01:21:04 2024 +0100

Add __builtion_unreachable to vector::size(), vector::capacity()

This patch makes it clear that vector sizes and capacities are not
negative.  With recent change to ipa-fnsummary this should not affect
inlining and improves codegen of some vector manipulation functions.

I tested clang build.  Looking for throw_bad calls there are only 3
called considerably often (bad_allloc, bad_array_new_length and
function_callv).
The patch seems to reduce bad_alloc and bad_array_new_length calls
considerably:

bad_alloc 380->147
bad_array_new_length 832->128

libstdc++-v3/ChangeLog:

PR tree-optimization/109442
* include/bits/stl_vector.h: (vector::size(),
vector::capacity()): Add __builtin_unreachable call to announce
that size and capacity are non-negative.

gcc/testsuite/ChangeLog:

PR tree-optimization/109442
* g++.dg/tree-ssa/pr109442.C: New test.

Diff:
---
 gcc/testsuite/g++.dg/tree-ssa/pr109442.C |  2 +-
 libstdc++-v3/include/bits/stl_vector.h   | 14 +++---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr109442.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr109442.C
index ec40c470c8dd..ea5800aa1340 100644
--- a/gcc/testsuite/g++.dg/tree-ssa/pr109442.C
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr109442.C
@@ -1,5 +1,5 @@
 // { dg-do compile { target c++11 } }
-// { dg-options "-O1 -fdump-tree-optimized" }
+// { dg-options "-O2 -fdump-tree-optimized" }
 #include 
 #define T int
 T vat1(std::vector v1) {
diff --git a/libstdc++-v3/include/bits/stl_vector.h 
b/libstdc++-v3/include/bits/stl_vector.h
index df48ba3377fa..3ece16b2facd 100644
--- a/libstdc++-v3/include/bits/stl_vector.h
+++ b/libstdc++-v3/include/bits/stl_vector.h
@@ -1114,7 +1114,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
   _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
   size_type
   size() const _GLIBCXX_NOEXCEPT
-  { return size_type(this->_M_impl._M_finish - this->_M_impl._M_start); }
+  {
+   ptrdiff_t __dif = this->_M_impl._M_finish - this->_M_impl._M_start;
+   if (__dif < 0)
+  __builtin_unreachable ();
+   return size_type(__dif);
+  }
 
   /**  Returns the size() of the largest possible %vector.  */
   _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
@@ -1201,8 +1206,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
   size_type
   capacity() const _GLIBCXX_NOEXCEPT
   {
-   return size_type(this->_M_impl._M_end_of_storage
-  - this->_M_impl._M_start);
+   ptrdiff_t __dif = this->_M_impl._M_end_of_storage
+ - this->_M_impl._M_start;
+   if (__dif < 0)
+  __builtin_unreachable ();
+   return size_type(__dif);
   }
 
   /**


[gcc r15-7163] Optimize vector::operator[]

2025-01-23 Thread Jan Hubicka via Libstdc++-cvs
https://gcc.gnu.org/g:2d55c0161562f96d2230cd132b494a5d06352a23

commit r15-7163-g2d55c0161562f96d2230cd132b494a5d06352a23
Author: Jan Hubicka 
Date:   Thu Jan 23 15:50:50 2025 +0100

Optimize vector::operator[]

the following testcase:

  bool f(const std::vector& v, std::size_t x) {
return v[x];
  }

is compiled as:

f(std::vector > const&, unsigned long):
testq   %rsi, %rsi
leaq63(%rsi), %rax
movq(%rdi), %rdx
cmovns  %rsi, %rax
sarq$6, %rax
leaq(%rdx,%rax,8), %rdx
movq%rsi, %rax
sarq$63, %rax
shrq$58, %rax
addq%rax, %rsi
andl$63, %esi
subq%rax, %rsi
jns .L2
addq$64, %rsi
subq$8, %rdx
.L2:
movl$1, %eax
shlx%rsi, %rax, %rax
andq(%rdx), %rax
setne   %al
ret

which is quite expensive for simple bit access in a bitmap.  The reason is 
that
the bit access is implemented using iterators
return begin()[__n];
Which in turn cares about situation where __n is negative yielding the extra
conditional.

_GLIBCXX20_CONSTEXPR
void
_M_incr(ptrdiff_t __i)
{
  _M_assume_normalized();
  difference_type __n = __i + _M_offset;
  _M_p += __n / int(_S_word_bit);
  __n = __n % int(_S_word_bit);
  if (__n < 0)
{
  __n += int(_S_word_bit);
  --_M_p;
}
  _M_offset = static_cast(__n);
}

While we can use __builtin_unreachable to declare that __n is in range
0...max_size () but I think it is better to implement it directly, since
resulting code is shorter and much easier to optimize.

We now porduce:
.LFB1248:
.cfi_startproc
movq(%rdi), %rax
movq%rsi, %rdx
shrq$6, %rdx
andq(%rax,%rdx,8), %rsi
andl$63, %esi
setne   %al
ret

Testcase suggests
movq(%rdi), %rax
movl%esi, %ecx
shrq$5, %rsi# does still need to be 64-bit
movl(%rax,%rsi,4), %eax
btl %ecx, %eax
setb%al
retq
Which is still one instruction shorter.

libstdc++-v3/ChangeLog:

PR target/80813
* include/bits/stl_bvector.h (vector::operator []): Do
not use iterators.

gcc/testsuite/ChangeLog:

PR target/80813
* g++.dg/tree-ssa/bvector-3.C: New test.

Diff:
---
 gcc/testsuite/g++.dg/tree-ssa/bvector-3.C | 10 ++
 libstdc++-v3/include/bits/stl_bvector.h   |  8 ++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/g++.dg/tree-ssa/bvector-3.C 
b/gcc/testsuite/g++.dg/tree-ssa/bvector-3.C
new file mode 100644
index ..feae791b20dd
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/bvector-3.C
@@ -0,0 +1,10 @@
+// { dg-do compile }
+// { dg-options "-O2 -fdump-tree-optimized"  }
+// { dg-skip-if "requires hosted libstdc++ for vector" { ! hostedlib } }
+
+#include 
+bool f(const std::vector& v, std::size_t x) {
+   return v[x];
+}
+// All references to src should be optimized out, so there should be no name 
for it
+// { dg-final { scan-tree-dump-not "if \\("  optimized } }
diff --git a/libstdc++-v3/include/bits/stl_bvector.h 
b/libstdc++-v3/include/bits/stl_bvector.h
index 0e6cc0d31132..961e4a252996 100644
--- a/libstdc++-v3/include/bits/stl_bvector.h
+++ b/libstdc++-v3/include/bits/stl_bvector.h
@@ -1132,7 +1132,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
   operator[](size_type __n)
   {
__glibcxx_requires_subscript(__n);
-   return begin()[__n];
+   return _Bit_reference (this->_M_impl._M_start._M_p
+  + __n / int(_S_word_bit),
+  1UL << __n % int(_S_word_bit));
   }
 
   _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
@@ -1140,7 +1142,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
   operator[](size_type __n) const
   {
__glibcxx_requires_subscript(__n);
-   return begin()[__n];
+   return _Bit_reference (this->_M_impl._M_start._M_p
+  + __n / int(_S_word_bit),
+  1UL << __n % int(_S_word_bit));
   }
 
 protected: