[gcc r15-7163] Optimize vector::operator[]

Jan Hubicka via Libstdc++-cvs Thu, 23 Jan 2025 06:51:54 -0800

https://gcc.gnu.org/g:2d55c0161562f96d2230cd132b494a5d06352a23


commit r15-7163-g2d55c0161562f96d2230cd132b494a5d06352a23
Author: Jan Hubicka <[email protected]>
Date:   Thu Jan 23 15:50:50 2025 +0100

    Optimize vector<bool>::operator[]
    
    the following testcase:
    
      bool f(const std::vector<bool>& v, std::size_t x) {
        return v[x];
      }
    
    is compiled as:
    
    f(std::vector<bool, std::allocator<bool> > const&, unsigned long):
            testq   %rsi, %rsi
            leaq    63(%rsi), %rax
            movq    (%rdi), %rdx
            cmovns  %rsi, %rax
            sarq    $6, %rax
            leaq    (%rdx,%rax,8), %rdx
            movq    %rsi, %rax
            sarq    $63, %rax
            shrq    $58, %rax
            addq    %rax, %rsi
            andl    $63, %esi
            subq    %rax, %rsi
            jns     .L2
            addq    $64, %rsi
            subq    $8, %rdx
    .L2:
            movl    $1, %eax
            shlx    %rsi, %rax, %rax
            andq    (%rdx), %rax
            setne   %al
            ret
    
    which is quite expensive for simple bit access in a bitmap.  The reason is 
that
    the bit access is implemented using iterators
            return begin()[__n];
    Which in turn cares about situation where __n is negative yielding the extra
    conditional.
    
        _GLIBCXX20_CONSTEXPR
        void
        _M_incr(ptrdiff_t __i)
        {
          _M_assume_normalized();
          difference_type __n = __i + _M_offset;
          _M_p += __n / int(_S_word_bit);
          __n = __n % int(_S_word_bit);
          if (__n < 0)
            {
              __n += int(_S_word_bit);
              --_M_p;
            }
          _M_offset = static_cast<unsigned int>(__n);
        }
    
    While we can use __builtin_unreachable to declare that __n is in range
    0...max_size () but I think it is better to implement it directly, since
    resulting code is shorter and much easier to optimize.
    
    We now porduce:
    .LFB1248:
            .cfi_startproc
            movq    (%rdi), %rax
            movq    %rsi, %rdx
            shrq    $6, %rdx
            andq    (%rax,%rdx,8), %rsi
            andl    $63, %esi
            setne   %al
            ret
    
    Testcase suggests
            movq    (%rdi), %rax
            movl    %esi, %ecx
            shrq    $5, %rsi        # does still need to be 64-bit
            movl    (%rax,%rsi,4), %eax
            btl     %ecx, %eax
            setb    %al
            retq
    Which is still one instruction shorter.
    
    libstdc++-v3/ChangeLog:
    
            PR target/80813
            * include/bits/stl_bvector.h (vector<bool, _Alloc>::operator []): Do
            not use iterators.
    
    gcc/testsuite/ChangeLog:
    
            PR target/80813
            * g++.dg/tree-ssa/bvector-3.C: New test.

Diff:
---
 gcc/testsuite/g++.dg/tree-ssa/bvector-3.C | 10 ++++++++++
 libstdc++-v3/include/bits/stl_bvector.h   |  8 ++++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/g++.dg/tree-ssa/bvector-3.C 
b/gcc/testsuite/g++.dg/tree-ssa/bvector-3.C
new file mode 100644
index 000000000000..feae791b20dd
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/bvector-3.C
@@ -0,0 +1,10 @@
+// { dg-do compile }
+// { dg-options "-O2 -fdump-tree-optimized"  }
+// { dg-skip-if "requires hosted libstdc++ for vector" { ! hostedlib } }
+
+#include <vector>
+bool f(const std::vector<bool>& v, std::size_t x) {
+           return v[x];
+}
+// All references to src should be optimized out, so there should be no name 
for it
+// { dg-final { scan-tree-dump-not "if \\("  optimized } }
diff --git a/libstdc++-v3/include/bits/stl_bvector.h 
b/libstdc++-v3/include/bits/stl_bvector.h
index 0e6cc0d31132..961e4a252996 100644
--- a/libstdc++-v3/include/bits/stl_bvector.h
+++ b/libstdc++-v3/include/bits/stl_bvector.h
@@ -1132,7 +1132,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
       operator[](size_type __n)
       {
        __glibcxx_requires_subscript(__n);
-       return begin()[__n];
+       return _Bit_reference (this->_M_impl._M_start._M_p
+                              + __n / int(_S_word_bit),
+                              1UL << __n % int(_S_word_bit));
       }
 
       _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
@@ -1140,7 +1142,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
       operator[](size_type __n) const
       {
        __glibcxx_requires_subscript(__n);
-       return begin()[__n];
+       return _Bit_reference (this->_M_impl._M_start._M_p
+                              + __n / int(_S_word_bit),
+                              1UL << __n % int(_S_word_bit));
       }
 
     protected:

[gcc r15-7163] Optimize vector::operator[]

Reply via email to