https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80813

Jan Hubicka <hubicka at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |NEW
            Summary|x86:                        |[12/13/14/15 Regression]
                   |std::vector<bool>::operator |x86:
                   |[] could be somewhat faster |std::vector<bool>::operator
                   |using BT instead of SHL     |[] could be somewhat faster
                   |                            |using BT instead of SHL
     Ever confirmed|0                           |1
   Last reconfirmed|                            |2024-12-20

--- Comment #2 from Jan Hubicka <hubicka at gcc dot gnu.org> ---
trunk does
f(std::vector<bool, std::allocator<bool> > const&, unsigned long):
        testq   %rsi, %rsi
        leaq    63(%rsi), %rax
        movq    (%rdi), %rdx
        cmovns  %rsi, %rax
        sarq    $6, %rax
        leaq    (%rdx,%rax,8), %rdx
        movq    %rsi, %rax
        sarq    $63, %rax
        shrq    $58, %rax
        addq    %rax, %rsi
        andl    $63, %esi
        subq    %rax, %rsi
        jns     .L2
        addq    $64, %rsi
        subq    $8, %rdx
.L2:
        movl    $1, %eax
        shlx    %rsi, %rax, %rax
        andq    (%rdx), %rax
        setne   %al
        ret

Removing basic block 5
bool f (const struct vector & v, size_t x)
{ 
  difference_type __n;
  _Bit_type * const SR.16;
  _Bit_type * _4;
  long int __n.0_5;
  long unsigned int _12;
  long unsigned int _13;
  long unsigned int _14;
  bool _15;
  long int _16;
  long int _20;
  long unsigned int _21;
  long unsigned int _22;
  _Bit_type * _23;
  _Bit_type * _26;
  unsigned int _42;

  <bb 2> [local count: 1073741824]:
  _4 = v_2(D)->D.25666._M_impl.D.25135._M_start.D.16486._M_p;
  __n.0_5 = (long int) x_3(D);
  _20 = __n.0_5 / 64;
  _21 = (long unsigned int) _20;
  _22 = _21 * 8;
  _23 = _4 + _22;
  __n_24 = __n.0_5 % 64;
  if (__n_24 < 0)
    goto <bb 3>; [41.00%]
  else
    goto <bb 4>; [59.00%]

  <bb 3> [local count: 440234144]:
  __n_25 = __n_24 + 64;
  _26 = _23 + 18446744073709551608;

  <bb 4> [local count: 1073741824]:
  # SR.16_41 = PHI <_26(3), _23(2)>
  # _16 = PHI <__n_25(3), __n_24(2)>
  _42 = (unsigned int) _16;
  _12 = 1 << _42;
  _13 = *SR.16_41;
  _14 = _12 & _13;
  _15 = _14 != 0;
  return _15;

} 

This is a regression since gcc 7 which produces more reasonable code:
f(std::vector<bool, std::allocator<bool> > const&, unsigned long):
        movq    (%rdi), %rdx
        movq    %rsi, %rcx
        movq    %rsi, %rax
        movl    $1, %esi
        shrq    $6, %rcx
        shlx    %rax, %rsi, %rsi
        andq    (%rdx,%rcx,8), %rsi
        setne   %al
        ret

clang:
f(std::vector<bool, std::allocator<bool>> const&, unsigned long):
        leaq    63(%rsi), %rax
        testq   %rsi, %rsi
        cmovnsq %rsi, %rax
        sarq    $6, %rax
        shlq    $3, %rax
        addq    (%rdi), %rax
        movabsq $-9223372036854775808, %rcx
        leaq    63(%rcx), %rdx
        andq    %rsi, %rdx
        xorl    %edi, %edi
        cmpq    %rcx, %rdx
        setbe   %dil
        movq    -8(%rax,%rdi,8), %rax
        btq     %rsi, %rax
        setb    %al
        retq

clang with libc++

f(std::__1::vector<bool, std::__1::allocator<bool>> const&, unsigned long):
        movq    (%rdi), %rax
        movq    %rsi, %rcx
        shrq    $6, %rcx
        movq    (%rax,%rcx,8), %rax
        btq     %rsi, %rax
        setb    %al
        retq

Reply via email to