btc'

dhowells at redhat dot com Fri, 29 Apr 2016 14:52:17 -0700

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49244


--- Comment #13 from dhowells at redhat dot com <dhowells at redhat dot com> ---
Very nice:-)

There are a couple of under optimisations yet.  Firstly:

   #define BITS_PER_LONG (sizeof(long) * 8)
   #define _BITOPS_LONG_SHIFT 6
   static __always_inline bool test_and_change_bit(long bit, volatile unsigned
long *ptr)
   {
        unsigned long mask = 1UL << (bit & (BITS_PER_LONG - 1));
        unsigned long old;
        ptr += bit >> _BITOPS_LONG_SHIFT;
        old = __atomic_fetch_xor(ptr, mask, __ATOMIC_SEQ_CST);
        return old & mask;
   }
   bool change_bit_3(unsigned long *p, long n)
   {
        return test_and_change_bit(n, p);
   }

is compiled to:

0000000000000048 <change_bit_3>:
  48:   48 89 f0                mov    %rsi,%rax
  4b:   83 e6 3f                and    $0x3f,%esi
  4e:   48 c1 f8 06             sar    $0x6,%rax
  52:   f0 48 0f bb 34 c7       lock btc %rsi,(%rdi,%rax,8)
  58:   0f 92 c0                setb   %al
  5b:   c3                      retq   

on x86, lines 48-4e are redundant as the btc instruction will do that for you. 
I don't know whether it's more efficient this way or not, though.

Secondly:

   static __always_inline bool test_bit(long bit, const unsigned long *ptr)
   {
        unsigned long mask = 1UL << (bit & (BITS_PER_LONG - 1));
        unsigned long old;
        ptr += bit >> _BITOPS_LONG_SHIFT;
        old = __atomic_load_n(ptr, __ATOMIC_RELAXED);
        return old & mask;
   }
   bool read_bit(unsigned long *p)
   {
        return test_bit(3, p);
   }

is compiled to:

0000000000000000 <read_bit>:
   0:   48 8b 07                mov    (%rdi),%rax
   3:   48 c1 e8 03             shr    $0x3,%rax
   7:   83 e0 01                and    $0x1,%eax
   a:   c3                      retq   

but could actually be either a TEST instruction or a BT instruction.

Still, thanks very much for looking at this!

[Bug target/49244] __sync or __atomic builtins will not emit 'lock bts/btr/btc'

Reply via email to

[Bug target/49244] sync or atomic builtins will not emit 'lock bts/btr/btc'