https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117148

            Bug ID: 117148
           Summary: `-Os` results in much larger code than `-O2` about
                    bitfields
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: lh_mouse at 126 dot com
  Target Milestone: ---

This code is complex but the two `atomic_*` routines can be ignored.
(https://godbolt.org/z/G6zsdPsrj)
```
#include <stdbool.h>
#include <stdint.h>

inline
void
atomic_load_iptr(void* out, const void* mem)
  {
    intptr_t ival = __atomic_load_n((intptr_t*) mem, __ATOMIC_ACQUIRE);
    *(intptr_t*) out = ival;
  }

inline
bool
atomic_cmpxchg_iptr(void* out, void* mem, const void* cmp, const void* val)
  {
    intptr_t icmp = *(intptr_t*) cmp;
    intptr_t ival = *(intptr_t*) val;
    bool r = __atomic_compare_exchange_n((intptr_t*) mem, &icmp, ival, 
                                         1 /* weak */,
                                         __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE);
    *(intptr_t*) out = ival;
    return r;
  }

struct Mutex
  {
    uintptr_t locked : 1;
    uintptr_t reserved : 8;
    uintptr_t waiter_count : __SIZEOF_POINTER__ * 8 - 9;
  };

bool
mutex_try_lock(struct Mutex* m)
  {
    struct Mutex old, new;

    atomic_load_iptr(&old, m);
    do {
      new.locked = 1;
      new.reserved = old.reserved;
      new.waiter_count = old.waiter_count + old.locked;
    }
    while(!atomic_cmpxchg_iptr(&old, m, &old, &new));

    return old.locked == 0;
  }
```

GCC 14 with `-O2` (59 bytes in total):
```
mutex_try_lock:
 mov    rax,QWORD PTR [rdi]
 mov    rdx,rax
 mov    rcx,rdx
 mov    rsi,rdx
 and    edx,0x1fe
 shr    rcx,0x9
 and    esi,0x1
 or     rdx,0x1
 add    rcx,rsi
 shl    rcx,0x9
 or     rdx,rcx
 lock cmpxchg QWORD PTR [rdi],rdx
 jne    36 <mutex_try_lock+0x36>
 mov    eax,edx
 not    eax
 and    eax,0x1
 ret
 mov    rax,rdx
 jmp    6 <mutex_try_lock+0x6>
```

GCC 14 with `-Os` (92 bytes in total, which is ~156%):
```
mutex_try_lock:
 push   rbx
 mov    rbx,rdi
 mov    rsi,rbx
 sub    rsp,0x10
 lea    rdi,[rsp+0x8]
 call   15 <mutex_try_lock+0x15>
    R_X86_64_PLT32 atomic_load_iptr-0x4
 mov    rax,QWORD PTR [rsp+0x8]
 mov    cl,BYTE PTR [rsp+0x8]
 mov    rdx,rax
 and    ecx,0x1
 shr    rdx,0x9
 add    rdx,rcx
 mov    ecx,DWORD PTR [rsp+0x8]
 shl    rdx,0x9
 shr    cx,1
 movzx  ecx,cl
 add    rcx,rcx
 or     rcx,0x1
 or     rdx,rcx
 lock cmpxchg QWORD PTR [rbx],rdx
 mov    QWORD PTR [rsp+0x8],rdx
 jne    15 <mutex_try_lock+0x15>
 mov    eax,edx
 add    rsp,0x10
 not    eax
 pop    rbx
 and    eax,0x1
 ret
```

Reply via email to