https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117148
Bug ID: 117148
Summary: `-Os` results in much larger code than `-O2` about
bitfields
Product: gcc
Version: 15.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: lh_mouse at 126 dot com
Target Milestone: ---
This code is complex but the two `atomic_*` routines can be ignored.
(https://godbolt.org/z/G6zsdPsrj)
```
#include <stdbool.h>
#include <stdint.h>
inline
void
atomic_load_iptr(void* out, const void* mem)
{
intptr_t ival = __atomic_load_n((intptr_t*) mem, __ATOMIC_ACQUIRE);
*(intptr_t*) out = ival;
}
inline
bool
atomic_cmpxchg_iptr(void* out, void* mem, const void* cmp, const void* val)
{
intptr_t icmp = *(intptr_t*) cmp;
intptr_t ival = *(intptr_t*) val;
bool r = __atomic_compare_exchange_n((intptr_t*) mem, &icmp, ival,
1 /* weak */,
__ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE);
*(intptr_t*) out = ival;
return r;
}
struct Mutex
{
uintptr_t locked : 1;
uintptr_t reserved : 8;
uintptr_t waiter_count : __SIZEOF_POINTER__ * 8 - 9;
};
bool
mutex_try_lock(struct Mutex* m)
{
struct Mutex old, new;
atomic_load_iptr(&old, m);
do {
new.locked = 1;
new.reserved = old.reserved;
new.waiter_count = old.waiter_count + old.locked;
}
while(!atomic_cmpxchg_iptr(&old, m, &old, &new));
return old.locked == 0;
}
```
GCC 14 with `-O2` (59 bytes in total):
```
mutex_try_lock:
mov rax,QWORD PTR [rdi]
mov rdx,rax
mov rcx,rdx
mov rsi,rdx
and edx,0x1fe
shr rcx,0x9
and esi,0x1
or rdx,0x1
add rcx,rsi
shl rcx,0x9
or rdx,rcx
lock cmpxchg QWORD PTR [rdi],rdx
jne 36 <mutex_try_lock+0x36>
mov eax,edx
not eax
and eax,0x1
ret
mov rax,rdx
jmp 6 <mutex_try_lock+0x6>
```
GCC 14 with `-Os` (92 bytes in total, which is ~156%):
```
mutex_try_lock:
push rbx
mov rbx,rdi
mov rsi,rbx
sub rsp,0x10
lea rdi,[rsp+0x8]
call 15 <mutex_try_lock+0x15>
R_X86_64_PLT32 atomic_load_iptr-0x4
mov rax,QWORD PTR [rsp+0x8]
mov cl,BYTE PTR [rsp+0x8]
mov rdx,rax
and ecx,0x1
shr rdx,0x9
add rdx,rcx
mov ecx,DWORD PTR [rsp+0x8]
shl rdx,0x9
shr cx,1
movzx ecx,cl
add rcx,rcx
or rcx,0x1
or rdx,rcx
lock cmpxchg QWORD PTR [rbx],rdx
mov QWORD PTR [rsp+0x8],rdx
jne 15 <mutex_try_lock+0x15>
mov eax,edx
add rsp,0x10
not eax
pop rbx
and eax,0x1
ret
```