https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117148
Bug ID: 117148 Summary: `-Os` results in much larger code than `-O2` about bitfields Product: gcc Version: 15.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: lh_mouse at 126 dot com Target Milestone: --- This code is complex but the two `atomic_*` routines can be ignored. (https://godbolt.org/z/G6zsdPsrj) ``` #include <stdbool.h> #include <stdint.h> inline void atomic_load_iptr(void* out, const void* mem) { intptr_t ival = __atomic_load_n((intptr_t*) mem, __ATOMIC_ACQUIRE); *(intptr_t*) out = ival; } inline bool atomic_cmpxchg_iptr(void* out, void* mem, const void* cmp, const void* val) { intptr_t icmp = *(intptr_t*) cmp; intptr_t ival = *(intptr_t*) val; bool r = __atomic_compare_exchange_n((intptr_t*) mem, &icmp, ival, 1 /* weak */, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE); *(intptr_t*) out = ival; return r; } struct Mutex { uintptr_t locked : 1; uintptr_t reserved : 8; uintptr_t waiter_count : __SIZEOF_POINTER__ * 8 - 9; }; bool mutex_try_lock(struct Mutex* m) { struct Mutex old, new; atomic_load_iptr(&old, m); do { new.locked = 1; new.reserved = old.reserved; new.waiter_count = old.waiter_count + old.locked; } while(!atomic_cmpxchg_iptr(&old, m, &old, &new)); return old.locked == 0; } ``` GCC 14 with `-O2` (59 bytes in total): ``` mutex_try_lock: mov rax,QWORD PTR [rdi] mov rdx,rax mov rcx,rdx mov rsi,rdx and edx,0x1fe shr rcx,0x9 and esi,0x1 or rdx,0x1 add rcx,rsi shl rcx,0x9 or rdx,rcx lock cmpxchg QWORD PTR [rdi],rdx jne 36 <mutex_try_lock+0x36> mov eax,edx not eax and eax,0x1 ret mov rax,rdx jmp 6 <mutex_try_lock+0x6> ``` GCC 14 with `-Os` (92 bytes in total, which is ~156%): ``` mutex_try_lock: push rbx mov rbx,rdi mov rsi,rbx sub rsp,0x10 lea rdi,[rsp+0x8] call 15 <mutex_try_lock+0x15> R_X86_64_PLT32 atomic_load_iptr-0x4 mov rax,QWORD PTR [rsp+0x8] mov cl,BYTE PTR [rsp+0x8] mov rdx,rax and ecx,0x1 shr rdx,0x9 add rdx,rcx mov ecx,DWORD PTR [rsp+0x8] shl rdx,0x9 shr cx,1 movzx ecx,cl add rcx,rcx or rcx,0x1 or rdx,rcx lock cmpxchg QWORD PTR [rbx],rdx mov QWORD PTR [rsp+0x8],rdx jne 15 <mutex_try_lock+0x15> mov eax,edx add rsp,0x10 not eax pop rbx and eax,0x1 ret ```