------- Comment #2 from adam at consulting dot net dot nz 2010-09-11 11:15 ------- GCC snapshot has regressed compared to gcc-4.5:
#include <assert.h> #include <stdint.h> #define LIKELY(x) __builtin_expect(!!(x), 1) #define UNLIKELY(x) __builtin_expect(!!(x), 0) register uint32_t *Iptr __asm__("rbp"); typedef void (*inst_t)(uint64_t types, uint64_t a, uint64_t b); __attribute__ ((noinline)) void dec_helper(uint64_t types, uint64_t a, uint64_t b) { assert("FIXME"==""); } void dec(uint64_t types, uint64_t a, uint64_t b) { if (LIKELY((types & 0xFF) == 1)) { uint32_t next = Iptr[1]; --a; ++Iptr; ((inst_t) (uint64_t) next)(types, a, b); } else dec_helper(types, a, b); } int main() { return 0; } $ gcc-4.5 -O3 -std=gnu99 plain-32bit-direct-dispatch.c && objdump -d -m i386:x86-64:intel a.out|less 0000000000400520 <dec>: 400520: 40 80 ff 01 cmp dil,0x1 400524: 75 0d jne 400533 <dec+0x13> 400526: 8b 45 04 mov eax,DWORD PTR [rbp+0x4] 400529: 48 83 ee 01 sub rsi,0x1 40052d: 48 83 c5 04 add rbp,0x4 400531: ff e0 jmp rax 400533: e9 c8 ff ff ff jmp 400500 <dec_helper> 400538: eb 06 jmp 400540 <main> 40053a: 90 nop 40053b: 90 nop 40053c: 90 nop 40053d: 90 nop 40053e: 90 nop 40053f: 90 nop The above code generation is fine. Here is what GCC snapshot {gcc (Debian 20100828-1) 4.6.0 20100828 (experimental) [trunk revision 163616]} generates: $ gcc-snapshot.sh -O3 -std=gnu99 plain-32bit-direct-dispatch.c && objdump -d -m i386:x86-64:intel a.out|less 0000000000400500 <dec>: 400500: 48 83 ec 08 sub rsp,0x8 400504: 40 80 ff 01 cmp dil,0x1 400508: 75 14 jne 40051e <dec+0x1e> 40050a: 48 89 e8 mov rax,rbp 40050d: 48 83 ee 01 sub rsi,0x1 400511: 48 8d 6d 04 lea rbp,[rbp+0x4] 400515: 8b 40 04 mov eax,DWORD PTR [rax+0x4] 400518: 48 83 c4 08 add rsp,0x8 40051c: ff e0 jmp rax 40051e: e8 bd ff ff ff call 4004e0 <dec_helper> 400523: eb 0b jmp 400530 <main> 400525: 90 nop 400526: 90 nop 400527: 90 nop 400528: 90 nop 400529: 90 nop 40052a: 90 nop 40052b: 90 nop 40052c: 90 nop 40052d: 90 nop 40052e: 90 nop 40052f: 90 nop Function size has jumped from rounded up to 32 bytes to rounded up to 48 bytes. Tail call has been missed, leading to insertion of stack alignment instructions. Global register variable RBP is copied into RAX for no reason whatsoever, subverting loading the next instruction before recomputing the instruction pointer. -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44281