static void *a0ra, *a1ra; void __attribute__((noinline)) a0() { a0ra = __builtin_return_address(0); } void __attribute__((noinline)) a1() { a1ra = __builtin_return_address(0); } int foo() { a0(); a1(); return a1ra - a0ra; } int main() { printf("pd=%d\n", foo()); return 0; } $ gcc -O3 --save-temps call.c && ./a.out
in theroy this code should return on vary platforms the sizeof(call/bl [mem] opcode). it works fine ix86 (pd=5), ppc (pd=4) but doesn't work on amd64 (pd=7 !!!) in the assemler dump i see redundant `xor eax,eax`. a1: movq (%rsp), %rax movq %rax, a1ra(%rip) ret a0: movq (%rsp), %rax movq %rax, a0ra(%rip) ret foo: xorl %eax, %eax <== what for? call a0 xorl %eax, %eax <== what for? call a1 movq a1ra(%rip), %rax subl a0ra(%rip), %eax ret -- Summary: redundant opcodes before function call. Product: gcc Version: 4.1.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: other AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: pluto at agmk dot net GCC build triplet: x86-64 GCC host triplet: x86-64 GCC target triplet: x86-64 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25345