static void *a0ra, *a1ra;
void __attribute__((noinline)) a0() { a0ra = __builtin_return_address(0); }
void __attribute__((noinline)) a1() { a1ra = __builtin_return_address(0); }
int foo() { a0(); a1(); return a1ra - a0ra; }
int main()
{
    printf("pd=%d\n", foo());
    return 0;
}
$ gcc -O3 --save-temps call.c && ./a.out

in theroy this code should return on vary platforms
the sizeof(call/bl [mem] opcode).

it works fine ix86 (pd=5), ppc (pd=4) but doesn't work on amd64 (pd=7 !!!)
in the assemler dump i see redundant `xor eax,eax`.

a1:     movq    (%rsp), %rax
        movq    %rax, a1ra(%rip)
        ret
a0:     movq    (%rsp), %rax
        movq    %rax, a0ra(%rip)
        ret
foo:    xorl    %eax, %eax       <== what for?
        call    a0
        xorl    %eax, %eax       <== what for?
        call    a1
        movq    a1ra(%rip), %rax
        subl    a0ra(%rip), %eax
        ret


-- 
           Summary: redundant opcodes before function call.
           Product: gcc
           Version: 4.1.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: other
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: pluto at agmk dot net
 GCC build triplet: x86-64
  GCC host triplet: x86-64
GCC target triplet: x86-64


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25345

Reply via email to