------- Comment #2 from adam at consulting dot net dot nz  2010-09-11 11:15 
-------
GCC snapshot has regressed compared to gcc-4.5:

#include <assert.h>
#include <stdint.h>

#define LIKELY(x)   __builtin_expect(!!(x), 1)
#define UNLIKELY(x) __builtin_expect(!!(x), 0)

register uint32_t *Iptr __asm__("rbp");

typedef void (*inst_t)(uint64_t types, uint64_t a, uint64_t b);

__attribute__ ((noinline)) void dec_helper(uint64_t types, uint64_t a, uint64_t
b) {
  assert("FIXME"=="");
}

void dec(uint64_t types, uint64_t a, uint64_t b) {
  if (LIKELY((types & 0xFF) == 1)) {
    uint32_t next = Iptr[1];
    --a;
    ++Iptr;
    ((inst_t) (uint64_t) next)(types, a, b);
  } else dec_helper(types, a, b);
}

int main() {
  return 0;
}

$ gcc-4.5 -O3 -std=gnu99 plain-32bit-direct-dispatch.c && objdump -d -m
i386:x86-64:intel a.out|less

0000000000400520 <dec>:
  400520:       40 80 ff 01             cmp    dil,0x1
  400524:       75 0d                   jne    400533 <dec+0x13>
  400526:       8b 45 04                mov    eax,DWORD PTR [rbp+0x4]
  400529:       48 83 ee 01             sub    rsi,0x1
  40052d:       48 83 c5 04             add    rbp,0x4
  400531:       ff e0                   jmp    rax
  400533:       e9 c8 ff ff ff          jmp    400500 <dec_helper>
  400538:       eb 06                   jmp    400540 <main>
  40053a:       90                      nop
  40053b:       90                      nop
  40053c:       90                      nop
  40053d:       90                      nop
  40053e:       90                      nop
  40053f:       90                      nop

The above code generation is fine. Here is what GCC snapshot {gcc (Debian
20100828-1) 4.6.0 20100828 (experimental) [trunk revision 163616]} generates:

$ gcc-snapshot.sh -O3 -std=gnu99 plain-32bit-direct-dispatch.c && objdump -d -m
i386:x86-64:intel a.out|less

0000000000400500 <dec>:
  400500:       48 83 ec 08             sub    rsp,0x8
  400504:       40 80 ff 01             cmp    dil,0x1
  400508:       75 14                   jne    40051e <dec+0x1e>
  40050a:       48 89 e8                mov    rax,rbp
  40050d:       48 83 ee 01             sub    rsi,0x1
  400511:       48 8d 6d 04             lea    rbp,[rbp+0x4]
  400515:       8b 40 04                mov    eax,DWORD PTR [rax+0x4]
  400518:       48 83 c4 08             add    rsp,0x8
  40051c:       ff e0                   jmp    rax
  40051e:       e8 bd ff ff ff          call   4004e0 <dec_helper>
  400523:       eb 0b                   jmp    400530 <main>
  400525:       90                      nop
  400526:       90                      nop
  400527:       90                      nop
  400528:       90                      nop
  400529:       90                      nop
  40052a:       90                      nop
  40052b:       90                      nop
  40052c:       90                      nop
  40052d:       90                      nop
  40052e:       90                      nop
  40052f:       90                      nop

Function size has jumped from rounded up to 32 bytes to rounded up to 48 bytes.
Tail call has been missed, leading to insertion of stack alignment
instructions. Global register variable RBP is copied into RAX for no reason
whatsoever, subverting loading the next instruction before recomputing the
instruction pointer.


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44281

Reply via email to