http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16541
Steven Bosscher <steven at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |FIXED --- Comment #6 from Steven Bosscher <steven at gcc dot gnu.org> 2011-05-22 15:21:05 UTC --- (In reply to comment #2) > Here is a simplified test case that shows a code quality regression: > > extern unsigned char first_one[65536]; > int FirstOnet(unsigned long long arg1) > { > if (arg1 >> 48) > return (first_one[arg1 >> 48]); > return 0; > } > > the code generated by gcc-3.0 -O2 -fomit-frame-pointer is: > > movl 8(%esp), %edx > movl %edx, %eax > shrl $16, %eax > xorl %edx, %edx > movl %eax, %ecx > orl %edx, %ecx > je .L3 > movzbl first_one(%eax), %eax > ret > .p2align 2 > .L3: > xorl %eax, %eax > ret > > and by mainline (a bit worse): > > pushl %ebx <- using a callee saved register > xorl %ecx, %ecx > movl 12(%esp), %edx > movl %edx, %eax <- why not load directly to eax? > xorl %edx, %edx > shrl $16, %eax > movl %edx, %ebx > orl %eax, %ebx > je .L4 > movzbl first_one(%eax), %ecx > .L4: > popl %ebx > movl %ecx, %eax > ret > > > Here is what Intel's compiler generates: > > movzwl 10(%esp), %edx #28.5 > xorl %eax, %eax #30.3 > orl %edx, %eax #30.3 > je ..B1.3 # Prob 50% #30.3 > # LOE edx ebx ebp esi edi > ..B1.2: # Preds ..B1.1 > movzbl first_one(%edx), %eax #31.13 > ret #31.13 > # LOE > ..B1.3: # Preds ..B1.1 > xorl %eax, %eax #32.10 > ret #32.10 > > > $ cc1 -quiet -m32 -O2 t.c -fdump-tree-optimized $ cat t.s .file "t.c" .text .p2align 4,,15 .globl FirstOnet .type FirstOnet, @function FirstOnet: .LFB0: .cfi_startproc movzwl 10(%esp), %edx xorl %eax, %eax testl %edx, %edx je .L2 movzbl first_one(%edx), %eax .L2: rep ret .cfi_endproc .LFE0: .size FirstOnet, .-FirstOnet .ident "GCC: (GNU) 4.6.0 20110312 (experimental) [trunk revision 170907]" .section .note.GNU-stack,"",@progbits