http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16541

Steven Bosscher <steven at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|NEW                         |RESOLVED
         Resolution|                            |FIXED

--- Comment #6 from Steven Bosscher <steven at gcc dot gnu.org> 2011-05-22 
15:21:05 UTC ---
(In reply to comment #2)
> Here is a simplified test case that shows a code quality regression:
> 
> extern unsigned char first_one[65536];
> int FirstOnet(unsigned long long arg1)
> {
>   if (arg1 >> 48)
>     return (first_one[arg1 >> 48]);
>   return 0;
> }
> 
> the code generated by gcc-3.0 -O2 -fomit-frame-pointer is:
> 
>         movl    8(%esp), %edx
>         movl    %edx, %eax
>         shrl    $16, %eax
>         xorl    %edx, %edx
>         movl    %eax, %ecx
>         orl     %edx, %ecx
>         je      .L3
>         movzbl  first_one(%eax), %eax
>         ret
>         .p2align 2
> .L3:
>         xorl    %eax, %eax
>         ret
> 
> and by mainline (a bit worse): 
> 
>         pushl   %ebx               <- using a callee saved register
>         xorl    %ecx, %ecx
>         movl    12(%esp), %edx
>         movl    %edx, %eax         <- why not load directly to eax?
>         xorl    %edx, %edx
>         shrl    $16, %eax
>         movl    %edx, %ebx
>         orl     %eax, %ebx
>         je      .L4
>         movzbl  first_one(%eax), %ecx
> .L4:
>         popl    %ebx
>         movl    %ecx, %eax
>         ret
> 
> 
> Here is what Intel's compiler generates:
> 
>         movzwl    10(%esp), %edx                                #28.5
>         xorl      %eax, %eax                                    #30.3
>         orl       %edx, %eax                                    #30.3
>         je        ..B1.3        # Prob 50%                      #30.3
>                                 # LOE edx ebx ebp esi edi
> ..B1.2:                         # Preds ..B1.1
>         movzbl    first_one(%edx), %eax                         #31.13
>         ret                                                     #31.13
>                                 # LOE
> ..B1.3:                         # Preds ..B1.1
>         xorl      %eax, %eax                                    #32.10
>         ret                                                     #32.10
> 
> 
> 

$ cc1 -quiet -m32 -O2 t.c -fdump-tree-optimized
$ cat t.s 
    .file    "t.c"
    .text
    .p2align 4,,15
    .globl    FirstOnet
    .type    FirstOnet, @function
FirstOnet:
.LFB0:
    .cfi_startproc
    movzwl    10(%esp), %edx
    xorl    %eax, %eax
    testl    %edx, %edx
    je    .L2
    movzbl    first_one(%edx), %eax
.L2:
    rep
    ret
    .cfi_endproc
.LFE0:
    .size    FirstOnet, .-FirstOnet
    .ident    "GCC: (GNU) 4.6.0 20110312 (experimental) [trunk revision
170907]"
    .section    .note.GNU-stack,"",@progbits

Reply via email to