http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52323

             Bug #: 52323
           Summary: i386: gcse runs amok with pic-addresses
    Classification: Unclassified
           Product: gcc
           Version: 4.6.2
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
        AssignedTo: unassig...@gcc.gnu.org
        ReportedBy: kaffeemons...@googlemail.com


I have here some very bad interaction between gcse and pic addresses on i386.

The attached testcase (yes, i know, it's not a beauty, could prop. be reduces
some more) compiled by:
gcc-4.6.2 -Wall -O1 -fpic -S gcse_amok.c -o gcse_amok.s
creates roughly this code:
to_base32_BMI2:
.LFB1:
    .cfi_startproc
    pushl    %ebp
    .cfi_def_cfa_offset 8
    .cfi_offset 5, -8
    pushl    %edi
    .cfi_def_cfa_offset 12
    .cfi_offset 7, -12
    pushl    %esi
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    pushl    %ebx
    .cfi_def_cfa_offset 20
    .cfi_offset 3, -20
    call    __i686.get_pc_thunk.bx
    addl    $_GLOBAL_OFFSET_TABLE_, %ebx
    movl    20(%esp), %eax
    movl    24(%esp), %edx
    movl    28(%esp), %ecx
    cmpl    $4, %ecx
    jbe    .L2
.L4:
    movl    (%edx), %esi
    bswap    %esi
    movl    %esi, %edi
    shrl    $12, %edi
#APP
# 19 "gcse_amok.c" 1
    pdep 64+vals@GOTOFF(%ebx), %edi, %edi
# 0 "" 2
#NO_APP
    movzbl    4(%edx), %ebp
    sall    $8, %esi
    orl    %ebp, %esi
#APP
# 20 "gcse_amok.c" 1
    pdep 64+vals@GOTOFF(%ebx), %esi, %esi
# 0 "" 2
#NO_APP
    bswap    %edi
    bswap    %esi
#APP
# 25 "gcse_amok.c" 1
    movd    %edi, %xmm0
    pinsrd    $1, %esi, %xmm0
    paddb    80+vals@GOTOFF(%ebx), %xmm0
    movdqa    %xmm0, %xmm1
    pcmpgtb    96+vals@GOTOFF(%ebx), %xmm1
    pand    112+vals@GOTOFF(%ebx), %xmm1
    psubb    %xmm1, %xmm0
    movq    %xmm0, (%eax)
# 0 "" 2
#NO_APP
    addl    $8, %eax
    addl    $5, %edx
    subl    $5, %ecx
    cmpl    $4, %ecx
    ja    .L4
...

If -fgcse (like in -O2) gets added to the command line, things get ugly:
gcc-4.6.2 -Wall -O1 -fpic -fgcse -S gcse_amok.c -o gcse_amok.s
results in:
to_base32_BMI2:
.LFB1:
    .cfi_startproc
    pushl    %ebp
    .cfi_def_cfa_offset 8
    .cfi_offset 5, -8
    pushl    %edi
    .cfi_def_cfa_offset 12
    .cfi_offset 7, -12
    pushl    %esi
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    pushl    %ebx
    .cfi_def_cfa_offset 20
    .cfi_offset 3, -20
    subl    $36, %esp
    .cfi_def_cfa_offset 56
    call    __i686.get_pc_thunk.bx
    addl    $_GLOBAL_OFFSET_TABLE_, %ebx
    movl    56(%esp), %eax
    movl    60(%esp), %edx
    movl    64(%esp), %ecx
    cmpl    $4, %ecx
    jbe    .L2
    leal    64+vals@GOTOFF, %edi
    leal    80+vals@GOTOFF, %esi
    movl    %esi, 32(%esp)
    leal    96+vals@GOTOFF, %ebp
    movl    %ebp, 28(%esp)
    leal    112+vals@GOTOFF, %esi
    movl    %esi, 24(%esp)
    movl    %eax, 8(%esp)
    movl    %edx, (%esp)
    movl    %ecx, 4(%esp)
    movl    %edi, 12(%esp)
.L3:
    movl    (%esp), %edi
    movl    (%edi), %esi
    bswap    %esi
    movl    %esi, %edi
    shrl    $12, %edi
    movl    12(%esp), %eax
#APP
# 19 "gcse_amok.c" 1
    pdep (%eax,%ebx), %edi, %edi
# 0 "" 2
#NO_APP
    movl    (%esp), %edx
    movzbl    4(%edx), %eax
    sall    $8, %esi
    orl    %eax, %esi
    movl    12(%esp), %ecx
#APP
# 20 "gcse_amok.c" 1
    pdep (%ecx,%ebx), %esi, %esi
# 0 "" 2
#NO_APP
    bswap    %edi
    bswap    %esi
    movl    8(%esp), %eax
    movl    32(%esp), %edx
    movl    28(%esp), %ecx
    movl    24(%esp), %ebp
#APP
# 25 "gcse_amok.c" 1
    movd    %edi, %xmm0
    pinsrd    $1, %esi, %xmm0
    paddb    (%edx,%ebx), %xmm0
    movdqa    %xmm0, %xmm1
    pcmpgtb    (%ecx,%ebx), %xmm1
    pand    0(%ebp,%ebx), %xmm1
    psubb    %xmm1, %xmm0
    movq    %xmm0, (%eax)
# 0 "" 2
#NO_APP
    addl    $8, %eax
    movl    %eax, 8(%esp)
    addl    $5, (%esp)
    subl    $5, 4(%esp)
    cmpl    $4, 4(%esp)
    ja    .L3
...

Later passes (-O2, -O3) only make things worse or can not recover from this.
For some reason gcse tries to hoist the constant address-offsets out the loop.
this needs a bunch of register, which i386 does not have, and so the spilling
begins...

GCC 4.5.3 does not even compile it
$ gcc-4.5.3 -Wall -O1 -fpic -fgcse -S -o gcse_amok.s gcse_amok.c                
gcse_amok.c: In function 'to_base32_BMI2':                                      
gcse_amok.c:25:2: error: can't find a register in class 'GENERAL_REGS' while
reloading 'asm'                           
gcse_amok.c:19:2: error: 'asm' operand has impossible constraints               
gcse_amok.c:20:2: error: 'asm' operand has impossible constraints               
gcse_amok.c:25:2: error: 'asm' operand has impossible constraints

without gcse, no problem:
$ gcc -Wall -O1 -fpic -S -o gcse_amok.s gcse_amok.c
$ echo $?
0

I do not really know which component this is, but i guess it is a target
problem, the low costs for "more complicated addressing modes" on x86 (so the
compiler generates them at all) luring the rest of the compiler into a trap.
But that is a wild guess...

Reply via email to