Hello,

I'm using gcc-4.1.1, built into an EABI tool chain with glibc-2.4 and
binutils-2.16.92.
I have a Fixed-to-Float transition program shown as below. But it can only work
with -O1 option. -O2/-O3/-Os are all failed. 

% cat CTL_Math.c
typedef float CTLFLOAT;
typedef int CTLFIXED;
typedef int CTLI32;
typedef unsigned int CTLU32;

inline CTLFLOAT _CTL_Fixed2Float( CTLFIXED In )
{
    CTLI32 lm = In;
    CTLI32 tm = lm;
    CTLI32 le;
    CTLU32 bits;
    CTLI32 man;

    if (0 == In) return 0;

    if (tm < 0)
        tm = -tm;

    // determine shift necessary to move radix to the 22-bit position
    le = CTL_FASTFLOAT_MANTISSA_LEADINGZEROS - (CTLI32)(CLZ(tm));
    if (le >= 0)
        lm >>= le;
    else
        lm <<= -le;

    // le = le + (CTL_FASTFLOAT_MANTISSA_BITS-17);
    le += 5;

    if (0==lm) le = 0;

    bits = ((CTLI32)(le + 127) & 0x00FF) << 23;
        man = lm;
        if (lm < 0)
        {
                man = -man;
                bits |= 0x80000000;
        }
        man = man << CTL_FASTFLOAT_IEEE_BITDIF_MANTISSA;
        bits |= man & 0x007FFFFF;

    return *(CTLFLOAT*)&bits;
}

I compiled it with following command.
% arm-iwmmxt-linux-gnueabi-gcc -O1 -Wall -mcpu=xscale -mtune=xscale
-mabi=aapcs-linux -c CTL_Math.c -o CTL_Math.o1
% arm-iwmmxt-linux-gnueabi-gcc -O2 -Wall -mcpu=xscale -mtune=xscale
-mabi=aapcs-linux -c CTL_Math.c -o CTL_Math.o2

I disassembled the object file and found the instruction order was different
between -O1 and -O2. With -O2 option, it seems GCC pull in 'ldr     r0, [sp,
#4]', which will only get an obsolete r0. So the return value is incorrect. The
disassembly result is as below.

% arm-iwmmxt-linux-gnueabi-objdump -d CTL_Math.o1 > o1
% arm-iwmmxt-linux-gnueabi-objdump -d CTL_Math.o2 > o2
% vimdiff o1 o2                                                              
00000564 <_CTL_Fixed2Float>:                                  |00000560
<_CTL_Fixed2Float>:                                    
   564:   e92d4010        stmdb   sp!, {r4, lr}               |   560:  
e92d4010        stmdb   sp!, {r4, lr}               
   568:   e24dd008        sub     sp, sp, #8      ; 0x8       |   564:  
e2504000        subs    r4, r0, #0      ; 0x0       
   56c:   e2504000        subs    r4, r0, #0      ; 0x0       |   568:  
e24dd008        sub     sp, sp, #8      ; 0x8       
   570:   03a00000        moveq   r0, #0  ; 0x0               |   56c:  
03a02000        moveq   r2, #0  ; 0x0               
   574:   0a000019        beq     5e0 <_CTL_Fixed2Float+0x7c> |   570:  
1a000002        bne     580 <_CTL_Fixed2Float+0x20> 
   578:   e0240fc4        eor     r0, r4, r4, asr #31         |   574:  
e1a00002        mov     r0, r2                      
   57c:   e0400fc4        sub     r0, r0, r4, asr #31         |   578:  
e28dd008        add     sp, sp, #8      ; 0x8       
   580:   ebfffffe        bl      0 <E3D_CLZ>                 |   57c:  
e8bd8010        ldmia   sp!, {r4, pc}               
   584:   e270200a        rsbs    r2, r0, #10     ; 0xa       |   580:  
e0240fc4        eor     r0, r4, r4, asr #31         
   588:   51a00254        movpl   r0, r4, asr r2              |   584:  
e0400fc4        sub     r0, r0, r4, asr #31         
   58c:   42623000        rsbmi   r3, r2, #0      ; 0x0       |   588:  
ebfffffe        bl      0 <E3D_CLZ>                 
   590:   41a00314        movmi   r0, r4, lsl r3              |   58c:  
e270200a        rsbs    r2, r0, #10     ; 0xa       
   594:   e3500000        cmp     r0, #0  ; 0x0               |   590:  
42623000        rsbmi   r3, r2, #0      ; 0x0       
   598:   03a035fe        moveq   r3, #1065353216 ; 0x3f800000|   594:  
41a00314        movmi   r0, r4, lsl r3              
   59c:   058d3004        streq   r3, [sp, #4]                |   598:  
51a00254        movpl   r0, r4, asr r2              
   5a0:   03a00000        moveq   r0, #0  ; 0x0               |   59c:  
e3500000        cmp     r0, #0  ; 0x0               
   5a4:   0a000006        beq     5c4 <_CTL_Fixed2Float+0x60> |   5a0:  
03a035fe        moveq   r3, #1065353216 ; 0x3f800000
   5a8:   e2823084        add     r3, r2, #132    ; 0x84      |   5a4:  
058d3004        streq   r3, [sp, #4]                
   5ac:   e20330ff        and     r3, r3, #255    ; 0xff      |   5a8:  
0a00000c        beq     5e0 <_CTL_Fixed2Float+0x80> 
   5b0:   e1a03b83        mov     r3, r3, lsl #23             |   5ac:  
e2823084        add     r3, r2, #132    ; 0x84      
   5b4:   e58d3004        str     r3, [sp, #4]                |   5b0:  
e20330ff        and     r3, r3, #255    ; 0xff      
   5b8:   b2600000        rsblt   r0, r0, #0      ; 0x0       |   5b4:  
e1a02b83        mov     r2, r3, lsl #23             
   5bc:   b3833102        orrlt   r3, r3, #-2147483648    ; 0x|   5b8:  
b2603000        rsblt   r3, r0, #0      ; 0x0       
   5c0:   b58d3004        strlt   r3, [sp, #4]                |   5bc:  
a1a03100        movge   r3, r0, lsl #2              
   5c4:   e3a02502        mov     r2, #8388608    ; 0x800000  |   5c0:  
b1a03103        movlt   r3, r3, lsl #2              
   5c8:   e2422001        sub     r2, r2, #1      ; 0x1       |   5c4:  
e58d2004        str     r2, [sp, #4]                
   5cc:   e0022100        and     r2, r2, r0, lsl #2          |   5c8:  
a3c304ff        bicge   r0, r3, #-16777216      ; 0x
   5d0:   e59d3004        ldr     r3, [sp, #4]                |   5cc:  
b3822102        orrlt   r2, r2, #-2147483648    ; 0x
   5d4:   e1833002        orr     r3, r3, r2                  |   5d0:  
b3c304ff        biclt   r0, r3, #-16777216      ; 0x
   5d8:   e58d3004        str     r3, [sp, #4]                |   5d4:  
a3c00502        bicge   r0, r0, #8388608        ; 0x
   5dc:   e59d0004        ldr     r0, [sp, #4]                |   5d8:  
b58d2004        strlt   r2, [sp, #4]                
   5e0:   e28dd008        add     sp, sp, #8      ; 0x8       |   5dc:  
b3c00502        biclt   r0, r0, #8388608        ; 0x
   5e4:   e8bd8010        ldmia   sp!, {r4, pc}               |   5e0:  
e59d3004        ldr     r3, [sp, #4]                
                                                              |   5e4:  
e59d2004        ldr     r2, [sp, #4]                
                                                              |   5e8:  
e1833000        orr     r3, r3, r0                  
                                                              |   5ec:  
e58d3004        str     r3, [sp, #4]                
                                                              |   5f0:  
eaffffdf        b       574 <_CTL_Fixed2Float+0x14> 

The 'ldr     r0, [sp, #4]' at 5dc line (left, O1) is put at 5e4 line (right,
O2).

I tried to narrow the test case to small enough. But I found the -O2 is fine to
this function in simplized test case while failed to other functions.


-- 
           Summary: optimization -O2/-O3/-Os generate wrong instruction
                    order
           Product: gcc
           Version: 4.1.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: mingqiao dot wu at gmail dot com
 GCC build triplet: i686-pc-linux-gnu
  GCC host triplet: i686-pc-linux-gnu
GCC target triplet: arm-iwmmxt-linux-gnueabi


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=29140

Reply via email to