Hello, I'm using gcc-4.1.1, built into an EABI tool chain with glibc-2.4 and binutils-2.16.92. I have a Fixed-to-Float transition program shown as below. But it can only work with -O1 option. -O2/-O3/-Os are all failed.
% cat CTL_Math.c typedef float CTLFLOAT; typedef int CTLFIXED; typedef int CTLI32; typedef unsigned int CTLU32; inline CTLFLOAT _CTL_Fixed2Float( CTLFIXED In ) { CTLI32 lm = In; CTLI32 tm = lm; CTLI32 le; CTLU32 bits; CTLI32 man; if (0 == In) return 0; if (tm < 0) tm = -tm; // determine shift necessary to move radix to the 22-bit position le = CTL_FASTFLOAT_MANTISSA_LEADINGZEROS - (CTLI32)(CLZ(tm)); if (le >= 0) lm >>= le; else lm <<= -le; // le = le + (CTL_FASTFLOAT_MANTISSA_BITS-17); le += 5; if (0==lm) le = 0; bits = ((CTLI32)(le + 127) & 0x00FF) << 23; man = lm; if (lm < 0) { man = -man; bits |= 0x80000000; } man = man << CTL_FASTFLOAT_IEEE_BITDIF_MANTISSA; bits |= man & 0x007FFFFF; return *(CTLFLOAT*)&bits; } I compiled it with following command. % arm-iwmmxt-linux-gnueabi-gcc -O1 -Wall -mcpu=xscale -mtune=xscale -mabi=aapcs-linux -c CTL_Math.c -o CTL_Math.o1 % arm-iwmmxt-linux-gnueabi-gcc -O2 -Wall -mcpu=xscale -mtune=xscale -mabi=aapcs-linux -c CTL_Math.c -o CTL_Math.o2 I disassembled the object file and found the instruction order was different between -O1 and -O2. With -O2 option, it seems GCC pull in 'ldr r0, [sp, #4]', which will only get an obsolete r0. So the return value is incorrect. The disassembly result is as below. % arm-iwmmxt-linux-gnueabi-objdump -d CTL_Math.o1 > o1 % arm-iwmmxt-linux-gnueabi-objdump -d CTL_Math.o2 > o2 % vimdiff o1 o2 00000564 <_CTL_Fixed2Float>: |00000560 <_CTL_Fixed2Float>: 564: e92d4010 stmdb sp!, {r4, lr} | 560: e92d4010 stmdb sp!, {r4, lr} 568: e24dd008 sub sp, sp, #8 ; 0x8 | 564: e2504000 subs r4, r0, #0 ; 0x0 56c: e2504000 subs r4, r0, #0 ; 0x0 | 568: e24dd008 sub sp, sp, #8 ; 0x8 570: 03a00000 moveq r0, #0 ; 0x0 | 56c: 03a02000 moveq r2, #0 ; 0x0 574: 0a000019 beq 5e0 <_CTL_Fixed2Float+0x7c> | 570: 1a000002 bne 580 <_CTL_Fixed2Float+0x20> 578: e0240fc4 eor r0, r4, r4, asr #31 | 574: e1a00002 mov r0, r2 57c: e0400fc4 sub r0, r0, r4, asr #31 | 578: e28dd008 add sp, sp, #8 ; 0x8 580: ebfffffe bl 0 <E3D_CLZ> | 57c: e8bd8010 ldmia sp!, {r4, pc} 584: e270200a rsbs r2, r0, #10 ; 0xa | 580: e0240fc4 eor r0, r4, r4, asr #31 588: 51a00254 movpl r0, r4, asr r2 | 584: e0400fc4 sub r0, r0, r4, asr #31 58c: 42623000 rsbmi r3, r2, #0 ; 0x0 | 588: ebfffffe bl 0 <E3D_CLZ> 590: 41a00314 movmi r0, r4, lsl r3 | 58c: e270200a rsbs r2, r0, #10 ; 0xa 594: e3500000 cmp r0, #0 ; 0x0 | 590: 42623000 rsbmi r3, r2, #0 ; 0x0 598: 03a035fe moveq r3, #1065353216 ; 0x3f800000| 594: 41a00314 movmi r0, r4, lsl r3 59c: 058d3004 streq r3, [sp, #4] | 598: 51a00254 movpl r0, r4, asr r2 5a0: 03a00000 moveq r0, #0 ; 0x0 | 59c: e3500000 cmp r0, #0 ; 0x0 5a4: 0a000006 beq 5c4 <_CTL_Fixed2Float+0x60> | 5a0: 03a035fe moveq r3, #1065353216 ; 0x3f800000 5a8: e2823084 add r3, r2, #132 ; 0x84 | 5a4: 058d3004 streq r3, [sp, #4] 5ac: e20330ff and r3, r3, #255 ; 0xff | 5a8: 0a00000c beq 5e0 <_CTL_Fixed2Float+0x80> 5b0: e1a03b83 mov r3, r3, lsl #23 | 5ac: e2823084 add r3, r2, #132 ; 0x84 5b4: e58d3004 str r3, [sp, #4] | 5b0: e20330ff and r3, r3, #255 ; 0xff 5b8: b2600000 rsblt r0, r0, #0 ; 0x0 | 5b4: e1a02b83 mov r2, r3, lsl #23 5bc: b3833102 orrlt r3, r3, #-2147483648 ; 0x| 5b8: b2603000 rsblt r3, r0, #0 ; 0x0 5c0: b58d3004 strlt r3, [sp, #4] | 5bc: a1a03100 movge r3, r0, lsl #2 5c4: e3a02502 mov r2, #8388608 ; 0x800000 | 5c0: b1a03103 movlt r3, r3, lsl #2 5c8: e2422001 sub r2, r2, #1 ; 0x1 | 5c4: e58d2004 str r2, [sp, #4] 5cc: e0022100 and r2, r2, r0, lsl #2 | 5c8: a3c304ff bicge r0, r3, #-16777216 ; 0x 5d0: e59d3004 ldr r3, [sp, #4] | 5cc: b3822102 orrlt r2, r2, #-2147483648 ; 0x 5d4: e1833002 orr r3, r3, r2 | 5d0: b3c304ff biclt r0, r3, #-16777216 ; 0x 5d8: e58d3004 str r3, [sp, #4] | 5d4: a3c00502 bicge r0, r0, #8388608 ; 0x 5dc: e59d0004 ldr r0, [sp, #4] | 5d8: b58d2004 strlt r2, [sp, #4] 5e0: e28dd008 add sp, sp, #8 ; 0x8 | 5dc: b3c00502 biclt r0, r0, #8388608 ; 0x 5e4: e8bd8010 ldmia sp!, {r4, pc} | 5e0: e59d3004 ldr r3, [sp, #4] | 5e4: e59d2004 ldr r2, [sp, #4] | 5e8: e1833000 orr r3, r3, r0 | 5ec: e58d3004 str r3, [sp, #4] | 5f0: eaffffdf b 574 <_CTL_Fixed2Float+0x14> The 'ldr r0, [sp, #4]' at 5dc line (left, O1) is put at 5e4 line (right, O2). I tried to narrow the test case to small enough. But I found the -O2 is fine to this function in simplized test case while failed to other functions. -- Summary: optimization -O2/-O3/-Os generate wrong instruction order Product: gcc Version: 4.1.1 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: mingqiao dot wu at gmail dot com GCC build triplet: i686-pc-linux-gnu GCC host triplet: i686-pc-linux-gnu GCC target triplet: arm-iwmmxt-linux-gnueabi http://gcc.gnu.org/bugzilla/show_bug.cgi?id=29140