This is a bug report for GCC 2.95.3. The bug also exists in 2.95.2. I am cross-compiling for arm-linux on an i386 Linux system, using the command line: arm-linux-gcc -static -O -g -c au1.c
The code fragment below is taken from the mpeg2enc package. When compiled with -O, it does not work correctly: the value of s changes between the line marked XXX and the line marked YYY. This is because s is stored in register f5, which is clobbered by __kernel_cos, a glibc function called by cos. I don't know whether f5 should be preserved by the calling function or the callee, but nothing is preserving it at present, and this results in the bug. The code works correctly when compiled without optimisation, because the value of s is stored on the stack. I don't think that this problem is confined to register f5 or the functions involved here, but I don't have any evidence for that. It occurs with any level of optimisation, not just -O. // Code fragment: (note: based on init_fdct() in fdctref.c, in mpeg2enc) #include <math.h> #define PI M_PI void Test ( double c [ 8 ][ 8 ] ) { int i, j; double s; for (i=0; i<8; i++) { s = (i==0) ? sqrt(0.125) : 0.5; printf ( "Begin: s = %1.4f\n" , s ) ; // XXX for (j=0; j<8; j++) { double p = PI / 8.0 ; printf ( " p: %1.4f " , p ) ; p *= (double) i ; printf ( "%1.4f " , p ) ; p *= ( (double) j ) + 0.5 ; printf ( "%1.4f " , p ) ; p = cos ( p ) ; printf ( "%1.4f " , p ) ; p *= s ; printf ( "%1.4f\n" , p ) ; c [ i ][ j ] = p ; } printf ( "Now: s = %1.4f\n" , s ) ; // YYY } } // End code fragment Here is a typical output produced by one iteration (i = 1) of the outer for loop, when compiled with -O. Begin: s = 0.5000 p: 0.3927 0.3927 0.1963 0.9808 0.0000 p: 0.3927 0.3927 0.5890 0.8315 0.0000 p: 0.3927 0.3927 0.9817 0.5556 0.0000 p: 0.3927 0.3927 1.3744 0.1951 0.0000 p: 0.3927 0.3927 1.7671 -0.1951 -0.0000 p: 0.3927 0.3927 2.1598 -0.5556 -0.0000 p: 0.3927 0.3927 2.5525 -0.8315 -1.7958 p: 0.3927 0.3927 2.9452 -0.9808 -2.1183 Now: s = 2.1598 As you can see, s has changed. Here is a disassembly of the Test() function when optimised with -O: // Begin disassembly au1.o: file format elf32-littlearm Disassembly of section .text: 00000000 <Test>: #define PI M_PI void Test ( double c [ 8 ][ 8 ] ) { 0: e1a0c00d mov ip, sp 4: e92dd8f0 stmdb sp!, {r4, r5, r6, r7, fp, ip, lr, pc} 8: ed6d4206 sfm f4, 2, [sp, #-24]! c: e24cb004 sub fp, ip, #4 ; 0x4 10: e1a07000 mov r7, r0 int i, j; double s; for (i=0; i<8; i++) 14: e3a06000 mov r6, #0 ; 0x0 { s = (i==0) ? sqrt(0.125) : 0.5; 18: e3560000 cmp r6, #0 ; 0x0 1c: 1a000011 bne 68 <.text+0x68> 20: ed9f9107 ldfd f1, [pc, #28] 24: ee408181 sqtd f0, f1 28: ee90f110 cmf f0, f0 2c: 0a00000d beq 68 <.text+0x68> 30: ed2d9102 stfd f1, [sp, #-8]! 34: e8bd0003 ldmia sp!, {r0, r1} 38: ebfffffe bl 38 <Test+0x38> 3c: ee00d180 mvfd f5, f0 40: ea000012 b 90 <.text+0x90> 44: 3fc00000 swicc 0x00c00000 48: 00000000 andeq r0, r0, r0 4c: ee00d18e mvfd f5, #0.5 printf ( "Begin: s = %1.4f\n" , s ) ; // XXX 50: e59f00c0 ldr r0, [pc, #192] ; 118 <.text+0x118> 54: ed2dd102 stfd f5, [sp, #-8]! 58: e8bd0006 ldmia sp!, {r1, r2} 5c: ebfffffe bl 5c <.text+0x5c> for (j=0; j<8; j++) 60: e3a04000 mov r4, #0 ; 0x0 64: e59f50b0 ldr r5, [pc, #176] ; 11c <.text+0x11c> { double p = PI / 8.0 ; 68: ed9fc12c ldfd f4, [pc, #176] printf ( " p: %1.4f " , p ) ; 6c: e59f00b4 ldr r0, [pc, #180] ; 128 <.text+0x128> 70: ed2dc102 stfd f4, [sp, #-8]! 74: e8bd0006 ldmia sp!, {r1, r2} 78: ebfffffe bl 78 <.text+0x78> p *= (double) i ; 7c: ee006190 fltd f0, r6 80: ee104184 mufd f4, f0, f4 printf ( "%1.4f " , p ) ; 84: e1a00005 mov r0, r5 88: ed2dc102 stfd f4, [sp, #-8]! 8c: e8bd0006 ldmia sp!, {r1, r2} 90: ebfffffe bl 90 <.text+0x90> p *= ( (double) j ) + 0.5 ; 94: ee004190 fltd f0, r4 98: ee00018e adfd f0, f0, #0.5 9c: ee144180 mufd f4, f4, f0 printf ( "%1.4f " , p ) ; a0: e1a00005 mov r0, r5 a4: ed2dc102 stfd f4, [sp, #-8]! a8: e8bd0006 ldmia sp!, {r1, r2} ac: ebfffffe bl ac <.text+0xac> p = cos ( p ) ; b0: ed2dc102 stfd f4, [sp, #-8]! b4: e8bd0003 ldmia sp!, {r0, r1} b8: ebfffffe bl b8 <.text+0xb8> bc: ee00c180 mvfd f4, f0 printf ( "%1.4f " , p ) ; c0: e1a00005 mov r0, r5 c4: ed2dc102 stfd f4, [sp, #-8]! c8: e8bd0006 ldmia sp!, {r1, r2} cc: ebfffffe bl cc <.text+0xcc> p *= s ; d0: ee144185 mufd f4, f4, f5 printf ( "%1.4f\n" , p ) ; d4: e59f0050 ldr r0, [pc, #80] ; 12c <.text+0x12c> d8: ed2dc102 stfd f4, [sp, #-8]! dc: e8bd0006 ldmia sp!, {r1, r2} e0: ebfffffe bl e0 <.text+0xe0> c [ i ][ j ] = p ; e4: e0873306 add r3, r7, r6, lsl #6 e8: e0833184 add r3, r3, r4, lsl #3 ec: ed83c100 stfd f4, [r3] f0: e2844001 add r4, r4, #1 ; 0x1 f4: e3540007 cmp r4, #7 ; 0x7 f8: da000018 ble 160 <.text+0x160> } printf ( "Now: s = %1.4f\n" , s ) ; // YYY fc: e59f002c ldr r0, [pc, #44] ; 130 <.text+0x130> 100: ed2dd102 stfd f5, [sp, #-8]! 104: e8bd0006 ldmia sp!, {r1, r2} 108: ebfffffe bl 108 <.text+0x108> 10c: e2866001 add r6, r6, #1 ; 0x1 110: e3560007 cmp r6, #7 ; 0x7 114: ea00004b b 248 <.text+0x248> 118: 00000000 andeq r0, r0, r0 11c: 00000024 andeq r0, r0, r4, lsr #32 120: 3fd921fb swicc 0x00d921fb 124: 54442d18 strplb r2, [r4], #-3352 128: 00000014 andeq r0, r0, r4, lsl r0 12c: 0000002c andeq r0, r0, ip, lsr #32 130: 00000034 andeq r0, r0, r4, lsr r0 134: da000004 ble 14c <.text+0x14c> } } 138: ed5b420d lfm f4, 2, [fp, #-52] 13c: e91ba8f0 ldmdb fp, {r4, r5, r6, r7, fp, sp, pc} // End disassembly And here is the output of the command: arm-linux-gcc -v -save-temps -static -O -g -c au1.c Reading specs from /usr/jack/local/arm_gcc_2.95.3/lib/gcc-lib/arm-linux/2.95.3/specs gcc version 2.95.3 20010315 (release) /usr/jack/local/arm_gcc_2.95.3/lib/gcc-lib/arm-linux/2.95.3/cpp0 -lang-c -v -D__GNUC__=2 -D__GNUC_MINOR__=95 -Dunix -D__arm__ -Dlinux -D__ELF__ -D__unix__ -D__arm__ -D__linux__ -D__ELF__ -D__unix -D__linux -Asystem(unix) -Asystem(posix) -Acpu(arm) -Amachine(arm) -D__CHAR_UNSIGNED__ -D__OPTIMIZE__ -g -D__ARM_ARCH_3__ -D__APCS_32__ au1.c au1.i GNU CPP version 2.95.3 20010315 (release) (ARM GNU/Linux with ELF) #include "..." search starts here: #include <...> search starts here: /usr/jack/proj/arm/include /usr/jack/proj/arm/include /usr/jack/local/arm_gcc_2.95.3/lib/gcc-lib/arm-linux/2.95.3/include /usr/jack/local/arm_gcc_2.95.3/lib/gcc-lib/arm-linux/2.95.3/../../../../arm-linux/include End of search list. The following default directories have been omitted from the search path: /usr/jack/local/arm_gcc_2.95.3/lib/gcc-lib/arm-linux/2.95.3/../../../../include/g++- /usr/jack/local/arm_gcc_2.95.3/lib/gcc-lib/arm-linux/2.95.3/../../../../arm-linux/sys-include End of omitted list. /usr/jack/local/arm_gcc_2.95.3/lib/gcc-lib/arm-linux/2.95.3/cc1 au1.i -quiet -dumpbase au1.c -g -O -version -o au1.s GNU C version 2.95.3 20010315 (release) (arm-linux) compiled by GNU C version 3.3.4. /usr/jack/local/arm_gcc_2.95.3/arm-linux/bin/as -o au1.o au1.s -- Summary: floating point registers not preserved during function call Product: gcc Version: 2.95.3 Status: UNCONFIRMED Severity: normal Priority: P2 Component: c AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: jack at cs dot york dot ac dot uk CC: gcc-bugs at gcc dot gnu dot org GCC build triplet: i686-unknown-linux GCC host triplet: i686-unknown-linux GCC target triplet: arm-unknown-linux http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19547