------- Additional Comments From schlie at comcast dot net 2004-11-11 20:28 ------- Subject: Re: 3.4.3 ~6x+ performance regression vs 3.3.1, constant trees not being computed.
> From: joseph at codesourcery dot com <[EMAIL PROTECTED]> > ------- Additional Comments From joseph at codesourcery dot com 2004-11-11 > 16:22 ------- > Subject: Re: 3.4.3 ~6x+ performance regression vs > 3.3.1, constant trees not being computed. > > Have you actually tried compiling code identical to that you test but with > 8388608L in place of (1L << 23) before making claims about what is done > with constant expressions? > > Your example may suggest a regression, provided no type sizes changed for > your target between the versions compared, but you really shouldn't report > conjectures about the cause of a bug without clear evidence to > substantiate them, which in this case would involve substituting the value > of the constant expression in the testcase. You were correct, the problem wasn't that 3.4.3 wasn't computing the constant expression values, it was that it was oddly transforming constant values into runtime computed expressions, such that 3.4.3 converted: (a & 0x800000L) => ((long)a >> 23) & 1), which doesn't quite seem sensible. The following are the results for both 3.4.3 and 3.3.1; where 3.4.3 shows a >100x performance regression, and a ~4x size regression relative to 3.3.1: ---- The source: /*Compiling: main.c using (for the sake of argument) avr-gcc -c -mmcu=atmega64 -I. -g -Os -funsigned-char -funsigned-bitfields -fpack-struct -fshort-enums -Wall -Wstrict-prototypes -Wa,-adhlns=main.lst -I/usr/local/avr/include -std=gnu99 -funsafe-math-optimizations -Wp,-M,-MP,-MT,main.o,-MF,.dep/main.o.d main.c -o main.o Linking: main.elf (again for the sake of argumnet) avr-gcc -mmcu=atmega64 -I. -g -Os -funsigned-char -funsigned-bitfields -fpack-struct -fshort-enums -Wall -Wstrict-prototypes -Wa,-adhlns=main.o -I/usr/local/avr/include -std=gnu99 -funsafe-math-optimizations -Wp,-M,-MP,-MT,main.o,-MF,.dep/main.elf.d main.o --output main.elf -Wl,-Map=main.map,--cref -lm File: main.c */ int foo0 ( int a ){ if (a & 0x800000L) return 1; else return 2 ; } int foo1 ( int a ){ if (a & (1L << 23)) return 1; else return 2 ; } int foo2 ( long a ){ if (a & 0x800000L) return 1; else return 2 ; } int foo3 ( long a ){ if (a & (1L << 23)) return 1; else return 2 ; } int main( void ){ volatile int a; a = foo0 ( a ); a = foo1 ( a ); a = foo2 ( a ); a = foo3 ( a ); return 0; } ---- Listing for 3.4.3 main.elf: file format elf32-avr Sections: Idx Name Size VMA LMA File off Algn 0 .data 00000000 00800100 000001c8 0000025c 2**0 CONTENTS, ALLOC, LOAD, DATA 1 .text 000001c8 00000000 00000000 00000094 2**0 CONTENTS, ALLOC, LOAD, READONLY, CODE 2 .bss 00000000 00800100 000001c8 0000025c 2**0 ALLOC 3 .noinit 00000000 00800100 00800100 0000025c 2**0 CONTENTS 4 .eeprom 00000000 00810000 00810000 0000025c 2**0 CONTENTS 5 .stab 000005d0 00000000 00000000 0000025c 2**2 CONTENTS, READONLY, DEBUGGING 6 .stabstr 0000046e 00000000 00000000 0000082c 2**0 CONTENTS, READONLY, DEBUGGING Disassembly of section .text: 00000000 <__vectors>: 0: 0c 94 46 00 jmp 0x8c 4: 0c 94 61 00 jmp 0xc2 8: 0c 94 61 00 jmp 0xc2 c: 0c 94 61 00 jmp 0xc2 10: 0c 94 61 00 jmp 0xc2 14: 0c 94 61 00 jmp 0xc2 18: 0c 94 61 00 jmp 0xc2 1c: 0c 94 61 00 jmp 0xc2 20: 0c 94 61 00 jmp 0xc2 24: 0c 94 61 00 jmp 0xc2 28: 0c 94 61 00 jmp 0xc2 2c: 0c 94 61 00 jmp 0xc2 30: 0c 94 61 00 jmp 0xc2 34: 0c 94 61 00 jmp 0xc2 38: 0c 94 61 00 jmp 0xc2 3c: 0c 94 61 00 jmp 0xc2 40: 0c 94 61 00 jmp 0xc2 44: 0c 94 61 00 jmp 0xc2 48: 0c 94 61 00 jmp 0xc2 4c: 0c 94 61 00 jmp 0xc2 50: 0c 94 61 00 jmp 0xc2 54: 0c 94 61 00 jmp 0xc2 58: 0c 94 61 00 jmp 0xc2 5c: 0c 94 61 00 jmp 0xc2 60: 0c 94 61 00 jmp 0xc2 64: 0c 94 61 00 jmp 0xc2 68: 0c 94 61 00 jmp 0xc2 6c: 0c 94 61 00 jmp 0xc2 70: 0c 94 61 00 jmp 0xc2 74: 0c 94 61 00 jmp 0xc2 78: 0c 94 61 00 jmp 0xc2 7c: 0c 94 61 00 jmp 0xc2 80: 0c 94 61 00 jmp 0xc2 84: 0c 94 61 00 jmp 0xc2 88: 0c 94 61 00 jmp 0xc2 0000008c <__ctors_end>: 8c: 11 24 eor r1, r1 8e: 1f be out 0x3f, r1 ; 63 90: cf ef ldi r28, 0xFF ; 255 92: d0 e1 ldi r29, 0x10 ; 16 94: de bf out 0x3e, r29 ; 62 96: cd bf out 0x3d, r28 ; 61 00000098 <__do_copy_data>: 98: 11 e0 ldi r17, 0x01 ; 1 9a: a0 e0 ldi r26, 0x00 ; 0 9c: b1 e0 ldi r27, 0x01 ; 1 9e: e8 ec ldi r30, 0xC8 ; 200 a0: f1 e0 ldi r31, 0x01 ; 1 a2: 02 c0 rjmp .+4 ; 0xa8 000000a4 <.do_copy_data_loop>: a4: 05 90 lpm r0, Z+ a6: 0d 92 st X+, r0 000000a8 <.do_copy_data_start>: a8: a0 30 cpi r26, 0x00 ; 0 aa: b1 07 cpc r27, r17 ac: d9 f7 brne .-10 ; 0xa4 000000ae <__do_clear_bss>: ae: 11 e0 ldi r17, 0x01 ; 1 b0: a0 e0 ldi r26, 0x00 ; 0 b2: b1 e0 ldi r27, 0x01 ; 1 b4: 01 c0 rjmp .+2 ; 0xb8 000000b6 <.do_clear_bss_loop>: b6: 1d 92 st X+, r1 000000b8 <.do_clear_bss_start>: b8: a0 30 cpi r26, 0x00 ; 0 ba: b1 07 cpc r27, r17 bc: e1 f7 brne .-8 ; 0xb6 be: 0c 94 b7 00 jmp 0x16e 000000c2 <__bad_interrupt>: c2: 0c 94 00 00 jmp 0x0 000000c6 <foo0>: */ int foo0 ( int a ){ if (a & 0x800000L) c6: aa 27 eor r26, r26 c8: 97 fd sbrc r25, 7 ca: a0 95 com r26 cc: ba 2f mov r27, r26 ce: 27 e1 ldi r18, 0x17 ; 23 d0: b6 95 lsr r27 d2: a7 95 ror r26 d4: 97 95 ror r25 d6: 87 95 ror r24 d8: 2a 95 dec r18 da: d1 f7 brne .-12 ; 0xd0 dc: 81 70 andi r24, 0x01 ; 1 de: 90 70 andi r25, 0x00 ; 0 e0: 89 2b or r24, r25 e2: 19 f0 breq .+6 ; 0xea return 1; e4: 81 e0 ldi r24, 0x01 ; 1 e6: 90 e0 ldi r25, 0x00 ; 0 e8: 08 95 ret else return 2 ; ea: 82 e0 ldi r24, 0x02 ; 2 ec: 90 e0 ldi r25, 0x00 ; 0 } ee: 08 95 ret f0: 08 95 ret 000000f2 <foo1>: int foo1 ( int a ){ if (a & (1L << 23)) f2: aa 27 eor r26, r26 f4: 97 fd sbrc r25, 7 f6: a0 95 com r26 f8: ba 2f mov r27, r26 fa: 37 e1 ldi r19, 0x17 ; 23 fc: b6 95 lsr r27 fe: a7 95 ror r26 100: 97 95 ror r25 102: 87 95 ror r24 104: 3a 95 dec r19 106: d1 f7 brne .-12 ; 0xfc 108: 81 70 andi r24, 0x01 ; 1 10a: 90 70 andi r25, 0x00 ; 0 10c: 89 2b or r24, r25 10e: 19 f0 breq .+6 ; 0x116 return 1; 110: 81 e0 ldi r24, 0x01 ; 1 112: 90 e0 ldi r25, 0x00 ; 0 114: 08 95 ret else return 2 ; 116: 82 e0 ldi r24, 0x02 ; 2 118: 90 e0 ldi r25, 0x00 ; 0 } 11a: 08 95 ret 11c: 08 95 ret 0000011e <foo2>: int foo2 ( long a ){ 11e: dc 01 movw r26, r24 120: cb 01 movw r24, r22 if (a & 0x800000L) 122: 47 e1 ldi r20, 0x17 ; 23 124: b6 95 lsr r27 126: a7 95 ror r26 128: 97 95 ror r25 12a: 87 95 ror r24 12c: 4a 95 dec r20 12e: d1 f7 brne .-12 ; 0x124 130: 81 70 andi r24, 0x01 ; 1 132: 90 70 andi r25, 0x00 ; 0 134: 89 2b or r24, r25 136: 19 f0 breq .+6 ; 0x13e return 1; 138: 81 e0 ldi r24, 0x01 ; 1 13a: 90 e0 ldi r25, 0x00 ; 0 13c: 08 95 ret else return 2 ; 13e: 82 e0 ldi r24, 0x02 ; 2 140: 90 e0 ldi r25, 0x00 ; 0 } 142: 08 95 ret 144: 08 95 ret 00000146 <foo3>: int foo3 ( long a ){ 146: dc 01 movw r26, r24 148: cb 01 movw r24, r22 if (a & (1L << 23)) 14a: 57 e1 ldi r21, 0x17 ; 23 14c: b6 95 lsr r27 14e: a7 95 ror r26 150: 97 95 ror r25 152: 87 95 ror r24 154: 5a 95 dec r21 156: d1 f7 brne .-12 ; 0x14c 158: 81 70 andi r24, 0x01 ; 1 15a: 90 70 andi r25, 0x00 ; 0 15c: 89 2b or r24, r25 15e: 19 f0 breq .+6 ; 0x166 return 1; 160: 81 e0 ldi r24, 0x01 ; 1 162: 90 e0 ldi r25, 0x00 ; 0 164: 08 95 ret else return 2 ; 166: 82 e0 ldi r24, 0x02 ; 2 168: 90 e0 ldi r25, 0x00 ; 0 } 16a: 08 95 ret 16c: 08 95 ret 0000016e <main>: int main( void ){ 16e: cd ef ldi r28, 0xFD ; 253 170: d0 e1 ldi r29, 0x10 ; 16 172: de bf out 0x3e, r29 ; 62 174: cd bf out 0x3d, r28 ; 61 volatile int a; a = foo0 ( a ); 176: 89 81 ldd r24, Y+1 ; 0x01 178: 9a 81 ldd r25, Y+2 ; 0x02 17a: 0e 94 63 00 call 0xc6 17e: 89 83 std Y+1, r24 ; 0x01 180: 9a 83 std Y+2, r25 ; 0x02 a = foo1 ( a ); 182: 89 81 ldd r24, Y+1 ; 0x01 184: 9a 81 ldd r25, Y+2 ; 0x02 186: 0e 94 79 00 call 0xf2 18a: 89 83 std Y+1, r24 ; 0x01 18c: 9a 83 std Y+2, r25 ; 0x02 a = foo2 ( a ); 18e: 89 81 ldd r24, Y+1 ; 0x01 190: 9a 81 ldd r25, Y+2 ; 0x02 192: aa 27 eor r26, r26 194: 97 fd sbrc r25, 7 196: a0 95 com r26 198: ba 2f mov r27, r26 19a: bc 01 movw r22, r24 19c: cd 01 movw r24, r26 19e: 0e 94 8f 00 call 0x11e 1a2: 89 83 std Y+1, r24 ; 0x01 1a4: 9a 83 std Y+2, r25 ; 0x02 a = foo3 ( a ); 1a6: 89 81 ldd r24, Y+1 ; 0x01 1a8: 9a 81 ldd r25, Y+2 ; 0x02 1aa: aa 27 eor r26, r26 1ac: 97 fd sbrc r25, 7 1ae: a0 95 com r26 1b0: ba 2f mov r27, r26 1b2: bc 01 movw r22, r24 1b4: cd 01 movw r24, r26 1b6: 0e 94 a3 00 call 0x146 1ba: 89 83 std Y+1, r24 ; 0x01 1bc: 9a 83 std Y+2, r25 ; 0x02 return 0; } 1be: 80 e0 ldi r24, 0x00 ; 0 1c0: 90 e0 ldi r25, 0x00 ; 0 1c2: 0c 94 e3 00 jmp 0x1c6 000001c6 <_exit>: 1c6: ff cf rjmp .-2 ; 0x1c6 --------- The listing with avr-gcc (GCC) 3.3.1: 1 .file "main.c" 2 .arch atmega64 3 __SREG__ = 0x3f 4 __SP_H__ = 0x3e 5 __SP_L__ = 0x3d 6 __tmp_reg__ = 0 7 __zero_reg__ = 1 8 .global __do_copy_data 9 .global __do_clear_bss 12 .text 13 .Ltext0: 38 .global foo0 40 foo0: 1:main.c **** /*Compiling: main.c using (for the sake of argument) 2:main.c **** 3:main.c **** avr-gcc -c -mmcu=atmega64 -I. -g -Os -funsigned-char -funsigned-bitfields 4:main.c **** -fpack-struct 5:main.c **** -fshort-enums -Wall -Wstrict-prototypes -Wa,-adhlns=main.lst 6:main.c **** -I/usr/local/avr/include 7:main.c **** -std=gnu99 -funsafe-math-optimizations 8:main.c **** -Wp,-M,-MP,-MT,main.o,-MF,.dep/main.o.d main.c 9:main.c **** -o main.o 10:main.c **** 11:main.c **** Linking: main.elf (again for the sake of argumnet) 12:main.c **** avr-gcc -mmcu=atmega64 -I. -g -Os -funsigned-char -funsigned-bitfields 13:main.c **** -fpack-struct 14:main.c **** -fshort-enums -Wall -Wstrict-prototypes -Wa,-adhlns=main.o 15:main.c **** -I/usr/local/avr/include 16:main.c **** -std=gnu99 -funsafe-math-optimizations 17:main.c **** -Wp,-M,-MP,-MT,main.o,-MF,.dep/main.elf.d main.o 18:main.c **** --output main.elf -Wl,-Map=main.map,--cref -lm 19:main.c **** 20:main.c **** File: main.c 21:main.c **** 22:main.c **** */ 23:main.c **** 24:main.c **** int foo0 ( int a ){ 42 .LM1: 43 /* prologue: frame size=0 */ 44 /* prologue end (size=0) */ 25:main.c **** 26:main.c **** if (a & 0x800000L) 46 .LM2: 47 0000 AA27 clr r26 48 0002 97FD sbrc r25,7 49 0004 A095 com r26 50 0006 BA2F mov r27,r26 51 0008 A7FF sbrs r26,7 52 000a 03C0 rjmp .L2 27:main.c **** return 1; 54 .LM3: 55 000c 81E0 ldi r24,lo8(1) 56 000e 90E0 ldi r25,hi8(1) 28:main.c **** else 29:main.c **** return 2 ; 30:main.c **** 31:main.c **** } 58 .LM4: 59 0010 0895 ret 60 .L2: 62 .LM5: 63 0012 82E0 ldi r24,lo8(2) 64 0014 90E0 ldi r25,hi8(2) 66 .LM6: 67 0016 0895 ret 68 /* epilogue: frame size=0 */ 69 0018 0895 ret 70 /* epilogue end (size=1) */ 71 /* function foo0 size 13 (12) */ 73 .Lscope0: 77 .global foo1 79 foo1: 32:main.c **** 33:main.c **** int foo1 ( int a ){ 81 .LM7: 82 /* prologue: frame size=0 */ 83 /* prologue end (size=0) */ 34:main.c **** 35:main.c **** if (a & (1L << 23)) 85 .LM8: 86 001a AA27 clr r26 87 001c 97FD sbrc r25,7 88 001e A095 com r26 89 0020 BA2F mov r27,r26 90 0022 A7FF sbrs r26,7 91 0024 03C0 rjmp .L5 36:main.c **** return 1; 93 .LM9: 94 0026 81E0 ldi r24,lo8(1) 95 0028 90E0 ldi r25,hi8(1) 37:main.c **** else 38:main.c **** return 2 ; 39:main.c **** 40:main.c **** } 97 .LM10: 98 002a 0895 ret 99 .L5: 101 .LM11: 102 002c 82E0 ldi r24,lo8(2) 103 002e 90E0 ldi r25,hi8(2) 105 .LM12: 106 0030 0895 ret 107 /* epilogue: frame size=0 */ 108 0032 0895 ret 109 /* epilogue end (size=1) */ 110 /* function foo1 size 13 (12) */ 112 .Lscope1: 116 .global foo2 118 foo2: 41:main.c **** 42:main.c **** int foo2 ( long a ){ 120 .LM13: 121 /* prologue: frame size=0 */ 122 /* prologue end (size=0) */ 123 0034 DC01 movw r26,r24 124 0036 CB01 movw r24,r22 43:main.c **** 44:main.c **** if (a & 0x800000L) 126 .LM14: 127 0038 A7FF sbrs r26,7 128 003a 03C0 rjmp .L8 45:main.c **** return 1; 130 .LM15: 131 003c 81E0 ldi r24,lo8(1) 132 003e 90E0 ldi r25,hi8(1) 46:main.c **** else 47:main.c **** return 2 ; 48:main.c **** 49:main.c **** } 134 .LM16: 135 0040 0895 ret 136 .L8: 138 .LM17: 139 0042 82E0 ldi r24,lo8(2) 140 0044 90E0 ldi r25,hi8(2) 142 .LM18: 143 0046 0895 ret 144 /* epilogue: frame size=0 */ 145 0048 0895 ret 146 /* epilogue end (size=1) */ 147 /* function foo2 size 11 (10) */ 149 .Lscope2: 153 .global foo3 155 foo3: 50:main.c **** 51:main.c **** int foo3 ( long a ){ 157 .LM19: 158 /* prologue: frame size=0 */ 159 /* prologue end (size=0) */ 160 004a DC01 movw r26,r24 161 004c CB01 movw r24,r22 52:main.c **** 53:main.c **** if (a & (1L << 23)) 163 .LM20: 164 004e A7FF sbrs r26,7 165 0050 03C0 rjmp .L11 54:main.c **** return 1; 167 .LM21: 168 0052 81E0 ldi r24,lo8(1) 169 0054 90E0 ldi r25,hi8(1) 55:main.c **** else 56:main.c **** return 2 ; 57:main.c **** 58:main.c **** } 171 .LM22: 172 0056 0895 ret 173 .L11: 175 .LM23: 176 0058 82E0 ldi r24,lo8(2) 177 005a 90E0 ldi r25,hi8(2) 179 .LM24: 180 005c 0895 ret 181 /* epilogue: frame size=0 */ 182 005e 0895 ret 183 /* epilogue end (size=1) */ 184 /* function foo3 size 11 (10) */ 186 .Lscope3: 189 .global main 191 main: 59:main.c **** 60:main.c **** int main( void ){ 193 .LM25: 194 /* prologue: frame size=2 */ 195 0060 C0E0 ldi r28,lo8(__stack - 2) 196 0062 D0E0 ldi r29,hi8(__stack - 2) 197 0064 DEBF out __SP_H__,r29 198 0066 CDBF out __SP_L__,r28 199 /* prologue end (size=4) */ 61:main.c **** 62:main.c **** volatile int a; 63:main.c **** 64:main.c **** a = foo0 ( a ); 201 .LM26: 202 .LBB2: 203 0068 8981 ldd r24,Y+1 204 006a 9A81 ldd r25,Y+2 205 006c 0E94 0000 call foo0 206 0070 8983 std Y+1,r24 207 0072 9A83 std Y+2,r25 65:main.c **** a = foo1 ( a ); 209 .LM27: 210 0074 8981 ldd r24,Y+1 211 0076 9A81 ldd r25,Y+2 212 0078 0E94 0000 call foo1 213 007c 8983 std Y+1,r24 214 007e 9A83 std Y+2,r25 66:main.c **** a = foo2 ( a ); 216 .LM28: 217 0080 8981 ldd r24,Y+1 218 0082 9A81 ldd r25,Y+2 219 0084 AA27 clr r26 220 0086 97FD sbrc r25,7 221 0088 A095 com r26 222 008a BA2F mov r27,r26 223 008c BC01 movw r22,r24 224 008e CD01 movw r24,r26 225 0090 0E94 0000 call foo2 226 0094 8983 std Y+1,r24 227 0096 9A83 std Y+2,r25 67:main.c **** a = foo3 ( a ); 229 .LM29: 230 0098 8981 ldd r24,Y+1 231 009a 9A81 ldd r25,Y+2 232 009c AA27 clr r26 233 009e 97FD sbrc r25,7 234 00a0 A095 com r26 235 00a2 BA2F mov r27,r26 236 00a4 BC01 movw r22,r24 237 00a6 CD01 movw r24,r26 238 00a8 0E94 0000 call foo3 239 00ac 8983 std Y+1,r24 240 00ae 9A83 std Y+2,r25 68:main.c **** 69:main.c **** return 0; 70:main.c **** } 242 .LM30: 243 .LBE2: 244 00b0 80E0 ldi r24,lo8(0) 245 00b2 90E0 ldi r25,hi8(0) 246 /* epilogue: frame size=2 */ 247 00b4 0C94 0000 jmp exit 248 /* epilogue end (size=2) */ 249 /* function main size 44 (38) */ 254 .Lscope4: 256 .text 258 Letext: 259 /* File "main.c": code 92 = 0x005c ( 82), prologues 4, epilogues 6 */ DEFINED SYMBOLS *ABS*:00000000 main.c *ABS*:0000003f __SREG__ *ABS*:0000003e __SP_H__ *ABS*:0000003d __SP_L__ *ABS*:00000000 __tmp_reg__ *ABS*:00000001 __zero_reg__ /tmp/ccS6dcXA.s:40 .text:00000000 foo0 /tmp/ccS6dcXA.s:79 .text:0000001a foo1 /tmp/ccS6dcXA.s:118 .text:00000034 foo2 /tmp/ccS6dcXA.s:155 .text:0000004a foo3 /tmp/ccS6dcXA.s:191 .text:00000060 main /tmp/ccS6dcXA.s:258 .text:000000b8 Letext UNDEFINED SYMBOLS __do_copy_data __do_clear_bss __stack exit -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18424