http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58405
Bug ID: 58405
Summary: Unoptimal code generated for computed goto
Product: gcc
Version: 4.7.2
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: c
Assignee: unassigned at gcc dot gnu.org
Reporter: semicontinuity at yandex dot ru
The following code
//
-----------------------------------------------------------------------------
static void test(void) {
char flag = 1;
static void* address = &&L0;
goto *address;
for(;;) {
L0:
asm volatile("nop\t\n");
flag = 0;
L1: (void)&&L1;
if (flag==0) { address = &&L1; return; }
asm volatile("nop\t\n");
flag = 0;
L2: (void)&&L2;
if (flag==0) { address = &&L2; return; }
}
}
int main(void) {
for(;;) {
test();
}
return 0;
}
//
-----------------------------------------------------------------------------
Compiles to:
//
-----------------------------------------------------------------------------
00000052 <test>:
static void test(void) {
52: cf 93 push r28
54: df 93 push r29
56: 00 d0 rcall .+0 ; 0x58 <test+0x6>
58: cd b7 in r28, 0x3d ; 61
5a: de b7 in r29, 0x3e ; 62
5c: 80 91 60 00 lds r24, 0x0060
60: 90 91 61 00 lds r25, 0x0061
64: 8f 93 push r24
66: 9f 93 push r25
68: 08 95 ret
goto *address;
for(;;) {
L0:
asm volatile("nop\t\n");
6a: 00 00 nop
flag = 0;
L1: (void)&&L1;
if (flag==0) { address = &&L1; return; }
6c: 81 e4 ldi r24, 0x41 ; 65
6e: 90 e0 ldi r25, 0x00 ; 0
70: 90 93 61 00 sts 0x0061, r25
74: 80 93 60 00 sts 0x0060, r24
flag = 0;
L2: (void)&&L2;
if (flag==0) { address = &&L2; return; }
}
}
78: 0f 90 pop r0
7a: 0f 90 pop r0
7c: df 91 pop r29
7e: cf 91 pop r28
80: 08 95 ret
...
//
-----------------------------------------------------------------------------
This code works but is non optimal:
- It saves stack pointer to r28:r29, but later r28:r29 is never used
- Because it uses r28:r29, it saves r28:r29 to stack.
- It makes rcall .+0 and later removes 2 bytes from stack (pop r0) - this is
not necessary; rcall .+0 and pop r0's can be removed.
- Computed goto is implemented with 5 instructions, 7 words, with stack usage:
5c: 80 91 60 00 lds r24, 0x0060
60: 90 91 61 00 lds r25, 0x0061
64: 8f 93 push r24
66: 9f 93 push r25
68: 08 95 ret
Instead, it could be implemented with
lds r30, 0x0060
lds r31, 0x0061
ijmp
with 3 instructions, 5 words, no additional memory access, but with clobbering
of r30:r31 instead of r24:r25 which is OK.
// ------------------------------------
Compiling C: main.c
avr-gcc -c -mmcu=atmega8535 -I. -gdwarf-2 -I../../bsp -I../../../.. -O3
-save-temps -v -funsigned-char -funsigned-bitfields -fpack-struct -fshort-enums
-Wall -Wstrict-prototypes -Wundef -Wa,-adhlns=.obj/main.lst -std=gnu99 -Wundef
-MD -MP -MF .dep/main.o.d main.c -o .obj/main.o
Using built-in specs.
COLLECT_GCC=avr-gcc
Target: avr
Configured with: ../../gcc.gnu.org/gcc-4_7-branch/configure --target=avr
--prefix=/local/gnu/install/gcc-4.7-mingw32 --host=i386-mingw32
--build=i686-linux-gnu --enable-languages=c,c++ --disable-nls --disable-shared
--with-dwarf2 --with-avrlibc=yes
Thread model: single
gcc version 4.7.2 (GCC)