https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108778
Bug ID: 108778 Summary: Missing optimization with direct register access instead of structure mapping Product: gcc Version: 12.2.1 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c++ Assignee: unassigned at gcc dot gnu.org Reporter: klaus.doldinger64 at googlemail dot com Target Milestone: --- The following example uses two functional identical ways to save the SREG of a AVR µC, in this case an avr128da32. To access the SREG two different ways are used: one directly via the SREG macro and one via a structure mapping (the Cpu_t structure is actually not included in the avr headers, so a definition of the Cpu_t structure is given here). If the SREG macro way is used the optimization regarding the variable g is missed (see assembler code below). Using the structure mapping the load / store of g is correctly optimized out of the loop. In the SREG macro case unfortunately not! #include <stdint.h> #include <util/atomic.h> #include <avr/interrupt.h> #include <avr/cpufunc.h> #define ACCESS_ONCE(x) (*(volatile typeof(x)*)&(x)) typedef struct Cpu { // this is missing in avr headers volatile uint8_t r0; volatile uint8_t r1; volatile uint8_t r2; volatile uint8_t r3; volatile uint8_t ccp; volatile uint8_t r5; volatile uint8_t r6; volatile uint8_t r7; volatile uint8_t r8; volatile uint8_t r9; volatile uint8_t ra; volatile uint8_t rampz; volatile uint8_t rc; volatile uint16_t sp; volatile uint8_t sreg; } Cpu_t; #define CPU (*(Cpu_t *) 0x0030) static uint8_t flag; static uint16_t counter; static uint16_t g; static inline uint16_t count() { const uint8_t save = CPU.sreg; // const uint8_t save = SREG; // suppresses optimization asm volatile("cli" : : :); const uint16_t t = ACCESS_ONCE(counter); // SREG = save; // suppresses optimization CPU.sreg = save; return t; } static void func(void) { for(uint8_t i = 0; i < 20; i++) { g += count(); if (ACCESS_ONCE(flag)) { ACCESS_ONCE(flag) = 1; } } } ISR(USART0_RXC_vect) { _MemoryBarrier(); counter += 1; if (counter >= 100) { flag = 1; } } int main() { func(); } the generated assembly should be: main: lds r24,g ; g_lsm.16, g lds r25,g+1 ; g_lsm.16, g ldi r18,lo8(20) ; ivtmp_7, ldi r19,lo8(1) ; tmp56, .L5: in r22,__SREG__ ; save, MEM[(struct Cpu_t *)48B].sreg cli lds r20,counter ; t, MEM[(volatile uint16_t *)&counter] lds r21,counter+1 ; t, MEM[(volatile uint16_t *)&counter] out __SREG__,r22 ; MEM[(struct Cpu_t *)48B].sreg, save add r24,r20 ; g_lsm.16, t adc r25,r21 ; g_lsm.16, t lds r20,flag ; _6, MEM[(volatile uint8_t *)&flag] cpse r20,__zero_reg__ ; _6 sts flag,r19 ; MEM[(volatile uint8_t *)&flag], tmp56 .L4: subi r18,lo8(-(-1)) ; ivtmp_7, cpse r18,__zero_reg__ ; ivtmp_7, rjmp .L5 ; sts g,r24 ; g, g_lsm.16 sts g+1,r25 ; g, g_lsm.16 ldi r24,0 ; ldi r25,0 ; ret but using SREG it gets: main: ldi r24,lo8(20) ; ivtmp_12, ldi r25,lo8(1) ; tmp59, .L5: in r18,__SREG__ ; save, MEM[(volatile uint8_t *)63B] cli lds r20,counter ; t, MEM[(volatile uint16_t *)&counter] lds r21,counter+1 ; t, MEM[(volatile uint16_t *)&counter] out __SREG__,r18 ; MEM[(struct Cpu_t *)48B].sreg, save lds r18,g ; g, g lds r19,g+1 ; g, g add r18,r20 ; tmp53, t adc r19,r21 ; , t sts g,r18 ; g, tmp53 sts g+1,r19 ; g, tmp53 lds r18,flag ; _6, MEM[(volatile uint8_t *)&flag] cpse r18,__zero_reg__ ; _6 sts flag,r25 ; MEM[(volatile uint8_t *)&flag], tmp59 .L4: subi r24,lo8(-(-1)) ; ivtmp_12, cpse r24,__zero_reg__ ; ivtmp_12, rjmp .L5 ; ldi r24,0 ; ldi r25,0 ; ret