https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108778

            Bug ID: 108778
           Summary: Missing optimization with direct register access
                    instead of structure mapping
           Product: gcc
           Version: 12.2.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: klaus.doldinger64 at googlemail dot com
  Target Milestone: ---

The following example uses two functional identical ways to save the SREG of a
AVR µC, in this case an avr128da32. 

To access the SREG two different ways are used: one directly via the SREG macro
and one via a structure mapping (the Cpu_t structure is actually not included
in the avr headers, 
so a definition of the Cpu_t structure is given here).

If the SREG macro way is used the optimization regarding the variable g is
missed (see assembler code below).

Using the structure mapping the load / store of g is correctly optimized out of
the loop.
In the SREG macro case unfortunately not!

#include <stdint.h>
#include <util/atomic.h>
#include <avr/interrupt.h>
#include <avr/cpufunc.h>

#define ACCESS_ONCE(x) (*(volatile typeof(x)*)&(x))

typedef struct Cpu { // this is missing in avr headers
    volatile uint8_t r0;
    volatile uint8_t r1;
    volatile uint8_t r2;
    volatile uint8_t r3;
    volatile uint8_t ccp;
    volatile uint8_t r5;
    volatile uint8_t r6;
    volatile uint8_t r7;
    volatile uint8_t r8;
    volatile uint8_t r9;
    volatile uint8_t ra;
    volatile uint8_t rampz;
    volatile uint8_t rc;
    volatile uint16_t sp;
    volatile uint8_t sreg;
} Cpu_t;

#define CPU (*(Cpu_t *) 0x0030) 

static uint8_t  flag;
static uint16_t counter;
static uint16_t g; 

static inline uint16_t count() {
    const uint8_t save = CPU.sreg;
//    const uint8_t save = SREG; // suppresses optimization
    asm volatile("cli" : : :);
    const uint16_t t = ACCESS_ONCE(counter);
//    SREG = save; // suppresses optimization
    CPU.sreg = save;
    return t;
}
static void func(void) {
    for(uint8_t i = 0; i < 20; i++) {
        g += count();
        if (ACCESS_ONCE(flag)) {
            ACCESS_ONCE(flag) = 1;
        }
    }
}

ISR(USART0_RXC_vect) {
    _MemoryBarrier();
    counter += 1;
    if (counter >= 100) {
        flag = 1;
    }
}

int main() {
    func();
}

the generated assembly should be:

main:
lds r24,g        ;  g_lsm.16, g
lds r25,g+1      ;  g_lsm.16, g
ldi r18,lo8(20)  ;  ivtmp_7,
ldi r19,lo8(1)   ;  tmp56,
.L5:
in r22,__SREG__  ;  save, MEM[(struct Cpu_t *)48B].sreg
cli
lds r20,counter  ;  t, MEM[(volatile uint16_t *)&counter]
lds r21,counter+1        ;  t, MEM[(volatile uint16_t *)&counter]
out __SREG__,r22         ;  MEM[(struct Cpu_t *)48B].sreg, save
add r24,r20      ;  g_lsm.16, t
adc r25,r21      ;  g_lsm.16, t
lds r20,flag     ;  _6, MEM[(volatile uint8_t *)&flag]
cpse r20,__zero_reg__    ;  _6
sts flag,r19     ;  MEM[(volatile uint8_t *)&flag], tmp56
.L4:
subi r18,lo8(-(-1))      ;  ivtmp_7,
cpse r18,__zero_reg__    ;  ivtmp_7,
rjmp .L5         ;
sts g,r24        ;  g, g_lsm.16
sts g+1,r25      ;  g, g_lsm.16
ldi r24,0                ;
ldi r25,0                ;
ret

but using SREG it gets:

main:
ldi r24,lo8(20)  ;  ivtmp_12,
ldi r25,lo8(1)   ;  tmp59,
.L5:
in r18,__SREG__  ;  save, MEM[(volatile uint8_t *)63B]
cli
lds r20,counter  ;  t, MEM[(volatile uint16_t *)&counter]
lds r21,counter+1        ;  t, MEM[(volatile uint16_t *)&counter]
out __SREG__,r18         ;  MEM[(struct Cpu_t *)48B].sreg, save
lds r18,g        ;  g, g
lds r19,g+1      ;  g, g
add r18,r20      ;  tmp53, t
adc r19,r21      ; , t
sts g,r18        ;  g, tmp53
sts g+1,r19      ;  g, tmp53
lds r18,flag     ;  _6, MEM[(volatile uint8_t *)&flag]
cpse r18,__zero_reg__    ;  _6
sts flag,r25     ;  MEM[(volatile uint8_t *)&flag], tmp59
.L4:
subi r24,lo8(-(-1))      ;  ivtmp_12,
cpse r24,__zero_reg__    ;  ivtmp_12,
rjmp .L5         ;
ldi r24,0                ;
ldi r25,0                ;
ret
  • [Bug c++/108778] New:... klaus.doldinger64 at googlemail dot com via Gcc-bugs

Reply via email to