https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118591

--- Comment #3 from Georg-Johann Lay <gjl at gcc dot gnu.org> ---
Created attachment 60238
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=60238&action=edit
C99 test case that fails on ordinary AVRs (not avrtiny)

This test case fails on ordinary AVRs like -mmcu=atmega128.  It passes more
arguments and clobbers some regs so that b lives in a stack slot:

__attribute__((noipa))
void func2 (long long a1, long long a2, long b)
{
  static unsigned char count = 0;
  if (b != count++)
    __builtin_abort ();
}

int main (void)
{
  for (long b = 0; b < 5; ++b)
    {
      __asm ("" ::: "r5", "r9");
      func2 (0, 0, b);
    }

  return 0;
}

$ avr-gcc -mmcu=atmega128 -dumpbase "" -save-temps -dp -Os -mlra ...

The argument preparation for b reads:

.L4:
        ldd r24,Y+4      ;  63  [c=4 l=1]  movqi_insn/3
        push r24         ;  9   [c=4 l=1]  pushqi1/0
        ldd r24,Y+4      ;  64  [c=4 l=1]  movqi_insn/3
        push r24         ;  11  [c=4 l=1]  pushqi1/0
        ldd r24,Y+4      ;  65  [c=4 l=1]  movqi_insn/3
        push r24         ;  13  [c=4 l=1]  pushqi1/0
        ldd r24,Y+4      ;  66  [c=4 l=1]  movqi_insn/3
        push r24         ;  15  [c=4 l=1]  pushqi1/0

What's also strange is that the code has a frame size of 4 and consequently, b
lives in Y[1]:SI.  However, after the call of func2, the code accesses Y[5]:SI
that was never initialized:

/* prologue: function */
/* frame size = 4 */
/* stack size = 4 */
.L__stack_usage = 4
# long b = 0;
        std Y+1,__zero_reg__     ;  115 [c=4 l=1]  movqi_insn/2
        std Y+2,__zero_reg__     ;  116 [c=4 l=1]  movqi_insn/2
        std Y+3,__zero_reg__     ;  117 [c=4 l=1]  movqi_insn/2
        std Y+4,__zero_reg__     ;  118 [c=4 l=1]  movqi_insn/2
.L4:
# Crippled passing of b.  Seems tlike LRA thinks that PUSH changes Y.
        ldd r24,Y+4      ;  63  [c=4 l=1]  movqi_insn/3
        push r24         ;  9   [c=4 l=1]  pushqi1/0
        ldd r24,Y+4      ;  64  [c=4 l=1]  movqi_insn/3
        push r24         ;  11  [c=4 l=1]  pushqi1/0
        ldd r24,Y+4      ;  65  [c=4 l=1]  movqi_insn/3
        push r24         ;  13  [c=4 l=1]  pushqi1/0
        ldd r24,Y+4      ;  66  [c=4 l=1]  movqi_insn/3
        push r24         ;  15  [c=4 l=1]  pushqi1/0
# Prepare long long args a1 and a2.
        ...
        rcall func2      ;  32  [c=0 l=1]  call_insn/1
# Following access is wrong.  As it seems, regalloc
# thinks that the PUSHes above changed the frame pointer?
        ldd r24,Y+5      ;  102 [c=4 l=1]  movqi_insn/3
        ldd r25,Y+6      ;  103 [c=4 l=1]  movqi_insn/3
        ldd r26,Y+7      ;  104 [c=4 l=1]  movqi_insn/3
        ldd r27,Y+8      ;  105 [c=4 l=1]  movqi_insn/3
        adiw r24,1       ;  84  [c=16 l=3]  *addsi3/1
        adc r26,__zero_reg__
        adc r27,__zero_reg__
        std Y+5,r24      ;  106 [c=4 l=1]  movqi_insn/2
        std Y+6,r25      ;  107 [c=4 l=1]  movqi_insn/2
        std Y+7,r26      ;  108 [c=4 l=1]  movqi_insn/2
        std Y+8,r27      ;  109 [c=4 l=1]  movqi_insn/2
         ; SP += 4       ;  35  [c=8 l=4]  *addhi3_sp
        pop __tmp_reg__
        pop __tmp_reg__
        pop __tmp_reg__
        pop __tmp_reg__
# After popping b, loading b is from the correct offset:
        ldd r24,Y+1      ;  110 [c=4 l=1]  movqi_insn/3
        ldd r25,Y+2      ;  111 [c=4 l=1]  movqi_insn/3
        ldd r26,Y+3      ;  112 [c=4 l=1]  movqi_insn/3
        ldd r27,Y+4      ;  113 [c=4 l=1]  movqi_insn/3
        sbiw r26,0       ;  87  [c=28 l=3]  *cmpsi/2
        sbci r25,hi8(5)
        sbci r24,lo8(5)
        brne .L4         ;  88  [c=4 l=1]  branch
...

Reply via email to