Denis Chertykov wrote:
> 2011/7/6 Georg-Johann Lay <a...@gjlay.de>:
>> For loading a 32-bit constant in a register, there is room for
>> improvement:
>>
>> * SF can be handled the same way as SI and therefore the patch
>>  adds a peep2 to produce a *reload_insf analogon to *reload_insi.
>>
>> * If the destination register overlaps NO_LD_REGS, values already
>>  loaded into some other byte can be reused by a simple MOV.
>>  This is helpful then moving values like, e.g. -2, -100 etc. because
>>  all high bytes are 0xff.
>>
>> * 0.0f can be directly moved to memory.
>>
>> * The mov insns contain "!d" constraint. I see no reason to make "d"
>>  expensive and discourage use of d-regs.  A "*d" to hide is better
>>  because it does it neither puts additional pressure on "d" nor
>>  discourages "d".
>>
> 
> I would like to have a real code examples.
> 
> Denis.

Hi Denis.

Attached you find a small C file and the asm that is generated by new
and old versions (-Os -mmcu=atmega88 -S -dp).

I took away some regs as potential clobbers (or -fno-peephole2) to
show the effect of high register pressure.  Bit even if a clobber was
available you can see that the new version is smarter in reusing
values, e.g. note the loading of -1L to r22-r25.

Johann
register int _x asm ("26");
register int _y asm ("28");
register int _z asm ("30");

void ibar (long, long, long, long);
void fbar (long, long, float, float);

void foo1 (long x)
{
    ibar (-1, x, -2, 0xff008000);
}

void foo2 (long x)
{
    ibar (x, x, 65537L, 0xffff0408);
}

void foo3 (long x)
{
    fbar (x, x, -3.0f, 2.0f);
}

        .file   "oint.c"
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
        .global __do_copy_data
        .global __do_clear_bss
        .text
.global foo1
        .type   foo1, @function
foo1:
        push r10         ;  16  *pushqi/1       [length = 1]
        push r11         ;  17  *pushqi/1       [length = 1]
        push r12         ;  18  *pushqi/1       [length = 1]
        push r13         ;  19  *pushqi/1       [length = 1]
        push r14         ;  20  *pushqi/1       [length = 1]
        push r15         ;  21  *pushqi/1       [length = 1]
        push r16         ;  22  *pushqi/1       [length = 1]
        push r17         ;  23  *pushqi/1       [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
        movw r18,r22     ;  2   *movsi/1        [length = 2]
        movw r20,r24
        ldi r22,lo8(-1)  ;  7   *movsi/5        [length = 4]
        ldi r23,hi8(-1)
        ldi r24,hlo8(-1)
        ldi r25,hhi8(-1)
        mov __tmp_reg__,r31      ;  9   *movsi/6        [length = 10]
        ldi r31,lo8(-2)
        mov r14,r31
        ldi r31,hi8(-2)
        mov r15,r31
        ldi r31,hlo8(-2)
        mov r16,r31
        ldi r31,hhi8(-2)
        mov r17,r31
        mov r31,__tmp_reg__
        mov __tmp_reg__,r31      ;  10  *movsi/6        [length = 10]
        ldi r31,lo8(-16744448)
        mov r10,r31
        ldi r31,hi8(-16744448)
        mov r11,r31
        ldi r31,hlo8(-16744448)
        mov r12,r31
        ldi r31,hhi8(-16744448)
        mov r13,r31
        mov r31,__tmp_reg__
        rcall ibar       ;  11  call_insn/3     [length = 1]
/* epilogue start */
        pop r17  ;  26  popqi   [length = 1]
        pop r16  ;  27  popqi   [length = 1]
        pop r15  ;  28  popqi   [length = 1]
        pop r14  ;  29  popqi   [length = 1]
        pop r13  ;  30  popqi   [length = 1]
        pop r12  ;  31  popqi   [length = 1]
        pop r11  ;  32  popqi   [length = 1]
        pop r10  ;  33  popqi   [length = 1]
        ret      ;  34  return_from_epilogue    [length = 1]
        .size   foo1, .-foo1
.global foo2
        .type   foo2, @function
foo2:
        push r10         ;  16  *pushqi/1       [length = 1]
        push r11         ;  17  *pushqi/1       [length = 1]
        push r12         ;  18  *pushqi/1       [length = 1]
        push r13         ;  19  *pushqi/1       [length = 1]
        push r14         ;  20  *pushqi/1       [length = 1]
        push r15         ;  21  *pushqi/1       [length = 1]
        push r16         ;  22  *pushqi/1       [length = 1]
        push r17         ;  23  *pushqi/1       [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
        movw r18,r22     ;  2   *movsi/1        [length = 2]
        movw r20,r24
        mov __tmp_reg__,r31      ;  9   *movsi/6        [length = 10]
        ldi r31,lo8(65537)
        mov r14,r31
        ldi r31,hi8(65537)
        mov r15,r31
        ldi r31,hlo8(65537)
        mov r16,r31
        ldi r31,hhi8(65537)
        mov r17,r31
        mov r31,__tmp_reg__
        mov __tmp_reg__,r31      ;  10  *movsi/6        [length = 10]
        ldi r31,lo8(-64504)
        mov r10,r31
        ldi r31,hi8(-64504)
        mov r11,r31
        ldi r31,hlo8(-64504)
        mov r12,r31
        ldi r31,hhi8(-64504)
        mov r13,r31
        mov r31,__tmp_reg__
        rcall ibar       ;  11  call_insn/3     [length = 1]
/* epilogue start */
        pop r17  ;  26  popqi   [length = 1]
        pop r16  ;  27  popqi   [length = 1]
        pop r15  ;  28  popqi   [length = 1]
        pop r14  ;  29  popqi   [length = 1]
        pop r13  ;  30  popqi   [length = 1]
        pop r12  ;  31  popqi   [length = 1]
        pop r11  ;  32  popqi   [length = 1]
        pop r10  ;  33  popqi   [length = 1]
        ret      ;  34  return_from_epilogue    [length = 1]
        .size   foo2, .-foo2
.global foo3
        .type   foo3, @function
foo3:
        push r10         ;  16  *pushqi/1       [length = 1]
        push r11         ;  17  *pushqi/1       [length = 1]
        push r12         ;  18  *pushqi/1       [length = 1]
        push r13         ;  19  *pushqi/1       [length = 1]
        push r14         ;  20  *pushqi/1       [length = 1]
        push r15         ;  21  *pushqi/1       [length = 1]
        push r16         ;  22  *pushqi/1       [length = 1]
        push r17         ;  23  *pushqi/1       [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
        movw r18,r22     ;  2   *movsi/1        [length = 2]
        movw r20,r24
        mov __tmp_reg__,r31      ;  9   *movsf/6        [length = 10]
        ldi r31,lo8(0xc0400000)
        mov r14,r31
        ldi r31,hi8(0xc0400000)
        mov r15,r31
        ldi r31,hlo8(0xc0400000)
        mov r16,r31
        ldi r31,hhi8(0xc0400000)
        mov r17,r31
        mov r31,__tmp_reg__
        mov __tmp_reg__,r31      ;  10  *movsf/6        [length = 10]
        ldi r31,lo8(0x40000000)
        mov r10,r31
        ldi r31,hi8(0x40000000)
        mov r11,r31
        ldi r31,hlo8(0x40000000)
        mov r12,r31
        ldi r31,hhi8(0x40000000)
        mov r13,r31
        mov r31,__tmp_reg__
        rcall fbar       ;  11  call_insn/3     [length = 1]
/* epilogue start */
        pop r17  ;  26  popqi   [length = 1]
        pop r16  ;  27  popqi   [length = 1]
        pop r15  ;  28  popqi   [length = 1]
        pop r14  ;  29  popqi   [length = 1]
        pop r13  ;  30  popqi   [length = 1]
        pop r12  ;  31  popqi   [length = 1]
        pop r11  ;  32  popqi   [length = 1]
        pop r10  ;  33  popqi   [length = 1]
        ret      ;  34  return_from_epilogue    [length = 1]
        .size   foo3, .-foo3
        .file   "oint.c"
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
        .text
.global foo1
        .type   foo1, @function
foo1:
        push r10         ;  16  *pushqi/1       [length = 1]
        push r11         ;  17  *pushqi/1       [length = 1]
        push r12         ;  18  *pushqi/1       [length = 1]
        push r13         ;  19  *pushqi/1       [length = 1]
        push r14         ;  20  *pushqi/1       [length = 1]
        push r15         ;  21  *pushqi/1       [length = 1]
        push r16         ;  22  *pushqi/1       [length = 1]
        push r17         ;  23  *pushqi/1       [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
        movw r18,r22     ;  2   *movsi/1        [length = 2]
        movw r20,r24
        ldi r22,lo8(-1)  ;  7   *movsi/5        [length = 3]
        ldi r23,lo8(-1)
        movw r24,r22
        ldi r17,lo8(-2)  ;  9   *movsi/6        [length = 6]
        mov r14,r17
        clr r15
        dec r15
        ldi r16,lo8(-1)
        ldi r17,lo8(-1)
        clr r10  ;  10  *movsi/6        [length = 7]
        set
        clr r11
        bld r11,7
        clr r12
        clr r13
        dec r13
        rcall ibar       ;  11  *call_insn/2    [length = 1]
/* epilogue start */
        pop r17  ;  26  popqi   [length = 1]
        pop r16  ;  27  popqi   [length = 1]
        pop r15  ;  28  popqi   [length = 1]
        pop r14  ;  29  popqi   [length = 1]
        pop r13  ;  30  popqi   [length = 1]
        pop r12  ;  31  popqi   [length = 1]
        pop r11  ;  32  popqi   [length = 1]
        pop r10  ;  33  popqi   [length = 1]
        ret      ;  34  return_from_epilogue    [length = 1]
        .size   foo1, .-foo1
.global foo2
        .type   foo2, @function
foo2:
        push r10         ;  16  *pushqi/1       [length = 1]
        push r11         ;  17  *pushqi/1       [length = 1]
        push r12         ;  18  *pushqi/1       [length = 1]
        push r13         ;  19  *pushqi/1       [length = 1]
        push r14         ;  20  *pushqi/1       [length = 1]
        push r15         ;  21  *pushqi/1       [length = 1]
        push r16         ;  22  *pushqi/1       [length = 1]
        push r17         ;  23  *pushqi/1       [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
        movw r18,r22     ;  2   *movsi/1        [length = 2]
        movw r20,r24
        ldi r17,lo8(1)   ;  9   *movsi/6        [length = 4]
        mov r14,r17
        clr r15
        movw r16,r14
        set      ;  10  *movsi/6        [length = 8]
        clr r10
        bld r10,3
        clr r11
        bld r11,2
        clr r12
        dec r12
        mov r13,r12
        rcall ibar       ;  11  *call_insn/2    [length = 1]
/* epilogue start */
        pop r17  ;  26  popqi   [length = 1]
        pop r16  ;  27  popqi   [length = 1]
        pop r15  ;  28  popqi   [length = 1]
        pop r14  ;  29  popqi   [length = 1]
        pop r13  ;  30  popqi   [length = 1]
        pop r12  ;  31  popqi   [length = 1]
        pop r11  ;  32  popqi   [length = 1]
        pop r10  ;  33  popqi   [length = 1]
        ret      ;  34  return_from_epilogue    [length = 1]
        .size   foo2, .-foo2
.global foo3
        .type   foo3, @function
foo3:
        push r10         ;  16  *pushqi/1       [length = 1]
        push r11         ;  17  *pushqi/1       [length = 1]
        push r12         ;  18  *pushqi/1       [length = 1]
        push r13         ;  19  *pushqi/1       [length = 1]
        push r14         ;  20  *pushqi/1       [length = 1]
        push r15         ;  21  *pushqi/1       [length = 1]
        push r16         ;  22  *pushqi/1       [length = 1]
        push r17         ;  23  *pushqi/1       [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
        movw r18,r22     ;  2   *movsi/1        [length = 2]
        movw r20,r24
        clr r14  ;  9   *movsf/6        [length = 4]
        clr r15
        ldi r16,lo8(64)
        ldi r17,lo8(-64)
        clr r10  ;  10  *movsf/6        [length = 6]
        clr r11
        clr r12
        set
        clr r13
        bld r13,6
        rcall fbar       ;  11  *call_insn/2    [length = 1]
/* epilogue start */
        pop r17  ;  26  popqi   [length = 1]
        pop r16  ;  27  popqi   [length = 1]
        pop r15  ;  28  popqi   [length = 1]
        pop r14  ;  29  popqi   [length = 1]
        pop r13  ;  30  popqi   [length = 1]
        pop r12  ;  31  popqi   [length = 1]
        pop r11  ;  32  popqi   [length = 1]
        pop r10  ;  33  popqi   [length = 1]
        ret      ;  34  return_from_epilogue    [length = 1]
        .size   foo3, .-foo3
        .ident  "GCC: (GNU) 4.7.0 20110704 (experimental)"

Reply via email to