Denis Chertykov wrote: > 2011/7/6 Georg-Johann Lay <a...@gjlay.de>: >> For loading a 32-bit constant in a register, there is room for >> improvement: >> >> * SF can be handled the same way as SI and therefore the patch >> adds a peep2 to produce a *reload_insf analogon to *reload_insi. >> >> * If the destination register overlaps NO_LD_REGS, values already >> loaded into some other byte can be reused by a simple MOV. >> This is helpful then moving values like, e.g. -2, -100 etc. because >> all high bytes are 0xff. >> >> * 0.0f can be directly moved to memory. >> >> * The mov insns contain "!d" constraint. I see no reason to make "d" >> expensive and discourage use of d-regs. A "*d" to hide is better >> because it does it neither puts additional pressure on "d" nor >> discourages "d". >> > > I would like to have a real code examples. > > Denis.
Hi Denis. Attached you find a small C file and the asm that is generated by new and old versions (-Os -mmcu=atmega88 -S -dp). I took away some regs as potential clobbers (or -fno-peephole2) to show the effect of high register pressure. Bit even if a clobber was available you can see that the new version is smarter in reusing values, e.g. note the loading of -1L to r22-r25. Johann
register int _x asm ("26"); register int _y asm ("28"); register int _z asm ("30"); void ibar (long, long, long, long); void fbar (long, long, float, float); void foo1 (long x) { ibar (-1, x, -2, 0xff008000); } void foo2 (long x) { ibar (x, x, 65537L, 0xffff0408); } void foo3 (long x) { fbar (x, x, -3.0f, 2.0f); }
.file "oint.c" __SREG__ = 0x3f __SP_H__ = 0x3e __SP_L__ = 0x3d __tmp_reg__ = 0 __zero_reg__ = 1 .global __do_copy_data .global __do_clear_bss .text .global foo1 .type foo1, @function foo1: push r10 ; 16 *pushqi/1 [length = 1] push r11 ; 17 *pushqi/1 [length = 1] push r12 ; 18 *pushqi/1 [length = 1] push r13 ; 19 *pushqi/1 [length = 1] push r14 ; 20 *pushqi/1 [length = 1] push r15 ; 21 *pushqi/1 [length = 1] push r16 ; 22 *pushqi/1 [length = 1] push r17 ; 23 *pushqi/1 [length = 1] /* prologue: function */ /* frame size = 0 */ /* stack size = 8 */ .L__stack_usage = 8 movw r18,r22 ; 2 *movsi/1 [length = 2] movw r20,r24 ldi r22,lo8(-1) ; 7 *movsi/5 [length = 4] ldi r23,hi8(-1) ldi r24,hlo8(-1) ldi r25,hhi8(-1) mov __tmp_reg__,r31 ; 9 *movsi/6 [length = 10] ldi r31,lo8(-2) mov r14,r31 ldi r31,hi8(-2) mov r15,r31 ldi r31,hlo8(-2) mov r16,r31 ldi r31,hhi8(-2) mov r17,r31 mov r31,__tmp_reg__ mov __tmp_reg__,r31 ; 10 *movsi/6 [length = 10] ldi r31,lo8(-16744448) mov r10,r31 ldi r31,hi8(-16744448) mov r11,r31 ldi r31,hlo8(-16744448) mov r12,r31 ldi r31,hhi8(-16744448) mov r13,r31 mov r31,__tmp_reg__ rcall ibar ; 11 call_insn/3 [length = 1] /* epilogue start */ pop r17 ; 26 popqi [length = 1] pop r16 ; 27 popqi [length = 1] pop r15 ; 28 popqi [length = 1] pop r14 ; 29 popqi [length = 1] pop r13 ; 30 popqi [length = 1] pop r12 ; 31 popqi [length = 1] pop r11 ; 32 popqi [length = 1] pop r10 ; 33 popqi [length = 1] ret ; 34 return_from_epilogue [length = 1] .size foo1, .-foo1 .global foo2 .type foo2, @function foo2: push r10 ; 16 *pushqi/1 [length = 1] push r11 ; 17 *pushqi/1 [length = 1] push r12 ; 18 *pushqi/1 [length = 1] push r13 ; 19 *pushqi/1 [length = 1] push r14 ; 20 *pushqi/1 [length = 1] push r15 ; 21 *pushqi/1 [length = 1] push r16 ; 22 *pushqi/1 [length = 1] push r17 ; 23 *pushqi/1 [length = 1] /* prologue: function */ /* frame size = 0 */ /* stack size = 8 */ .L__stack_usage = 8 movw r18,r22 ; 2 *movsi/1 [length = 2] movw r20,r24 mov __tmp_reg__,r31 ; 9 *movsi/6 [length = 10] ldi r31,lo8(65537) mov r14,r31 ldi r31,hi8(65537) mov r15,r31 ldi r31,hlo8(65537) mov r16,r31 ldi r31,hhi8(65537) mov r17,r31 mov r31,__tmp_reg__ mov __tmp_reg__,r31 ; 10 *movsi/6 [length = 10] ldi r31,lo8(-64504) mov r10,r31 ldi r31,hi8(-64504) mov r11,r31 ldi r31,hlo8(-64504) mov r12,r31 ldi r31,hhi8(-64504) mov r13,r31 mov r31,__tmp_reg__ rcall ibar ; 11 call_insn/3 [length = 1] /* epilogue start */ pop r17 ; 26 popqi [length = 1] pop r16 ; 27 popqi [length = 1] pop r15 ; 28 popqi [length = 1] pop r14 ; 29 popqi [length = 1] pop r13 ; 30 popqi [length = 1] pop r12 ; 31 popqi [length = 1] pop r11 ; 32 popqi [length = 1] pop r10 ; 33 popqi [length = 1] ret ; 34 return_from_epilogue [length = 1] .size foo2, .-foo2 .global foo3 .type foo3, @function foo3: push r10 ; 16 *pushqi/1 [length = 1] push r11 ; 17 *pushqi/1 [length = 1] push r12 ; 18 *pushqi/1 [length = 1] push r13 ; 19 *pushqi/1 [length = 1] push r14 ; 20 *pushqi/1 [length = 1] push r15 ; 21 *pushqi/1 [length = 1] push r16 ; 22 *pushqi/1 [length = 1] push r17 ; 23 *pushqi/1 [length = 1] /* prologue: function */ /* frame size = 0 */ /* stack size = 8 */ .L__stack_usage = 8 movw r18,r22 ; 2 *movsi/1 [length = 2] movw r20,r24 mov __tmp_reg__,r31 ; 9 *movsf/6 [length = 10] ldi r31,lo8(0xc0400000) mov r14,r31 ldi r31,hi8(0xc0400000) mov r15,r31 ldi r31,hlo8(0xc0400000) mov r16,r31 ldi r31,hhi8(0xc0400000) mov r17,r31 mov r31,__tmp_reg__ mov __tmp_reg__,r31 ; 10 *movsf/6 [length = 10] ldi r31,lo8(0x40000000) mov r10,r31 ldi r31,hi8(0x40000000) mov r11,r31 ldi r31,hlo8(0x40000000) mov r12,r31 ldi r31,hhi8(0x40000000) mov r13,r31 mov r31,__tmp_reg__ rcall fbar ; 11 call_insn/3 [length = 1] /* epilogue start */ pop r17 ; 26 popqi [length = 1] pop r16 ; 27 popqi [length = 1] pop r15 ; 28 popqi [length = 1] pop r14 ; 29 popqi [length = 1] pop r13 ; 30 popqi [length = 1] pop r12 ; 31 popqi [length = 1] pop r11 ; 32 popqi [length = 1] pop r10 ; 33 popqi [length = 1] ret ; 34 return_from_epilogue [length = 1] .size foo3, .-foo3
.file "oint.c" __SREG__ = 0x3f __SP_H__ = 0x3e __SP_L__ = 0x3d __tmp_reg__ = 0 __zero_reg__ = 1 .text .global foo1 .type foo1, @function foo1: push r10 ; 16 *pushqi/1 [length = 1] push r11 ; 17 *pushqi/1 [length = 1] push r12 ; 18 *pushqi/1 [length = 1] push r13 ; 19 *pushqi/1 [length = 1] push r14 ; 20 *pushqi/1 [length = 1] push r15 ; 21 *pushqi/1 [length = 1] push r16 ; 22 *pushqi/1 [length = 1] push r17 ; 23 *pushqi/1 [length = 1] /* prologue: function */ /* frame size = 0 */ /* stack size = 8 */ .L__stack_usage = 8 movw r18,r22 ; 2 *movsi/1 [length = 2] movw r20,r24 ldi r22,lo8(-1) ; 7 *movsi/5 [length = 3] ldi r23,lo8(-1) movw r24,r22 ldi r17,lo8(-2) ; 9 *movsi/6 [length = 6] mov r14,r17 clr r15 dec r15 ldi r16,lo8(-1) ldi r17,lo8(-1) clr r10 ; 10 *movsi/6 [length = 7] set clr r11 bld r11,7 clr r12 clr r13 dec r13 rcall ibar ; 11 *call_insn/2 [length = 1] /* epilogue start */ pop r17 ; 26 popqi [length = 1] pop r16 ; 27 popqi [length = 1] pop r15 ; 28 popqi [length = 1] pop r14 ; 29 popqi [length = 1] pop r13 ; 30 popqi [length = 1] pop r12 ; 31 popqi [length = 1] pop r11 ; 32 popqi [length = 1] pop r10 ; 33 popqi [length = 1] ret ; 34 return_from_epilogue [length = 1] .size foo1, .-foo1 .global foo2 .type foo2, @function foo2: push r10 ; 16 *pushqi/1 [length = 1] push r11 ; 17 *pushqi/1 [length = 1] push r12 ; 18 *pushqi/1 [length = 1] push r13 ; 19 *pushqi/1 [length = 1] push r14 ; 20 *pushqi/1 [length = 1] push r15 ; 21 *pushqi/1 [length = 1] push r16 ; 22 *pushqi/1 [length = 1] push r17 ; 23 *pushqi/1 [length = 1] /* prologue: function */ /* frame size = 0 */ /* stack size = 8 */ .L__stack_usage = 8 movw r18,r22 ; 2 *movsi/1 [length = 2] movw r20,r24 ldi r17,lo8(1) ; 9 *movsi/6 [length = 4] mov r14,r17 clr r15 movw r16,r14 set ; 10 *movsi/6 [length = 8] clr r10 bld r10,3 clr r11 bld r11,2 clr r12 dec r12 mov r13,r12 rcall ibar ; 11 *call_insn/2 [length = 1] /* epilogue start */ pop r17 ; 26 popqi [length = 1] pop r16 ; 27 popqi [length = 1] pop r15 ; 28 popqi [length = 1] pop r14 ; 29 popqi [length = 1] pop r13 ; 30 popqi [length = 1] pop r12 ; 31 popqi [length = 1] pop r11 ; 32 popqi [length = 1] pop r10 ; 33 popqi [length = 1] ret ; 34 return_from_epilogue [length = 1] .size foo2, .-foo2 .global foo3 .type foo3, @function foo3: push r10 ; 16 *pushqi/1 [length = 1] push r11 ; 17 *pushqi/1 [length = 1] push r12 ; 18 *pushqi/1 [length = 1] push r13 ; 19 *pushqi/1 [length = 1] push r14 ; 20 *pushqi/1 [length = 1] push r15 ; 21 *pushqi/1 [length = 1] push r16 ; 22 *pushqi/1 [length = 1] push r17 ; 23 *pushqi/1 [length = 1] /* prologue: function */ /* frame size = 0 */ /* stack size = 8 */ .L__stack_usage = 8 movw r18,r22 ; 2 *movsi/1 [length = 2] movw r20,r24 clr r14 ; 9 *movsf/6 [length = 4] clr r15 ldi r16,lo8(64) ldi r17,lo8(-64) clr r10 ; 10 *movsf/6 [length = 6] clr r11 clr r12 set clr r13 bld r13,6 rcall fbar ; 11 *call_insn/2 [length = 1] /* epilogue start */ pop r17 ; 26 popqi [length = 1] pop r16 ; 27 popqi [length = 1] pop r15 ; 28 popqi [length = 1] pop r14 ; 29 popqi [length = 1] pop r13 ; 30 popqi [length = 1] pop r12 ; 31 popqi [length = 1] pop r11 ; 32 popqi [length = 1] pop r10 ; 33 popqi [length = 1] ret ; 34 return_from_epilogue [length = 1] .size foo3, .-foo3 .ident "GCC: (GNU) 4.7.0 20110704 (experimental)"