Georg-Johann Lay wrote:
> Richard Henderson wrote:
>> On 08/01/2011 11:42 AM, Georg-Johann Lay wrote:
>>> Is there a specific reason not to define
>>> ACCUMULATE_OUTGOING_ARGS on AVR?
>> Yes. So that you can use PUSH. But as I said in PR49881,
>> you probably want to provide -maccumulate-outgoing-args.
>>
>> I have a follow-up patch to the last one in that PR...
>>
>>
>> r~
>
> PUSH is fine but what about POP?
>
> It's very expensive to pop several bytes, i.e. disabling IRQs, loading and
> storing SP and the like.
> Usung store+displacement has not this drawback and as I wrote, come code
> degradations you explained
> in PR49881 are artifacts of PR46278, i.e. fake X addressing.
>
> Johann
>
Tried this test case:
#include <stdio.h>
void foo ()
{
printf ("%d %d %d", 1, 2, 3);
printf ("%d %d %d", 3, 4, 5);
printf ("%d %d %d", 1, 4, 5);
}
Attached the output: The compiler happily pushes onto the stack
but pops only at the end of the function. So in a function with
many such calls that would eat up great deal of RAM. It that
what we want?
RETURN_POPS_ARGS cannot help here.
Johann
.file "printf.c"
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
; GNU C (GCC) version 4.7.0 20110803 (experimental) (avr)
; compiled by GNU C version 4.3.2 [gcc-4_3-branch revision 141291], GMP
version 5.0.1, MPFR version 3.0.0-p8, MPC version 0.8.2
; GGC heuristics: --param ggc-min-expand=30 --param ggc-min-heapsize=4096
; options passed: printf.c -Os -fverbose-asm
; options enabled: -fauto-inc-dec -fbranch-count-reg -fcaller-saves
; -fcombine-stack-adjustments -fcommon -fcompare-elim -fcprop-registers
; -fcrossjumping -fcse-follow-jumps -fdebug-types-section -fdefer-pop
; -fdevirtualize -fearly-inlining -feliminate-unused-debug-types
; -fexpensive-optimizations -fforward-propagate -ffunction-cse -fgcse
; -fgcse-lm -fguess-branch-probability -fident -fif-conversion
; -fif-conversion2 -findirect-inlining -finline -finline-functions
; -finline-functions-called-once -finline-small-functions -fipa-cp
; -fipa-profile -fipa-pure-const -fipa-reference -fipa-sra
; -fira-share-save-slots -fira-share-spill-slots -fivopts
; -fkeep-static-consts -fleading-underscore -fmath-errno
; -fmerge-constants -fmerge-debug-strings -fmove-loop-invariants
; -fomit-frame-pointer -foptimize-register-move -foptimize-sibling-calls
; -fpartial-inlining -fpeephole -fpeephole2 -fprefetch-loop-arrays
; -freg-struct-return -fregmove -freorder-blocks -freorder-functions
; -frerun-cse-after-loop -fsched-critical-path-heuristic
; -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock
; -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec
; -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fshow-column
; -fsigned-zeros -fsplit-ivs-in-unroller -fsplit-wide-types
; -fstrict-aliasing -fstrict-overflow -fstrict-volatile-bitfields
; -fthread-jumps -ftoplevel-reorder -ftrapping-math -ftree-bit-ccp
; -ftree-builtin-call-dce -ftree-ccp -ftree-ch -ftree-copy-prop
; -ftree-copyrename -ftree-dce -ftree-dominator-opts -ftree-dse
; -ftree-forwprop -ftree-fre -ftree-loop-if-convert -ftree-loop-im
; -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops=
; -ftree-phiprop -ftree-pre -ftree-pta -ftree-reassoc -ftree-scev-cprop
; -ftree-sink -ftree-slp-vectorize -ftree-sra -ftree-switch-conversion
; -ftree-ter -ftree-vect-loop-version -ftree-vrp -funit-at-a-time
; -fverbose-asm -fzero-initialized-in-bss
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string "%d %d %d"
.text
.global foo
.type foo, @function
foo:
push r14 ; ; 66 pushqi1/1 [length = 1]
push r15 ; ; 67 pushqi1/1 [length = 1]
push r16 ; ; 68 pushqi1/1 [length = 1]
push r17 ; ; 69 pushqi1/1 [length = 1]
push r28 ; ; 70 pushqi1/1 [length = 1]
push r29 ; ; 71 pushqi1/1 [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 6 */
.L__stack_usage = 6
push __zero_reg__ ; 5 pushqi1/2 [length = 1]
ldi r24,lo8(3) ; , ; 82 *reload_inqi [length = 2]
mov r14,r24 ; tmp42,
push r14 ; tmp42 ; 7 pushqi1/1 [length = 1]
push __zero_reg__ ; 8 pushqi1/2 [length = 1]
ldi r24,lo8(2) ; tmp43, ; 9 *movqi/2 [length = 1]
push r24 ; tmp43 ; 10 pushqi1/1 [length = 1]
push __zero_reg__ ; 11 pushqi1/2 [length = 1]
ldi r17,lo8(1) ; tmp44, ; 12 *movqi/2 [length = 1]
push r17 ; tmp44 ; 13 pushqi1/1 [length = 1]
ldi r28,lo8(.LC0) ; tmp45, ; 14 *movhi/4 [length
= 2]
ldi r29,hi8(.LC0) ; tmp45,
push r29 ; tmp24 ; 16 pushqi1/1 [length = 1]
push r28 ; tmp25 ; 19 pushqi1/1 [length = 1]
rcall printf ; ; 20 *call_value_insn/2 [length = 1]
push __zero_reg__ ; 21 pushqi1/2 [length = 1]
ldi r25,lo8(5) ; , ; 83 *reload_inqi [length = 2]
mov r15,r25 ; tmp49,
push r15 ; tmp49 ; 23 pushqi1/1 [length = 1]
push __zero_reg__ ; 24 pushqi1/2 [length = 1]
ldi r16,lo8(4) ; tmp50, ; 25 *movqi/2 [length = 1]
push r16 ; tmp50 ; 26 pushqi1/1 [length = 1]
push __zero_reg__ ; 27 pushqi1/2 [length = 1]
push r14 ; tmp42 ; 29 pushqi1/1 [length = 1]
push r29 ; tmp24 ; 32 pushqi1/1 [length = 1]
push r28 ; tmp25 ; 35 pushqi1/1 [length = 1]
rcall printf ; ; 36 *call_value_insn/2 [length = 1]
push __zero_reg__ ; 37 pushqi1/2 [length = 1]
push r15 ; tmp49 ; 39 pushqi1/1 [length = 1]
push __zero_reg__ ; 40 pushqi1/2 [length = 1]
push r16 ; tmp50 ; 42 pushqi1/1 [length = 1]
push __zero_reg__ ; 43 pushqi1/2 [length = 1]
push r17 ; tmp44 ; 45 pushqi1/1 [length = 1]
push r29 ; tmp24 ; 48 pushqi1/1 [length = 1]
push r28 ; tmp25 ; 51 pushqi1/1 [length = 1]
rcall printf ; ; 52 *call_value_insn/2 [length = 1]
in r24,__SP_L__ ; ; 64 *movhi_sp/2 [length = 2]
in r25,__SP_H__ ;
adiw r24,24 ; , ; 53 *addhi3/2 [length = 1]
in __tmp_reg__,__SREG__ ; 65 *movhi_sp/1 [length = 5]
cli
out __SP_H__,r25 ;
out __SREG__,__tmp_reg__
out __SP_L__,r24 ;
/* epilogue start */
pop r29 ; ; 74 popqi [length = 1]
pop r28 ; ; 75 popqi [length = 1]
pop r17 ; ; 76 popqi [length = 1]
pop r16 ; ; 77 popqi [length = 1]
pop r15 ; ; 78 popqi [length = 1]
pop r14 ; ; 79 popqi [length = 1]
ret ; 80 return_from_epilogue [length = 1]
.size foo, .-foo
.ident "GCC: (GNU) 4.7.0 20110803 (experimental)"
.global __do_copy_data