------- Comment #36 from guillaume dot melquiond at ens-lyon dot fr 2006-04-06
10:59 -------
The generated code is getting both better and worse. I just tested with GCC
4.1, and there is now a byte-by-byte (!) copy instead of memcpy. So not only
does GCC use superfluous copies, but it generates code such that these copies
are the slowest possible. On the other hand, there is only one copy left. So
this is better than GCC 4.0, but still worse than GCC 3.4.
pushl %ebp
movl %esp, %ebp
pushl %ebx
subl $8004, %esp
leal -4004(%ebp), %ebx
movl %ebx, (%esp)
call f
xorl %edx, %edx
subl $4, %esp
.L3:
cmpl $4000, %edx
jb .L2
call g
movl -4(%ebp), %ebx
leave
ret
.p2align 4,,7
.L2:
movzbl (%ebx,%edx), %eax
movb %al, (%esp,%edx)
incl %edx
jmp .L3
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23372