------- Additional Comments From steven at gcc dot gnu dot org 2005-06-25
10:15 -------
Re. comment #25, as far as I can tell there are registers available in
that loop. To quote the loop from comment #12:
.L4:
movb (%esi), %al
movb %al, (%edx)
leal (%ecx,%edi), %eax
andl $15, %eax
incl %ecx
addb (%esi), %al
incl %edx
addl $17, %eax
cmpl %ecx, 12(%ebp)
movb %al, (%esi)
jne .L4
Checking off used registers in this loop:
%esi x
%edi x
%eax x
%ebx
%ecx x
%edx x
So %ebx at least is free (and iiuc, with -fomit-frame-pointer %ebp is
also free, right?). Maybe the allocator thinks %ebx can't be used
because it is the PIC register.
Here is what mainline today ("GCC: (GNU) 4.1.0 20050625 (experimental)")
gives me (x86-64 compiler with "-m32 -march=i686 -O3 -fPIC"):
.L4:
movzbl (%esi), %eax
movb %al, (%ecx)
incl %ecx
movzbl -13(%ebp), %eax
movzbl (%esi), %edx
incb -13(%ebp)
andb $15, %al
addb $17, %dl
addb %dl, %al
cmpl %edi, %ecx
movb %al, (%esi)
jne .L4
The .optimized tree dump looks like this:
<bb 0>:
len.23 = len - 1;
if (len.23 != 4294967295) goto <L6>; else goto <L2>;
<L6>:;
ivtmp.19 = (unsigned char) (signed char) (int) (ptr + 1B);
ptr.27 = ptr;
<L0>:;
MEM[base: ptr.27] = cleanse_ctr;
ptr.27 = ptr.27 + 1B;
cleanse_ctr = (unsigned char) (((signed char) ivtmp.19 & 15)
+ (signed char) cleanse_ctr + 17);
ivtmp.19 = ivtmp.19 + 1;
if (ptr.27 != (unsigned char *) (ptr + (void *) len.23 + 1B)) goto <L0>;
else goto <L2>;
<L2>:;
cleanse_ctr = (unsigned char) ((signed char) cleanse_ctr + 63);
return;
Note how the loop test is against ptr. Also, as far as I can tell the
right hand side of the test (i.e. "(ptr + (void *) len.23 + 1B)") is loop
invariant and should have been moved out. And the first two lines are
also just weird, it is probably cheaper on almost any machine to do
len.23 = len;
if (len.23 != 0) goto <L6>; else goto <L2>;
<L6>:
len.23 = len.23 - 1;
(etc...)
In summary, we just produce crap code here ;-)
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19923