https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55342
Jeffrey A. Law <law at redhat dot com> changed:
What |Removed |Added
----------------------------------------------------------------------------
Summary|[4.8/4.9/5 Regression] |[4.8/4.9 Regression]
|[LRA,x86] Non-optimal code |[LRA,x86] Non-optimal code
|for simple loop with LRA |for simple loop with LRA
--- Comment #15 from Jeffrey A. Law <law at redhat dot com> ---
I've examined the various testcases and the complaints about the poor register
allocation in this BZ with a trunk compiler.
I'm happy to report that I'm seeing none of the issues raised in this BZ.
For c#0 (store-back part of the loop):
.L5:
movl %edi, %ecx
addl $4, %esi
subl %ecx, %eax
subl %ecx, %edx
movzbl 3(%esp), %ecx
movb %al, -3(%esi)
movl %edi, %eax
movb %dl, -4(%esi)
subl %eax, %ecx
movb %cl, -2(%esi)
cmpl %ebp, %ebx
movb %al, -1(%esi)
je .L1
In c#2, the negation sequence is pointed out. We now get:
.L9:
movzbl (%ebx), %edx
movzbl 1(%ebx), %eax
addl $3, %ebx
movzbl -1(%ebx), %ecx
notl %edx
notl %eax
notl %ecx
cmpb %al, %dl
movb %cl, 3(%esp)
jb .L13
cmpb 3(%esp), %al
movzbl %al, %edi
jbe .L5
movzbl 3(%esp), %edi
jmp .L5
For the 1st modified testcase -O2 -mcpu=atom -m32:
.L11:
movzbl %al, %edi
cmpb %al, %cl
cmovbe %ecx, %edi
.L4:
movl %edi, %eax
leal 4(%esi), %esi
subl %eax, %edx
subl %eax, %ecx
movb %dl, -3(%esi)
movb %cl, -4(%esi)
movzbl 3(%esp), %edx
subl %eax, %edx
movl %edi, %eax
movb %dl, -2(%esi)
cmpl %ebx, %ebp
movb %al, -1(%esi)
je .L1
.L7:
movzbl (%ebx), %ecx
leal 3(%ebx), %ebx
movzbl -2(%ebx), %edx
notl %ecx
movzbl -1(%ebx), %eax
notl %edx
notl %eax
cmpb %dl, %cl
movb %al, 3(%esp)
jb .L11
movzbl 3(%esp), %eax
movzbl %al, %edi
cmpb %al, %dl
cmovbe %edx, %edi
jmp .L4
Then in c#10 (t1 testcase):
.L11:
movzbl %al, %edi
cmpb %al, %cl
cmovbe %ecx, %edi
.L4:
movl %edi, %eax
leal 4(%esi), %esi
subl %eax, %edx
subl %eax, %ecx
movb %dl, -3(%esi)
movb %cl, -4(%esi)
movzbl 3(%esp), %edx
subl %eax, %edx
movl %edi, %eax
movb %dl, -2(%esi)
cmpl %ebp, %ebx
movb %al, -1(%esi)
je .L1
.L7:
movzbl (%ebx), %ecx
leal 3(%ebx), %ebx
movzbl -2(%ebx), %edx
notl %ecx
movzbl -1(%ebx), %eax
notl %edx
notl %eax
cmpb %dl, %cl
movb %al, 3(%esp)
jb .L11
movzbl 3(%esp), %eax
movzbl %al, %edi
cmpb %al, %dl
cmovbe %edx, %edi
jmp .L4
Across the board we're not seeing objects spilled into the stack. The code
looks quite tight to me.
Clearing the regressio marker for GCC 5. I didn't do any bisection work to
identify what changes fixed things.