4.9 Regression] [LRA,x86] Non-optimal code for simple loop with LRA

law at redhat dot com Wed, 11 Feb 2015 23:20:12 -0800

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55342


Jeffrey A. Law <law at redhat dot com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
            Summary|[4.8/4.9/5 Regression]      |[4.8/4.9 Regression]
                   |[LRA,x86] Non-optimal code  |[LRA,x86] Non-optimal code
                   |for simple loop with LRA    |for simple loop with LRA

--- Comment #15 from Jeffrey A. Law <law at redhat dot com> ---
I've examined the various testcases and the complaints about the poor register
allocation in this BZ with a trunk compiler.

I'm happy to report that I'm seeing none of the issues raised in this BZ.  

For c#0 (store-back part of the loop):
.L5:
        movl    %edi, %ecx
        addl    $4, %esi
        subl    %ecx, %eax
        subl    %ecx, %edx
        movzbl  3(%esp), %ecx
        movb    %al, -3(%esi)
        movl    %edi, %eax
        movb    %dl, -4(%esi)
        subl    %eax, %ecx
        movb    %cl, -2(%esi)
        cmpl    %ebp, %ebx
        movb    %al, -1(%esi)
        je      .L1

In c#2, the negation sequence is pointed out.  We now get:

.L9:
        movzbl  (%ebx), %edx
        movzbl  1(%ebx), %eax
        addl    $3, %ebx
        movzbl  -1(%ebx), %ecx
        notl    %edx
        notl    %eax
        notl    %ecx
        cmpb    %al, %dl
        movb    %cl, 3(%esp)
        jb      .L13
        cmpb    3(%esp), %al
        movzbl  %al, %edi
        jbe     .L5
        movzbl  3(%esp), %edi
        jmp     .L5

For the 1st modified testcase -O2 -mcpu=atom -m32:

.L11:
        movzbl  %al, %edi
        cmpb    %al, %cl
        cmovbe  %ecx, %edi
.L4:
        movl    %edi, %eax
        leal    4(%esi), %esi
        subl    %eax, %edx
        subl    %eax, %ecx
        movb    %dl, -3(%esi)
        movb    %cl, -4(%esi)
        movzbl  3(%esp), %edx
        subl    %eax, %edx
        movl    %edi, %eax
        movb    %dl, -2(%esi)
        cmpl    %ebx, %ebp
        movb    %al, -1(%esi)
        je      .L1
.L7:
        movzbl  (%ebx), %ecx
        leal    3(%ebx), %ebx
        movzbl  -2(%ebx), %edx
        notl    %ecx
        movzbl  -1(%ebx), %eax
        notl    %edx
        notl    %eax
        cmpb    %dl, %cl
        movb    %al, 3(%esp)
        jb      .L11
        movzbl  3(%esp), %eax
        movzbl  %al, %edi
        cmpb    %al, %dl
        cmovbe  %edx, %edi
        jmp     .L4

Then in c#10 (t1 testcase):

.L11:
        movzbl  %al, %edi
        cmpb    %al, %cl
        cmovbe  %ecx, %edi
.L4:
        movl    %edi, %eax
        leal    4(%esi), %esi
        subl    %eax, %edx
        subl    %eax, %ecx
        movb    %dl, -3(%esi)
        movb    %cl, -4(%esi)
        movzbl  3(%esp), %edx
        subl    %eax, %edx
        movl    %edi, %eax
        movb    %dl, -2(%esi)
        cmpl    %ebp, %ebx
        movb    %al, -1(%esi)
        je      .L1
.L7:
        movzbl  (%ebx), %ecx
        leal    3(%ebx), %ebx
        movzbl  -2(%ebx), %edx
        notl    %ecx
        movzbl  -1(%ebx), %eax
        notl    %edx
        notl    %eax
        cmpb    %dl, %cl
        movb    %al, 3(%esp)
        jb      .L11
        movzbl  3(%esp), %eax
        movzbl  %al, %edi
        cmpb    %al, %dl
        cmovbe  %edx, %edi
        jmp     .L4


Across the board we're not seeing objects spilled into the stack.  The code
looks quite tight to me.

Clearing the regressio marker for GCC 5.  I didn't do any bisection work to
identify what changes fixed things.

[Bug rtl-optimization/55342] [4.8/4.9 Regression] [LRA,x86] Non-optimal code for simple loop with LRA

Reply via email to