https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117081

--- Comment #19 from Hongtao Liu <liuhongt at gcc dot gnu.org> ---
(In reply to H.J. Lu from comment #18)
> (In reply to Haochen Jiang from comment #17)
> >
> > For reproduce, not only on ADL, the fix patch showed regression on all
> > Cascade Lake/Ice Lake/Sapphire Rapids with ~2-4% for 511.povary_r with
> > o2_generic_v3.
> 
> Can you extract some testcases to show more PUSH and POP?

The original case was a bit more complicated, so I tried to mimic it by writing
a similar.

extern int bar (double* a, double* b, double* c, double* d, double* e);
extern bool foo2 (double* a, double b);
int
foo (double* a, double* b, double *c)
{
    int rr = 0;
    double d1;
    double d2;
    if (bar (a, b, c, &d1, &d2)) --- mostly false;
    {
        if (d1 > 0.0 && d1 < 100.0)
        {
            c[0] = a[0] + d1 * b[0];
            c[1] = a[1] + d1 * b[1];
            c[2] = a[2] + d1 * b[2];
            if (foo2 (c, d1))
              rr = 1;
        }
        if (d2 > 0.0 && d2 < 100.0)
        {
            c[0] = a[0] + d2 * b[0];
            c[1] = a[1] + d2 * b[1];
            c[2] = a[2] + d2 * b[2];
            if (foo2 (c, d2))
              rr = 1;
        }
    }
    return rr;
}

Before r15-7400

foo:
.LFB0:
        .cfi_startproc
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset 6, -16
        movq    %rdi, %rbp
        pushq   %rbx
        .cfi_def_cfa_offset 24
        .cfi_offset 3, -24
        movq    %rdx, %rbx
        subq    $40, %rsp
        .cfi_def_cfa_offset 64
        leaq    16(%rsp), %rcx
        leaq    24(%rsp), %r8
        movq    %rsi, 8(%rsp)
        call    bar
        movl    %eax, %edx
        testl   %eax, %eax
        je      .L1
        vmovsd  16(%rsp), %xmm0
        vxorpd  %xmm1, %xmm1, %xmm1
        movq    8(%rsp), %rsi
        vcomisd %xmm1, %xmm0
        jbe     .L18
        vmovsd  .LC1(%rip), %xmm1
        vcomisd %xmm0, %xmm1
        ja      .L21
.L18:
        xorl    %edx, %edx
.L3:
        vmovsd  24(%rsp), %xmm0
        vxorpd  %xmm1, %xmm1, %xmm1
        vcomisd %xmm1, %xmm0
        jbe     .L1
        vmovsd  .LC1(%rip), %xmm1
        vcomisd %xmm0, %xmm1
        ja      .L22
.L1:
        addq    $40, %rsp
        .cfi_remember_state
        .cfi_def_cfa_offset 24
        movl    %edx, %eax
        popq    %rbx
        .cfi_def_cfa_offset 16
        popq    %rbp
        .cfi_def_cfa_offset 8
        ret
        .p2align 4,,10
        .p2align 3



after r15-7400
foo:
.LFB0:
        .cfi_startproc
        pushq   %r13
        .cfi_def_cfa_offset 16
        .cfi_offset 13, -16
        movq    %rsi, %r13
        pushq   %r12
        .cfi_def_cfa_offset 24
        .cfi_offset 12, -24
        movq    %rdi, %r12
        pushq   %rbp
        .cfi_def_cfa_offset 32
        .cfi_offset 6, -32
        movq    %rdx, %rbp
        pushq   %rbx
        .cfi_def_cfa_offset 40
        .cfi_offset 3, -40
        subq    $24, %rsp
        .cfi_def_cfa_offset 64
        movq    %rsp, %rcx
        leaq    8(%rsp), %r8
        call    bar
        movl    %eax, %ebx
        testl   %eax, %eax
        je      .L1
        vmovsd  (%rsp), %xmm0
        vxorpd  %xmm1, %xmm1, %xmm1
        vcomisd %xmm1, %xmm0
        jbe     .L18
        vmovsd  .LC1(%rip), %xmm1
        vcomisd %xmm0, %xmm1
        ja      .L21
.L18:
        xorl    %ebx, %ebx
.L3:
        vmovsd  8(%rsp), %xmm0
        vxorpd  %xmm1, %xmm1, %xmm1
        vcomisd %xmm1, %xmm0
        jbe     .L1
        vmovsd  .LC1(%rip), %xmm1
        vcomisd %xmm0, %xmm1
        ja      .L22
.L1:
        addq    $24, %rsp
        .cfi_remember_state
        .cfi_def_cfa_offset 40
        movl    %ebx, %eax
        popq    %rbx
        .cfi_def_cfa_offset 32
        popq    %rbp
        .cfi_def_cfa_offset 24
        popq    %r12
        .cfi_def_cfa_offset 16
        popq    %r13
        .cfi_def_cfa_offset 8
        ret


W/o more usage of callee-saved registers, callee needs to restore them before
exit which is not needed if more caller-saved register are used.

Reply via email to