https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96252

--- Comment #1 from Will Wray <wjwray at gmail dot com> ---
Here's the code, compiler invocation and codegen output.

The longer codegen expands memcpy to copy the std::array by-value arguments.

-fno-inline shows the compiler call the first function from the second, then,
when it does so, it has to copy the arguments as both functions pass by value.

/** compare_differing_codegen.cpp ******************/

#include <algorithm>
#include <array>

using cmp = std::array<int,64>;

bool cmp_x(cmp l, cmp r) noexcept {
    return std::lexicographical_compare(begin(l),end(l)
                                       ,begin(r),end(r));
}
bool cmp_y(cmp l, cmp r) noexcept {
    return std::lexicographical_compare(begin(l),end(l)
                                       ,begin(r),end(r));
}

/** compiler invocation **************************/

> g++ --version
  g++ (GCC) 10.1.1 20200507 (Red Hat 10.1.1-1)
> g++ -std=c++11 -O2 compare_differing_codegen.cpp -S
> cat compare_differing_codegen.s

        .file   "compare_differing_codegen.cpp"
        .text
        .p2align 4
        .globl  _Z5cmp_xSt5arrayIiLm64EES0_
        .type   _Z5cmp_xSt5arrayIiLm64EES0_, @function
_Z5cmp_xSt5arrayIiLm64EES0_:
.LFB890:
        .cfi_startproc
        leaq    264(%rsp), %rcx
        leaq    8(%rsp), %rax
        movq    %rcx, %rdx
        .p2align 4,,10
        .p2align 3
.L4:
        movl    (%rdx), %esi
        cmpl    %esi, (%rax)
        jl      .L12
        jg      .L7
        addq    $4, %rax
        addq    $4, %rdx
        cmpq    %rcx, %rax
        jne     .L4
        leaq    520(%rsp), %rax
        cmpq    %rax, %rdx
        setne   %al
        ret
        .p2align 4,,10
        .p2align 3
.L12:
        movl    $1, %eax
        ret
        .p2align 4,,10
        .p2align 3
.L7:
        xorl    %eax, %eax
        ret
        .cfi_endproc
.LFE890:
        .size   _Z5cmp_xSt5arrayIiLm64EES0_, .-_Z5cmp_xSt5arrayIiLm64EES0_
        .p2align 4
        .globl  _Z5cmp_ySt5arrayIiLm64EES0_
        .type   _Z5cmp_ySt5arrayIiLm64EES0_, @function
_Z5cmp_ySt5arrayIiLm64EES0_:
.LFB909:
        .cfi_startproc
        subq    $400, %rsp
        .cfi_def_cfa_offset 408
        movdqu  408(%rsp), %xmm0
        leaq    -120(%rsp), %rdx
        movdqu  424(%rsp), %xmm1
        leaq    136(%rsp), %rax
        movdqu  440(%rsp), %xmm2
        movdqu  456(%rsp), %xmm3
        movdqu  472(%rsp), %xmm4
        movups  %xmm0, -120(%rsp)
        movdqu  488(%rsp), %xmm5
        movdqu  504(%rsp), %xmm6
        movups  %xmm1, -104(%rsp)
        movdqu  520(%rsp), %xmm7
        movdqu  536(%rsp), %xmm0
        movups  %xmm2, -88(%rsp)
        movdqu  552(%rsp), %xmm1
        movdqu  568(%rsp), %xmm2
        movups  %xmm3, -72(%rsp)
        movdqu  584(%rsp), %xmm3
        movups  %xmm4, -56(%rsp)
        movdqu  600(%rsp), %xmm4
        movups  %xmm5, -40(%rsp)
        movdqu  616(%rsp), %xmm5
        movups  %xmm6, -24(%rsp)
        movdqu  632(%rsp), %xmm6
        movups  %xmm7, -8(%rsp)
        movdqu  648(%rsp), %xmm7
        movups  %xmm0, 8(%rsp)
        movups  %xmm1, 24(%rsp)
        movups  %xmm2, 40(%rsp)
        movups  %xmm3, 56(%rsp)
        movups  %xmm4, 72(%rsp)
        movups  %xmm5, 88(%rsp)
        movups  %xmm6, 104(%rsp)
        movups  %xmm7, 120(%rsp)
        movdqu  664(%rsp), %xmm0
        movdqu  680(%rsp), %xmm1
        movdqu  696(%rsp), %xmm2
        movdqu  712(%rsp), %xmm3
        movdqu  728(%rsp), %xmm4
        movdqu  744(%rsp), %xmm5
        movups  %xmm0, 136(%rsp)
        movdqu  760(%rsp), %xmm6
        movups  %xmm1, 152(%rsp)
        movdqu  776(%rsp), %xmm7
        movdqu  792(%rsp), %xmm0
        movups  %xmm2, 168(%rsp)
        movdqu  808(%rsp), %xmm1
        movdqu  824(%rsp), %xmm2
        movups  %xmm3, 184(%rsp)
        movdqu  840(%rsp), %xmm3
        movups  %xmm4, 200(%rsp)
        movdqu  856(%rsp), %xmm4
        movups  %xmm5, 216(%rsp)
        movdqu  872(%rsp), %xmm5
        movups  %xmm6, 232(%rsp)
        movdqu  888(%rsp), %xmm6
        movups  %xmm7, 248(%rsp)
        movdqu  904(%rsp), %xmm7
        movups  %xmm0, 264(%rsp)
        movups  %xmm1, 280(%rsp)
        movups  %xmm2, 296(%rsp)
        movups  %xmm3, 312(%rsp)
        movups  %xmm4, 328(%rsp)
        movups  %xmm5, 344(%rsp)
        movups  %xmm6, 360(%rsp)
        movups  %xmm7, 376(%rsp)
        .p2align 4,,10
        .p2align 3
.L15:
        movl    (%rax), %ecx
        cmpl    %ecx, (%rdx)
        jl      .L16
        jg      .L17
        addq    $4, %rax
        leaq    392(%rsp), %rsi
        addq    $4, %rdx
        cmpq    %rsi, %rax
        jne     .L15
.L17:
        xorl    %eax, %eax
        addq    $400, %rsp
        .cfi_remember_state
        .cfi_def_cfa_offset 8
        ret
        .p2align 4,,10
        .p2align 3
.L16:
        .cfi_restore_state
        movl    $1, %eax
        addq    $400, %rsp
        .cfi_def_cfa_offset 8
        ret
        .cfi_endproc
.LFE909:
        .size   _Z5cmp_ySt5arrayIiLm64EES0_, .-_Z5cmp_ySt5arrayIiLm64EES0_
        .ident  "GCC: (GNU) 10.1.1 20200507 (Red Hat 10.1.1-1)"
        .section        .note.GNU-stack,"",@progbits

Reply via email to