https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96252
--- Comment #1 from Will Wray <wjwray at gmail dot com> --- Here's the code, compiler invocation and codegen output. The longer codegen expands memcpy to copy the std::array by-value arguments. -fno-inline shows the compiler call the first function from the second, then, when it does so, it has to copy the arguments as both functions pass by value. /** compare_differing_codegen.cpp ******************/ #include <algorithm> #include <array> using cmp = std::array<int,64>; bool cmp_x(cmp l, cmp r) noexcept { return std::lexicographical_compare(begin(l),end(l) ,begin(r),end(r)); } bool cmp_y(cmp l, cmp r) noexcept { return std::lexicographical_compare(begin(l),end(l) ,begin(r),end(r)); } /** compiler invocation **************************/ > g++ --version g++ (GCC) 10.1.1 20200507 (Red Hat 10.1.1-1) > g++ -std=c++11 -O2 compare_differing_codegen.cpp -S > cat compare_differing_codegen.s .file "compare_differing_codegen.cpp" .text .p2align 4 .globl _Z5cmp_xSt5arrayIiLm64EES0_ .type _Z5cmp_xSt5arrayIiLm64EES0_, @function _Z5cmp_xSt5arrayIiLm64EES0_: .LFB890: .cfi_startproc leaq 264(%rsp), %rcx leaq 8(%rsp), %rax movq %rcx, %rdx .p2align 4,,10 .p2align 3 .L4: movl (%rdx), %esi cmpl %esi, (%rax) jl .L12 jg .L7 addq $4, %rax addq $4, %rdx cmpq %rcx, %rax jne .L4 leaq 520(%rsp), %rax cmpq %rax, %rdx setne %al ret .p2align 4,,10 .p2align 3 .L12: movl $1, %eax ret .p2align 4,,10 .p2align 3 .L7: xorl %eax, %eax ret .cfi_endproc .LFE890: .size _Z5cmp_xSt5arrayIiLm64EES0_, .-_Z5cmp_xSt5arrayIiLm64EES0_ .p2align 4 .globl _Z5cmp_ySt5arrayIiLm64EES0_ .type _Z5cmp_ySt5arrayIiLm64EES0_, @function _Z5cmp_ySt5arrayIiLm64EES0_: .LFB909: .cfi_startproc subq $400, %rsp .cfi_def_cfa_offset 408 movdqu 408(%rsp), %xmm0 leaq -120(%rsp), %rdx movdqu 424(%rsp), %xmm1 leaq 136(%rsp), %rax movdqu 440(%rsp), %xmm2 movdqu 456(%rsp), %xmm3 movdqu 472(%rsp), %xmm4 movups %xmm0, -120(%rsp) movdqu 488(%rsp), %xmm5 movdqu 504(%rsp), %xmm6 movups %xmm1, -104(%rsp) movdqu 520(%rsp), %xmm7 movdqu 536(%rsp), %xmm0 movups %xmm2, -88(%rsp) movdqu 552(%rsp), %xmm1 movdqu 568(%rsp), %xmm2 movups %xmm3, -72(%rsp) movdqu 584(%rsp), %xmm3 movups %xmm4, -56(%rsp) movdqu 600(%rsp), %xmm4 movups %xmm5, -40(%rsp) movdqu 616(%rsp), %xmm5 movups %xmm6, -24(%rsp) movdqu 632(%rsp), %xmm6 movups %xmm7, -8(%rsp) movdqu 648(%rsp), %xmm7 movups %xmm0, 8(%rsp) movups %xmm1, 24(%rsp) movups %xmm2, 40(%rsp) movups %xmm3, 56(%rsp) movups %xmm4, 72(%rsp) movups %xmm5, 88(%rsp) movups %xmm6, 104(%rsp) movups %xmm7, 120(%rsp) movdqu 664(%rsp), %xmm0 movdqu 680(%rsp), %xmm1 movdqu 696(%rsp), %xmm2 movdqu 712(%rsp), %xmm3 movdqu 728(%rsp), %xmm4 movdqu 744(%rsp), %xmm5 movups %xmm0, 136(%rsp) movdqu 760(%rsp), %xmm6 movups %xmm1, 152(%rsp) movdqu 776(%rsp), %xmm7 movdqu 792(%rsp), %xmm0 movups %xmm2, 168(%rsp) movdqu 808(%rsp), %xmm1 movdqu 824(%rsp), %xmm2 movups %xmm3, 184(%rsp) movdqu 840(%rsp), %xmm3 movups %xmm4, 200(%rsp) movdqu 856(%rsp), %xmm4 movups %xmm5, 216(%rsp) movdqu 872(%rsp), %xmm5 movups %xmm6, 232(%rsp) movdqu 888(%rsp), %xmm6 movups %xmm7, 248(%rsp) movdqu 904(%rsp), %xmm7 movups %xmm0, 264(%rsp) movups %xmm1, 280(%rsp) movups %xmm2, 296(%rsp) movups %xmm3, 312(%rsp) movups %xmm4, 328(%rsp) movups %xmm5, 344(%rsp) movups %xmm6, 360(%rsp) movups %xmm7, 376(%rsp) .p2align 4,,10 .p2align 3 .L15: movl (%rax), %ecx cmpl %ecx, (%rdx) jl .L16 jg .L17 addq $4, %rax leaq 392(%rsp), %rsi addq $4, %rdx cmpq %rsi, %rax jne .L15 .L17: xorl %eax, %eax addq $400, %rsp .cfi_remember_state .cfi_def_cfa_offset 8 ret .p2align 4,,10 .p2align 3 .L16: .cfi_restore_state movl $1, %eax addq $400, %rsp .cfi_def_cfa_offset 8 ret .cfi_endproc .LFE909: .size _Z5cmp_ySt5arrayIiLm64EES0_, .-_Z5cmp_ySt5arrayIiLm64EES0_ .ident "GCC: (GNU) 10.1.1 20200507 (Red Hat 10.1.1-1)" .section .note.GNU-stack,"",@progbits