https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78180
--- Comment #3 from Markus Trippelsdorf <trippels at gcc dot gnu.org> --- void BM_Rolling<long [128]>: | void BM_Rolling<std::array<long, 128ul> >: .LFB1712: | .LFB1713: .cfi_startproc .cfi_startproc pushq %r13 < .cfi_def_cfa_offset 16 < .cfi_offset 13, -16 < pushq %r12 < .cfi_def_cfa_offset 24 < .cfi_offset 12, -24 < movl $128, %ecx < pushq %rbp pushq %rbp .cfi_def_cfa_offset 32 | .cfi_def_cfa_offset 16 .cfi_offset 6, -32 | .cfi_offset 6, -16 pushq %rbx pushq %rbx .cfi_def_cfa_offset 40 | .cfi_def_cfa_offset 24 .cfi_offset 3, -40 | .cfi_offset 3, -24 movq %rdi, %rbp < xorl %eax, %eax xorl %eax, %eax xorl %ebx, %ebx | movq %rdi, %rbx xorl %r12d, %r12d | movl $128, %ecx > xorl %ebp, %ebp subq $1048, %rsp subq $1048, %rsp .cfi_def_cfa_offset 1088 | .cfi_def_cfa_offset 1072 xorl %r13d, %r13d < movq %rsp, %rdi movq %rsp, %rdi rep stosq rep stosq > movabsq $429496729600, %rax > movq %rax, 1024(%rsp) .p2align 4,,10 .p2align 4,,10 .p2align 3 .p2align 3 .L28: | .L7: cmpb $0, 0(%rbp) | cmpb $0, (%rbx) je .L39 | je .L20 .L23: | .L2: movq 8(%rbp), %rax | movq 8(%rbx), %rax cmpq 72(%rbp), %rax | cmpq 72(%rbx), %rax leaq 1(%rax), %rdx leaq 1(%rax), %rdx movq %rdx, 8(%rbp) | movq %rdx, 8(%rbx) jnb .L40 | jnb .L21 movq 24(%rbp), %r8 | movq 24(%rbx), %r8 movq 16(%rbp), %rdi | movq 16(%rbx), %rsi movq %r8, %rax movq %r8, %rax subq %rdi, %rax | subq %rsi, %rax sarq $2, %rax sarq $2, %rax testq %rax, %rax testq %rax, %rax je .L25 | je .L4 movl (%rdi), %edx | movl (%rsi), %eax xorl %eax, %eax | xorl %edx, %edx xorl %ecx, %ecx xorl %ecx, %ecx movl %ebx, %esi | testl %eax, %eax testl %edx, %edx | jg .L5 jg .L26 | jmp .L7 jmp .L28 < .p2align 4,,10 .p2align 4,,10 .p2align 3 .p2align 3 .L31: | .L11: addq $1, %rax | addq $1, %rdx cmpl (%rdi), %ecx | cmpl (%rsi), %ecx jge .L28 | jge .L7 .L26: | .L5: addl $1, %ebx | movl 1024(%rsp), %edi movq %rax, (%rsp,%rsi,8) | movq %rdi, %rax movq %rax, %rdx | movq %rdx, (%rsp,%rdi,8) cmpl $100, %ebx | addl $1, %eax cmove %r13d, %ebx | cmpl 1028(%rsp), %eax movl %ebx, %esi | movl %eax, 1024(%rsp) subq (%rsp,%rsi,8), %rdx | jne .L9 cmpq $999999, %rdx | movl $0, 1024(%rsp) jg .L30 | xorl %eax, %eax addq $1, %r12 | .L9: movq 16(%rbp), %rdi | movq %rdx, %rdi movq 24(%rbp), %r8 | subq (%rsp,%rax,8), %rdi .L30: | cmpq $999999, %rdi movq %r8, %rdx | jg .L10 > addq $1, %rbp > movq 16(%rbx), %rsi > movq 24(%rbx), %r8 > .L10: > movq %r8, %rax addl $1, %ecx addl $1, %ecx subq %rdi, %rdx | subq %rsi, %rax sarq $2, %rdx | sarq $2, %rax testq %rdx, %rdx | testq %rax, %rax jne .L31 | jne .L11