https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120837
--- Comment #16 from Jakub Jelinek <jakub at gcc dot gnu.org> --- For e.g. restrict-2.c, the assembly difference is - movslq %ecx, %rcx - movss 200(%rsi,%rcx,4), %xmm0 - addss -200(%rsi,%rcx,4), %xmm0 + addl $50, %ecx + movslq %ecx, %rax + movss -400(%rsi,%rax,4), %xmm0 + addss (%rsi,%rax,4), %xmm0 so worse code. gimple dump difference - _1 = (sizetype) j; - _2 = _1 + 50; + _1 = j + 50; + _2 = (long unsigned int) _1; _3 = _2 * 4; _4 = b + _3; _5 = *_4; - _6 = (long unsigned int) j; - _7 = _6 * 4; - _8 = _7 + 18446744073709551416; + _6 = j + -50; + _7 = (long unsigned int) _6; + _8 = _7 * 4; and 12 Moving statement messages in lim2 instead of 11. For copy-headers-5.c gimple difference - _5 = (sizetype) i; - _6 = _5 + 1; + _5 = i + 1; + _6 = (long unsigned int) _5; and the loop header copying doesn't happen, assembly is shorter though: - cmpl $1, %esi - jle .L4 - leal -2(%rsi), %ecx - movl (%rdi), %edx - leaq 4(%rdi), %rax - leaq 8(%rdi,%rcx,4), %rsi - jmp .L3 - .p2align 5 - .p2align 4,,10 - .p2align 3 -.L8: - addq $4, %rax - cmpq %rsi, %rax - je .L4 -.L3: - movl %edx, %ecx - movl (%rax), %edx - cmpl %ecx, %edx - jge .L8 + subl $1, %esi xorl %eax, %eax - ret + jmp .L2 + .p2align 4 .p2align 4,,10 .p2align 3 .L4: + movl (%rdi,%rax,4), %edx + addq $1, %rax + cmpl (%rdi,%rax,4), %edx + jg .L5 +.L2: + cmpl %eax, %esi + jg .L4 movl $1, %eax ret + .p2align 4,,10 + .p2align 3 +.L5: + xorl %eax, %eax + ret