https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88569
Bug ID: 88569
Summary: Track relations between variable values
Product: gcc
Version: 8.2.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: middle-end
Assignee: unassigned at gcc dot gnu.org
Reporter: [email protected]
Target Milestone: ---
This example comes from code which could be compiled for various CPUs, and had
dedicated sections for AVX and SSE2. I left original ifdefs in comments. When
1st loop (for AVX) ends, following relations is true: (cnt - n <= 3). Similarly
after 2nd loop this is true: (cnt - n <= 1). With such knowledge it is possible
to optimize code of bar() to baz(). This eliminates two condition checks (after
2nd and 3rd loop), and one increment (for 3rd loop). It would be nice if gcc
could perform such transformation automatically.
[code]
void foo(int n);
void bar(int cnt)
{
int n = 0;
//#ifdef __AVX__
for (; n < cnt - 3; n += 4)
foo(n);
//#endif
//#ifdef __SSE2__
for (; n < cnt - 1; n += 2)
foo(n);
//#endif
for (; n < cnt; n += 1)
foo(n);
}
void baz(int cnt)
{
int n = 0;
for (; n < cnt - 3; n += 4)
foo(n);
if (n < cnt - 1)
{
foo(n);
n += 2;
}
if (n < cnt)
foo(n);
}
[/code]
[asm]
bar(int):
push r13
push r12
mov r12d, edi
push rbp
lea ebp, [rdi-3]
push rbx
xor ebx, ebx
sub rsp, 8
test ebp, ebp
jle .L5
.L2:
mov edi, ebx
add ebx, 4
call foo(int)
cmp ebx, ebp
jl .L2
lea eax, [r12-4]
shr eax, 2
lea ebx, [4+rax*4]
.L5:
lea ebp, [r12-1]
cmp ebp, ebx
jle .L3
mov edi, ebx
lea r13d, [rbx+2]
call foo(int)
cmp ebp, r13d
jle .L8
mov edi, r13d
call foo(int)
.L8:
lea edi, [r12-2]
sub edi, ebx
mov ebx, edi
and ebx, -2
add ebx, r13d
.L3:
cmp r12d, ebx
jle .L14
mov edi, ebx
call foo(int)
lea edi, [rbx+1]
cmp r12d, edi
jg .L17
.L14:
add rsp, 8
pop rbx
pop rbp
pop r12
pop r13
ret
.L17:
add rsp, 8
pop rbx
pop rbp
pop r12
pop r13
jmp foo(int)
baz(int):
push r12
mov r12d, edi
push rbp
lea ebp, [rdi-3]
push rbx
xor ebx, ebx
test ebp, ebp
jle .L19
.L20:
mov edi, ebx
add ebx, 4
call foo(int)
cmp ebx, ebp
jl .L20
lea eax, [r12-4]
shr eax, 2
lea ebx, [4+rax*4]
.L19:
lea eax, [r12-1]
cmp eax, ebx
jg .L27
cmp ebx, r12d
jl .L28
.L25:
pop rbx
pop rbp
pop r12
ret
.L27:
mov edi, ebx
add ebx, 2
call foo(int)
cmp ebx, r12d
jge .L25
.L28:
mov edi, ebx
pop rbx
pop rbp
pop r12
jmp foo(int)
[/asm]