https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120916
Bug ID: 120916 Summary: debug info for IV increment is lost Product: gcc Version: 16.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: driver Assignee: unassigned at gcc dot gnu.org Reporter: hubicka at gcc dot gnu.org Target Milestone: --- jan@padlo:/tmp> cat t.c int s = 1023; int a[1024]; __attribute__ ((noipa)) int test() { for ( int i = 0; /* Line 7 */ i < s; /* Line 8 */ i++) /* Line 9 */ a[i]++; } int main() { for (int i = 0; i < 1000000; i++) test (); } jan@padlo:/tmp> gcc -O3 t.c -g jan@padlo:/tmp> perf record ./a.out [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.020 MB perf.data (497 samples) ] perf report then shows: │ a[i]++; ▒ │ movd %esi,%xmm1 ▒ │ shl $0x4,%rdx ▒ │ pshufd $0x0,%xmm1,%xmm1 ▒ 0.37 │ data16 cs nopw 0x0(%rax,%rax,1) ▒ │ xchg %ax,%ax ▒ │40:┌─→movdqa 0x404040(%rax),%xmm0 ▒ 0.44 │ │ add $0x10,%rax ▒ 0.22 │ │ paddd %xmm1,%xmm0 ▒ 1.74 │ │ movaps %xmm0,0x404030(%rax) ▒ │ │i < s; /* Line 8 */ ▒ 91.10 │ ├──cmp %rdx,%rax ▒ 0.22 │ └──jne 40 ◆ Notice that a[I]++ is misplaced and should be just before │40:┌─→movdqa 0x404040(%rax),%xmm0 ▒ Moreover we lose location of "add" that should correspond to i++; /* Line 9 */ LLVM does: │ a[i]++; ▒ 0.50 │60:┌─→movdqa (%r8,%rdi,1),%xmm1 ▒ │ │ movdqa 0x10(%r8,%rdi,1),%xmm2 ▒ │ │ psubd %xmm0,%xmm1 ▒ 0.36 │ │ psubd %xmm0,%xmm2 ▒ 0.48 │ │ movdqa %xmm1,(%r8,%rdi,1) ▒ 43.46 │ │ movdqa %xmm2,0x10(%r8,%rdi,1) ▒ │ │i++) /* Line 9 */ ▒ 46.83 │ │ add $0x20,%r8 ▒ │ ├──cmp %r8,%rdx ▒ │ └──jne 60 ▒ It loses location of the "cmp" instruction, but that is less harmful since it is not an header of the loop. In optimized dup we have: <bb 2> [local count: 118111600]: [t.c:6:2] # DEBUG BEGIN_STMT [t.c:7:7] # DEBUG BEGIN_STMT # DEBUG i => 0 [t.c:8:9] # DEBUG BEGIN_STMT [t.c:8:9] s.0_11 = s; [t.c:8:9] if (s.0_11 > 0) goto <bb 3>; [89.00%] else goto <bb 11>; [11.00%] <bb 3> [local count: 105119324]: niters.6_5 = (unsigned int) s.0_11; _21 = niters.6_5 + 4294967295; if (_21 <= 2) goto <bb 7>; [10.00%] else goto <bb 4>; [90.00%] <bb 4> [local count: 94607391]: bnd.7_24 = niters.6_5 >> 2; _20 = (sizetype) bnd.7_24; _7 = _20 * 16; <bb 5> [local count: 860067200]: # ivtmp.21_17 = PHI <ivtmp.21_1(5), 0(4)> # DEBUG i => NULL [t.c:10:4] # DEBUG BEGIN_STMT [t.c:10:5] vect__1.12_29 = MEM <vector(4) int> [(int *)&a + ivtmp.21_17 * 1]; [t.c:10:8] vect__2.13_31 = vect__1.12_29 + { 1, 1, 1, 1 }; [t.c:10:8] MEM <vector(4) int> [(int *)&a + ivtmp.21_17 * 1] = vect__2.13_31; [t.c:9:8] # DEBUG BEGIN_STMT # DEBUG i => NULL [t.c:8:9] # DEBUG BEGIN_STMT ^^^^ maybe we want to just move this after the plus to "if" ivtmp.21_1 = ivtmp.21_17 + 16; if (ivtmp.21_1 != _7) goto <bb 5>; [89.00%] else goto <bb 6>; [11.00%]