https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120916

            Bug ID: 120916
           Summary: debug info for IV increment is lost
           Product: gcc
           Version: 16.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: driver
          Assignee: unassigned at gcc dot gnu.org
          Reporter: hubicka at gcc dot gnu.org
  Target Milestone: ---

jan@padlo:/tmp> cat t.c
int s = 1023;
int a[1024];
__attribute__ ((noipa))
int test()
{
        for (
             int i = 0;  /* Line 7 */
             i < s;      /* Line 8 */
             i++)        /* Line 9 */
          a[i]++;
}
int
main()
{
        for (int i = 0; i < 1000000; i++)
          test ();
}
jan@padlo:/tmp> gcc -O3 t.c -g
jan@padlo:/tmp> perf record ./a.out
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.020 MB perf.data (497 samples) ]

perf report then shows:
        │    a[i]++;                                     ▒
        │      movd   %esi,%xmm1                         ▒
        │      shl    $0x4,%rdx                          ▒
        │      pshufd $0x0,%xmm1,%xmm1                   ▒
   0.37 │      data16 cs nopw 0x0(%rax,%rax,1)           ▒
        │      xchg   %ax,%ax                            ▒
        │40:┌─→movdqa 0x404040(%rax),%xmm0               ▒
   0.44 │   │  add    $0x10,%rax                         ▒
   0.22 │   │  paddd  %xmm1,%xmm0                        ▒
   1.74 │   │  movaps %xmm0,0x404030(%rax)               ▒
        │   │i < s;   /* Line 8 */                       ▒
  91.10 │   ├──cmp    %rdx,%rax                          ▒
   0.22 │   └──jne    40                                 ◆

Notice that a[I]++ is misplaced and should be just before
        │40:┌─→movdqa 0x404040(%rax),%xmm0               ▒
Moreover we lose location of "add" that should correspond to i++; /* Line 9 */
LLVM does:

        │    a[i]++;                                     ▒
   0.50 │60:┌─→movdqa  (%r8,%rdi,1),%xmm1                ▒
        │   │  movdqa  0x10(%r8,%rdi,1),%xmm2            ▒
        │   │  psubd   %xmm0,%xmm1                       ▒
   0.36 │   │  psubd   %xmm0,%xmm2                       ▒
   0.48 │   │  movdqa  %xmm1,(%r8,%rdi,1)                ▒
  43.46 │   │  movdqa  %xmm2,0x10(%r8,%rdi,1)            ▒
        │   │i++)     /* Line 9 */                       ▒
  46.83 │   │  add     $0x20,%r8                         ▒
        │   ├──cmp     %r8,%rdx                          ▒
        │   └──jne     60                                ▒

It loses location of the "cmp" instruction, but that is less harmful since it
is not an header of the loop.

In optimized dup we have:
  <bb 2> [local count: 118111600]:
  [t.c:6:2] # DEBUG BEGIN_STMT
  [t.c:7:7] # DEBUG BEGIN_STMT
  # DEBUG i => 0
  [t.c:8:9] # DEBUG BEGIN_STMT
  [t.c:8:9] s.0_11 = s;
  [t.c:8:9] if (s.0_11 > 0)
    goto <bb 3>; [89.00%]
  else
    goto <bb 11>; [11.00%]

  <bb 3> [local count: 105119324]:
  niters.6_5 = (unsigned int) s.0_11;
  _21 = niters.6_5 + 4294967295;
  if (_21 <= 2)
    goto <bb 7>; [10.00%]
  else
    goto <bb 4>; [90.00%]

  <bb 4> [local count: 94607391]:
  bnd.7_24 = niters.6_5 >> 2;
  _20 = (sizetype) bnd.7_24;
  _7 = _20 * 16;

  <bb 5> [local count: 860067200]:
  # ivtmp.21_17 = PHI <ivtmp.21_1(5), 0(4)>
  # DEBUG i => NULL
  [t.c:10:4] # DEBUG BEGIN_STMT
  [t.c:10:5] vect__1.12_29 = MEM <vector(4) int> [(int *)&a + ivtmp.21_17 * 1];
  [t.c:10:8] vect__2.13_31 = vect__1.12_29 + { 1, 1, 1, 1 };
  [t.c:10:8] MEM <vector(4) int> [(int *)&a + ivtmp.21_17 * 1] = vect__2.13_31;
  [t.c:9:8] # DEBUG BEGIN_STMT
  # DEBUG i => NULL
  [t.c:8:9] # DEBUG BEGIN_STMT
^^^^ maybe we want to just move this after the plus to "if"
  ivtmp.21_1 = ivtmp.21_17 + 16;
  if (ivtmp.21_1 != _7)
    goto <bb 5>; [89.00%]
  else
    goto <bb 6>; [11.00%]

Reply via email to