https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122824

Andrew Pinski <pinskia at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Keywords|needs-source                |missed-optimization

--- Comment #3 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
There seems to be a missing optimization here after fre3 we have:
```
  <bb 7> [local count: 7447232]:
  # .MEM_37 = VDEF <.MEM_19>
  __arr[0] = __fill_char_31(D);
  # .MEM_53 = VDEF <.MEM_37>
  MEM[(struct span *)&__v] = &__arr;
  # .MEM_640 = VDEF <.MEM_53>
  __first._M_current = &__arr;
  # .MEM_641 = VDEF <.MEM_640>
  D.144916._M_current = &MEM <const char32_t[1]> [(void *)&__arr + 4B];
  # .MEM_642 = VDEF <.MEM_641>
  __first = __first;
  # .MEM_643 = VDEF <.MEM_642>
  __it = D.144916;
  # .MEM_644 = VDEF <.MEM_643>
  __last = D.144916;
  # .MEM_645 = VDEF <.MEM_644>
  MEM <const char32_t *> [(struct _First_and_curr *)&D.135323] = &__arr;
  # .MEM_646 = VDEF <.MEM_645>
  MEM <const char32_t *> [(struct _First_and_curr *)&D.135323 + 8B] = &MEM
<const char32_t[1]> [(void *)&__arr + 4B];
  # .MEM_647 = VDEF <.MEM_646>
  D.135323._M_last = __last;
  # .MEM_648 = VDEF <.MEM_647>
  D.135323._M_buf_index = 0;
  # .MEM_649 = VDEF <.MEM_648>
  D.135323._M_buf_last = 0;
  # .MEM_650 = VDEF <.MEM_649>
  D.135323._M_to_increment = 0;
  # VUSE <.MEM_650>
  _104 = MEM[(const char32_t * const &)&D.135323 + 16];
  if (_104 != &MEM <const char32_t[1]> [(void *)&__arr + 4B])
```
Latter on SRA is able to scalarize all of these stores/loads and we get:
```
  __arr[0] = __fill_char_31(D);
  SR.2068_482 = &MEM <const char32_t[1]> [(void *)&__arr + 4B];
  __last$_M_current_483 = SR.2068_482;
  SR.2034_480 = &__arr;
  SR.2035_481 = &MEM <const char32_t[1]> [(void *)&__arr + 4B];
  SR.2036_476 = __last$_M_current_483;
  SR.2041_477 = 0;
  SR.2042_433 = 0;
  SR.2043_430 = 0;
  _104 = SR.2036_476;
  if (_104 != &MEM <const char32_t[1]> [(void *)&__arr + 4B])
```
Which gets optimized to `if (false)` .

Note the above IR is from `-O2  -flifetime-dse=0 -fstack-reuse=none` (to
disable the clobbers in some case). I wonder why VN could not look through all
of those aggregate copies.

Reply via email to