13 Regression] ifconversion introduces many compares with loads

rguenth at gcc dot gnu.org via Gcc-bugs Wed, 19 Oct 2022 03:06:24 -0700

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105546


Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Priority|P3                          |P2

--- Comment #4 from Richard Biener <rguenth at gcc dot gnu.org> ---
And it's sinking (of common stores) that turns

  <bb 2> [local count: 1073741824]:
  g_344.0_1 = g_344;
  if (g_344.0_1 != 0)
    goto <bb 3>; [50.00%]
  else
    goto <bb 4>; [50.00%]

  <bb 3> [local count: 536870913]:
  <retval>.f0 = 2738;
  <retval>.f1 = 27943;
  <retval>.f2 = -1;
  <retval>.f3 = 171;
  <retval>.f4 = 3;
  <retval>.f5 = 4499926296329723445;
  goto <bb 5>; [100.00%]

  <bb 4> [local count: 536870913]:
  <retval>.f0 = 65526;
  <retval>.f1 = 1;
  <retval>.f2 = -8;
  <retval>.f3 = 161;
  <retval>.f4 = 3409572933270154779;
  <retval>.f5 = -6;

  <bb 5> [local count: 1073741824]:
  return <retval>;

into

  <bb 2> [local count: 1073741824]:
  g_344.0_1 = g_344;
  if (g_344.0_1 != 0)
    goto <bb 4>; [50.00%]
  else
    goto <bb 3>; [50.00%]

  <bb 3> [local count: 536870913]:

  <bb 4> [local count: 1073741824]:
  # _16 = PHI <4499926296329723445(2), -6(3)>
  # _18 = PHI <3(2), 3409572933270154779(3)>
  # _20 = PHI <171(2), 161(3)>
  # _22 = PHI <-1(2), -8(3)>
  # _24 = PHI <27943(2), 1(3)>
  # _26 = PHI <2738(2), 65526(3)>
  <retval>.f0 = _26;
  <retval>.f1 = _24;
  <retval>.f2 = _22;
  <retval>.f3 = _20;
  <retval>.f4 = _18;
  <retval>.f5 = _16;
  return <retval>;

without that (-fno-tree-sink) we'd get

func_1:
.LFB0:
        .cfi_startproc
        movq    %rdi, %rax
        cmpw    $0, g_344(%rip)
        je      .L2
        movw    $2738, (%rdi)
        movw    $27943, 2(%rdi)
        movw    $-1, 4(%rdi)
        movb    $-85, 6(%rdi)
        movq    $3, 8(%rdi)
        movabsq $4499926296329723445, %rdx
        movq    %rdx, 16(%rdi)
        ret
.L2:
        movw    $-10, (%rdi)
        movw    $1, 2(%rdi)
        movw    $-8, 4(%rdi)
        movb    $-95, 6(%rdi)
        movabsq $3409572933270154779, %rcx
        movq    %rcx, 8(%rdi)
        movq    $-6, 16(%rdi)
        ret

or at -O2 now with vectorizing

func_1:
.LFB0:
        .cfi_startproc
        cmpw    $0, g_344(%rip)
        movq    %rdi, %rax
        je      .L2
        movdqa  .LC1(%rip), %xmm0
        movl    $-1, %ecx
        movl    $1831275186, (%rdi)
        movw    %cx, 4(%rdi)
        movb    $-85, 6(%rdi)
        movups  %xmm0, 8(%rdi)
        ret
        .p2align 4,,10
        .p2align 3
.L2:
        movdqa  .LC3(%rip), %xmm0
        movl    $-8, %edx
        movl    $131062, (%rdi)
        movw    %dx, 4(%rdi)
        movb    $-95, 6(%rdi)
        movups  %xmm0, 8(%rdi)
        ret

we could probably improve things by storing into the padding but GIMPLE
doesn't know it is allowed to do that.

sinking notes

          /* Insert a PHI to merge differing stored values if necessary.
             Note that in general inserting PHIs isn't a very good idea as
             it makes the job of coalescing and register allocation harder.
             Even common SSA uses on the rhs/lhs might extend their lifetime
             across multiple edges by this code motion which makes
             register allocation harder.  */

but we don't limit ourselves in the number of PHI nodes to create.
Of course since we have two sinking passes now we'd get inconsistent
results here, also since vectorization sits inbetween the two.

[Bug target/105546] [11/12/13 Regression] ifconversion introduces many compares with loads

Reply via email to