https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79725

            Bug ID: 79725
           Summary: Sinking opportunity missed if complex type is changed
           Product: gcc
           Version: 7.0.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: drraph at gmail dot com
  Target Milestone: ---

Consider:

#include <complex.h>
complex double f(complex double x[]) {
  complex float p = 1.0;
  for (int i = 0; i < 1000000; i++)
    p = x[i];
  return p;
}

This compiles using -O3 -march=core-avx2 -ffast-math to:

f:
        lea     rax, [rdi+16000000]
.L2:
        vmovupd ymm0, YMMWORD PTR [rdi]
        vmovupd ymm5, YMMWORD PTR [rdi+32]
        sub     rdi, -128
        vmovupd ymm1, YMMWORD PTR [rdi-64]
        vmovupd ymm4, YMMWORD PTR [rdi-32]
        vunpcklpd       ymm2, ymm0, ymm5
        vunpckhpd       ymm0, ymm0, ymm5
        vunpcklpd       ymm3, ymm1, ymm4
        vunpckhpd       ymm1, ymm1, ymm4
        vpermpd ymm2, ymm2, 216
        vpermpd ymm3, ymm3, 216
        vpermpd ymm0, ymm0, 216
        vpermpd ymm1, ymm1, 216
        vcvtpd2ps       xmm2, ymm2
        vcvtpd2ps       xmm3, ymm3
        vcvtpd2ps       xmm0, ymm0
        vcvtpd2ps       xmm1, ymm1
        vinsertf128     ymm2, ymm2, xmm3, 0x1
        vinsertf128     ymm1, ymm0, xmm1, 0x1
        cmp     rax, rdi
        jne     .L2
        vextractf128    xmm2, ymm2, 0x1
        vextractf128    xmm1, ymm1, 0x1
        vshufps xmm0, xmm2, xmm2, 255
        vshufps xmm1, xmm1, xmm1, 255
        vcvtss2sd       xmm0, xmm0, xmm0
        vzeroupper
        vcvtss2sd       xmm1, xmm1, xmm1
        ret

More efficient would be:

f:                                      # @f
        vmovsd  xmm0, qword ptr [rdi + 15999984] # xmm0 = mem[0],zero
        vmovsd  xmm1, qword ptr [rdi + 15999992] # xmm1 = mem[0],zero
        vcvtsd2ss       xmm0, xmm0, xmm0
        vcvtsd2ss       xmm1, xmm1, xmm1
        vcvtss2sd       xmm0, xmm0, xmm0
        vcvtss2sd       xmm1, xmm1, xmm1
        ret


If we change the line complex float p = 1.0; to complex double p = 1.0; then
the sinking happens correctly.

Reply via email to