https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66986

Andrew Pinski <pinskia at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
         Resolution|---                         |INVALID
             Status|UNCONFIRMED                 |RESOLVED

--- Comment #6 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
Your defined isinf2 is incorrect:
int I2
isinf2 (double dx)
{
  unsigned long x;
  memcpy(&x, &dx, sizeof(dx));
  if (2 * x == 0xffe0000000000000)
    return 0;
  else
    return (int) (x >> 32);
}

With that change, the GCC version that is produced is faster.

isinf2:
.LFB22:
        .cfi_startproc
#APP
# 19 "/app/example.cpp" 1
        movq %xmm0, %rax
# 0 "" 2
#NO_APP
        movabsq $-9007199254740992, %rdx
        leaq    (%rax,%rax), %rcx
        shrq    $32, %rax
        cmpq    %rdx, %rcx
        movl    $0, %edx
        cmove   %edx, %eax
        ret


vs
isinf2:
.LFB22:
        .cfi_startproc
        xorl    %eax, %eax
        andpd   .LC0(%rip), %xmm0
        ucomisd .LC1(%rip), %xmm0
        seta    %al
        ret


For the inlined inlined case (for the T1):
.L15:
        movsd   (%rax), %xmm0
        addsd   %xmm4, %xmm0
        andpd   %xmm3, %xmm0
        ucomisd %xmm2, %xmm0
        jbe     .L14
        addsd   %xmm5, %xmm1
.L14:
        addq    $8, %rax
        cmpq    %rax, %rdx
        jne     .L15

vs
.L19:
        movsd   (%rax), %xmm3
        addsd   %xmm0, %xmm3
        movq    %xmm3, %rdx
        leaq    (%rdx,%rdx), %rcx
        cmpq    %rdi, %rcx
        je      .L18
        shrq    $32, %rdx
        testl   %edx, %edx
        je      .L18
        addsd   %xmm2, %xmm1
.L18:
        addq    $8, %rax
        cmpq    %rsi, %rax
        jne     .L19

A double jump

Reply via email to