https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66986
Andrew Pinski <pinskia at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- Resolution|--- |INVALID Status|UNCONFIRMED |RESOLVED --- Comment #6 from Andrew Pinski <pinskia at gcc dot gnu.org> --- Your defined isinf2 is incorrect: int I2 isinf2 (double dx) { unsigned long x; memcpy(&x, &dx, sizeof(dx)); if (2 * x == 0xffe0000000000000) return 0; else return (int) (x >> 32); } With that change, the GCC version that is produced is faster. isinf2: .LFB22: .cfi_startproc #APP # 19 "/app/example.cpp" 1 movq %xmm0, %rax # 0 "" 2 #NO_APP movabsq $-9007199254740992, %rdx leaq (%rax,%rax), %rcx shrq $32, %rax cmpq %rdx, %rcx movl $0, %edx cmove %edx, %eax ret vs isinf2: .LFB22: .cfi_startproc xorl %eax, %eax andpd .LC0(%rip), %xmm0 ucomisd .LC1(%rip), %xmm0 seta %al ret For the inlined inlined case (for the T1): .L15: movsd (%rax), %xmm0 addsd %xmm4, %xmm0 andpd %xmm3, %xmm0 ucomisd %xmm2, %xmm0 jbe .L14 addsd %xmm5, %xmm1 .L14: addq $8, %rax cmpq %rax, %rdx jne .L15 vs .L19: movsd (%rax), %xmm3 addsd %xmm0, %xmm3 movq %xmm3, %rdx leaq (%rdx,%rdx), %rcx cmpq %rdi, %rcx je .L18 shrq $32, %rdx testl %edx, %edx je .L18 addsd %xmm2, %xmm1 .L18: addq $8, %rax cmpq %rsi, %rax jne .L19 A double jump