https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66989

            Bug ID: 66989
           Summary: poor performance of builtin_isfinite on x64
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: neleai at seznam dot cz
  Target Milestone: ---

This is another part of considering floating classification builtin
performance. This starts to be more cpu dependent as benchmark show large
improvement for core2 but almost none for haswell.

#define EXTRACT_WORDS64(i, d)                                                 \
  do {                                                                        \
    int64_t i_;                                                               \
    asm ("movq %1, %0" : "=rm" (i_) : "x" ((double) (d)));                   \
    (i) = i_;                                                                 \
  } while (0)


int I2
isfinite2 (double dx)
{
  unsigned long x;
  EXTRACT_WORDS64(dx, x);
  if (2 * x < 0xffe0000000000000)
    return 1;
  else
    return 0;
}


core2:
don't inline
conditional add
branched

real    0m1.334s
user    0m1.334s
sys     0m0.000s
builtin

real    0m1.577s
user    0m1.576s
sys     0m0.000s
branch
branched

real    0m1.453s
user    0m1.452s
sys     0m0.000s
builtin

real    0m1.335s
user    0m1.334s
sys     0m0.000s
sum
branched

real    0m1.336s
user    0m1.335s
sys     0m0.000s
builtin

real    0m1.575s
user    0m1.573s
sys     0m0.000s
inline outer call
conditional add
branched

real    0m1.046s
user    0m1.046s
sys     0m0.000s
builtin

real    0m0.972s
user    0m0.971s
sys     0m0.000s
branch
branched

real    0m0.849s
user    0m0.845s
sys     0m0.003s
builtin

real    0m0.971s
user    0m0.970s
sys     0m0.002s
sum
branched

real    0m1.097s
user    0m1.095s
sys     0m0.000s
builtin

real    0m1.104s
user    0m1.100s
sys     0m0.003s
inline inner call
conditional add
branched

real    0m0.981s
user    0m0.980s
sys     0m0.000s
builtin

real    0m0.971s
user    0m0.971s
sys     0m0.000s
branch
branched

real    0m0.849s
user    0m0.848s
sys     0m0.000s
builtin

real    0m0.970s
user    0m0.969s
sys     0m0.000s
sum
branched

real    0m1.094s
user    0m1.094s
sys     0m0.000s
builtin

real    0m1.101s
user    0m1.100s
sys     0m0.000s
tigth loop
conditional add
branched

real    0m0.365s
user    0m0.364s
sys     0m0.000s
builtin

real    0m0.433s
user    0m0.433s
sys     0m0.000s
branch
branched

real    0m0.126s
user    0m0.126s
sys     0m0.000s
builtin

real    0m0.368s
user    0m0.367s
sys     0m0.000s
sum
branched

real    0m0.367s
user    0m0.365s
sys     0m0.000s
builtin

real    0m0.632s
user    0m0.631s
sys     0m0.000s

fx10
don't inline
conditional add
branched

real    0m1.297s
user    0m1.295s
sys     0m0.004s
builtin

real    0m1.300s
user    0m1.299s
sys     0m0.004s
branch
branched

real    0m0.657s
user    0m0.658s
sys     0m0.000s
builtin

real    0m0.677s
user    0m0.677s
sys     0m0.000s
sum
branched

real    0m1.296s
user    0m1.295s
sys     0m0.004s
builtin

real    0m1.313s
user    0m1.315s
sys     0m0.000s
inline outer call
conditional add
branched

real    0m1.296s
user    0m1.298s
sys     0m0.000s
builtin

real    0m1.297s
user    0m1.298s
sys     0m0.000s
branch
branched

real    0m0.365s
user    0m0.366s
sys     0m0.000s
builtin

real    0m0.412s
user    0m0.409s
sys     0m0.004s
sum
branched

real    0m1.302s
user    0m1.304s
sys     0m0.000s
builtin

real    0m1.305s
user    0m1.307s
sys     0m0.000s
inline inner call
conditional add
branched

real    0m1.299s
user    0m1.300s
sys     0m0.000s
builtin

real    0m1.296s
user    0m1.297s
sys     0m0.000s
branch
branched

real    0m0.509s
user    0m0.509s
sys     0m0.000s
builtin

real    0m0.539s
user    0m0.539s
sys     0m0.001s
sum
branched

real    0m1.301s
user    0m1.303s
sys     0m0.000s
builtin

real    0m1.307s
user    0m1.309s
sys     0m0.000s
tigth loop
conditional add
branched

real    0m0.369s
user    0m0.369s
sys     0m0.000s
builtin

real    0m0.362s
user    0m0.362s
sys     0m0.000s
branch
branched

real    0m0.152s
user    0m0.152s
sys     0m0.000s
builtin

real    0m0.260s
user    0m0.257s
sys     0m0.004s
sum
branched

real    0m0.362s
user    0m0.362s
sys     0m0.001s
builtin

real    0m0.399s
user    0m0.399s
sys     0m0.001s

haswelldon't inline
conditional add
branched

real    0m0.697s
user    0m0.698s
sys     0m0.000s
builtin

real    0m0.802s
user    0m0.803s
sys     0m0.000s
branch
branched

real    0m0.697s
user    0m0.698s
sys     0m0.000s
builtin

real    0m0.796s
user    0m0.793s
sys     0m0.003s
sum
branched

real    0m0.717s
user    0m0.717s
sys     0m0.000s
builtin

real    0m0.802s
user    0m0.802s
sys     0m0.000s
inline outer call
conditional add
branched

real    0m0.695s
user    0m0.695s
sys     0m0.000s
builtin

real    0m0.695s
user    0m0.695s
sys     0m0.000s
branch
branched

real    0m0.390s
user    0m0.387s
sys     0m0.003s
builtin

real    0m0.413s
user    0m0.412s
sys     0m0.000s
sum
branched

real    0m0.695s
user    0m0.702s
sys     0m0.000s
builtin

real    0m0.696s
user    0m0.697s
sys     0m0.000s
inline inner call
conditional add
branched

real    0m0.695s
user    0m0.696s
sys     0m0.000s
builtin

real    0m0.696s
user    0m0.692s
sys     0m0.003s
branch
branched

real    0m0.388s
user    0m0.388s
sys     0m0.000s
builtin

real    0m0.388s
user    0m0.388s
sys     0m0.000s
sum
branched

real    0m0.695s
user    0m0.695s
sys     0m0.000s
builtin

real    0m0.695s
user    0m0.695s
sys     0m0.000s
tigth loop
conditional add
branched

real    0m0.233s
user    0m0.232s
sys     0m0.000s
builtin

real    0m0.232s
user    0m0.232s
sys     0m0.000s
branch
branched

real    0m0.080s
user    0m0.080s
sys     0m0.000s
builtin

real    0m0.161s
user    0m0.160s
sys     0m0.000s
sum
branched

real    0m0.232s
user    0m0.232s
sys     0m0.000s
builtin

real    0m0.310s
user    0m0.310s
sys     0m0.000s

Reply via email to