[Bug middle-end/31723] New: Use reciprocal and reciprocal square root with -ffast-math

jb at gcc dot gnu dot org Fri, 27 Apr 2007 01:07:46 -0700

I did some analysis of why gfortran does badly at the gas_dyn benchmark of the
Polyhedron benchmark suite. See my analysis at


http://gcc.gnu.org/ml/fortran/2007-04/msg00494.html

In short, GCC should use reciprocal and reciprocal square root instructions
(available in single precision for SSE and Altivec) when possible. These
instructions are very fast, a few cycles vs. dozens or hundreds of cycles for
normal division and square root instructions. However, as these instructions
are accurate only to 12 bits, they should be enabled only with -ffast-math (or
some separate option that gets included with -ffast-math).

The following C program demonstrates the issue, for all the functions it should
be possible to use reciprocal and/or reciprocal square root instructions
instead of normal div and sqrt:

#include <math.h>

float recip1 (float a)
{
  return 1.0f/a;
}

float recip2 (float a, float b)
{
  return a/b;
}

float rsqrt1 (float a)
{
  return 1.0f/sqrtf(a);
}

float rsqrt2 (float a, float b)
{
  /* Mathematically equivalent to 1/sqrt(b*(1/a))  */
  return sqrtf(a/b);
}

asm output (compiled with -std=c99 -O3 -c -Wall -pedantic -march=k8
-mfpmath=sse -ffast-math -S):

        .file   "recip.c"
        .text
        .p2align 4,,15
.globl recip1
        .type   recip1, @function
recip1:
        pushl   %ebp
        movl    %esp, %ebp
        subl    $4, %esp
        movss   .LC0, %xmm0
        divss   8(%ebp), %xmm0
        movss   %xmm0, -4(%ebp)
        flds    -4(%ebp)
        leave
        ret
        .size   recip1, .-recip1
        .p2align 4,,15
.globl recip2
        .type   recip2, @function
recip2:
        pushl   %ebp
        movl    %esp, %ebp
        movss   8(%ebp), %xmm0
        divss   12(%ebp), %xmm0
        movss   %xmm0, 8(%ebp)
        flds    8(%ebp)
        leave
        ret
        .size   recip2, .-recip2
        .p2align 4,,15
.globl rsqrt2
        .type   rsqrt2, @function
rsqrt2:
        pushl   %ebp
        movl    %esp, %ebp
        subl    $4, %esp
        movss   8(%ebp), %xmm0
        divss   12(%ebp), %xmm0
        sqrtss  %xmm0, %xmm0
        movss   %xmm0, -4(%ebp)
        flds    -4(%ebp)
        leave
        ret
        .size   rsqrt2, .-rsqrt2
        .p2align 4,,15
.globl rsqrt1
        .type   rsqrt1, @function
rsqrt1:
        pushl   %ebp
        movl    %esp, %ebp
        subl    $4, %esp
        movss   .LC0, %xmm0
        sqrtss  8(%ebp), %xmm1
        divss   %xmm1, %xmm0
        movss   %xmm0, -4(%ebp)
        flds    -4(%ebp)
        leave
        ret
        .size   rsqrt1, .-rsqrt1
        .section        .rodata.cst4,"aM",@progbits,4
        .align 4
.LC0:
        .long   1065353216
        .ident  "GCC: (GNU) 4.3.0 20070426 (experimental)"
        .section        .note.GNU-stack,"",@progbits


As can be seen, it uses divss and sqrtss instead of rcpss and rsqrtss. Of
course, there are vectorized versions of these functions too, rcpps and
rsqrtps, that should be used when appropriate (vectorization is important e.g.
for gas_dyn).


-- 
           Summary: Use reciprocal and reciprocal square root with -ffast-
                    math
           Product: gcc
           Version: 4.3.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: middle-end
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: jb at gcc dot gnu dot org
GCC target triplet: i686-pc-linux-gnu


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31723

[Bug middle-end/31723] New: Use reciprocal and reciprocal square root with -ffast-math

Reply via email to