[PATCH] 2015-06-15 Benedikt Huber

Benedikt Huber Thu, 18 Jun 2015 04:58:49 -0700

       * config/aarch64/aarch64-builtins.c: Builtins for rsqrt and rsqrtf.
       * config/aarch64/aarch64-protos.h: Declare.
       * config/aarch64/aarch64-simd.md: Matching expressions for frsqrte and 
frsqrts.
       * config/aarch64/aarch64.c: New functions. Emit rsqrt estimation code in 
fast math mode.
       * config/aarch64/aarch64.md: Added enum entry.
       * testsuite/gcc.target/aarch64/rsqrt.c: Tests for single and double.
---
 gcc/ChangeLog                            |   9 +++
 gcc/config/aarch64/aarch64-builtins.c    |  60 ++++++++++++++++
 gcc/config/aarch64/aarch64-protos.h      |   2 +
 gcc/config/aarch64/aarch64-simd.md       |  27 ++++++++
 gcc/config/aarch64/aarch64.c             |  63 +++++++++++++++++
 gcc/config/aarch64/aarch64.md            |   3 +
 gcc/testsuite/gcc.target/aarch64/rsqrt.c | 113 +++++++++++++++++++++++++++++++
 7 files changed, 277 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/rsqrt.c


diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c9b156f..690ebba 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2015-06-15  Benedikt Huber  <benedikt.hu...@theobroma-systems.com>
+
+       * config/aarch64/aarch64-builtins.c: Builtins for rsqrt and rsqrtf.
+       * config/aarch64/aarch64-protos.h: Declare.
+       * config/aarch64/aarch64-simd.md: Matching expressions for frsqrte and 
frsqrts.
+       * config/aarch64/aarch64.c: New functions. Emit rsqrt estimation code 
in fast math mode.
+       * config/aarch64/aarch64.md: Added enum entry.
+       * testsuite/gcc.target/aarch64/rsqrt.c: Tests for single and double.
+
 2015-06-14  Richard Sandiford  <richard.sandif...@arm.com>
 
        * rtl.h (classify_insn): Declare.
diff --git a/gcc/config/aarch64/aarch64-builtins.c 
b/gcc/config/aarch64/aarch64-builtins.c
index f7a39ec..484bb84 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -342,6 +342,8 @@ enum aarch64_builtins
   AARCH64_BUILTIN_GET_FPSR,
   AARCH64_BUILTIN_SET_FPSR,
 
+  AARCH64_BUILTIN_RSQRT,
+  AARCH64_BUILTIN_RSQRTF,
   AARCH64_SIMD_BUILTIN_BASE,
   AARCH64_SIMD_BUILTIN_LANE_CHECK,
 #include "aarch64-simd-builtins.def"
@@ -831,6 +833,32 @@ aarch64_init_crc32_builtins ()
 }
 
 void
+aarch64_add_builtin_rsqrt (void)
+{
+  tree fndecl = NULL;
+  tree ftype = NULL;
+  ftype = build_function_type_list (double_type_node, double_type_node, 
NULL_TREE);
+
+  fndecl = add_builtin_function ("__builtin_aarch64_rsqrt",
+                                 ftype,
+                                 AARCH64_BUILTIN_RSQRT,
+                                 BUILT_IN_MD,
+                                 NULL,
+                                 NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT] = fndecl;
+
+  tree ftypef = NULL;
+  ftypef = build_function_type_list (float_type_node, float_type_node, 
NULL_TREE);
+  fndecl = add_builtin_function ("__builtin_aarch64_rsqrtf",
+                                 ftypef,
+                                 AARCH64_BUILTIN_RSQRTF,
+                                 BUILT_IN_MD,
+                                 NULL,
+                                 NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_RSQRTF] = fndecl;
+}
+
+void
 aarch64_init_builtins (void)
 {
   tree ftype_set_fpr
@@ -855,6 +883,7 @@ aarch64_init_builtins (void)
     aarch64_init_simd_builtins ();
   if (TARGET_CRC32)
     aarch64_init_crc32_builtins ();
+  aarch64_add_builtin_rsqrt ();
 }
 
 tree
@@ -1099,6 +1128,23 @@ aarch64_crc32_expand_builtin (int fcode, tree exp, rtx 
target)
   return target;
 }
 
+static rtx
+aarch64_expand_builtin_rsqrt (int fcode, tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+
+  enum insn_code c = CODE_FOR_rsqrtdf;
+  if (fcode == AARCH64_BUILTIN_RSQRTF)
+    c = CODE_FOR_rsqrtsf;
+
+  pat = GEN_FCN (c) (target, op0);
+  emit_insn (pat);
+
+  return target;
+}
+
 /* Expand an expression EXP that calls a built-in function,
    with result going to TARGET if that's convenient.  */
 rtx
@@ -1146,6 +1192,11 @@ aarch64_expand_builtin (tree exp,
   else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= 
AARCH64_CRC32_BUILTIN_MAX)
     return aarch64_crc32_expand_builtin (fcode, exp, target);
 
+  if (fcode == AARCH64_BUILTIN_RSQRT ||
+      fcode == AARCH64_BUILTIN_RSQRTF)
+    return aarch64_expand_builtin_rsqrt (fcode, exp, target);
+
+  return NULL_RTX;
   gcc_unreachable ();
 }
 
@@ -1303,6 +1354,15 @@ aarch64_builtin_vectorized_function (tree fndecl, tree 
type_out, tree type_in)
   return NULL_TREE;
 }
 
+tree
+aarch64_builtin_rsqrt (bool is_float)
+{
+  if (is_float)
+    return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRTF];
+  else
+    return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT];
+}
+
 #undef VAR1
 #define VAR1(T, N, MAP, A) \
   case AARCH64_SIMD_BUILTIN_##T##_##N##A:
diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 965a11b..4f1c8ce 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -270,6 +270,8 @@ void aarch64_print_operand (FILE *, rtx, char);
 void aarch64_print_operand_address (FILE *, rtx);
 void aarch64_emit_call_insn (rtx);
 
+void aarch64_emit_swrsqrt (rtx, rtx);
+
 /* Initialize builtins for SIMD intrinsics.  */
 void init_aarch64_simd_builtins (void);
 
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index b90f938..266800a 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -353,6 +353,33 @@
   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 )
 
+(define_insn "*rsqrte_simd"
+  [(set (match_operand:VALLF 0 "register_operand" "=w")
+       (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
+                    UNSPEC_RSQRTE))]
+  "TARGET_SIMD"
+  "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
+  [(set_attr "type" "neon_fp_rsqrte_<Vetype><q>")])
+
+(define_insn "*rsqrts_simd"
+  [(set (match_operand:VALLF 0 "register_operand" "=w")
+       (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
+               (match_operand:VALLF 2 "register_operand" "w")]
+                    UNSPEC_RSQRTS))]
+  "TARGET_SIMD"
+  "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+  [(set_attr "type" "neon_fp_rsqrts_<Vetype><q>")])
+
+(define_expand "rsqrt<mode>"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+       (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
+                    UNSPEC_RSQRT))]
+  "TARGET_FLOAT"
+{
+  aarch64_emit_swrsqrt (operands[0], operands[1]);
+  DONE;
+})
+
 (define_insn "*aarch64_mul3_elt_to_64v2df"
   [(set (match_operand:DF 0 "register_operand" "=w")
      (mult:DF
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index c3c2795..281f0b2 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -6816,6 +6816,66 @@ aarch64_memory_move_cost (machine_mode mode 
ATTRIBUTE_UNUSED,
   return aarch64_tune_params->memmov_cost;
 }
 
+extern tree aarch64_builtin_rsqrt (bool is_float);
+
+static tree
+aarch64_builtin_reciprocal (unsigned int fn,
+                            bool md_fn ATTRIBUTE_UNUSED,
+                            bool sqrt ATTRIBUTE_UNUSED)
+{
+  if (!fast_math_flags_set_p (&global_options))
+    return NULL_TREE;
+
+  if (fn == BUILT_IN_SQRTF)
+    return aarch64_builtin_rsqrt (true);
+  else if (fn == BUILT_IN_SQRT)
+    return aarch64_builtin_rsqrt (false);
+  else
+    return NULL_TREE;
+}
+
+void
+aarch64_emit_swrsqrt (rtx dst, rtx src)
+{
+  enum machine_mode mode = GET_MODE (src);
+  rtx xsrc = gen_reg_rtx (mode);
+  emit_set_insn (xsrc, src);
+
+  rtx x0 = gen_reg_rtx (mode);
+  emit_insn (gen_rtx_SET (x0,
+                          gen_rtx_UNSPEC (mode, gen_rtvec (1, xsrc),
+                                          UNSPEC_RSQRTE)));
+
+  bool double_mode = (DFmode   == mode ||
+                      V1DFmode == mode ||
+                      V2DFmode == mode ||
+                      V4DFmode == mode ||
+                      V6DFmode == mode ||
+                      V8DFmode == mode);
+
+  int iterations = 2;
+  if (double_mode)
+    iterations = 3;
+
+  for (int i = 0; i < iterations; ++i)
+    {
+      rtx x1 = gen_reg_rtx (mode);
+      rtx x2 = gen_reg_rtx (mode);
+      rtx x3 = gen_reg_rtx (mode);
+      emit_insn (gen_rtx_SET (x2,
+                              gen_rtx_MULT (mode, x0, x0)));
+      emit_insn (gen_rtx_SET (x3,
+                              gen_rtx_UNSPEC (mode, gen_rtvec (2, xsrc, x2),
+                                              UNSPEC_RSQRTS)));
+      emit_insn (gen_rtx_SET (x1,
+                              gen_rtx_MULT (mode, x0, x3)));
+      x0 = x1;
+    }
+
+  emit_move_insn (dst, x0);
+  return;
+}
+
 /* Return the number of instructions that can be issued per cycle.  */
 static int
 aarch64_sched_issue_rate (void)
@@ -11747,6 +11807,9 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
 
+#undef TARGET_BUILTIN_RECIPROCAL
+#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
+
 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 11123d6..7272d05 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -120,6 +120,9 @@
     UNSPEC_VSTRUCTDUMMY
     UNSPEC_SP_SET
     UNSPEC_SP_TEST
+    UNSPEC_RSQRT
+    UNSPEC_RSQRTE
+    UNSPEC_RSQRTS
 ])
 
 (define_c_enum "unspecv" [
diff --git a/gcc/testsuite/gcc.target/aarch64/rsqrt.c 
b/gcc/testsuite/gcc.target/aarch64/rsqrt.c
new file mode 100644
index 0000000..6607483
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/rsqrt.c
@@ -0,0 +1,113 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline --save-temps -ffast-math" } */
+
+#include <math.h>
+#include <stdio.h>
+
+#include <values.h>
+
+#define PI    3.141592653589793
+#define SQRT2 1.4142135623730951
+
+#define PI_4 0.7853981633974483
+#define SQRT1_2 0.7071067811865475
+
+/* 2^25+1, float has 24 significand bits
+ *       according to Single-precision floating-point format.  */
+#define TESTA8_FLT 33554433
+/* 2^54+1, double has 53 significand bits
+ *       according to Double-precision floating-point format.  */
+#define TESTA8_DBL 18014398509481985
+
+#define SD(a, b) t_double ((#a), (a), (b));
+#define SF(a, b) t_float ((#a), (a), (b));
+
+#define EPSILON_double __DBL_EPSILON__
+#define EPSILON_float __FLT_EPSILON__
+#define ABS_double fabs
+#define ABS_float fabsf
+#define SQRT_double sqrt
+#define SQRT_float sqrtf
+
+extern void abort (void);
+
+#define TESTTYPE(TYPE)                                                     \
+TYPE rsqrt_##TYPE (TYPE a)                                                 \
+{                                                                          \
+  return 1.0/SQRT_##TYPE(a);                                               \
+}                                                                          \
+                                                                           \
+int equals_##TYPE (TYPE a, TYPE b)                                         \
+{                                                                          \
+  return (a == b ||                                                        \
+          (isnan (a) && isnan (b)) ||                                      \
+          (ABS_##TYPE (a - b) < EPSILON_##TYPE));                          \
+}                                                                          \
+                                                                           \
+void t_##TYPE (const char *s, TYPE a, TYPE result)                         \
+{                                                                          \
+  TYPE r = rsqrt_##TYPE (a);                                               \
+  if (!equals_##TYPE (r, result))                                          \
+  {                                                                        \
+    abort ();                                                              \
+  }                                                                        \
+}                                                                          \
+
+//  printf ("Problem in %20s: %30.18A should be %30.18A\n", s, r, result); \
+
+TESTTYPE(double)
+TESTTYPE(float)
+
+/* { dg-final { scan-assembler-times "frsqrte\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frsqrts\\td\[0-9\]+, d\[0-9\]+, 
d\[0-9\]+" 3 } } */
+
+/* { dg-final { scan-assembler-times "frsqrte\\ts\[0-9\]+, s\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frsqrts\\ts\[0-9\]+, s\[0-9\]+, 
s\[0-9\]+" 2 } } */
+
+int main ()
+{
+  SD(    1.0/256, 0X1.00000000000000P+4  );
+  SD(        1.0, 0X1.00000000000000P+0  );
+  SD(       -1.0,                     NAN);
+  SD(       11.0, 0X1.34BF63D1568260P-2  );
+  SD(        0.0,                INFINITY);
+  SD(   INFINITY, 0X0.00000000000000P+0  );
+  SD(        NAN,                     NAN);
+  SD(       -NAN,                    -NAN);
+  SD(    DBL_MAX, 0X1.00000000000010P-512);
+  SD(    DBL_MIN, 0X1.00000000000000P+511);
+  SD(         PI, 0X1.20DD750429B6D0P-1  );
+  SD(       PI_4, 0X1.20DD750429B6D0P+0  );
+  SD(      SQRT2, 0X1.AE89F995AD3AE0P-1  );
+  SD(    SQRT1_2, 0X1.306FE0A31B7150P+0  );
+  SD(        -PI,                     NAN);
+  SD(     -SQRT2,                     NAN);
+  SD( TESTA8_DBL, 0X1.00000000000000P-27 );
+
+  SF(    1.0/256, 0X1.00000000000000P+4  );
+  SF(        1.0, 0X1.00000000000000P+0  );
+  SF(       -1.0,                     NAN);
+  SF(       11.0, 0X1.34BF6400000000P-2  );
+  SF(        0.0,                INFINITY);
+  SF(   INFINITY, 0X0.00000000000000P+0  );
+  SF(        NAN,                     NAN);
+  SF(       -NAN,                    -NAN);
+  SF(    FLT_MAX, 0X1.00000200000000P-64 );
+  SF(    FLT_MIN, 0X1.00000000000000P+63 );
+  SF(         PI, 0X1.20DD7400000000P-1  );
+  SF(       PI_4, 0X1.20DD7400000000P+0  );
+  SF(      SQRT2, 0X1.AE89FA00000000P-1  );
+  SF(    SQRT1_2, 0X1.306FE000000000P+0  );
+  SF(        -PI,                     NAN);
+  SF(     -SQRT2,                     NAN);
+  SF( TESTA8_FLT, 0X1.6A09E600000000P-13 );
+
+//   With -ffast-math these return positive INF.
+//   SD(       -0.0,               -INFINITY);
+//   SF(       -0.0,               -INFINITY);
+//   The reason here is that -ffast-math flushes to zero.
+//   SD(DBL_MIN/256, 0X1.00000000000000P+515);
+//   SF(FLT_MIN/256, 0X1.00000000000000P+67 );
+
+  return 0;
+}
-- 
1.9.1

[PATCH] 2015-06-15 Benedikt Huber

Reply via email to