3] Add VFP support for VFMA and friends

Matthew Gretton-Dann Mon, 25 Jun 2012 07:59:23 -0700

All,

This patch adds support to the ARM backend for generating floating-point
fused multiply-accumulate.


OK?

gcc/ChangeLog:

2012-06-25  Matthew Gretton-Dann  <matthew.gretton-d...@arm.com>

        * config/arm/iterators.md (SDF): New mode iterator.
        (V_if_elem): Add support for SF and DF modes.
        (V_reg): Likewise.
        (F_w_constraint): New mode iterator attribute.
        (F_r_constraint): Likewise.
        (F_fma_type): Likewise.
        (F_target): Likewise.
        config/arm/vfp.md (fma<mode>4): New pattern.
        (*fmsub<mode>4): Likewise.
        (*fmnsub<mode>4): Likewise.
        (*fmnadd<mode>4): Likewise.

Thanks,

Matt

--
Matthew Gretton-Dann
Principal Engineer, PD Software - Tools, ARM Ltd

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 795a5ee..3063f00 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -42,6 +42,9 @@
 ;; A list of the 32bit and 64bit integer modes
 (define_mode_iterator SIDI [SI DI])
 
+;; A list of modes which the VFP unit can handle
+(define_mode_iterator SDF [SF DF])
+
 ;; Integer element sizes implemented by IWMMXT.
 (define_mode_iterator VMMX [V2SI V4HI V8QI])
 
@@ -245,7 +248,8 @@
                          (V4HI "P") (V8HI  "q")
                          (V2SI "P") (V4SI  "q")
                          (V2SF "P") (V4SF  "q")
-                         (DI   "P") (V2DI  "q")])
+                         (DI   "P") (V2DI  "q")
+			 (SF   "")  (DF    "P")])
 
 ;; Wider modes with the same number of elements.
 (define_mode_attr V_widen [(V8QI "V8HI") (V4HI "V4SI") (V2SI "V2DI")])
@@ -303,7 +307,8 @@
                  (V4HI "i16") (V8HI  "i16")
                              (V2SI "i32") (V4SI  "i32")
                              (DI   "i64") (V2DI  "i64")
-                 (V2SF "f32") (V4SF  "f32")])
+                 (V2SF "f32") (V4SF  "f32")
+		 (SF "f32") (DF "f64")])
 
 ;; Same, but for operations which work on signed values.
 (define_mode_attr V_s_elem [(V8QI "s8")  (V16QI "s8")
@@ -423,6 +428,12 @@
 ;; Mode attribute for vshll.
 (define_mode_attr V_innermode [(V8QI "QI") (V4HI "HI") (V2SI "SI")])
 
+;; Mode attributes used for fused-multiply-accumulate VFP support
+(define_mode_attr F_w_constraint [(SF "=t") (DF "=w")])
+(define_mode_attr F_r_constraint [(SF "t") (DF "w")])
+(define_mode_attr F_fma_type [(SF "fmacs") (DF "fmacd")])
+(define_mode_attr F_target [(SF "") (DF "&& TARGET_VFP_DOUBLE")])
+
 ;;----------------------------------------------------------------------------
 ;; Code attributes
 ;;----------------------------------------------------------------------------
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 2061414..2a50353 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -890,6 +890,54 @@
    (set_attr "type" "fmacd")]
 )
 
+;; Fused-multiply-accumulate
+
+(define_insn "fma<mode>4"
+  [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>")
+        (fma:SDF (match_operand:SDF 1 "register_operand" "<F_r_constraint>")
+		 (match_operand:SDF 2 "register_operand" "<F_r_constraint>")
+		 (match_operand:SDF 3 "register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>"
+  "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "<F_fma_type>")]
+)
+
+(define_insn "*fmsub<mode>4"
+  [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>")
+	(fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand" 
+					     "<F_r_constraint>"))
+		 (match_operand:SDF 2 "register_operand" "<F_r_constraint>")
+		 (match_operand:SDF 3 "register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>"
+  "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "<F_fma_type>")]
+)
+
+(define_insn "*fnmsub<mode>4"
+  [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>")
+	(fma:SDF (match_operand:SDF 1 "register_operand"  "<F_r_constraint>")
+		 (match_operand:SDF 2 "register_operand" "<F_r_constraint>")
+		 (neg:SDF (match_operand:SDF 3 "register_operand" "0"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>"
+  "vfnms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "<F_fma_type>")]
+)
+
+(define_insn "*fnmadd<mode>4"
+  [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>")
+	(fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand"  
+					       "<F_r_constraint>"))
+		 (match_operand:SDF 2 "register_operand" "<F_r_constraint>")
+		 (neg:SDF (match_operand:SDF 3 "register_operand" "0"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>"
+  "vfnma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "<F_fma_type>")]
+)
+
 
 ;; Conversion routines
 
diff --git a/gcc/testsuite/gcc.target/arm/fma-sp.c b/gcc/testsuite/gcc.target/arm/fma-sp.c
new file mode 100644
index 0000000..457b55c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/fma-sp.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 -mthumb" } */
+
+#include "fma.h"
+
+/* { dg-final { scan-assembler-not "vfma\.f64\td\[0-9\]" } } */
+/* { dg-final { scan-assembler-times "vfma\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vfms\.f64\td\[0-9\]" } } */
+/* { dg-final { scan-assembler-times "vfms\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vfnma\.f64\td\[0-9\]" } } */
+/* { dg-final { scan-assembler-times "vfnma\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vfnms\.f64\td\[0-9\]" } } */
+/* { dg-final { scan-assembler-times "vfnms\.f32\ts\[0-9\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/fma.c b/gcc/testsuite/gcc.target/arm/fma.c
new file mode 100644
index 0000000..1809aa8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/fma.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-a15 -mfpu=vfpv4" } */
+
+#include "fma.h"
+
+/* { dg-final { scan-assembler-times "vfma\.f64\td\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfma\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfms\.f64\td\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfms\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnma\.f64\td\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnma\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnms\.f64\td\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnms\.f32\ts\[0-9\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/fma.h b/gcc/testsuite/gcc.target/arm/fma.h
new file mode 100644
index 0000000..0812c2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/fma.h
@@ -0,0 +1,50 @@
+extern double fma (double, double, double);
+extern float fmaf (float, float, float);
+
+float
+vfma32 (float x, float y, float z)
+{
+  return fmaf (x, y, z);
+}
+
+float
+vfms32 (float x, float y, float z)
+{
+  return fmaf (-x, y, z);
+}
+
+float
+vfnms32 (float x, float y, float z)
+{
+  return fmaf (x, y, -z);
+}
+
+float
+vfnma32 (float x, float y, float z)
+{
+  return fmaf (-x, y, -z);
+}
+
+double
+vfma64 (double x, double y, double z)
+{
+  return fma (x, y, z);
+}
+
+double
+vfms64 (double x, double y, double z)
+{
+  return fma (-x, y, z);
+}
+
+double
+vfnms64 (double x, double y, double z)
+{
+  return fma (x, y, -z);
+}
+
+double
+vfnma64 (double x, double y, double z)
+{
+  return fma (-x, y, -z);
+}

[RFA/ARM 1/3] Add VFP support for VFMA and friends

Reply via email to