All,
This patch adds support to the ARM backend for generating floating-point
fused multiply-accumulate.
OK?
gcc/ChangeLog:
2012-06-25 Matthew Gretton-Dann <[email protected]>
* config/arm/iterators.md (SDF): New mode iterator.
(V_if_elem): Add support for SF and DF modes.
(V_reg): Likewise.
(F_w_constraint): New mode iterator attribute.
(F_r_constraint): Likewise.
(F_fma_type): Likewise.
(F_target): Likewise.
config/arm/vfp.md (fma<mode>4): New pattern.
(*fmsub<mode>4): Likewise.
(*fmnsub<mode>4): Likewise.
(*fmnadd<mode>4): Likewise.
Thanks,
Matt
--
Matthew Gretton-Dann
Principal Engineer, PD Software - Tools, ARM Ltd
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 795a5ee..3063f00 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -42,6 +42,9 @@
;; A list of the 32bit and 64bit integer modes
(define_mode_iterator SIDI [SI DI])
+;; A list of modes which the VFP unit can handle
+(define_mode_iterator SDF [SF DF])
+
;; Integer element sizes implemented by IWMMXT.
(define_mode_iterator VMMX [V2SI V4HI V8QI])
@@ -245,7 +248,8 @@
(V4HI "P") (V8HI "q")
(V2SI "P") (V4SI "q")
(V2SF "P") (V4SF "q")
- (DI "P") (V2DI "q")])
+ (DI "P") (V2DI "q")
+ (SF "") (DF "P")])
;; Wider modes with the same number of elements.
(define_mode_attr V_widen [(V8QI "V8HI") (V4HI "V4SI") (V2SI "V2DI")])
@@ -303,7 +307,8 @@
(V4HI "i16") (V8HI "i16")
(V2SI "i32") (V4SI "i32")
(DI "i64") (V2DI "i64")
- (V2SF "f32") (V4SF "f32")])
+ (V2SF "f32") (V4SF "f32")
+ (SF "f32") (DF "f64")])
;; Same, but for operations which work on signed values.
(define_mode_attr V_s_elem [(V8QI "s8") (V16QI "s8")
@@ -423,6 +428,12 @@
;; Mode attribute for vshll.
(define_mode_attr V_innermode [(V8QI "QI") (V4HI "HI") (V2SI "SI")])
+;; Mode attributes used for fused-multiply-accumulate VFP support
+(define_mode_attr F_w_constraint [(SF "=t") (DF "=w")])
+(define_mode_attr F_r_constraint [(SF "t") (DF "w")])
+(define_mode_attr F_fma_type [(SF "fmacs") (DF "fmacd")])
+(define_mode_attr F_target [(SF "") (DF "&& TARGET_VFP_DOUBLE")])
+
;;----------------------------------------------------------------------------
;; Code attributes
;;----------------------------------------------------------------------------
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 2061414..2a50353 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -890,6 +890,54 @@
(set_attr "type" "fmacd")]
)
+;; Fused-multiply-accumulate
+
+(define_insn "fma<mode>4"
+ [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>")
+ (fma:SDF (match_operand:SDF 1 "register_operand" "<F_r_constraint>")
+ (match_operand:SDF 2 "register_operand" "<F_r_constraint>")
+ (match_operand:SDF 3 "register_operand" "0")))]
+ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>"
+ "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "predicable" "yes")
+ (set_attr "type" "<F_fma_type>")]
+)
+
+(define_insn "*fmsub<mode>4"
+ [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>")
+ (fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand"
+ "<F_r_constraint>"))
+ (match_operand:SDF 2 "register_operand" "<F_r_constraint>")
+ (match_operand:SDF 3 "register_operand" "0")))]
+ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>"
+ "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "predicable" "yes")
+ (set_attr "type" "<F_fma_type>")]
+)
+
+(define_insn "*fnmsub<mode>4"
+ [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>")
+ (fma:SDF (match_operand:SDF 1 "register_operand" "<F_r_constraint>")
+ (match_operand:SDF 2 "register_operand" "<F_r_constraint>")
+ (neg:SDF (match_operand:SDF 3 "register_operand" "0"))))]
+ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>"
+ "vfnms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "predicable" "yes")
+ (set_attr "type" "<F_fma_type>")]
+)
+
+(define_insn "*fnmadd<mode>4"
+ [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>")
+ (fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand"
+ "<F_r_constraint>"))
+ (match_operand:SDF 2 "register_operand" "<F_r_constraint>")
+ (neg:SDF (match_operand:SDF 3 "register_operand" "0"))))]
+ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>"
+ "vfnma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "predicable" "yes")
+ (set_attr "type" "<F_fma_type>")]
+)
+
;; Conversion routines
diff --git a/gcc/testsuite/gcc.target/arm/fma-sp.c b/gcc/testsuite/gcc.target/arm/fma-sp.c
new file mode 100644
index 0000000..457b55c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/fma-sp.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 -mthumb" } */
+
+#include "fma.h"
+
+/* { dg-final { scan-assembler-not "vfma\.f64\td\[0-9\]" } } */
+/* { dg-final { scan-assembler-times "vfma\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vfms\.f64\td\[0-9\]" } } */
+/* { dg-final { scan-assembler-times "vfms\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vfnma\.f64\td\[0-9\]" } } */
+/* { dg-final { scan-assembler-times "vfnma\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vfnms\.f64\td\[0-9\]" } } */
+/* { dg-final { scan-assembler-times "vfnms\.f32\ts\[0-9\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/fma.c b/gcc/testsuite/gcc.target/arm/fma.c
new file mode 100644
index 0000000..1809aa8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/fma.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-a15 -mfpu=vfpv4" } */
+
+#include "fma.h"
+
+/* { dg-final { scan-assembler-times "vfma\.f64\td\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfma\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfms\.f64\td\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfms\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnma\.f64\td\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnma\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnms\.f64\td\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnms\.f32\ts\[0-9\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/fma.h b/gcc/testsuite/gcc.target/arm/fma.h
new file mode 100644
index 0000000..0812c2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/fma.h
@@ -0,0 +1,50 @@
+extern double fma (double, double, double);
+extern float fmaf (float, float, float);
+
+float
+vfma32 (float x, float y, float z)
+{
+ return fmaf (x, y, z);
+}
+
+float
+vfms32 (float x, float y, float z)
+{
+ return fmaf (-x, y, z);
+}
+
+float
+vfnms32 (float x, float y, float z)
+{
+ return fmaf (x, y, -z);
+}
+
+float
+vfnma32 (float x, float y, float z)
+{
+ return fmaf (-x, y, -z);
+}
+
+double
+vfma64 (double x, double y, double z)
+{
+ return fma (x, y, z);
+}
+
+double
+vfms64 (double x, double y, double z)
+{
+ return fma (-x, y, z);
+}
+
+double
+vfnms64 (double x, double y, double z)
+{
+ return fma (x, y, -z);
+}
+
+double
+vfnma64 (double x, double y, double z)
+{
+ return fma (-x, y, -z);
+}