All,
This patch adds vectoriser support for VFMA to the ARM Neon backend.
Note that the VFP VFNMA and VFNMS instructions do not have Neon
equivalents.
OK?
gcc/ChangeLog:
2012-06-25 Matthew Gretton-Dann <[email protected]>
* config/arm/neon.md (fma<mode>4): New pattern.
(*fmsub<mode>4): Likewise.
2012-06-25 Matthew Gretton-Dann <[email protected]>
* gcc.target/arm/neon-vfma-1.c: New testcase.
* gcc.target/arm/neon-vfms-1.c: Likewise.
* lib/target-supports.exp (add_options_for_arm_neonv2): New
function.
(check_effective_target_arm_neonv2_ok_nocache): Likewise.
(check_effective_target_arm_neonv2_ok): Likewise.
(check_effective_target_arm_neonv2_hw): Likewise.
(check_effective_target_arm_neonv2): Likewise.
Thanks,
Matt
--
Matthew Gretton-Dann
Principal Engineer, PD Software - Tools, ARM Ltd
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 4568dea..4d12fb3 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -711,6 +711,33 @@
(const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
)
+;; Fused multiply-accumulate
+(define_insn "fma<mode>4"
+ [(set (match_operand:VCVTF 0 "register_operand" "=w")
+ (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
+ (match_operand:VCVTF 2 "register_operand" "w")
+ (match_operand:VCVTF 3 "register_operand" "0")))]
+ "TARGET_NEON && TARGET_FMA"
+ "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "neon_type")
+ (if_then_else (match_test "<Is_d_reg>")
+ (const_string "neon_fp_vmla_ddd")
+ (const_string "neon_fp_vmla_qqq")))]
+)
+
+(define_insn "*fmsub<mode>4"
+ [(set (match_operand:VCVTF 0 "register_operand" "=w")
+ (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
+ (match_operand:VCVTF 2 "register_operand" "w")
+ (match_operand:VCVTF 3 "register_operand" "0")))]
+ "TARGET_NEON && TARGET_FMA"
+ "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "neon_type")
+ (if_then_else (match_test "<Is_d_reg>")
+ (const_string "neon_fp_vmla_ddd")
+ (const_string "neon_fp_vmla_qqq")))]
+)
+
(define_insn "ior<mode>3"
[(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
diff --git a/gcc/testsuite/gcc.target/arm/neon-vfma-1.c b/gcc/testsuite/gcc.target/arm/neon-vfma-1.c
new file mode 100644
index 0000000..a003a82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vfma-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neonv2_ok } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+/* { dg-add-options arm_neonv2 } */
+/* { dg-final { scan-assembler "vfma\\.f32\[ \]+\[dDqQ]" } } */
+
+/* Verify that VFMA is used. */
+void f1(int n, float a, float x[], float y[]) {
+ int i;
+ for (i = 0; i < n; ++i)
+ y[i] = a * x[i] + y[i];
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon-vfms-1.c b/gcc/testsuite/gcc.target/arm/neon-vfms-1.c
new file mode 100644
index 0000000..8cefd8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vfms-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neonv2_ok } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+/* { dg-add-options arm_neonv2 } */
+/* { dg-final { scan-assembler "vfms\\.f32\[ \]+\[dDqQ]" } } */
+
+/* Verify that VFMS is used. */
+void f1(int n, float a, float x[], float y[]) {
+ int i;
+ for (i = 0; i < n; ++i)
+ y[i] = a * -x[i] + y[i];
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index bc5baa7..9fc8a5c 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2082,6 +2082,19 @@ proc add_options_for_arm_neon { flags } {
return "$flags $et_arm_neon_flags"
}
+# Add the options needed for NEON. We need either -mfloat-abi=softfp
+# or -mfloat-abi=hard, but if one is already specified by the
+# multilib, use it. Similarly, if a -mfpu option already enables
+# NEON, do not add -mfpu=neon.
+
+proc add_options_for_arm_neonv2 { flags } {
+ if { ! [check_effective_target_arm_neonv2_ok] } {
+ return "$flags"
+ }
+ global et_arm_neonv2_flags
+ return "$flags $et_arm_neonv2_flags"
+}
+
# Return 1 if this is an ARM target supporting -mfpu=neon
# -mfloat-abi=softfp or equivalent options. Some multilibs may be
# incompatible with these options. Also set et_arm_neon_flags to the
@@ -2110,6 +2123,38 @@ proc check_effective_target_arm_neon_ok { } {
check_effective_target_arm_neon_ok_nocache]
}
+# Return 1 if this is an ARM target supporting -mfpu=neon-vfpv4
+# -mfloat-abi=softfp or equivalent options. Some multilibs may be
+# incompatible with these options. Also set et_arm_neonv2_flags to the
+# best options to add.
+
+proc check_effective_target_arm_neonv2_ok_nocache { } {
+ global et_arm_neonv2_flags
+ set et_arm_neonv2_flags ""
+ if { [check_effective_target_arm32] } {
+ foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-vfpv4" "-mfpu=neon-vfpv4 -mfloat-abi=softfp"} {
+ if { [check_no_compiler_messages_nocache arm_neonv2_ok object {
+ #include "arm_neon.h"
+ float32x2_t
+ foo (float32x2_t a, float32x2_t b, float32x2_t c)
+ {
+ return vfma_f32 (a, b, c);
+ }
+ } "$flags"] } {
+ set et_arm_neonv2_flags $flags
+ return 1
+ }
+ }
+ }
+
+ return 0
+}
+
+proc check_effective_target_arm_neonv2_ok { } {
+ return [check_cached_effective_target arm_neonv2_ok \
+ check_effective_target_arm_neonv2_ok_nocache]
+}
+
# Add the options needed for NEON. We need either -mfloat-abi=softfp
# or -mfloat-abi=hard, but if one is already specified by the
# multilib, use it.
@@ -2301,6 +2346,21 @@ proc check_effective_target_arm_neon_hw { } {
} [add_options_for_arm_neon ""]]
}
+proc check_effective_target_arm_neonv2_hw { } {
+ return [check_runtime arm_neon_hwv2_available {
+ #include "arm_neon.h"
+ int
+ main (void)
+ {
+ float32x2_t a, b, c;
+ asm ("vfma.f32 %P0, %P1, %P2"
+ : "=w" (a)
+ : "w" (b), "w" (c));
+ return 0;
+ }
+ } [add_options_for_arm_neonv2 ""]]
+}
+
# Return 1 if this is a ARM target with NEON enabled.
proc check_effective_target_arm_neon { } {
@@ -2317,6 +2377,24 @@ proc check_effective_target_arm_neon { } {
}
}
+proc check_effective_target_arm_neonv2 { } {
+ if { [check_effective_target_arm32] } {
+ return [check_no_compiler_messages arm_neon object {
+ #ifndef __ARM_NEON__
+ #error not NEON
+ #else
+ #ifndef __ARM_FEATURE_FMA
+ #error not NEONv2
+ #else
+ int dummy;
+ #endif
+ #endif
+ }]
+ } else {
+ return 0
+ }
+}
+
# Return 1 if this a Loongson-2E or -2F target using an ABI that supports
# the Loongson vector modes.