All, This patch adds support to the ARM backend for generating floating-point fused multiply-accumulate.
OK? gcc/ChangeLog: 2012-06-25 Matthew Gretton-Dann <matthew.gretton-d...@arm.com> * config/arm/iterators.md (SDF): New mode iterator. (V_if_elem): Add support for SF and DF modes. (V_reg): Likewise. (F_w_constraint): New mode iterator attribute. (F_r_constraint): Likewise. (F_fma_type): Likewise. (F_target): Likewise. config/arm/vfp.md (fma<mode>4): New pattern. (*fmsub<mode>4): Likewise. (*fmnsub<mode>4): Likewise. (*fmnadd<mode>4): Likewise. Thanks, Matt -- Matthew Gretton-Dann Principal Engineer, PD Software - Tools, ARM Ltd
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 795a5ee..3063f00 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -42,6 +42,9 @@ ;; A list of the 32bit and 64bit integer modes (define_mode_iterator SIDI [SI DI]) +;; A list of modes which the VFP unit can handle +(define_mode_iterator SDF [SF DF]) + ;; Integer element sizes implemented by IWMMXT. (define_mode_iterator VMMX [V2SI V4HI V8QI]) @@ -245,7 +248,8 @@ (V4HI "P") (V8HI "q") (V2SI "P") (V4SI "q") (V2SF "P") (V4SF "q") - (DI "P") (V2DI "q")]) + (DI "P") (V2DI "q") + (SF "") (DF "P")]) ;; Wider modes with the same number of elements. (define_mode_attr V_widen [(V8QI "V8HI") (V4HI "V4SI") (V2SI "V2DI")]) @@ -303,7 +307,8 @@ (V4HI "i16") (V8HI "i16") (V2SI "i32") (V4SI "i32") (DI "i64") (V2DI "i64") - (V2SF "f32") (V4SF "f32")]) + (V2SF "f32") (V4SF "f32") + (SF "f32") (DF "f64")]) ;; Same, but for operations which work on signed values. (define_mode_attr V_s_elem [(V8QI "s8") (V16QI "s8") @@ -423,6 +428,12 @@ ;; Mode attribute for vshll. (define_mode_attr V_innermode [(V8QI "QI") (V4HI "HI") (V2SI "SI")]) +;; Mode attributes used for fused-multiply-accumulate VFP support +(define_mode_attr F_w_constraint [(SF "=t") (DF "=w")]) +(define_mode_attr F_r_constraint [(SF "t") (DF "w")]) +(define_mode_attr F_fma_type [(SF "fmacs") (DF "fmacd")]) +(define_mode_attr F_target [(SF "") (DF "&& TARGET_VFP_DOUBLE")]) + ;;---------------------------------------------------------------------------- ;; Code attributes ;;---------------------------------------------------------------------------- diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 2061414..2a50353 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -890,6 +890,54 @@ (set_attr "type" "fmacd")] ) +;; Fused-multiply-accumulate + +(define_insn "fma<mode>4" + [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>") + (fma:SDF (match_operand:SDF 1 "register_operand" "<F_r_constraint>") + (match_operand:SDF 2 "register_operand" "<F_r_constraint>") + (match_operand:SDF 3 "register_operand" "0")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>" + "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" + [(set_attr "predicable" "yes") + (set_attr "type" "<F_fma_type>")] +) + +(define_insn "*fmsub<mode>4" + [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>") + (fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand" + "<F_r_constraint>")) + (match_operand:SDF 2 "register_operand" "<F_r_constraint>") + (match_operand:SDF 3 "register_operand" "0")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>" + "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" + [(set_attr "predicable" "yes") + (set_attr "type" "<F_fma_type>")] +) + +(define_insn "*fnmsub<mode>4" + [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>") + (fma:SDF (match_operand:SDF 1 "register_operand" "<F_r_constraint>") + (match_operand:SDF 2 "register_operand" "<F_r_constraint>") + (neg:SDF (match_operand:SDF 3 "register_operand" "0"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>" + "vfnms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" + [(set_attr "predicable" "yes") + (set_attr "type" "<F_fma_type>")] +) + +(define_insn "*fnmadd<mode>4" + [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>") + (fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand" + "<F_r_constraint>")) + (match_operand:SDF 2 "register_operand" "<F_r_constraint>") + (neg:SDF (match_operand:SDF 3 "register_operand" "0"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>" + "vfnma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" + [(set_attr "predicable" "yes") + (set_attr "type" "<F_fma_type>")] +) + ;; Conversion routines diff --git a/gcc/testsuite/gcc.target/arm/fma-sp.c b/gcc/testsuite/gcc.target/arm/fma-sp.c new file mode 100644 index 0000000..457b55c --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/fma-sp.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 -mthumb" } */ + +#include "fma.h" + +/* { dg-final { scan-assembler-not "vfma\.f64\td\[0-9\]" } } */ +/* { dg-final { scan-assembler-times "vfma\.f32\ts\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-not "vfms\.f64\td\[0-9\]" } } */ +/* { dg-final { scan-assembler-times "vfms\.f32\ts\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-not "vfnma\.f64\td\[0-9\]" } } */ +/* { dg-final { scan-assembler-times "vfnma\.f32\ts\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-not "vfnms\.f64\td\[0-9\]" } } */ +/* { dg-final { scan-assembler-times "vfnms\.f32\ts\[0-9\]" 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/fma.c b/gcc/testsuite/gcc.target/arm/fma.c new file mode 100644 index 0000000..1809aa8 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/fma.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mcpu=cortex-a15 -mfpu=vfpv4" } */ + +#include "fma.h" + +/* { dg-final { scan-assembler-times "vfma\.f64\td\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-times "vfma\.f32\ts\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-times "vfms\.f64\td\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-times "vfms\.f32\ts\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-times "vfnma\.f64\td\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-times "vfnma\.f32\ts\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-times "vfnms\.f64\td\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-times "vfnms\.f32\ts\[0-9\]" 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/fma.h b/gcc/testsuite/gcc.target/arm/fma.h new file mode 100644 index 0000000..0812c2d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/fma.h @@ -0,0 +1,50 @@ +extern double fma (double, double, double); +extern float fmaf (float, float, float); + +float +vfma32 (float x, float y, float z) +{ + return fmaf (x, y, z); +} + +float +vfms32 (float x, float y, float z) +{ + return fmaf (-x, y, z); +} + +float +vfnms32 (float x, float y, float z) +{ + return fmaf (x, y, -z); +} + +float +vfnma32 (float x, float y, float z) +{ + return fmaf (-x, y, -z); +} + +double +vfma64 (double x, double y, double z) +{ + return fma (x, y, z); +} + +double +vfms64 (double x, double y, double z) +{ + return fma (-x, y, z); +} + +double +vfnms64 (double x, double y, double z) +{ + return fma (x, y, -z); +} + +double +vfnma64 (double x, double y, double z) +{ + return fma (-x, y, -z); +}