This patch introduces inline definitions for the __fma and __fmaf functions in arm_acle.h for arm targets. These definitions rely on __builtin_fma and __builtin_fmaf to ensure proper inlining and to meet the ACLE requirements [1].
The patch has been tested locally using a crosstool-NG sysroot for arm-cortexa9_neon-linux-gnueabihf, confirming that the generated code uses the expected fused multiply-accumulate instructions: vfma.f32 for single precision vmfa.f64 for double precision [1] https://arm-software.github.io/acle/main/acle.html#fused-multiply-accumulate-fma gcc/ChangeLog: * config/arm/arm_acle.h (__fma, __fmaf): New functions. gcc/testsuite/ChangeLog: * gcc.target/arm/acle/acle_fma.c: New test. --- gcc/config/arm/arm_acle.h | 19 +++++++++++++++++++ gcc/testsuite/gcc.target/arm/acle/acle_fma.c | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 gcc/testsuite/gcc.target/arm/acle/acle_fma.c diff --git a/gcc/config/arm/arm_acle.h b/gcc/config/arm/arm_acle.h index c6c03fdce27..02cb67d1516 100644 --- a/gcc/config/arm/arm_acle.h +++ b/gcc/config/arm/arm_acle.h @@ -829,6 +829,25 @@ __crc32cd (uint32_t __a, uint64_t __b) #endif /* __ARM_FEATURE_CRC32 */ #pragma GCC pop_options +#pragma GCC push_options +#pragma GCC target("fpu=neon-vfpv4") + +__extension__ extern __inline double +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__fma (double __x, double __y, double __z) +{ + return __builtin_fma (__x, __y, __z); +} + +__extension__ extern __inline float +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__fmaf (float __x, float __y, float __z) +{ + return __builtin_fmaf (__x, __y, __z); +} + +#pragma GCC pop_options + #ifdef __cplusplus } #endif diff --git a/gcc/testsuite/gcc.target/arm/acle/acle_fma.c b/gcc/testsuite/gcc.target/arm/acle/acle_fma.c new file mode 100644 index 00000000000..cba4f48929d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/acle/acle_fma.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv7-a -mfpu=neon-vfpv4 -mfloat-abi=hard" } */ + +#include "arm_acle.h" + +double +test_acle_fma (double x, double y, double z) +{ + return __fma (x, y, z); +} + +float +test_acle_fmaf (float x, float y, float z) +{ + return __fmaf (x, y, z); +} + +/* { dg-final { scan-assembler-times "vfma.f64\td\[0-9\]," 1 } } */ +/* { dg-final { scan-assembler-times "vfma.f32\ts\[0-9\]" 1 } } */ -- 2.43.0