Jonathan Wright via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > Hi, > > As subject, this patch adds compilation tests to make sure that the output > of vmla/vmls floating-point Neon intrinsics (fmul, fadd/fsub) is not fused > into fmla/fmls instructions. > > Ok for master? > > Thanks, > Jonathan > > --- > > gcc/testsuite/ChangeLog: > > 2021-02-16 Jonathan Wright <jonathan.wri...@arm.com> > > * gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused.c: > New test. > * gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused_A64.c: > New test. > * gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused.c: > New test. > * gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused_A64.c: > New test. > > diff --git > a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused.c > b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..402c4ef414558767c7d7ddc21817093a80d2a06d > --- /dev/null > +++ > b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused.c > @@ -0,0 +1,42 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3" } */
Could you test this on an arm*-*-* target too? I'd expect the dg-finals to fail there, since the syntax is vmul.f32 etc. instead. Alternatively, we could just skip this for arm*-*-*, like you do with the by-lane tests. > + > + > +#include <arm_neon.h> > + > +float32x2_t foo_f32 (float32x2_t a, float32x2_t b, float32x2_t c) > +{ > + return vmla_f32 (a, b, c); > +} > + > +float32x4_t fooq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) > +{ > + return vmlaq_f32 (a, b, c); > +} > + > +float32x2_t foo_n_f32 (float32x2_t a, float32x2_t b, float32_t c) > +{ > + return vmla_n_f32 (a, b, c); > +} > + > +float32x4_t fooq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) > +{ > + return vmlaq_n_f32 (a, b, c); > +} > + > +float32x2_t foo_lane_f32 (float32x2_t a, > + float32x2_t b, > + float32x2_t v) > +{ > + return vmla_lane_f32 (a, b, v, 0); > +} > + > +float32x4_t fooq_lane_f32 (float32x4_t a, > + float32x4_t b, > + float32x2_t v) > +{ > + return vmlaq_lane_f32 (a, b, v, 0); > +} > + > +/* { dg-final { scan-assembler-times {fmul} 6} } */ > +/* { dg-final { scan-assembler-times {fadd} 6} } */ It'd be safer to match {\tfmul\t} etc. instead. Matching bare words runs the risk of picking up things like directory names that happen to contain “fmul” as a substring. Thanks, Richard > diff --git > a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused_A64.c > > b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused_A64.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..08a9590e2572fa78c8360f09c8353a0d23678ec1 > --- /dev/null > +++ > b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused_A64.c > @@ -0,0 +1,33 @@ > +/* { dg-skip-if "" { arm*-*-* } } */ > +/* { dg-do compile } */ > +/* { dg-options "-O3" } */ > + > + > +#include <arm_neon.h> > + > +float64x1_t foo_f64 (float64x1_t a, float64x1_t b, float64x1_t c) > +{ > + return vmla_f64 (a, b, c); > +} > + > +float64x2_t fooq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) > +{ > + return vmlaq_f64 (a, b, c); > +} > + > +float32x2_t foo_laneq_f32 (float32x2_t a, > + float32x2_t b, > + float32x4_t v) > +{ > + return vmla_laneq_f32 (a, b, v, 0); > +} > + > +float32x4_t fooq_laneq_f32 (float32x4_t a, > + float32x4_t b, > + float32x4_t v) > +{ > + return vmlaq_laneq_f32 (a, b, v, 0); > +} > + > +/* { dg-final { scan-assembler-times {fmul} 4} } */ > +/* { dg-final { scan-assembler-times {fadd} 4} } */ > diff --git > a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused.c > b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..0846b7cf5d2c332175235c15bbe534b2558960ef > --- /dev/null > +++ > b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused.c > @@ -0,0 +1,42 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3" } */ > + > + > +#include <arm_neon.h> > + > +float32x2_t foo_f32 (float32x2_t a, float32x2_t b, float32x2_t c) > +{ > + return vmls_f32 (a, b, c); > +} > + > +float32x4_t fooq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) > +{ > + return vmlsq_f32 (a, b, c); > +} > + > +float32x2_t foo_n_f32 (float32x2_t a, float32x2_t b, float32_t c) > +{ > + return vmls_n_f32 (a, b, c); > +} > + > +float32x4_t fooq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) > +{ > + return vmlsq_n_f32 (a, b, c); > +} > + > +float32x2_t foo_lane_f32 (float32x2_t a, > + float32x2_t b, > + float32x2_t v) > +{ > + return vmls_lane_f32 (a, b, v, 0); > +} > + > +float32x4_t fooq_lane_f32 (float32x4_t a, > + float32x4_t b, > + float32x2_t v) > +{ > + return vmlsq_lane_f32 (a, b, v, 0); > +} > + > +/* { dg-final { scan-assembler-times {fmul} 6} } */ > +/* { dg-final { scan-assembler-times {fsub} 6} } */ > diff --git > a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused_A64.c > > b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused_A64.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..856d46757de6418ee18873ec73bc670ec481dd1c > --- /dev/null > +++ > b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused_A64.c > @@ -0,0 +1,33 @@ > +/* { dg-skip-if "" { arm*-*-* } } */ > +/* { dg-do compile } */ > +/* { dg-options "-O3" } */ > + > + > +#include <arm_neon.h> > + > +float64x1_t foo_f64 (float64x1_t a, float64x1_t b, float64x1_t c) > +{ > + return vmls_f64 (a, b, c); > +} > + > +float64x2_t fooq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) > +{ > + return vmlsq_f64 (a, b, c); > +} > + > +float32x2_t foo_laneq_f32 (float32x2_t a, > + float32x2_t b, > + float32x4_t v) > +{ > + return vmls_laneq_f32 (a, b, v, 0); > +} > + > +float32x4_t fooq_laneq_f32 (float32x4_t a, > + float32x4_t b, > + float32x4_t v) > +{ > + return vmlsq_laneq_f32 (a, b, v, 0); > +} > + > +/* { dg-final { scan-assembler-times {fmul} 4} } */ > +/* { dg-final { scan-assembler-times {fsub} 4} } */