Jonathan Wright via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> Hi,
>
> As subject, this patch adds compilation tests to make sure that the output
> of vmla/vmls floating-point Neon intrinsics (fmul, fadd/fsub) is not fused
> into fmla/fmls instructions.
>
> Ok for master?
>
> Thanks,
> Jonathan
>
> ---
>
> gcc/testsuite/ChangeLog:
>
> 2021-02-16  Jonathan Wright  <jonathan.wri...@arm.com>
>
>       * gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused.c:
>       New test.
>       * gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused_A64.c:
>       New test.
>       * gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused.c:
>       New test.
>       * gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused_A64.c:
>       New test.
>
> diff --git 
> a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused.c 
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..402c4ef414558767c7d7ddc21817093a80d2a06d
> --- /dev/null
> +++ 
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused.c
> @@ -0,0 +1,42 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */

Could you test this on an arm*-*-* target too?  I'd expect the
dg-finals to fail there, since the syntax is vmul.f32 etc. instead.
Alternatively, we could just skip this for arm*-*-*, like you do
with the by-lane tests.

> +
> +
> +#include <arm_neon.h>
> +
> +float32x2_t foo_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
> +{
> +  return vmla_f32 (a, b, c);
> +}
> +
> +float32x4_t fooq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
> +{
> +  return vmlaq_f32 (a, b, c);
> +}
> +
> +float32x2_t foo_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
> +{
> +  return vmla_n_f32 (a, b, c);
> +}
> +
> +float32x4_t fooq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
> +{
> +  return vmlaq_n_f32 (a, b, c);
> +}
> +
> +float32x2_t foo_lane_f32 (float32x2_t a,
> +                       float32x2_t b,
> +                       float32x2_t v)
> +{
> +  return vmla_lane_f32 (a, b, v, 0);
> +}
> +
> +float32x4_t fooq_lane_f32 (float32x4_t a,
> +                        float32x4_t b,
> +                        float32x2_t v)
> +{
> +  return vmlaq_lane_f32 (a, b, v, 0);
> +}
> +
> +/* { dg-final { scan-assembler-times {fmul} 6} }  */
> +/* { dg-final { scan-assembler-times {fadd} 6} }  */

It'd be safer to match {\tfmul\t} etc. instead.  Matching bare words
runs the risk of picking up things like directory names that happen
to contain “fmul” as a substring.

Thanks,
Richard

> diff --git 
> a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused_A64.c
>  
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused_A64.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..08a9590e2572fa78c8360f09c8353a0d23678ec1
> --- /dev/null
> +++ 
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_float_not_fused_A64.c
> @@ -0,0 +1,33 @@
> +/* { dg-skip-if "" { arm*-*-* } } */
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +
> +#include <arm_neon.h>
> +
> +float64x1_t foo_f64 (float64x1_t a, float64x1_t b, float64x1_t c)
> +{
> +  return vmla_f64 (a, b, c);
> +}
> +
> +float64x2_t fooq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
> +{
> +  return vmlaq_f64 (a, b, c);
> +}
> +
> +float32x2_t foo_laneq_f32 (float32x2_t a,
> +                        float32x2_t b,
> +                        float32x4_t v)
> +{
> +  return vmla_laneq_f32 (a, b, v, 0);
> +}
> +
> +float32x4_t fooq_laneq_f32 (float32x4_t a,
> +                         float32x4_t b,
> +                         float32x4_t v)
> +{
> +  return vmlaq_laneq_f32 (a, b, v, 0);
> +}
> +
> +/* { dg-final { scan-assembler-times {fmul} 4} }  */
> +/* { dg-final { scan-assembler-times {fadd} 4} }  */
> diff --git 
> a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused.c 
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..0846b7cf5d2c332175235c15bbe534b2558960ef
> --- /dev/null
> +++ 
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused.c
> @@ -0,0 +1,42 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +
> +#include <arm_neon.h>
> +
> +float32x2_t foo_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
> +{
> +  return vmls_f32 (a, b, c);
> +}
> +
> +float32x4_t fooq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
> +{
> +  return vmlsq_f32 (a, b, c);
> +}
> +
> +float32x2_t foo_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
> +{
> +  return vmls_n_f32 (a, b, c);
> +}
> +
> +float32x4_t fooq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
> +{
> +  return vmlsq_n_f32 (a, b, c);
> +}
> +
> +float32x2_t foo_lane_f32 (float32x2_t a,
> +                       float32x2_t b,
> +                       float32x2_t v)
> +{
> +  return vmls_lane_f32 (a, b, v, 0);
> +}
> +
> +float32x4_t fooq_lane_f32 (float32x4_t a,
> +                        float32x4_t b,
> +                        float32x2_t v)
> +{
> +  return vmlsq_lane_f32 (a, b, v, 0);
> +}
> +
> +/* { dg-final { scan-assembler-times {fmul} 6} }  */
> +/* { dg-final { scan-assembler-times {fsub} 6} }  */
> diff --git 
> a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused_A64.c
>  
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused_A64.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..856d46757de6418ee18873ec73bc670ec481dd1c
> --- /dev/null
> +++ 
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_float_not_fused_A64.c
> @@ -0,0 +1,33 @@
> +/* { dg-skip-if "" { arm*-*-* } } */
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +
> +#include <arm_neon.h>
> +
> +float64x1_t foo_f64 (float64x1_t a, float64x1_t b, float64x1_t c)
> +{
> +  return vmls_f64 (a, b, c);
> +}
> +
> +float64x2_t fooq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
> +{
> +  return vmlsq_f64 (a, b, c);
> +}
> +
> +float32x2_t foo_laneq_f32 (float32x2_t a,
> +                        float32x2_t b,
> +                        float32x4_t v)
> +{
> +  return vmls_laneq_f32 (a, b, v, 0);
> +}
> +
> +float32x4_t fooq_laneq_f32 (float32x4_t a,
> +                         float32x4_t b,
> +                         float32x4_t v)
> +{
> +  return vmlsq_laneq_f32 (a, b, v, 0);
> +}
> +
> +/* { dg-final { scan-assembler-times {fmul} 4} }  */
> +/* { dg-final { scan-assembler-times {fsub} 4} }  */

Reply via email to