https://gcc.gnu.org/g:cdacb32bd5cd8e09f03a7df4339832f436d43a27
commit r15-5509-gcdacb32bd5cd8e09f03a7df4339832f436d43a27 Author: Richard Sandiford <richard.sandif...@arm.com> Date: Wed Nov 20 13:27:41 2024 +0000 aarch64: Add support for SME_B16B16 This patch adds support for the SME_B16B16 extension. It follows similar lines to the SME_F16F16 extension added earlier. gcc/ * config/aarch64/aarch64-option-extensions.def (sme-b16b16): New extension. * doc/invoke.texi: Document it. * config/aarch64/aarch64.h (TARGET_STREAMING_SME_B16B16): New macro. * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Conditionally define __ARM_FEATURE_SME_B16B16. * config/aarch64/aarch64-sve-builtins-sme.def: Add SME_B16B16 forms of existing intrinsics. * config/aarch64/aarch64-sme.md (@aarch64_sme_<SME_BINARY_SLICE_HSDF:optab><mode>) (*aarch64_sme_<SME_BINARY_SLICE_HSDF:optab><mode>_plus) (@aarch64_sme_<SME_FP_TERNARY_SLICE:optab><mode><mode>) (*aarch64_sme_<SME_FP_TERNARY_SLICE:optab><mode><mode>_plus) (@aarch64_sme_single_<SME_FP_TERNARY_SLICE:optab><mode><mode>) (*aarch64_sme_single_<SME_FP_TERNARY_SLICE:optab><mode><mode>_plus) (@aarch64_sme_lane_<SME_FP_TERNARY_SLICE:optab><mode><mode>) (*aarch64_sme_lane_<SME_FP_TERNARY_SLICE:optab><mode><mode>) (@aarch64_sme_<SME_FP_MOP:optab><mode><mode>): Extend to BF16 modes. * config/aarch64/aarch64-sve-builtins.cc (TYPES_za_h_bfloat): New type macro. * config/aarch64/iterators.md (SME_ZA_HSDFx24): Add BF16 modes. (SME_MOP_HSDF): Likewise. gcc/testsuite/ * lib/target-supports.exp: Test the assembler for sve-b16b16 support. * gcc.target/aarch64/pragma_cpp_predefs_4.c: Add tests for __ARM_FEATURE_SME_B16B16. * gcc.target/aarch64/sme2/acle-asm/add_za16_bf16_vg1x2.c: New test. * gcc.target/aarch64/sme2/acle-asm/add_za16_bf16_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_bf16_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_bf16_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mla_za16_bf16_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mla_za16_bf16_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_bf16_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_bf16_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mls_za16_bf16_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mls_za16_bf16_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mopa_za16_bf16.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mops_za16_bf16.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/sub_za16_bf16_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/sub_za16_bf16_vg1x4.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-c.cc | 2 + gcc/config/aarch64/aarch64-option-extensions.def | 2 + gcc/config/aarch64/aarch64-sme.md | 20 ++- gcc/config/aarch64/aarch64-sve-builtins-sme.def | 15 ++ gcc/config/aarch64/aarch64-sve-builtins.cc | 5 + gcc/config/aarch64/aarch64.h | 4 + gcc/config/aarch64/iterators.md | 7 +- gcc/doc/invoke.texi | 3 + .../gcc.target/aarch64/pragma_cpp_predefs_4.c | 17 ++ .../aarch64/sme2/acle-asm/add_za16_bf16_vg1x2.c | 126 ++++++++++++++ .../aarch64/sme2/acle-asm/add_za16_bf16_vg1x4.c | 141 ++++++++++++++++ .../sme2/acle-asm/mla_lane_za16_bf16_vg1x2.c | 106 ++++++++++++ .../sme2/acle-asm/mla_lane_za16_bf16_vg1x4.c | 112 +++++++++++++ .../aarch64/sme2/acle-asm/mla_za16_bf16_vg1x2.c | 184 +++++++++++++++++++++ .../aarch64/sme2/acle-asm/mla_za16_bf16_vg1x4.c | 176 ++++++++++++++++++++ .../sme2/acle-asm/mls_lane_za16_bf16_vg1x2.c | 106 ++++++++++++ .../sme2/acle-asm/mls_lane_za16_bf16_vg1x4.c | 112 +++++++++++++ .../aarch64/sme2/acle-asm/mls_za16_bf16_vg1x2.c | 184 +++++++++++++++++++++ .../aarch64/sme2/acle-asm/mls_za16_bf16_vg1x4.c | 176 ++++++++++++++++++++ .../aarch64/sme2/acle-asm/mopa_za16_bf16.c | 34 ++++ .../aarch64/sme2/acle-asm/mops_za16_bf16.c | 34 ++++ .../aarch64/sme2/acle-asm/sub_za16_bf16_vg1x2.c | 126 ++++++++++++++ .../aarch64/sme2/acle-asm/sub_za16_bf16_vg1x4.c | 141 ++++++++++++++++ gcc/testsuite/lib/target-supports.exp | 2 +- 24 files changed, 1824 insertions(+), 11 deletions(-) diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index db1a93b7e054..78224eaedfba 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -271,6 +271,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) aarch64_def_or_undef (TARGET_SME, "__ARM_FEATURE_SME", pfile); aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", pfile); + aarch64_def_or_undef (AARCH64_HAVE_ISA (SME_B16B16), + "__ARM_FEATURE_SME_B16B16", pfile); aarch64_def_or_undef (AARCH64_HAVE_ISA (SME_F16F16), "__ARM_FEATURE_SME_F16F16", pfile); aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile); diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 0667359da761..7ad966ac7f00 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -225,6 +225,8 @@ AARCH64_FMV_FEATURE("sme-i16i64", SME_I64, (SME_I16I64)) AARCH64_OPT_FMV_EXTENSION("sme2", SME2, (SME), (), (), "sme2") +AARCH64_OPT_EXTENSION("sme-b16b16", SME_B16B16, (SME2, SVE_B16B16), (), (), "") + AARCH64_OPT_EXTENSION("sme-f16f16", SME_F16F16, (SME2), (), (), "") AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "") diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md index 226fcbd7cef9..2dda831b7c0d 100644 --- a/gcc/config/aarch64/aarch64-sme.md +++ b/gcc/config/aarch64/aarch64-sme.md @@ -926,6 +926,8 @@ ;; ------------------------------------------------------------------------- ;; Includes: ;; - ADD +;; - BFADD +;; - BFSUB ;; - FADD ;; - FSUB ;; - SUB @@ -965,7 +967,7 @@ (match_operand:SME_ZA_HSDFx24 1 "aligned_register_operand" "Uw<vector_count>")] SME_BINARY_SLICE_HSDF))] "TARGET_STREAMING_SME2" - "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1" + "<b><optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1" ) (define_insn "*aarch64_sme_<optab><mode>_plus" @@ -978,7 +980,7 @@ (match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>")] SME_BINARY_SLICE_HSDF))] "TARGET_STREAMING_SME2" - "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2" + "<b><optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2" ) ;; ------------------------------------------------------------------------- @@ -1632,6 +1634,8 @@ ;; ---- [FP] Ternary arithmetic on ZA slice ;; ------------------------------------------------------------------------- ;; Includes: +;; - BFMLA +;; - BFMLS ;; - FMLA ;; - FMLS ;; ------------------------------------------------------------------------- @@ -1646,7 +1650,7 @@ (match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>")] SME_FP_TERNARY_SLICE))] "TARGET_STREAMING_SME2" - "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2" + "<b><optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2" ) (define_insn "*aarch64_sme_<optab><mode><mode>_plus" @@ -1660,7 +1664,7 @@ (match_operand:SME_ZA_HSDFx24 3 "aligned_register_operand" "Uw<vector_count>")] SME_FP_TERNARY_SLICE))] "TARGET_STREAMING_SME2" - "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3" + "<b><optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3" ) (define_insn "@aarch64_sme_single_<optab><mode><mode>" @@ -1674,7 +1678,7 @@ (match_operand:<SME_ZA_HSDFx24:VSINGLE> 2 "register_operand" "x"))] SME_FP_TERNARY_SLICE))] "TARGET_STREAMING_SME2" - "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>" + "<b><optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>" ) (define_insn "*aarch64_sme_single_<optab><mode><mode>_plus" @@ -1689,7 +1693,7 @@ (match_operand:<SME_ZA_HSDFx24:VSINGLE> 3 "register_operand" "x"))] SME_FP_TERNARY_SLICE))] "TARGET_STREAMING_SME2" - "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>" + "<b><optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>" ) (define_insn "@aarch64_sme_lane_<optab><mode><mode>" @@ -1705,7 +1709,7 @@ UNSPEC_SVE_LANE_SELECT)] SME_FP_TERNARY_SLICE))] "TARGET_STREAMING_SME2" - "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>[%3]" + "<b><optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>[%3]" ) (define_insn "*aarch64_sme_lane_<optab><mode><mode>" @@ -1722,7 +1726,7 @@ UNSPEC_SVE_LANE_SELECT)] SME_FP_TERNARY_SLICE))] "TARGET_STREAMING_SME2" - "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>[%4]" + "<b><optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>[%4]" ) ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def index c79245d92495..115f011c967e 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def @@ -221,6 +221,21 @@ DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_h_float, za_m) DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_h_float, vg1x24, none) #undef REQUIRED_EXTENSIONS +#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_B16B16) +DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_h_bfloat, vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_h_bfloat, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_h_bfloat, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_h_bfloat, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_h_bfloat, + vg1x24, none) +DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_h_bfloat, za_m) +DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_h_bfloat, za_m) +DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_h_bfloat, vg1x24, none) +#undef REQUIRED_EXTENSIONS + #undef DEF_SME_ZA_FUNCTION #undef DEF_SME_ZA_FUNCTION_GS #undef DEF_SME_FUNCTION diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 671f17dd8bf5..ea2281127691 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -633,6 +633,10 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { TYPES_za_bhsd_data (S, D), \ TYPES_reinterpret1 (D, za128) +/* _za16_bf16. */ +#define TYPES_za_h_bfloat(S, D) \ + D (za16, bf16) + /* _za16_f16. */ #define TYPES_za_h_float(S, D) \ D (za16, f16) @@ -807,6 +811,7 @@ DEF_SVE_TYPES_ARRAY (all_za); DEF_SVE_TYPES_ARRAY (d_za); DEF_SVE_TYPES_ARRAY (za_bhsd_data); DEF_SVE_TYPES_ARRAY (za_all_data); +DEF_SVE_TYPES_ARRAY (za_h_bfloat); DEF_SVE_TYPES_ARRAY (za_h_float); DEF_SVE_TYPES_ARRAY (za_s_b_signed); DEF_SVE_TYPES_ARRAY (za_s_b_unsigned); diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 92e14c2b6a8b..bbb904353f45 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -349,6 +349,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED /* The FEAT_SME_I16I64 extension to SME, enabled through +sme-i16i64. */ #define TARGET_SME_I16I64 AARCH64_HAVE_ISA (SME_I16I64) +/* The FEAT_SME_B16B16 extension to SME, enabled through +sme-b16b16. */ +#define TARGET_STREAMING_SME_B16B16 \ + (AARCH64_HAVE_ISA (SME_B16B16) && TARGET_STREAMING) + /* The FEAT_SME_F16F16 extension to SME, enabled through +sme-f16f16. */ #define TARGET_STREAMING_SME_F16F16 \ (AARCH64_HAVE_ISA (SME_F16F16) && TARGET_STREAMING) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 3325a3a14840..88c04c917a04 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -686,13 +686,16 @@ (VNx4DF "TARGET_SME_F64F64") (VNx8DF "TARGET_SME_F64F64") (VNx16HF "TARGET_STREAMING_SME_F16F16") - (VNx32HF "TARGET_STREAMING_SME_F16F16")]) + (VNx32HF "TARGET_STREAMING_SME_F16F16") + (VNx16BF "TARGET_STREAMING_SME_B16B16") + (VNx32BF "TARGET_STREAMING_SME_B16B16")]) ;; The modes for which outer product instructions are supported. (define_mode_iterator SME_MOP_BHI [VNx16QI (VNx8HI "TARGET_SME_I16I64")]) (define_mode_iterator SME_MOP_HSDF [VNx4SF (VNx2DF "TARGET_SME_F64F64") - (VNx8HF "TARGET_STREAMING_SME_F16F16")]) + (VNx8HF "TARGET_STREAMING_SME_F16F16") + (VNx8BF "TARGET_STREAMING_SME_B16B16")]) ;; ------------------------------------------------------------------ ;; Unspec enumerations for Advance SIMD. These could well go into diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index fa2efb42bbf4..d3ba160bbe23 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21854,6 +21854,9 @@ Enable the FEAT_SME_F64F64 extension to SME. This also enables SME instructions. @item sme2 Enable the Scalable Matrix Extension 2. This also enables SME instructions. +@item sme-b16b16 +Enable the FEAT_SME_B16B16 extension to SME. This also enables SME2 +and SVE_B16B16 instructions. @item sme-f16f16 Enable the FEAT_SME_F16F16 extension to SME. This also enables SME2 instructions. diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c index d9ec0b630cc9..ed0d70a71753 100644 --- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c +++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c @@ -70,6 +70,9 @@ #ifdef __ARM_FEATURE_SME_I16I64 #error Foo #endif +#ifdef __ARM_FEATURE_SME_B16B16 +#error Foo +#endif #ifdef __ARM_FEATURE_SME_F16F16 #error Foo #endif @@ -88,6 +91,20 @@ #error Foo #endif +#pragma GCC target "+nothing+sme-b16b16" +#ifndef __ARM_FEATURE_SME_B16B16 +#error Foo +#endif +#ifndef __ARM_FEATURE_SME +#error Foo +#endif +#ifndef __ARM_FEATURE_SME2 +#error Foo +#endif +#ifndef __ARM_FEATURE_SVE_B16B16 +#error Foo +#endif + #pragma GCC target "+nothing+sme-f16f16" #ifndef __ARM_FEATURE_SME_F16F16 #error Foo diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_bf16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_bf16_vg1x2.c new file mode 100644 index 000000000000..466f4a679e95 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_bf16_vg1x2.c @@ -0,0 +1,126 @@ +/* { dg-do assemble { target aarch64_asm_sme-b16b16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-b16b16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-b16b16" + +/* +** add_0_z0: +** mov (w8|w9|w10|w11), #?0 +** bfadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_0_z0, svbfloat16x2_t, + svadd_za16_bf16_vg1x2 (0, z0), + svadd_za16_vg1x2 (0, z0)) + +/* +** add_w0_z0: +** mov (w8|w9|w10|w11), w0 +** bfadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_w0_z0, svbfloat16x2_t, + svadd_za16_bf16_vg1x2 (w0, z0), + svadd_za16_vg1x2 (w0, z0)) + +/* +** add_w7_z0: +** mov (w8|w9|w10|w11), w7 +** bfadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_w7_z0, svbfloat16x2_t, + svadd_za16_bf16_vg1x2 (w7, z0), + svadd_za16_vg1x2 (w7, z0)) + +/* +** add_w8_z0: +** bfadd za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_w8_z0, svbfloat16x2_t, + svadd_za16_bf16_vg1x2 (w8, z0), + svadd_za16_vg1x2 (w8, z0)) + +/* +** add_w11_z0: +** bfadd za\.h\[w11, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_w11_z0, svbfloat16x2_t, + svadd_za16_bf16_vg1x2 (w11, z0), + svadd_za16_vg1x2 (w11, z0)) + + +/* +** add_w12_z0: +** mov (w8|w9|w10|w11), w12 +** bfadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_w12_z0, svbfloat16x2_t, + svadd_za16_bf16_vg1x2 (w12, z0), + svadd_za16_vg1x2 (w12, z0)) + +/* +** add_w8p7_z0: +** bfadd za\.h\[w8, 7, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_w8p7_z0, svbfloat16x2_t, + svadd_za16_bf16_vg1x2 (w8 + 7, z0), + svadd_za16_vg1x2 (w8 + 7, z0)) + +/* +** add_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** bfadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_w8p8_z0, svbfloat16x2_t, + svadd_za16_bf16_vg1x2 (w8 + 8, z0), + svadd_za16_vg1x2 (w8 + 8, z0)) + +/* +** add_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** bfadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_w8m1_z0, svbfloat16x2_t, + svadd_za16_bf16_vg1x2 (w8 - 1, z0), + svadd_za16_vg1x2 (w8 - 1, z0)) + +/* +** add_w8_z18: +** bfadd za\.h\[w8, 0, vgx2\], {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (add_w8_z18, svbfloat16x2_t, + svadd_za16_bf16_vg1x2 (w8, z18), + svadd_za16_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** bfadd za\.h\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z23, svbfloat16x2_t, + svadd_za16_bf16_vg1x2 (w8, z23), + svadd_za16_vg1x2 (w8, z23)) + +/* +** add_w8_z28: +** bfadd za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (add_w8_z28, svbfloat16x2_t, + svadd_za16_bf16_vg1x2 (w8, z28), + svadd_za16_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_bf16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_bf16_vg1x4.c new file mode 100644 index 000000000000..b91d8f7eeebd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_bf16_vg1x4.c @@ -0,0 +1,141 @@ +/* { dg-do assemble { target aarch64_asm_sme-b16b16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-b16b16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-b16b16" + +/* +** add_0_z0: +** mov (w8|w9|w10|w11), #?0 +** bfadd za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_0_z0, svbfloat16x4_t, + svadd_za16_bf16_vg1x4 (0, z0), + svadd_za16_vg1x4 (0, z0)) + +/* +** add_w0_z0: +** mov (w8|w9|w10|w11), w0 +** bfadd za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_w0_z0, svbfloat16x4_t, + svadd_za16_bf16_vg1x4 (w0, z0), + svadd_za16_vg1x4 (w0, z0)) + +/* +** add_w7_z0: +** mov (w8|w9|w10|w11), w7 +** bfadd za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_w7_z0, svbfloat16x4_t, + svadd_za16_bf16_vg1x4 (w7, z0), + svadd_za16_vg1x4 (w7, z0)) + +/* +** add_w8_z0: +** bfadd za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_w8_z0, svbfloat16x4_t, + svadd_za16_bf16_vg1x4 (w8, z0), + svadd_za16_vg1x4 (w8, z0)) + +/* +** add_w11_z0: +** bfadd za\.h\[w11, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_w11_z0, svbfloat16x4_t, + svadd_za16_bf16_vg1x4 (w11, z0), + svadd_za16_vg1x4 (w11, z0)) + + +/* +** add_w12_z0: +** mov (w8|w9|w10|w11), w12 +** bfadd za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_w12_z0, svbfloat16x4_t, + svadd_za16_bf16_vg1x4 (w12, z0), + svadd_za16_vg1x4 (w12, z0)) + +/* +** add_w8p7_z0: +** bfadd za\.h\[w8, 7, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_w8p7_z0, svbfloat16x4_t, + svadd_za16_bf16_vg1x4 (w8 + 7, z0), + svadd_za16_vg1x4 (w8 + 7, z0)) + +/* +** add_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** bfadd za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_w8p8_z0, svbfloat16x4_t, + svadd_za16_bf16_vg1x4 (w8 + 8, z0), + svadd_za16_vg1x4 (w8 + 8, z0)) + +/* +** add_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** bfadd za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_w8m1_z0, svbfloat16x4_t, + svadd_za16_bf16_vg1x4 (w8 - 1, z0), + svadd_za16_vg1x4 (w8 - 1, z0)) + +/* +** add_w8_z4: +** bfadd za\.h\[w8, 0, vgx4\], {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (add_w8_z4, svbfloat16x4_t, + svadd_za16_bf16_vg1x4 (w8, z4), + svadd_za16_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** bfadd za\.h\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z18, svbfloat16x4_t, + svadd_za16_bf16_vg1x4 (w8, z18), + svadd_za16_vg1x4 (w8, z18)) + +/* +** add_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** bfadd za\.h\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z23, svbfloat16x4_t, + svadd_za16_bf16_vg1x4 (w8, z23), + svadd_za16_vg1x4 (w8, z23)) + +/* +** add_w8_z28: +** bfadd za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (add_w8_z28, svbfloat16x4_t, + svadd_za16_bf16_vg1x4 (w8, z28), + svadd_za16_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_bf16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_bf16_vg1x2.c new file mode 100644 index 000000000000..43c1597cd42d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_bf16_vg1x2.c @@ -0,0 +1,106 @@ +/* { dg-do assemble { target aarch64_asm_sme-b16b16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-b16b16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-b16b16" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** bfmla za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x2 (0, z0, z4, 0), + svmla_lane_za16_vg1x2 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** bfmla za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x2 (w0, z0, z7, 1), + svmla_lane_za16_vg1x2 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** bfmla za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x2 (w8, z28, z4, 2), + svmla_lane_za16_vg1x2 (w8, z28, z4, 2)) + +/* +** mla_lane_w8p7_z0_z4_3: +** bfmla za\.h\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x2 (w8 + 7, z0, z4, 3), + svmla_lane_za16_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** mla_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** bfmla za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x2 (w8 + 8, z0, z4, 4), + svmla_lane_za16_vg1x2 (w8 + 8, z0, z4, 4)) + +/* +** mla_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** bfmla za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x2 (w0 - 1, z0, z4, 5), + svmla_lane_za16_vg1x2 (w0 - 1, z0, z4, 5)) + +/* +** mla_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** bfmla za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x2 (w8, z4, z15, 6), + svmla_lane_za16_vg1x2 (w8, z4, z15, 6)) + +/* +** mla_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** bfmla za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x2 (w8, z28, z16, 7), + svmla_lane_za16_vg1x2 (w8, z28, z16, 7)) + +/* +** mla_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** bfmla za\.h\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x2 (w8, z17, z7, 0), + svmla_lane_za16_vg1x2 (w8, z17, z7, 0)) + +/* +** mla_lane_w8_z22_z4_1: +** bfmla za\.h\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x2 (w8, z22, z4, 1), + svmla_lane_za16_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_bf16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_bf16_vg1x4.c new file mode 100644 index 000000000000..66f522b56a5c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_bf16_vg1x4.c @@ -0,0 +1,112 @@ +/* { dg-do assemble { target aarch64_asm_sme-b16b16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-b16b16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-b16b16" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** bfmla za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x4 (0, z0, z4, 0), + svmla_lane_za16_vg1x4 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** bfmla za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x4 (w0, z0, z7, 1), + svmla_lane_za16_vg1x4 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** bfmla za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x4 (w8, z28, z4, 2), + svmla_lane_za16_vg1x4 (w8, z28, z4, 2)) + +/* +** mla_lane_w8p7_z0_z4_3: +** bfmla za\.h\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x4 (w8 + 7, z0, z4, 3), + svmla_lane_za16_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** mla_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** bfmla za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x4 (w8 + 8, z0, z4, 4), + svmla_lane_za16_vg1x4 (w8 + 8, z0, z4, 4)) + +/* +** mla_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** bfmla za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x4 (w0 - 1, z0, z4, 5), + svmla_lane_za16_vg1x4 (w0 - 1, z0, z4, 5)) + +/* +** mla_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** bfmla za\.h\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x4 (w8, z4, z15, 6), + svmla_lane_za16_vg1x4 (w8, z4, z15, 6)) + +/* +** mla_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** bfmla za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x4 (w8, z28, z16, 7), + svmla_lane_za16_vg1x4 (w8, z28, z16, 7)) + +/* +** mla_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** bfmla za\.h\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x4 (w8, z17, z7, 0), + svmla_lane_za16_vg1x4 (w8, z17, z7, 0)) + +/* +** mla_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** bfmla za\.h\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za16_bf16_vg1x4 (w8, z22, z4, 1), + svmla_lane_za16_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_bf16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_bf16_vg1x2.c new file mode 100644 index 000000000000..21cbd9fa8ae9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_bf16_vg1x2.c @@ -0,0 +1,184 @@ +/* { dg-do assemble { target aarch64_asm_sme-b16b16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-b16b16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-b16b16" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmla za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svbfloat16x2_t, + svmla_za16_bf16_vg1x2 (0, z0, z0), + svmla_za16_vg1x2 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** bfmla za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svbfloat16x2_t, + svmla_za16_bf16_vg1x2 (w0, z0, z0), + svmla_za16_vg1x2 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** bfmla za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svbfloat16x2_t, + svmla_za16_bf16_vg1x2 (w8, z0, z4), + svmla_za16_vg1x2 (w8, z0, z4)) + +/* +** mla_w8_z4_z18: +** bfmla za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z18, svbfloat16x2_t, + svmla_za16_bf16_vg1x2 (w8, z4, z18), + svmla_za16_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z23_z0: +** ... +** bfmla za\.h\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svbfloat16x2_t, + svmla_za16_bf16_vg1x2 (w8, z23, z0), + svmla_za16_vg1x2 (w8, z23, z0)) + +/* +** mla_w8_z18_z23: +** ... +** bfmla za\.h\[w8, 0, vgx2\], {z18\.h - z19\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z23, svbfloat16x2_t, + svmla_za16_bf16_vg1x2 (w8, z18, z23), + svmla_za16_vg1x2 (w8, z18, z23)) + +/* +** mla_w8_z4_z28: +** bfmla za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z28, svbfloat16x2_t, + svmla_za16_bf16_vg1x2 (w8, z4, z28), + svmla_za16_vg1x2 (w8, z4, z28)) + +/* +** mla_w8p7_z4_z0: +** bfmla za\.h\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svbfloat16x2_t, + svmla_za16_bf16_vg1x2 (w8 + 7, z4, z0), + svmla_za16_vg1x2 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** bfmla za\.h\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svbfloat16x2_t, + svmla_za16_bf16_vg1x2 (w8 + 8, z4, z4), + svmla_za16_vg1x2 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** bfmla za\.h\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svbfloat16x2_t, + svmla_za16_bf16_vg1x2 (w8 - 1, z4, z0), + svmla_za16_vg1x2 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmla za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmla_single_za16_bf16_vg1x2 (0, z1, z0), + svmla_za16_vg1x2 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** bfmla za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmla_single_za16_bf16_vg1x2 (w0, z1, z0), + svmla_za16_vg1x2 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** bfmla za\.h\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmla_single_za16_bf16_vg1x2 (w8, z1, z0), + svmla_za16_vg1x2 (w8, z1, z0)) + +/* +** mla_single_w8p7_z1_z0: +** bfmla za\.h\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmla_single_za16_bf16_vg1x2 (w8 + 7, z1, z0), + svmla_za16_vg1x2 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** bfmla za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmla_single_za16_bf16_vg1x2 (w8 + 8, z1, z0), + svmla_za16_vg1x2 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** bfmla za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmla_single_za16_bf16_vg1x2 (w0 - 1, z1, z0), + svmla_za16_vg1x2 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** bfmla za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svbfloat16x2_t, svbfloat16_t, + svmla_single_za16_bf16_vg1x2 (w8, z0, z15), + svmla_za16_vg1x2 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** bfmla za\.h\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svbfloat16x2_t, svbfloat16_t, + svmla_single_za16_bf16_vg1x2 (w8, z20, z16), + svmla_za16_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_bf16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_bf16_vg1x4.c new file mode 100644 index 000000000000..0fead21e677f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_bf16_vg1x4.c @@ -0,0 +1,176 @@ +/* { dg-do assemble { target aarch64_asm_sme-b16b16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-b16b16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-b16b16" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmla za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svbfloat16x4_t, + svmla_za16_bf16_vg1x4 (0, z0, z0), + svmla_za16_vg1x4 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** bfmla za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svbfloat16x4_t, + svmla_za16_bf16_vg1x4 (w0, z0, z0), + svmla_za16_vg1x4 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** bfmla za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svbfloat16x4_t, + svmla_za16_bf16_vg1x4 (w8, z0, z4), + svmla_za16_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z18: +** ... +** bfmla za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z18, svbfloat16x4_t, + svmla_za16_bf16_vg1x4 (w8, z0, z18), + svmla_za16_vg1x4 (w8, z0, z18)) + +/* +** mla_w8_z18_z28: +** ... +** bfmla za\.h\[w8, 0, vgx4\], [^\n]+, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z28, svbfloat16x4_t, + svmla_za16_bf16_vg1x4 (w8, z18, z28), + svmla_za16_vg1x4 (w8, z18, z28)) + +/* +** mla_w8_z28_z23: +** ... +** bfmla za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z23, svbfloat16x4_t, + svmla_za16_bf16_vg1x4 (w8, z28, z23), + svmla_za16_vg1x4 (w8, z28, z23)) + +/* +** mla_w8p7_z4_z0: +** bfmla za\.h\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svbfloat16x4_t, + svmla_za16_bf16_vg1x4 (w8 + 7, z4, z0), + svmla_za16_vg1x4 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** bfmla za\.h\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svbfloat16x4_t, + svmla_za16_bf16_vg1x4 (w8 + 8, z4, z4), + svmla_za16_vg1x4 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** bfmla za\.h\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svbfloat16x4_t, + svmla_za16_bf16_vg1x4 (w8 - 1, z4, z0), + svmla_za16_vg1x4 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmla za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmla_single_za16_bf16_vg1x4 (0, z1, z0), + svmla_za16_vg1x4 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** bfmla za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmla_single_za16_bf16_vg1x4 (w0, z1, z0), + svmla_za16_vg1x4 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** bfmla za\.h\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmla_single_za16_bf16_vg1x4 (w8, z1, z0), + svmla_za16_vg1x4 (w8, z1, z0)) + +/* +** mla_single_w8p7_z1_z0: +** bfmla za\.h\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmla_single_za16_bf16_vg1x4 (w8 + 7, z1, z0), + svmla_za16_vg1x4 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** bfmla za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmla_single_za16_bf16_vg1x4 (w8 + 8, z1, z0), + svmla_za16_vg1x4 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** bfmla za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmla_single_za16_bf16_vg1x4 (w0 - 1, z1, z0), + svmla_za16_vg1x4 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** bfmla za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svbfloat16x4_t, svbfloat16_t, + svmla_single_za16_bf16_vg1x4 (w8, z0, z15), + svmla_za16_vg1x4 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** bfmla za\.h\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svbfloat16x4_t, svbfloat16_t, + svmla_single_za16_bf16_vg1x4 (w8, z20, z16), + svmla_za16_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_bf16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_bf16_vg1x2.c new file mode 100644 index 000000000000..ab28c49fac05 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_bf16_vg1x2.c @@ -0,0 +1,106 @@ +/* { dg-do assemble { target aarch64_asm_sme-b16b16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-b16b16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-b16b16" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** bfmls za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x2 (0, z0, z4, 0), + svmls_lane_za16_vg1x2 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** bfmls za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x2 (w0, z0, z7, 1), + svmls_lane_za16_vg1x2 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** bfmls za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x2 (w8, z28, z4, 2), + svmls_lane_za16_vg1x2 (w8, z28, z4, 2)) + +/* +** mls_lane_w8p7_z0_z4_3: +** bfmls za\.h\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x2 (w8 + 7, z0, z4, 3), + svmls_lane_za16_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** mls_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** bfmls za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x2 (w8 + 8, z0, z4, 4), + svmls_lane_za16_vg1x2 (w8 + 8, z0, z4, 4)) + +/* +** mls_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** bfmls za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x2 (w0 - 1, z0, z4, 5), + svmls_lane_za16_vg1x2 (w0 - 1, z0, z4, 5)) + +/* +** mls_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** bfmls za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x2 (w8, z4, z15, 6), + svmls_lane_za16_vg1x2 (w8, z4, z15, 6)) + +/* +** mls_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** bfmls za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x2 (w8, z28, z16, 7), + svmls_lane_za16_vg1x2 (w8, z28, z16, 7)) + +/* +** mls_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** bfmls za\.h\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x2 (w8, z17, z7, 0), + svmls_lane_za16_vg1x2 (w8, z17, z7, 0)) + +/* +** mls_lane_w8_z22_z4_1: +** bfmls za\.h\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x2 (w8, z22, z4, 1), + svmls_lane_za16_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_bf16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_bf16_vg1x4.c new file mode 100644 index 000000000000..23587a100b3a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_bf16_vg1x4.c @@ -0,0 +1,112 @@ +/* { dg-do assemble { target aarch64_asm_sme-b16b16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-b16b16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-b16b16" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** bfmls za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x4 (0, z0, z4, 0), + svmls_lane_za16_vg1x4 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** bfmls za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x4 (w0, z0, z7, 1), + svmls_lane_za16_vg1x4 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** bfmls za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x4 (w8, z28, z4, 2), + svmls_lane_za16_vg1x4 (w8, z28, z4, 2)) + +/* +** mls_lane_w8p7_z0_z4_3: +** bfmls za\.h\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x4 (w8 + 7, z0, z4, 3), + svmls_lane_za16_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** mls_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** bfmls za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x4 (w8 + 8, z0, z4, 4), + svmls_lane_za16_vg1x4 (w8 + 8, z0, z4, 4)) + +/* +** mls_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** bfmls za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x4 (w0 - 1, z0, z4, 5), + svmls_lane_za16_vg1x4 (w0 - 1, z0, z4, 5)) + +/* +** mls_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** bfmls za\.h\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x4 (w8, z4, z15, 6), + svmls_lane_za16_vg1x4 (w8, z4, z15, 6)) + +/* +** mls_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** bfmls za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x4 (w8, z28, z16, 7), + svmls_lane_za16_vg1x4 (w8, z28, z16, 7)) + +/* +** mls_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** bfmls za\.h\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x4 (w8, z17, z7, 0), + svmls_lane_za16_vg1x4 (w8, z17, z7, 0)) + +/* +** mls_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** bfmls za\.h\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za16_bf16_vg1x4 (w8, z22, z4, 1), + svmls_lane_za16_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_bf16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_bf16_vg1x2.c new file mode 100644 index 000000000000..2fcb48fc548c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_bf16_vg1x2.c @@ -0,0 +1,184 @@ +/* { dg-do assemble { target aarch64_asm_sme-b16b16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-b16b16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-b16b16" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmls za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svbfloat16x2_t, + svmls_za16_bf16_vg1x2 (0, z0, z0), + svmls_za16_vg1x2 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** bfmls za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svbfloat16x2_t, + svmls_za16_bf16_vg1x2 (w0, z0, z0), + svmls_za16_vg1x2 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** bfmls za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svbfloat16x2_t, + svmls_za16_bf16_vg1x2 (w8, z0, z4), + svmls_za16_vg1x2 (w8, z0, z4)) + +/* +** mls_w8_z4_z18: +** bfmls za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z18, svbfloat16x2_t, + svmls_za16_bf16_vg1x2 (w8, z4, z18), + svmls_za16_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z23_z0: +** ... +** bfmls za\.h\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svbfloat16x2_t, + svmls_za16_bf16_vg1x2 (w8, z23, z0), + svmls_za16_vg1x2 (w8, z23, z0)) + +/* +** mls_w8_z18_z23: +** ... +** bfmls za\.h\[w8, 0, vgx2\], {z18\.h - z19\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z23, svbfloat16x2_t, + svmls_za16_bf16_vg1x2 (w8, z18, z23), + svmls_za16_vg1x2 (w8, z18, z23)) + +/* +** mls_w8_z4_z28: +** bfmls za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z28, svbfloat16x2_t, + svmls_za16_bf16_vg1x2 (w8, z4, z28), + svmls_za16_vg1x2 (w8, z4, z28)) + +/* +** mls_w8p7_z4_z0: +** bfmls za\.h\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svbfloat16x2_t, + svmls_za16_bf16_vg1x2 (w8 + 7, z4, z0), + svmls_za16_vg1x2 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** bfmls za\.h\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svbfloat16x2_t, + svmls_za16_bf16_vg1x2 (w8 + 8, z4, z4), + svmls_za16_vg1x2 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** bfmls za\.h\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svbfloat16x2_t, + svmls_za16_bf16_vg1x2 (w8 - 1, z4, z0), + svmls_za16_vg1x2 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmls za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmls_single_za16_bf16_vg1x2 (0, z1, z0), + svmls_za16_vg1x2 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** bfmls za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmls_single_za16_bf16_vg1x2 (w0, z1, z0), + svmls_za16_vg1x2 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** bfmls za\.h\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmls_single_za16_bf16_vg1x2 (w8, z1, z0), + svmls_za16_vg1x2 (w8, z1, z0)) + +/* +** mls_single_w8p7_z1_z0: +** bfmls za\.h\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmls_single_za16_bf16_vg1x2 (w8 + 7, z1, z0), + svmls_za16_vg1x2 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** bfmls za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmls_single_za16_bf16_vg1x2 (w8 + 8, z1, z0), + svmls_za16_vg1x2 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** bfmls za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmls_single_za16_bf16_vg1x2 (w0 - 1, z1, z0), + svmls_za16_vg1x2 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** bfmls za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svbfloat16x2_t, svbfloat16_t, + svmls_single_za16_bf16_vg1x2 (w8, z0, z15), + svmls_za16_vg1x2 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** bfmls za\.h\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svbfloat16x2_t, svbfloat16_t, + svmls_single_za16_bf16_vg1x2 (w8, z20, z16), + svmls_za16_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_bf16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_bf16_vg1x4.c new file mode 100644 index 000000000000..e7fc338599fa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_bf16_vg1x4.c @@ -0,0 +1,176 @@ +/* { dg-do assemble { target aarch64_asm_sme-b16b16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-b16b16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-b16b16" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmls za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svbfloat16x4_t, + svmls_za16_bf16_vg1x4 (0, z0, z0), + svmls_za16_vg1x4 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** bfmls za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svbfloat16x4_t, + svmls_za16_bf16_vg1x4 (w0, z0, z0), + svmls_za16_vg1x4 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** bfmls za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svbfloat16x4_t, + svmls_za16_bf16_vg1x4 (w8, z0, z4), + svmls_za16_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z18: +** ... +** bfmls za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z18, svbfloat16x4_t, + svmls_za16_bf16_vg1x4 (w8, z0, z18), + svmls_za16_vg1x4 (w8, z0, z18)) + +/* +** mls_w8_z18_z28: +** ... +** bfmls za\.h\[w8, 0, vgx4\], [^\n]+, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z28, svbfloat16x4_t, + svmls_za16_bf16_vg1x4 (w8, z18, z28), + svmls_za16_vg1x4 (w8, z18, z28)) + +/* +** mls_w8_z28_z23: +** ... +** bfmls za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z23, svbfloat16x4_t, + svmls_za16_bf16_vg1x4 (w8, z28, z23), + svmls_za16_vg1x4 (w8, z28, z23)) + +/* +** mls_w8p7_z4_z0: +** bfmls za\.h\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svbfloat16x4_t, + svmls_za16_bf16_vg1x4 (w8 + 7, z4, z0), + svmls_za16_vg1x4 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** bfmls za\.h\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svbfloat16x4_t, + svmls_za16_bf16_vg1x4 (w8 + 8, z4, z4), + svmls_za16_vg1x4 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** bfmls za\.h\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svbfloat16x4_t, + svmls_za16_bf16_vg1x4 (w8 - 1, z4, z0), + svmls_za16_vg1x4 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmls za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmls_single_za16_bf16_vg1x4 (0, z1, z0), + svmls_za16_vg1x4 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** bfmls za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmls_single_za16_bf16_vg1x4 (w0, z1, z0), + svmls_za16_vg1x4 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** bfmls za\.h\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmls_single_za16_bf16_vg1x4 (w8, z1, z0), + svmls_za16_vg1x4 (w8, z1, z0)) + +/* +** mls_single_w8p7_z1_z0: +** bfmls za\.h\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmls_single_za16_bf16_vg1x4 (w8 + 7, z1, z0), + svmls_za16_vg1x4 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** bfmls za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmls_single_za16_bf16_vg1x4 (w8 + 8, z1, z0), + svmls_za16_vg1x4 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** bfmls za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmls_single_za16_bf16_vg1x4 (w0 - 1, z1, z0), + svmls_za16_vg1x4 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** bfmls za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svbfloat16x4_t, svbfloat16_t, + svmls_single_za16_bf16_vg1x4 (w8, z0, z15), + svmls_za16_vg1x4 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** bfmls za\.h\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svbfloat16x4_t, svbfloat16_t, + svmls_single_za16_bf16_vg1x4 (w8, z20, z16), + svmls_za16_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za16_bf16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za16_bf16.c new file mode 100644 index 000000000000..5ee133a5eb2c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za16_bf16.c @@ -0,0 +1,34 @@ +/* { dg-do assemble { target aarch64_asm_sme-b16b16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-b16b16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-b16b16" + +/* +** mopa_za16_bf16_0_p0_p1_z0_z1: +** bfmopa za0\.h, p0/m, p1/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_ZA (mopa_za16_bf16_0_p0_p1_z0_z1, svbfloat16_t, + svmopa_za16_bf16_m (0, p0, p1, z0, z1), + svmopa_za16_m (0, p0, p1, z0, z1)) + +/* +** mopa_za16_bf16_0_p1_p0_z1_z0: +** bfmopa za0\.h, p1/m, p0/m, z1\.h, z0\.h +** ret +*/ +TEST_UNIFORM_ZA (mopa_za16_bf16_0_p1_p0_z1_z0, svbfloat16_t, + svmopa_za16_bf16_m (0, p1, p0, z1, z0), + svmopa_za16_m (0, p1, p0, z1, z0)) + +/* +** mopa_za16_bf16_1_p0_p1_z0_z1: +** bfmopa za1\.h, p0/m, p1/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_ZA (mopa_za16_bf16_1_p0_p1_z0_z1, svbfloat16_t, + svmopa_za16_bf16_m (1, p0, p1, z0, z1), + svmopa_za16_m (1, p0, p1, z0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mops_za16_bf16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mops_za16_bf16.c new file mode 100644 index 000000000000..2bbb28ea3af6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mops_za16_bf16.c @@ -0,0 +1,34 @@ +/* { dg-do assemble { target aarch64_asm_sme-b16b16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-b16b16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-b16b16" + +/* +** mops_za16_bf16_0_p0_p1_z0_z1: +** bfmops za0\.h, p0/m, p1/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_ZA (mops_za16_bf16_0_p0_p1_z0_z1, svbfloat16_t, + svmops_za16_bf16_m (0, p0, p1, z0, z1), + svmops_za16_m (0, p0, p1, z0, z1)) + +/* +** mops_za16_bf16_0_p1_p0_z1_z0: +** bfmops za0\.h, p1/m, p0/m, z1\.h, z0\.h +** ret +*/ +TEST_UNIFORM_ZA (mops_za16_bf16_0_p1_p0_z1_z0, svbfloat16_t, + svmops_za16_bf16_m (0, p1, p0, z1, z0), + svmops_za16_m (0, p1, p0, z1, z0)) + +/* +** mops_za16_bf16_1_p0_p1_z0_z1: +** bfmops za1\.h, p0/m, p1/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_ZA (mops_za16_bf16_1_p0_p1_z0_z1, svbfloat16_t, + svmops_za16_bf16_m (1, p0, p1, z0, z1), + svmops_za16_m (1, p0, p1, z0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_bf16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_bf16_vg1x2.c new file mode 100644 index 000000000000..3c0ea0c36718 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_bf16_vg1x2.c @@ -0,0 +1,126 @@ +/* { dg-do assemble { target aarch64_asm_sme-b16b16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-b16b16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-b16b16" + +/* +** sub_0_z0: +** mov (w8|w9|w10|w11), #?0 +** bfsub za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_0_z0, svbfloat16x2_t, + svsub_za16_bf16_vg1x2 (0, z0), + svsub_za16_vg1x2 (0, z0)) + +/* +** sub_w0_z0: +** mov (w8|w9|w10|w11), w0 +** bfsub za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_w0_z0, svbfloat16x2_t, + svsub_za16_bf16_vg1x2 (w0, z0), + svsub_za16_vg1x2 (w0, z0)) + +/* +** sub_w7_z0: +** mov (w8|w9|w10|w11), w7 +** bfsub za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_w7_z0, svbfloat16x2_t, + svsub_za16_bf16_vg1x2 (w7, z0), + svsub_za16_vg1x2 (w7, z0)) + +/* +** sub_w8_z0: +** bfsub za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_w8_z0, svbfloat16x2_t, + svsub_za16_bf16_vg1x2 (w8, z0), + svsub_za16_vg1x2 (w8, z0)) + +/* +** sub_w11_z0: +** bfsub za\.h\[w11, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_w11_z0, svbfloat16x2_t, + svsub_za16_bf16_vg1x2 (w11, z0), + svsub_za16_vg1x2 (w11, z0)) + + +/* +** sub_w12_z0: +** mov (w8|w9|w10|w11), w12 +** bfsub za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_w12_z0, svbfloat16x2_t, + svsub_za16_bf16_vg1x2 (w12, z0), + svsub_za16_vg1x2 (w12, z0)) + +/* +** sub_w8p7_z0: +** bfsub za\.h\[w8, 7, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_w8p7_z0, svbfloat16x2_t, + svsub_za16_bf16_vg1x2 (w8 + 7, z0), + svsub_za16_vg1x2 (w8 + 7, z0)) + +/* +** sub_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** bfsub za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_w8p8_z0, svbfloat16x2_t, + svsub_za16_bf16_vg1x2 (w8 + 8, z0), + svsub_za16_vg1x2 (w8 + 8, z0)) + +/* +** sub_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** bfsub za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_w8m1_z0, svbfloat16x2_t, + svsub_za16_bf16_vg1x2 (w8 - 1, z0), + svsub_za16_vg1x2 (w8 - 1, z0)) + +/* +** sub_w8_z18: +** bfsub za\.h\[w8, 0, vgx2\], {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (sub_w8_z18, svbfloat16x2_t, + svsub_za16_bf16_vg1x2 (w8, z18), + svsub_za16_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** bfsub za\.h\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z23, svbfloat16x2_t, + svsub_za16_bf16_vg1x2 (w8, z23), + svsub_za16_vg1x2 (w8, z23)) + +/* +** sub_w8_z28: +** bfsub za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (sub_w8_z28, svbfloat16x2_t, + svsub_za16_bf16_vg1x2 (w8, z28), + svsub_za16_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_bf16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_bf16_vg1x4.c new file mode 100644 index 000000000000..61f9f9635dfc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_bf16_vg1x4.c @@ -0,0 +1,141 @@ +/* { dg-do assemble { target aarch64_asm_sme-b16b16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-b16b16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-b16b16" + +/* +** sub_0_z0: +** mov (w8|w9|w10|w11), #?0 +** bfsub za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_0_z0, svbfloat16x4_t, + svsub_za16_bf16_vg1x4 (0, z0), + svsub_za16_vg1x4 (0, z0)) + +/* +** sub_w0_z0: +** mov (w8|w9|w10|w11), w0 +** bfsub za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_w0_z0, svbfloat16x4_t, + svsub_za16_bf16_vg1x4 (w0, z0), + svsub_za16_vg1x4 (w0, z0)) + +/* +** sub_w7_z0: +** mov (w8|w9|w10|w11), w7 +** bfsub za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_w7_z0, svbfloat16x4_t, + svsub_za16_bf16_vg1x4 (w7, z0), + svsub_za16_vg1x4 (w7, z0)) + +/* +** sub_w8_z0: +** bfsub za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_w8_z0, svbfloat16x4_t, + svsub_za16_bf16_vg1x4 (w8, z0), + svsub_za16_vg1x4 (w8, z0)) + +/* +** sub_w11_z0: +** bfsub za\.h\[w11, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_w11_z0, svbfloat16x4_t, + svsub_za16_bf16_vg1x4 (w11, z0), + svsub_za16_vg1x4 (w11, z0)) + + +/* +** sub_w12_z0: +** mov (w8|w9|w10|w11), w12 +** bfsub za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_w12_z0, svbfloat16x4_t, + svsub_za16_bf16_vg1x4 (w12, z0), + svsub_za16_vg1x4 (w12, z0)) + +/* +** sub_w8p7_z0: +** bfsub za\.h\[w8, 7, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_w8p7_z0, svbfloat16x4_t, + svsub_za16_bf16_vg1x4 (w8 + 7, z0), + svsub_za16_vg1x4 (w8 + 7, z0)) + +/* +** sub_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** bfsub za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_w8p8_z0, svbfloat16x4_t, + svsub_za16_bf16_vg1x4 (w8 + 8, z0), + svsub_za16_vg1x4 (w8 + 8, z0)) + +/* +** sub_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** bfsub za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_w8m1_z0, svbfloat16x4_t, + svsub_za16_bf16_vg1x4 (w8 - 1, z0), + svsub_za16_vg1x4 (w8 - 1, z0)) + +/* +** sub_w8_z4: +** bfsub za\.h\[w8, 0, vgx4\], {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (sub_w8_z4, svbfloat16x4_t, + svsub_za16_bf16_vg1x4 (w8, z4), + svsub_za16_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** bfsub za\.h\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z18, svbfloat16x4_t, + svsub_za16_bf16_vg1x4 (w8, z18), + svsub_za16_vg1x4 (w8, z18)) + +/* +** sub_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** bfsub za\.h\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z23, svbfloat16x4_t, + svsub_za16_bf16_vg1x4 (w8, z23), + svsub_za16_vg1x4 (w8, z23)) + +/* +** sub_w8_z28: +** bfsub za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (sub_w8_z28, svbfloat16x4_t, + svsub_za16_bf16_vg1x4 (w8, z28), + svsub_za16_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index eaae2f61775d..62d58226db84 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -12122,7 +12122,7 @@ proc check_effective_target_aarch64_tiny { } { foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve" "i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" "ls64" "sme" "sme-i16i64" "sme2" "sve-b16b16" - "sme-f16f16" } { + "sme-b16b16" "sme-f16f16" } { eval [string map [list FUNC $aarch64_ext] { proc check_effective_target_aarch64_asm_FUNC_ok { } { if { [istarget aarch64*-*-*] } {