https://gcc.gnu.org/g:e6751e1deb636bbd1538ccded4d9b3abfec8e0cf
commit r15-5508-ge6751e1deb636bbd1538ccded4d9b3abfec8e0cf Author: Richard Sandiford <richard.sandif...@arm.com> Date: Wed Nov 20 13:27:41 2024 +0000 aarch64: Add support for SME_F16F16 This patch adds support for the SME_F16F16 extension. The extension adds two new instructions to convert from a single vector of f16s to two vectors of f32s. It also adds f16 variants of existing SME ZA instructions. gcc/ * config/aarch64/aarch64-option-extensions.def (sme-f16f16): New extension. * doc/invoke.texi: Document it. Also document that sme-i16i64 and sme-f64f64 enable SME. * config/aarch64/aarch64.h (TARGET_STREAMING_SME_F16F16): New macro. * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Conditionally define __ARM_FEATURE_SME_F16F16. * config/aarch64/aarch64-sve-builtins-sve2.def (svcvt, svcvtl): Add new SME_F16F16 intrinsics. * config/aarch64/aarch64-sve-builtins-sme.def: Add SME_F16F16 forms of existing intrinsics. * config/aarch64/aarch64-sve-builtins.cc (TYPES_h_float) (TYPES_cvt_f32_f16, TYPES_za_h_float): New type macros. * config/aarch64/aarch64-sve-builtins-base.cc (svcvt_impl::expand): Add sext_optab as another possibility. * config/aarch64/aarch64-sve-builtins-sve2.h (svcvtl): Declare. * config/aarch64/aarch64-sve-builtins-sve2.cc (svcvtl_impl): New class. (svcvtl): New function. * config/aarch64/iterators.md (VNx8SF_ONLY): New mode iterator. (SME_ZA_SDFx24): Replace with... (SME_ZA_HSDFx24): ...this. (SME_MOP_SDF): Replace with... (SME_MOP_HSDF): ...this. (SME_BINARY_SLICE_SDF): Replace with... (SME_BINARY_SLICE_HSDF): ...this. * config/aarch64/aarch64-sve2.md (extendvnx8hfvnx8sf2) (@aarch64_sve_cvtl<mode>): New patterns. * config/aarch64/aarch64-sme.md (@aarch64_sme_<SME_BINARY_SLICE_SDF:optab><mode>): Extend to... (@aarch64_sme_<SME_BINARY_SLICE_HSDF:optab><mode>): ...this. (*aarch64_sme_<SME_BINARY_SLICE_SDF:optab><mode>_plus): Extend to... (*aarch64_sme_<SME_BINARY_SLICE_HSDF:optab><mode>_plus): ...this. (@aarch64_sme_<SME_FP_TERNARY_SLICE:optab><mode><mode>): Extend to HF modes. (*aarch64_sme_<SME_FP_TERNARY_SLICE:optab><mode><mode>_plus) (@aarch64_sme_single_<SME_FP_TERNARY_SLICE:optab><mode><mode>) (*aarch64_sme_single_<SME_FP_TERNARY_SLICE:optab><mode><mode>_plus) (@aarch64_sme_lane_<SME_FP_TERNARY_SLICE:optab><mode><mode>) (*aarch64_sme_lane_<SME_FP_TERNARY_SLICE:optab><mode><mode>) (@aarch64_sme_<SME_FP_MOP:optab><mode><mode>): Likewise. gcc/testsuite/ * lib/target-supports.exp: Test the assembler for sve-f16f16 support. * gcc.target/aarch64/pragma_cpp_predefs_4.c: Add tests for __ARM_FEATURE_SME_F16F16. Also extend the existing SME tests. * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h (TEST_X2_WIDE): New macro * gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x2.c: New test. * gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/cvt_f32_f16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/cvtl_f32_f16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mopa_za16_f16.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mops_za16_f16.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x4.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-c.cc | 2 + gcc/config/aarch64/aarch64-option-extensions.def | 2 + gcc/config/aarch64/aarch64-sme.md | 101 +++++------ gcc/config/aarch64/aarch64-sve-builtins-base.cc | 5 +- gcc/config/aarch64/aarch64-sve-builtins-sme.def | 15 ++ gcc/config/aarch64/aarch64-sve-builtins-sve2.cc | 11 ++ gcc/config/aarch64/aarch64-sve-builtins-sve2.def | 5 + gcc/config/aarch64/aarch64-sve-builtins-sve2.h | 1 + gcc/config/aarch64/aarch64-sve-builtins.cc | 15 ++ gcc/config/aarch64/aarch64-sve2.md | 26 +++ gcc/config/aarch64/aarch64.h | 4 + gcc/config/aarch64/iterators.md | 15 +- gcc/doc/invoke.texi | 9 +- .../gcc.target/aarch64/pragma_cpp_predefs_4.c | 30 +++- .../aarch64/sme2/acle-asm/add_za16_f16_vg1x2.c | 126 ++++++++++++++ .../aarch64/sme2/acle-asm/add_za16_f16_vg1x4.c | 141 ++++++++++++++++ .../aarch64/sme2/acle-asm/cvt_f32_f16_x2.c | 54 ++++++ .../aarch64/sme2/acle-asm/cvtl_f32_f16_x2.c | 54 ++++++ .../sme2/acle-asm/mla_lane_za16_f16_vg1x2.c | 106 ++++++++++++ .../sme2/acle-asm/mla_lane_za16_f16_vg1x4.c | 112 +++++++++++++ .../aarch64/sme2/acle-asm/mla_za16_f16_vg1x2.c | 184 +++++++++++++++++++++ .../aarch64/sme2/acle-asm/mla_za16_f16_vg1x4.c | 176 ++++++++++++++++++++ .../sme2/acle-asm/mls_lane_za16_f16_vg1x2.c | 106 ++++++++++++ .../sme2/acle-asm/mls_lane_za16_f16_vg1x4.c | 112 +++++++++++++ .../aarch64/sme2/acle-asm/mls_za16_f16_vg1x2.c | 184 +++++++++++++++++++++ .../aarch64/sme2/acle-asm/mls_za16_f16_vg1x4.c | 176 ++++++++++++++++++++ .../aarch64/sme2/acle-asm/mopa_za16_f16.c | 34 ++++ .../aarch64/sme2/acle-asm/mops_za16_f16.c | 34 ++++ .../aarch64/sme2/acle-asm/sub_za16_f16_vg1x2.c | 126 ++++++++++++++ .../aarch64/sme2/acle-asm/sub_za16_f16_vg1x4.c | 141 ++++++++++++++++ .../aarch64/sve/acle/asm/test_sve_acle.h | 16 ++ gcc/testsuite/lib/target-supports.exp | 3 +- 32 files changed, 2067 insertions(+), 59 deletions(-) diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index 7c683274800d..db1a93b7e054 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -271,6 +271,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) aarch64_def_or_undef (TARGET_SME, "__ARM_FEATURE_SME", pfile); aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", pfile); + aarch64_def_or_undef (AARCH64_HAVE_ISA (SME_F16F16), + "__ARM_FEATURE_SME_F16F16", pfile); aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile); aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile); diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index a5ab16233ba0..0667359da761 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -225,6 +225,8 @@ AARCH64_FMV_FEATURE("sme-i16i64", SME_I64, (SME_I16I64)) AARCH64_OPT_FMV_EXTENSION("sme2", SME2, (SME), (), (), "sme2") +AARCH64_OPT_EXTENSION("sme-f16f16", SME_F16F16, (SME2), (), (), "") + AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "") AARCH64_OPT_EXTENSION("cssc", CSSC, (), (), (), "cssc") diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md index e8a24e0b2f6f..226fcbd7cef9 100644 --- a/gcc/config/aarch64/aarch64-sme.md +++ b/gcc/config/aarch64/aarch64-sme.md @@ -926,6 +926,9 @@ ;; ------------------------------------------------------------------------- ;; Includes: ;; - ADD +;; - FADD +;; - FSUB +;; - SUB ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sme_<optab><mode>" @@ -954,26 +957,26 @@ ) (define_insn "@aarch64_sme_<optab><mode>" - [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) - (unspec:SME_ZA_SDFx24 - [(reg:SME_ZA_SDFx24 ZA_REGNUM) + [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM) + (unspec:SME_ZA_HSDFx24 + [(reg:SME_ZA_HSDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (match_operand:SI 0 "register_operand" "Uci") - (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")] - SME_BINARY_SLICE_SDF))] + (match_operand:SME_ZA_HSDFx24 1 "aligned_register_operand" "Uw<vector_count>")] + SME_BINARY_SLICE_HSDF))] "TARGET_STREAMING_SME2" "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1" ) (define_insn "*aarch64_sme_<optab><mode>_plus" - [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) - (unspec:SME_ZA_SDFx24 - [(reg:SME_ZA_SDFx24 ZA_REGNUM) + [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM) + (unspec:SME_ZA_HSDFx24 + [(reg:SME_ZA_HSDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (plus:SI (match_operand:SI 0 "register_operand" "Uci") (match_operand:SI 1 "const_0_to_7_operand")) - (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")] - SME_BINARY_SLICE_SDF))] + (match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>")] + SME_BINARY_SLICE_HSDF))] "TARGET_STREAMING_SME2" "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2" ) @@ -1634,70 +1637,70 @@ ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sme_<optab><mode><mode>" - [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) - (unspec:SME_ZA_SDFx24 - [(reg:SME_ZA_SDFx24 ZA_REGNUM) + [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM) + (unspec:SME_ZA_HSDFx24 + [(reg:SME_ZA_HSDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (match_operand:SI 0 "register_operand" "Uci") - (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>") - (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")] + (match_operand:SME_ZA_HSDFx24 1 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>")] SME_FP_TERNARY_SLICE))] "TARGET_STREAMING_SME2" "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2" ) (define_insn "*aarch64_sme_<optab><mode><mode>_plus" - [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) - (unspec:SME_ZA_SDFx24 - [(reg:SME_ZA_SDFx24 ZA_REGNUM) + [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM) + (unspec:SME_ZA_HSDFx24 + [(reg:SME_ZA_HSDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (plus:SI (match_operand:SI 0 "register_operand" "Uci") (match_operand:SI 1 "const_0_to_7_operand")) - (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>") - (match_operand:SME_ZA_SDFx24 3 "aligned_register_operand" "Uw<vector_count>")] + (match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_HSDFx24 3 "aligned_register_operand" "Uw<vector_count>")] SME_FP_TERNARY_SLICE))] "TARGET_STREAMING_SME2" "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3" ) (define_insn "@aarch64_sme_single_<optab><mode><mode>" - [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) - (unspec:SME_ZA_SDFx24 - [(reg:SME_ZA_SDFx24 ZA_REGNUM) + [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM) + (unspec:SME_ZA_HSDFx24 + [(reg:SME_ZA_HSDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (match_operand:SI 0 "register_operand" "Uci") - (match_operand:SME_ZA_SDFx24 1 "register_operand" "w") - (vec_duplicate:SME_ZA_SDFx24 - (match_operand:<VSINGLE> 2 "register_operand" "x"))] + (match_operand:SME_ZA_HSDFx24 1 "register_operand" "w") + (vec_duplicate:SME_ZA_HSDFx24 + (match_operand:<SME_ZA_HSDFx24:VSINGLE> 2 "register_operand" "x"))] SME_FP_TERNARY_SLICE))] "TARGET_STREAMING_SME2" "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>" ) (define_insn "*aarch64_sme_single_<optab><mode><mode>_plus" - [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) - (unspec:SME_ZA_SDFx24 - [(reg:SME_ZA_SDFx24 ZA_REGNUM) + [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM) + (unspec:SME_ZA_HSDFx24 + [(reg:SME_ZA_HSDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (plus:SI (match_operand:SI 0 "register_operand" "Uci") (match_operand:SI 1 "const_0_to_7_operand")) - (match_operand:SME_ZA_SDFx24 2 "register_operand" "w") - (vec_duplicate:SME_ZA_SDFx24 - (match_operand:<VSINGLE> 3 "register_operand" "x"))] + (match_operand:SME_ZA_HSDFx24 2 "register_operand" "w") + (vec_duplicate:SME_ZA_HSDFx24 + (match_operand:<SME_ZA_HSDFx24:VSINGLE> 3 "register_operand" "x"))] SME_FP_TERNARY_SLICE))] "TARGET_STREAMING_SME2" "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>" ) (define_insn "@aarch64_sme_lane_<optab><mode><mode>" - [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) - (unspec:SME_ZA_SDFx24 - [(reg:SME_ZA_SDFx24 ZA_REGNUM) + [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM) + (unspec:SME_ZA_HSDFx24 + [(reg:SME_ZA_HSDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (match_operand:SI 0 "register_operand" "Uci") - (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>") - (unspec:SME_ZA_SDFx24 - [(match_operand:<VSINGLE> 2 "register_operand" "x") + (match_operand:SME_ZA_HSDFx24 1 "aligned_register_operand" "Uw<vector_count>") + (unspec:SME_ZA_HSDFx24 + [(match_operand:<SME_ZA_HSDFx24:VSINGLE> 2 "register_operand" "x") (match_operand:SI 3 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] SME_FP_TERNARY_SLICE))] @@ -1706,15 +1709,15 @@ ) (define_insn "*aarch64_sme_lane_<optab><mode><mode>" - [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) - (unspec:SME_ZA_SDFx24 - [(reg:SME_ZA_SDFx24 ZA_REGNUM) + [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM) + (unspec:SME_ZA_HSDFx24 + [(reg:SME_ZA_HSDFx24 ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (plus:SI (match_operand:SI 0 "register_operand" "Uci") (match_operand:SI 1 "const_0_to_7_operand")) - (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>") - (unspec:SME_ZA_SDFx24 - [(match_operand:<VSINGLE> 3 "register_operand" "x") + (match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>") + (unspec:SME_ZA_HSDFx24 + [(match_operand:<SME_ZA_HSDFx24:VSINGLE> 3 "register_operand" "x") (match_operand:SI 4 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] SME_FP_TERNARY_SLICE))] @@ -1871,15 +1874,15 @@ ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sme_<optab><mode><mode>" - [(set (reg:SME_MOP_SDF ZA_REGNUM) - (unspec:SME_MOP_SDF - [(reg:SME_MOP_SDF ZA_REGNUM) + [(set (reg:SME_MOP_HSDF ZA_REGNUM) + (unspec:SME_MOP_HSDF + [(reg:SME_MOP_HSDF ZA_REGNUM) (reg:DI SME_STATE_REGNUM) (match_operand:DI 0 "const_int_operand") (match_operand:<VPRED> 1 "register_operand" "Upl") (match_operand:<VPRED> 2 "register_operand" "Upl") - (match_operand:SME_MOP_SDF 3 "register_operand" "w") - (match_operand:SME_MOP_SDF 4 "register_operand" "w")] + (match_operand:SME_MOP_HSDF 3 "register_operand" "w") + (match_operand:SME_MOP_HSDF 4 "register_operand" "w")] SME_FP_MOP))] "TARGET_STREAMING" "<b><optab>\tza%0.<Vetype>, %1/m, %2/m, %3.<Vetype>, %4.<Vetype>" diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 20820fb1985c..87e9909b55a0 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -684,8 +684,11 @@ public: optab = e.type_suffix (0).unsigned_p ? ufix_optab : sfix_optab; else if (e.type_suffix (1).integer_p) optab = e.type_suffix (1).unsigned_p ? ufloat_optab : sfloat_optab; - else + else if (e.type_suffix (0).element_bits + < e.type_suffix (1).element_bits) optab = trunc_optab; + else + optab = sext_optab; icode = convert_optab_handler (optab, mode0, mode1); gcc_assert (icode != CODE_FOR_nothing); return e.use_exact_insn (icode); diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def index bc2c33236363..c79245d92495 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def @@ -206,6 +206,21 @@ DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_d_float, DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_d_float, vg1x24, none) #undef REQUIRED_EXTENSIONS +#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F16F16) +DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_h_float, vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_h_float, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_h_float, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_h_float, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_h_float, + vg1x24, none) +DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_h_float, za_m) +DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_h_float, za_m) +DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_h_float, vg1x24, none) +#undef REQUIRED_EXTENSIONS + #undef DEF_SME_ZA_FUNCTION #undef DEF_SME_ZA_FUNCTION_GS #undef DEF_SME_FUNCTION diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc index fd0c98c6b680..b17b78dadd5e 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc @@ -211,6 +211,16 @@ public: } }; +class svcvtl_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const override + { + return e.use_exact_insn (code_for_aarch64_sve_cvtl (e.result_mode ())); + } +}; + class svcvtn_impl : public function_base { public: @@ -908,6 +918,7 @@ FUNCTION (svcdot, svcdot_impl,) FUNCTION (svcdot_lane, svcdot_lane_impl,) FUNCTION (svclamp, svclamp_impl,) FUNCTION (svcvtlt, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTLT)) +FUNCTION (svcvtl, svcvtl_impl,) FUNCTION (svcvtn, svcvtn_impl,) FUNCTION (svcvtx, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTX)) FUNCTION (svcvtxnt, CODE_FOR_MODE1 (aarch64_sve2_cvtxnt),) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def index 39b5a59ae79f..2189855d705c 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def @@ -336,6 +336,11 @@ DEF_SVE_FUNCTION (svamax, binary_opt_single_n, all_float, mxz) DEF_SVE_FUNCTION (svamin, binary_opt_single_n, all_float, mxz) #undef REQUIRED_EXTENSIONS +#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F16F16) +DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_f32_f16, x2, none) +DEF_SVE_FUNCTION_GS (svcvtl, unary_convertxn, cvt_f32_f16, x2, none) +#undef REQUIRED_EXTENSIONS + #define REQUIRED_EXTENSIONS \ sve_and_sme (AARCH64_FL_SVE2 | AARCH64_FL_SVE_B16B16, \ AARCH64_FL_SME2 | AARCH64_FL_SVE_B16B16) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h index bb610cb792bb..bfe3d170e70e 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h @@ -61,6 +61,7 @@ namespace aarch64_sve extern const function_base *const svcdot_lane; extern const function_base *const svclamp; extern const function_base *const svcntp; + extern const function_base *const svcvtl; extern const function_base *const svcvtlt; extern const function_base *const svcvtn; extern const function_base *const svcvtx; diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index bee0d17352c3..671f17dd8bf5 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -296,6 +296,10 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_h_bfloat(S, D) \ S (bf16) +/* _f16. */ +#define TYPES_h_float(S, D) \ + S (f16) + /* _s16 _u16. */ #define TYPES_h_integer(S, D) \ @@ -450,6 +454,10 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_cvt_h_s_float(S, D) \ D (bf16, f32), D (f16, f32) +/* _f32_f16. */ +#define TYPES_cvt_f32_f16(S, D) \ + D (f32, f16) + /* _f32_f16 _f64_f32. */ #define TYPES_cvt_long(S, D) \ @@ -625,6 +633,10 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { TYPES_za_bhsd_data (S, D), \ TYPES_reinterpret1 (D, za128) +/* _za16_f16. */ +#define TYPES_za_h_float(S, D) \ + D (za16, f16) + /* _za32_s8. */ #define TYPES_za_s_b_signed(S, D) \ D (za32, s8) @@ -744,6 +756,7 @@ DEF_SVE_TYPES_ARRAY (bhs_data); DEF_SVE_TYPES_ARRAY (bhs_widen); DEF_SVE_TYPES_ARRAY (c); DEF_SVE_TYPES_ARRAY (h_bfloat); +DEF_SVE_TYPES_ARRAY (h_float); DEF_SVE_TYPES_ARRAY (h_integer); DEF_SVE_TYPES_ARRAY (hs_signed); DEF_SVE_TYPES_ARRAY (hs_integer); @@ -772,6 +785,7 @@ DEF_SVE_TYPES_ARRAY (d_data); DEF_SVE_TYPES_ARRAY (cvt); DEF_SVE_TYPES_ARRAY (cvt_bfloat); DEF_SVE_TYPES_ARRAY (cvt_h_s_float); +DEF_SVE_TYPES_ARRAY (cvt_f32_f16); DEF_SVE_TYPES_ARRAY (cvt_long); DEF_SVE_TYPES_ARRAY (cvt_narrow_s); DEF_SVE_TYPES_ARRAY (cvt_narrow); @@ -793,6 +807,7 @@ DEF_SVE_TYPES_ARRAY (all_za); DEF_SVE_TYPES_ARRAY (d_za); DEF_SVE_TYPES_ARRAY (za_bhsd_data); DEF_SVE_TYPES_ARRAY (za_all_data); +DEF_SVE_TYPES_ARRAY (za_h_float); DEF_SVE_TYPES_ARRAY (za_s_b_signed); DEF_SVE_TYPES_ARRAY (za_s_b_unsigned); DEF_SVE_TYPES_ARRAY (za_s_b_integer); diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 5a6fd0c47992..66affa85d369 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -97,6 +97,7 @@ ;; == Conversions ;; ---- [FP<-FP] Widening conversions ;; ---- [FP<-FP] Narrowing conversions +;; ---- [FP<-FP] Multi-vector widening conversions ;; ---- [FP<-FP] Multi-vector narrowing conversions ;; ---- [FP<-INT] Multi-vector conversions ;; ---- [INT<-FP] Multi-vector conversions @@ -3116,6 +3117,31 @@ "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>" ) +;; ------------------------------------------------------------------------- +;; ---- [FP<-FP] Multi-vector widening conversions +;; ------------------------------------------------------------------------- +;; Includes the multi-register forms of: +;; - FCVT +;; - FCVTL +;; ------------------------------------------------------------------------- + +(define_insn "extendvnx8hfvnx8sf2" + [(set (match_operand:VNx8SF 0 "aligned_register_operand" "=Uw2") + (float_extend:VNx8SF + (match_operand:VNx8HF 1 "register_operand" "w")))] + "TARGET_STREAMING_SME_F16F16" + "fcvt\t%0, %1.h" +) + +(define_insn "@aarch64_sve_cvtl<mode>" + [(set (match_operand:VNx8SF_ONLY 0 "aligned_register_operand" "=Uw2") + (unspec:VNx8SF_ONLY + [(match_operand:VNx8HF 1 "register_operand" "w")] + UNSPEC_FCVTL))] + "TARGET_STREAMING_SME_F16F16" + "fcvtl\t%0, %1.h" +) + ;; ------------------------------------------------------------------------- ;; ---- [FP<-FP] Multi-vector narrowing conversions ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 4365673d61ea..92e14c2b6a8b 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -349,6 +349,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED /* The FEAT_SME_I16I64 extension to SME, enabled through +sme-i16i64. */ #define TARGET_SME_I16I64 AARCH64_HAVE_ISA (SME_I16I64) +/* The FEAT_SME_F16F16 extension to SME, enabled through +sme-f16f16. */ +#define TARGET_STREAMING_SME_F16F16 \ + (AARCH64_HAVE_ISA (SME_F16F16) && TARGET_STREAMING) + /* The FEAT_SME_F64F64 extension to SME, enabled through +sme-f64f64. */ #define TARGET_SME_F64F64 AARCH64_HAVE_ISA (SME_F64F64) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 96acbdd3f7f9..3325a3a14840 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -432,6 +432,7 @@ (define_mode_iterator VNx8HI_ONLY [VNx8HI]) (define_mode_iterator VNx8BF_ONLY [VNx8BF]) (define_mode_iterator VNx8SI_ONLY [VNx8SI]) +(define_mode_iterator VNx8SF_ONLY [VNx8SF]) (define_mode_iterator VNx8DI_ONLY [VNx8DI]) (define_mode_iterator VNx4SI_ONLY [VNx4SI]) (define_mode_iterator VNx4SF_ONLY [VNx4SF]) @@ -681,12 +682,17 @@ (define_mode_iterator SME_ZA_SDIx24 [VNx8SI (VNx4DI "TARGET_SME_I16I64") VNx16SI (VNx8DI "TARGET_SME_I16I64")]) -(define_mode_iterator SME_ZA_SDFx24 [VNx8SF (VNx4DF "TARGET_SME_F64F64") - VNx16SF (VNx8DF "TARGET_SME_F64F64")]) +(define_mode_iterator SME_ZA_HSDFx24 [VNx8SF VNx16SF + (VNx4DF "TARGET_SME_F64F64") + (VNx8DF "TARGET_SME_F64F64") + (VNx16HF "TARGET_STREAMING_SME_F16F16") + (VNx32HF "TARGET_STREAMING_SME_F16F16")]) ;; The modes for which outer product instructions are supported. (define_mode_iterator SME_MOP_BHI [VNx16QI (VNx8HI "TARGET_SME_I16I64")]) -(define_mode_iterator SME_MOP_SDF [VNx4SF (VNx2DF "TARGET_SME_F64F64")]) +(define_mode_iterator SME_MOP_HSDF [VNx4SF + (VNx2DF "TARGET_SME_F64F64") + (VNx8HF "TARGET_STREAMING_SME_F16F16")]) ;; ------------------------------------------------------------------ ;; Unspec enumerations for Advance SIMD. These could well go into @@ -1093,6 +1099,7 @@ UNSPEC_FMAXNMQV UNSPEC_FMINQV UNSPEC_FMINNMQV + UNSPEC_FCVTL UNSPEC_FCVTN UNSPEC_FDOT UNSPEC_LD1_EXTENDQ @@ -3677,7 +3684,7 @@ (define_int_iterator SME_BINARY_SLICE_SDI [UNSPEC_SME_ADD UNSPEC_SME_SUB]) -(define_int_iterator SME_BINARY_SLICE_SDF [UNSPEC_SME_FADD UNSPEC_SME_FSUB]) +(define_int_iterator SME_BINARY_SLICE_HSDF [UNSPEC_SME_FADD UNSPEC_SME_FSUB]) (define_int_iterator SME_BINARY_WRITE_SLICE_SDI [UNSPEC_SME_ADD_WRITE UNSPEC_SME_SUB_WRITE]) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index e99cbc47dc96..fa2efb42bbf4 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21847,11 +21847,16 @@ Enable the Common Short Sequence Compression instructions. @item sme Enable the Scalable Matrix Extension. @item sme-i16i64 -Enable the FEAT_SME_I16I64 extension to SME. +Enable the FEAT_SME_I16I64 extension to SME. This also enables SME +instructions. @item sme-f64f64 -Enable the FEAT_SME_F64F64 extension to SME. +Enable the FEAT_SME_F64F64 extension to SME. This also enables SME +instructions. @item sme2 Enable the Scalable Matrix Extension 2. This also enables SME instructions. +@item sme-f16f16 +Enable the FEAT_SME_F16F16 extension to SME. This also enables SME2 +instructions. @item lse128 Enable the LSE128 128-bit atomic instructions extension. This also enables LSE instructions. diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c index 77aa1e5e4715..d9ec0b630cc9 100644 --- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c +++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c @@ -70,19 +70,45 @@ #ifdef __ARM_FEATURE_SME_I16I64 #error Foo #endif +#ifdef __ARM_FEATURE_SME_F16F16 +#error Foo +#endif #ifdef __ARM_FEATURE_SME_F64F64 #error Foo #endif -#pragma GCC target "+sme-i16i64" +#pragma GCC target "+nothing+sme-i16i64" #ifndef __ARM_FEATURE_SME_I16I64 #error Foo #endif +#ifndef __ARM_FEATURE_SME +#error Foo +#endif +#ifdef __ARM_FEATURE_SME2 +#error Foo +#endif + +#pragma GCC target "+nothing+sme-f16f16" +#ifndef __ARM_FEATURE_SME_F16F16 +#error Foo +#endif +#ifndef __ARM_FEATURE_SME +#error Foo +#endif +#ifndef __ARM_FEATURE_SME2 +#error Foo +#endif -#pragma GCC target "+sme-f64f64" +#pragma GCC target "+nothing+sme-f64f64" #ifndef __ARM_FEATURE_SME_F64F64 #error Foo #endif +#ifndef __ARM_FEATURE_SME +#error Foo +#endif +#ifdef __ARM_FEATURE_SME2 +#error Foo +#endif #pragma GCC target "+nothing+sve-b16b16" #ifdef __ARM_FEATURE_SVE_B16B16 diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x2.c new file mode 100644 index 000000000000..80e3de6b543c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x2.c @@ -0,0 +1,126 @@ +/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f16f16" + +/* +** add_0_z0: +** mov (w8|w9|w10|w11), #?0 +** fadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_0_z0, svfloat16x2_t, + svadd_za16_f16_vg1x2 (0, z0), + svadd_za16_vg1x2 (0, z0)) + +/* +** add_w0_z0: +** mov (w8|w9|w10|w11), w0 +** fadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_w0_z0, svfloat16x2_t, + svadd_za16_f16_vg1x2 (w0, z0), + svadd_za16_vg1x2 (w0, z0)) + +/* +** add_w7_z0: +** mov (w8|w9|w10|w11), w7 +** fadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_w7_z0, svfloat16x2_t, + svadd_za16_f16_vg1x2 (w7, z0), + svadd_za16_vg1x2 (w7, z0)) + +/* +** add_w8_z0: +** fadd za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_w8_z0, svfloat16x2_t, + svadd_za16_f16_vg1x2 (w8, z0), + svadd_za16_vg1x2 (w8, z0)) + +/* +** add_w11_z0: +** fadd za\.h\[w11, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_w11_z0, svfloat16x2_t, + svadd_za16_f16_vg1x2 (w11, z0), + svadd_za16_vg1x2 (w11, z0)) + + +/* +** add_w12_z0: +** mov (w8|w9|w10|w11), w12 +** fadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_w12_z0, svfloat16x2_t, + svadd_za16_f16_vg1x2 (w12, z0), + svadd_za16_vg1x2 (w12, z0)) + +/* +** add_w8p7_z0: +** fadd za\.h\[w8, 7, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_w8p7_z0, svfloat16x2_t, + svadd_za16_f16_vg1x2 (w8 + 7, z0), + svadd_za16_vg1x2 (w8 + 7, z0)) + +/* +** add_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_w8p8_z0, svfloat16x2_t, + svadd_za16_f16_vg1x2 (w8 + 8, z0), + svadd_za16_vg1x2 (w8 + 8, z0)) + +/* +** add_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (add_w8m1_z0, svfloat16x2_t, + svadd_za16_f16_vg1x2 (w8 - 1, z0), + svadd_za16_vg1x2 (w8 - 1, z0)) + +/* +** add_w8_z18: +** fadd za\.h\[w8, 0, vgx2\], {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (add_w8_z18, svfloat16x2_t, + svadd_za16_f16_vg1x2 (w8, z18), + svadd_za16_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** fadd za\.h\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z23, svfloat16x2_t, + svadd_za16_f16_vg1x2 (w8, z23), + svadd_za16_vg1x2 (w8, z23)) + +/* +** add_w8_z28: +** fadd za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (add_w8_z28, svfloat16x2_t, + svadd_za16_f16_vg1x2 (w8, z28), + svadd_za16_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x4.c new file mode 100644 index 000000000000..21ca01149482 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x4.c @@ -0,0 +1,141 @@ +/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f16f16" + +/* +** add_0_z0: +** mov (w8|w9|w10|w11), #?0 +** fadd za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_0_z0, svfloat16x4_t, + svadd_za16_f16_vg1x4 (0, z0), + svadd_za16_vg1x4 (0, z0)) + +/* +** add_w0_z0: +** mov (w8|w9|w10|w11), w0 +** fadd za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_w0_z0, svfloat16x4_t, + svadd_za16_f16_vg1x4 (w0, z0), + svadd_za16_vg1x4 (w0, z0)) + +/* +** add_w7_z0: +** mov (w8|w9|w10|w11), w7 +** fadd za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_w7_z0, svfloat16x4_t, + svadd_za16_f16_vg1x4 (w7, z0), + svadd_za16_vg1x4 (w7, z0)) + +/* +** add_w8_z0: +** fadd za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_w8_z0, svfloat16x4_t, + svadd_za16_f16_vg1x4 (w8, z0), + svadd_za16_vg1x4 (w8, z0)) + +/* +** add_w11_z0: +** fadd za\.h\[w11, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_w11_z0, svfloat16x4_t, + svadd_za16_f16_vg1x4 (w11, z0), + svadd_za16_vg1x4 (w11, z0)) + + +/* +** add_w12_z0: +** mov (w8|w9|w10|w11), w12 +** fadd za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_w12_z0, svfloat16x4_t, + svadd_za16_f16_vg1x4 (w12, z0), + svadd_za16_vg1x4 (w12, z0)) + +/* +** add_w8p7_z0: +** fadd za\.h\[w8, 7, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_w8p7_z0, svfloat16x4_t, + svadd_za16_f16_vg1x4 (w8 + 7, z0), + svadd_za16_vg1x4 (w8 + 7, z0)) + +/* +** add_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fadd za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_w8p8_z0, svfloat16x4_t, + svadd_za16_f16_vg1x4 (w8 + 8, z0), + svadd_za16_vg1x4 (w8 + 8, z0)) + +/* +** add_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fadd za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (add_w8m1_z0, svfloat16x4_t, + svadd_za16_f16_vg1x4 (w8 - 1, z0), + svadd_za16_vg1x4 (w8 - 1, z0)) + +/* +** add_w8_z4: +** fadd za\.h\[w8, 0, vgx4\], {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (add_w8_z4, svfloat16x4_t, + svadd_za16_f16_vg1x4 (w8, z4), + svadd_za16_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fadd za\.h\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z18, svfloat16x4_t, + svadd_za16_f16_vg1x4 (w8, z18), + svadd_za16_vg1x4 (w8, z18)) + +/* +** add_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fadd za\.h\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z23, svfloat16x4_t, + svadd_za16_f16_vg1x4 (w8, z23), + svadd_za16_vg1x4 (w8, z23)) + +/* +** add_w8_z28: +** fadd za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (add_w8_z28, svfloat16x4_t, + svadd_za16_f16_vg1x4 (w8, z28), + svadd_za16_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_f16_x2.c new file mode 100644 index 000000000000..8d610042367e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_f16_x2.c @@ -0,0 +1,54 @@ +/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f16f16" + +/* +** cvt_z0_z0: +** fcvt {z0\.s - z1\.s}, z0\.h +** ret +*/ +TEST_X2_WIDE (cvt_z0_z0, svfloat32x2_t, svfloat16_t, + z0_res = svcvt_f32_f16_x2 (z0), + z0_res = svcvt_f32 (z0)) + +/* +** cvt_z0_z6: +** fcvt {z6\.s - z7\.s}, z0\.h +** ret +*/ +TEST_X2_WIDE (cvt_z0_z6, svfloat32x2_t, svfloat16_t, + z6 = svcvt_f32_f16_x2 (z0), + z6 = svcvt_f32 (z0)) + +/* +** cvt_z0_z29: +** fcvt [^\n]+, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_X2_WIDE (cvt_z0_z29, svfloat32x2_t, svfloat16_t, + z29 = svcvt_f32_f16_x2 (z0), + z29 = svcvt_f32 (z0)) + +/* +** cvt_z5_z0: +** fcvt {z0\.s - z1\.s}, z5\.h +** ret +*/ +TEST_X2_WIDE (cvt_z5_z0, svfloat32x2_t, svfloat16_t, + z0_res = svcvt_f32_f16_x2 (z5), + z0_res = svcvt_f32 (z5)) + +/* +** cvt_z22_z16: +** fcvt {z16\.s - z17\.s}, z22\.h +** ret +*/ +TEST_X2_WIDE (cvt_z22_z16, svfloat32x2_t, svfloat16_t, + z16 = svcvt_f32_f16_x2 (z22), + z16 = svcvt_f32 (z22)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtl_f32_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtl_f32_f16_x2.c new file mode 100644 index 000000000000..42e89b531f13 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtl_f32_f16_x2.c @@ -0,0 +1,54 @@ +/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f16f16" + +/* +** cvtl_z0_z0: +** fcvtl {z0\.s - z1\.s}, z0\.h +** ret +*/ +TEST_X2_WIDE (cvtl_z0_z0, svfloat32x2_t, svfloat16_t, + z0_res = svcvtl_f32_f16_x2 (z0), + z0_res = svcvtl_f32 (z0)) + +/* +** cvtl_z0_z6: +** fcvtl {z6\.s - z7\.s}, z0\.h +** ret +*/ +TEST_X2_WIDE (cvtl_z0_z6, svfloat32x2_t, svfloat16_t, + z6 = svcvtl_f32_f16_x2 (z0), + z6 = svcvtl_f32 (z0)) + +/* +** cvtl_z0_z29: +** fcvtl [^\n]+, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_X2_WIDE (cvtl_z0_z29, svfloat32x2_t, svfloat16_t, + z29 = svcvtl_f32_f16_x2 (z0), + z29 = svcvtl_f32 (z0)) + +/* +** cvtl_z5_z0: +** fcvtl {z0\.s - z1\.s}, z5\.h +** ret +*/ +TEST_X2_WIDE (cvtl_z5_z0, svfloat32x2_t, svfloat16_t, + z0_res = svcvtl_f32_f16_x2 (z5), + z0_res = svcvtl_f32 (z5)) + +/* +** cvtl_z22_z16: +** fcvtl {z16\.s - z17\.s}, z22\.h +** ret +*/ +TEST_X2_WIDE (cvtl_z22_z16, svfloat32x2_t, svfloat16_t, + z16 = svcvtl_f32_f16_x2 (z22), + z16 = svcvtl_f32 (z22)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x2.c new file mode 100644 index 000000000000..ecb3fc2381c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x2.c @@ -0,0 +1,106 @@ +/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f16f16" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svfloat16x2_t, svfloat16_t, + svmla_lane_za16_f16_vg1x2 (0, z0, z4, 0), + svmla_lane_za16_vg1x2 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fmla za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svfloat16x2_t, svfloat16_t, + svmla_lane_za16_f16_vg1x2 (w0, z0, z7, 1), + svmla_lane_za16_vg1x2 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** fmla za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svfloat16x2_t, svfloat16_t, + svmla_lane_za16_f16_vg1x2 (w8, z28, z4, 2), + svmla_lane_za16_vg1x2 (w8, z28, z4, 2)) + +/* +** mla_lane_w8p7_z0_z4_3: +** fmla za\.h\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svfloat16x2_t, svfloat16_t, + svmla_lane_za16_f16_vg1x2 (w8 + 7, z0, z4, 3), + svmla_lane_za16_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** mla_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svfloat16x2_t, svfloat16_t, + svmla_lane_za16_f16_vg1x2 (w8 + 8, z0, z4, 4), + svmla_lane_za16_vg1x2 (w8 + 8, z0, z4, 4)) + +/* +** mla_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** fmla za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svfloat16x2_t, svfloat16_t, + svmla_lane_za16_f16_vg1x2 (w0 - 1, z0, z4, 5), + svmla_lane_za16_vg1x2 (w0 - 1, z0, z4, 5)) + +/* +** mla_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** fmla za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svfloat16x2_t, svfloat16_t, + svmla_lane_za16_f16_vg1x2 (w8, z4, z15, 6), + svmla_lane_za16_vg1x2 (w8, z4, z15, 6)) + +/* +** mla_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** fmla za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svfloat16x2_t, svfloat16_t, + svmla_lane_za16_f16_vg1x2 (w8, z28, z16, 7), + svmla_lane_za16_vg1x2 (w8, z28, z16, 7)) + +/* +** mla_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** fmla za\.h\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svfloat16x2_t, svfloat16_t, + svmla_lane_za16_f16_vg1x2 (w8, z17, z7, 0), + svmla_lane_za16_vg1x2 (w8, z17, z7, 0)) + +/* +** mla_lane_w8_z22_z4_1: +** fmla za\.h\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svfloat16x2_t, svfloat16_t, + svmla_lane_za16_f16_vg1x2 (w8, z22, z4, 1), + svmla_lane_za16_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x4.c new file mode 100644 index 000000000000..8ff575d598de --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x4.c @@ -0,0 +1,112 @@ +/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f16f16" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svfloat16x4_t, svfloat16_t, + svmla_lane_za16_f16_vg1x4 (0, z0, z4, 0), + svmla_lane_za16_vg1x4 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fmla za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svfloat16x4_t, svfloat16_t, + svmla_lane_za16_f16_vg1x4 (w0, z0, z7, 1), + svmla_lane_za16_vg1x4 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** fmla za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svfloat16x4_t, svfloat16_t, + svmla_lane_za16_f16_vg1x4 (w8, z28, z4, 2), + svmla_lane_za16_vg1x4 (w8, z28, z4, 2)) + +/* +** mla_lane_w8p7_z0_z4_3: +** fmla za\.h\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svfloat16x4_t, svfloat16_t, + svmla_lane_za16_f16_vg1x4 (w8 + 7, z0, z4, 3), + svmla_lane_za16_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** mla_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svfloat16x4_t, svfloat16_t, + svmla_lane_za16_f16_vg1x4 (w8 + 8, z0, z4, 4), + svmla_lane_za16_vg1x4 (w8 + 8, z0, z4, 4)) + +/* +** mla_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** fmla za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svfloat16x4_t, svfloat16_t, + svmla_lane_za16_f16_vg1x4 (w0 - 1, z0, z4, 5), + svmla_lane_za16_vg1x4 (w0 - 1, z0, z4, 5)) + +/* +** mla_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** fmla za\.h\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svfloat16x4_t, svfloat16_t, + svmla_lane_za16_f16_vg1x4 (w8, z4, z15, 6), + svmla_lane_za16_vg1x4 (w8, z4, z15, 6)) + +/* +** mla_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** fmla za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svfloat16x4_t, svfloat16_t, + svmla_lane_za16_f16_vg1x4 (w8, z28, z16, 7), + svmla_lane_za16_vg1x4 (w8, z28, z16, 7)) + +/* +** mla_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmla za\.h\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svfloat16x4_t, svfloat16_t, + svmla_lane_za16_f16_vg1x4 (w8, z17, z7, 0), + svmla_lane_za16_vg1x4 (w8, z17, z7, 0)) + +/* +** mla_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmla za\.h\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svfloat16x4_t, svfloat16_t, + svmla_lane_za16_f16_vg1x4 (w8, z22, z4, 1), + svmla_lane_za16_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x2.c new file mode 100644 index 000000000000..f4a723aba2eb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x2.c @@ -0,0 +1,184 @@ +/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f16f16" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svfloat16x2_t, + svmla_za16_f16_vg1x2 (0, z0, z0), + svmla_za16_vg1x2 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fmla za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svfloat16x2_t, + svmla_za16_f16_vg1x2 (w0, z0, z0), + svmla_za16_vg1x2 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** fmla za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svfloat16x2_t, + svmla_za16_f16_vg1x2 (w8, z0, z4), + svmla_za16_vg1x2 (w8, z0, z4)) + +/* +** mla_w8_z4_z18: +** fmla za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z18, svfloat16x2_t, + svmla_za16_f16_vg1x2 (w8, z4, z18), + svmla_za16_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z23_z0: +** ... +** fmla za\.h\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svfloat16x2_t, + svmla_za16_f16_vg1x2 (w8, z23, z0), + svmla_za16_vg1x2 (w8, z23, z0)) + +/* +** mla_w8_z18_z23: +** ... +** fmla za\.h\[w8, 0, vgx2\], {z18\.h - z19\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z23, svfloat16x2_t, + svmla_za16_f16_vg1x2 (w8, z18, z23), + svmla_za16_vg1x2 (w8, z18, z23)) + +/* +** mla_w8_z4_z28: +** fmla za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z28, svfloat16x2_t, + svmla_za16_f16_vg1x2 (w8, z4, z28), + svmla_za16_vg1x2 (w8, z4, z28)) + +/* +** mla_w8p7_z4_z0: +** fmla za\.h\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svfloat16x2_t, + svmla_za16_f16_vg1x2 (w8 + 7, z4, z0), + svmla_za16_vg1x2 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.h\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svfloat16x2_t, + svmla_za16_f16_vg1x2 (w8 + 8, z4, z4), + svmla_za16_vg1x2 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmla za\.h\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svfloat16x2_t, + svmla_za16_f16_vg1x2 (w8 - 1, z4, z0), + svmla_za16_vg1x2 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svfloat16x2_t, svfloat16_t, + svmla_single_za16_f16_vg1x2 (0, z1, z0), + svmla_za16_vg1x2 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fmla za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svfloat16x2_t, svfloat16_t, + svmla_single_za16_f16_vg1x2 (w0, z1, z0), + svmla_za16_vg1x2 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** fmla za\.h\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svfloat16x2_t, svfloat16_t, + svmla_single_za16_f16_vg1x2 (w8, z1, z0), + svmla_za16_vg1x2 (w8, z1, z0)) + +/* +** mla_single_w8p7_z1_z0: +** fmla za\.h\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svfloat16x2_t, svfloat16_t, + svmla_single_za16_f16_vg1x2 (w8 + 7, z1, z0), + svmla_za16_vg1x2 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svfloat16x2_t, svfloat16_t, + svmla_single_za16_f16_vg1x2 (w8 + 8, z1, z0), + svmla_za16_vg1x2 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fmla za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svfloat16x2_t, svfloat16_t, + svmla_single_za16_f16_vg1x2 (w0 - 1, z1, z0), + svmla_za16_vg1x2 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fmla za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svfloat16x2_t, svfloat16_t, + svmla_single_za16_f16_vg1x2 (w8, z0, z15), + svmla_za16_vg1x2 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fmla za\.h\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svfloat16x2_t, svfloat16_t, + svmla_single_za16_f16_vg1x2 (w8, z20, z16), + svmla_za16_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x4.c new file mode 100644 index 000000000000..420be0dd50fe --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x4.c @@ -0,0 +1,176 @@ +/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f16f16" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svfloat16x4_t, + svmla_za16_f16_vg1x4 (0, z0, z0), + svmla_za16_vg1x4 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fmla za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svfloat16x4_t, + svmla_za16_f16_vg1x4 (w0, z0, z0), + svmla_za16_vg1x4 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** fmla za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svfloat16x4_t, + svmla_za16_f16_vg1x4 (w8, z0, z4), + svmla_za16_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z18: +** ... +** fmla za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z18, svfloat16x4_t, + svmla_za16_f16_vg1x4 (w8, z0, z18), + svmla_za16_vg1x4 (w8, z0, z18)) + +/* +** mla_w8_z18_z28: +** ... +** fmla za\.h\[w8, 0, vgx4\], [^\n]+, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z28, svfloat16x4_t, + svmla_za16_f16_vg1x4 (w8, z18, z28), + svmla_za16_vg1x4 (w8, z18, z28)) + +/* +** mla_w8_z28_z23: +** ... +** fmla za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z23, svfloat16x4_t, + svmla_za16_f16_vg1x4 (w8, z28, z23), + svmla_za16_vg1x4 (w8, z28, z23)) + +/* +** mla_w8p7_z4_z0: +** fmla za\.h\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svfloat16x4_t, + svmla_za16_f16_vg1x4 (w8 + 7, z4, z0), + svmla_za16_vg1x4 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.h\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svfloat16x4_t, + svmla_za16_f16_vg1x4 (w8 + 8, z4, z4), + svmla_za16_vg1x4 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmla za\.h\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svfloat16x4_t, + svmla_za16_f16_vg1x4 (w8 - 1, z4, z0), + svmla_za16_vg1x4 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svfloat16x4_t, svfloat16_t, + svmla_single_za16_f16_vg1x4 (0, z1, z0), + svmla_za16_vg1x4 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fmla za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svfloat16x4_t, svfloat16_t, + svmla_single_za16_f16_vg1x4 (w0, z1, z0), + svmla_za16_vg1x4 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** fmla za\.h\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svfloat16x4_t, svfloat16_t, + svmla_single_za16_f16_vg1x4 (w8, z1, z0), + svmla_za16_vg1x4 (w8, z1, z0)) + +/* +** mla_single_w8p7_z1_z0: +** fmla za\.h\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svfloat16x4_t, svfloat16_t, + svmla_single_za16_f16_vg1x4 (w8 + 7, z1, z0), + svmla_za16_vg1x4 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svfloat16x4_t, svfloat16_t, + svmla_single_za16_f16_vg1x4 (w8 + 8, z1, z0), + svmla_za16_vg1x4 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fmla za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svfloat16x4_t, svfloat16_t, + svmla_single_za16_f16_vg1x4 (w0 - 1, z1, z0), + svmla_za16_vg1x4 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fmla za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svfloat16x4_t, svfloat16_t, + svmla_single_za16_f16_vg1x4 (w8, z0, z15), + svmla_za16_vg1x4 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fmla za\.h\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svfloat16x4_t, svfloat16_t, + svmla_single_za16_f16_vg1x4 (w8, z20, z16), + svmla_za16_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x2.c new file mode 100644 index 000000000000..9d796cab066a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x2.c @@ -0,0 +1,106 @@ +/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f16f16" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svfloat16x2_t, svfloat16_t, + svmls_lane_za16_f16_vg1x2 (0, z0, z4, 0), + svmls_lane_za16_vg1x2 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fmls za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svfloat16x2_t, svfloat16_t, + svmls_lane_za16_f16_vg1x2 (w0, z0, z7, 1), + svmls_lane_za16_vg1x2 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** fmls za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svfloat16x2_t, svfloat16_t, + svmls_lane_za16_f16_vg1x2 (w8, z28, z4, 2), + svmls_lane_za16_vg1x2 (w8, z28, z4, 2)) + +/* +** mls_lane_w8p7_z0_z4_3: +** fmls za\.h\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svfloat16x2_t, svfloat16_t, + svmls_lane_za16_f16_vg1x2 (w8 + 7, z0, z4, 3), + svmls_lane_za16_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** mls_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svfloat16x2_t, svfloat16_t, + svmls_lane_za16_f16_vg1x2 (w8 + 8, z0, z4, 4), + svmls_lane_za16_vg1x2 (w8 + 8, z0, z4, 4)) + +/* +** mls_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** fmls za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svfloat16x2_t, svfloat16_t, + svmls_lane_za16_f16_vg1x2 (w0 - 1, z0, z4, 5), + svmls_lane_za16_vg1x2 (w0 - 1, z0, z4, 5)) + +/* +** mls_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** fmls za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svfloat16x2_t, svfloat16_t, + svmls_lane_za16_f16_vg1x2 (w8, z4, z15, 6), + svmls_lane_za16_vg1x2 (w8, z4, z15, 6)) + +/* +** mls_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** fmls za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svfloat16x2_t, svfloat16_t, + svmls_lane_za16_f16_vg1x2 (w8, z28, z16, 7), + svmls_lane_za16_vg1x2 (w8, z28, z16, 7)) + +/* +** mls_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** fmls za\.h\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svfloat16x2_t, svfloat16_t, + svmls_lane_za16_f16_vg1x2 (w8, z17, z7, 0), + svmls_lane_za16_vg1x2 (w8, z17, z7, 0)) + +/* +** mls_lane_w8_z22_z4_1: +** fmls za\.h\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svfloat16x2_t, svfloat16_t, + svmls_lane_za16_f16_vg1x2 (w8, z22, z4, 1), + svmls_lane_za16_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x4.c new file mode 100644 index 000000000000..e5f5828836f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x4.c @@ -0,0 +1,112 @@ +/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f16f16" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svfloat16x4_t, svfloat16_t, + svmls_lane_za16_f16_vg1x4 (0, z0, z4, 0), + svmls_lane_za16_vg1x4 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fmls za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svfloat16x4_t, svfloat16_t, + svmls_lane_za16_f16_vg1x4 (w0, z0, z7, 1), + svmls_lane_za16_vg1x4 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** fmls za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svfloat16x4_t, svfloat16_t, + svmls_lane_za16_f16_vg1x4 (w8, z28, z4, 2), + svmls_lane_za16_vg1x4 (w8, z28, z4, 2)) + +/* +** mls_lane_w8p7_z0_z4_3: +** fmls za\.h\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svfloat16x4_t, svfloat16_t, + svmls_lane_za16_f16_vg1x4 (w8 + 7, z0, z4, 3), + svmls_lane_za16_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** mls_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svfloat16x4_t, svfloat16_t, + svmls_lane_za16_f16_vg1x4 (w8 + 8, z0, z4, 4), + svmls_lane_za16_vg1x4 (w8 + 8, z0, z4, 4)) + +/* +** mls_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** fmls za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svfloat16x4_t, svfloat16_t, + svmls_lane_za16_f16_vg1x4 (w0 - 1, z0, z4, 5), + svmls_lane_za16_vg1x4 (w0 - 1, z0, z4, 5)) + +/* +** mls_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** fmls za\.h\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svfloat16x4_t, svfloat16_t, + svmls_lane_za16_f16_vg1x4 (w8, z4, z15, 6), + svmls_lane_za16_vg1x4 (w8, z4, z15, 6)) + +/* +** mls_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** fmls za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svfloat16x4_t, svfloat16_t, + svmls_lane_za16_f16_vg1x4 (w8, z28, z16, 7), + svmls_lane_za16_vg1x4 (w8, z28, z16, 7)) + +/* +** mls_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmls za\.h\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svfloat16x4_t, svfloat16_t, + svmls_lane_za16_f16_vg1x4 (w8, z17, z7, 0), + svmls_lane_za16_vg1x4 (w8, z17, z7, 0)) + +/* +** mls_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmls za\.h\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svfloat16x4_t, svfloat16_t, + svmls_lane_za16_f16_vg1x4 (w8, z22, z4, 1), + svmls_lane_za16_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x2.c new file mode 100644 index 000000000000..a333bee67b80 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x2.c @@ -0,0 +1,184 @@ +/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f16f16" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svfloat16x2_t, + svmls_za16_f16_vg1x2 (0, z0, z0), + svmls_za16_vg1x2 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fmls za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svfloat16x2_t, + svmls_za16_f16_vg1x2 (w0, z0, z0), + svmls_za16_vg1x2 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** fmls za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svfloat16x2_t, + svmls_za16_f16_vg1x2 (w8, z0, z4), + svmls_za16_vg1x2 (w8, z0, z4)) + +/* +** mls_w8_z4_z18: +** fmls za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z18, svfloat16x2_t, + svmls_za16_f16_vg1x2 (w8, z4, z18), + svmls_za16_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z23_z0: +** ... +** fmls za\.h\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svfloat16x2_t, + svmls_za16_f16_vg1x2 (w8, z23, z0), + svmls_za16_vg1x2 (w8, z23, z0)) + +/* +** mls_w8_z18_z23: +** ... +** fmls za\.h\[w8, 0, vgx2\], {z18\.h - z19\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z23, svfloat16x2_t, + svmls_za16_f16_vg1x2 (w8, z18, z23), + svmls_za16_vg1x2 (w8, z18, z23)) + +/* +** mls_w8_z4_z28: +** fmls za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z28, svfloat16x2_t, + svmls_za16_f16_vg1x2 (w8, z4, z28), + svmls_za16_vg1x2 (w8, z4, z28)) + +/* +** mls_w8p7_z4_z0: +** fmls za\.h\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svfloat16x2_t, + svmls_za16_f16_vg1x2 (w8 + 7, z4, z0), + svmls_za16_vg1x2 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.h\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svfloat16x2_t, + svmls_za16_f16_vg1x2 (w8 + 8, z4, z4), + svmls_za16_vg1x2 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmls za\.h\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svfloat16x2_t, + svmls_za16_f16_vg1x2 (w8 - 1, z4, z0), + svmls_za16_vg1x2 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svfloat16x2_t, svfloat16_t, + svmls_single_za16_f16_vg1x2 (0, z1, z0), + svmls_za16_vg1x2 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fmls za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svfloat16x2_t, svfloat16_t, + svmls_single_za16_f16_vg1x2 (w0, z1, z0), + svmls_za16_vg1x2 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** fmls za\.h\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svfloat16x2_t, svfloat16_t, + svmls_single_za16_f16_vg1x2 (w8, z1, z0), + svmls_za16_vg1x2 (w8, z1, z0)) + +/* +** mls_single_w8p7_z1_z0: +** fmls za\.h\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svfloat16x2_t, svfloat16_t, + svmls_single_za16_f16_vg1x2 (w8 + 7, z1, z0), + svmls_za16_vg1x2 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svfloat16x2_t, svfloat16_t, + svmls_single_za16_f16_vg1x2 (w8 + 8, z1, z0), + svmls_za16_vg1x2 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fmls za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svfloat16x2_t, svfloat16_t, + svmls_single_za16_f16_vg1x2 (w0 - 1, z1, z0), + svmls_za16_vg1x2 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fmls za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svfloat16x2_t, svfloat16_t, + svmls_single_za16_f16_vg1x2 (w8, z0, z15), + svmls_za16_vg1x2 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fmls za\.h\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svfloat16x2_t, svfloat16_t, + svmls_single_za16_f16_vg1x2 (w8, z20, z16), + svmls_za16_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x4.c new file mode 100644 index 000000000000..b0485be14e5c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x4.c @@ -0,0 +1,176 @@ +/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f16f16" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svfloat16x4_t, + svmls_za16_f16_vg1x4 (0, z0, z0), + svmls_za16_vg1x4 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fmls za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svfloat16x4_t, + svmls_za16_f16_vg1x4 (w0, z0, z0), + svmls_za16_vg1x4 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** fmls za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svfloat16x4_t, + svmls_za16_f16_vg1x4 (w8, z0, z4), + svmls_za16_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z18: +** ... +** fmls za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z18, svfloat16x4_t, + svmls_za16_f16_vg1x4 (w8, z0, z18), + svmls_za16_vg1x4 (w8, z0, z18)) + +/* +** mls_w8_z18_z28: +** ... +** fmls za\.h\[w8, 0, vgx4\], [^\n]+, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z28, svfloat16x4_t, + svmls_za16_f16_vg1x4 (w8, z18, z28), + svmls_za16_vg1x4 (w8, z18, z28)) + +/* +** mls_w8_z28_z23: +** ... +** fmls za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z23, svfloat16x4_t, + svmls_za16_f16_vg1x4 (w8, z28, z23), + svmls_za16_vg1x4 (w8, z28, z23)) + +/* +** mls_w8p7_z4_z0: +** fmls za\.h\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svfloat16x4_t, + svmls_za16_f16_vg1x4 (w8 + 7, z4, z0), + svmls_za16_vg1x4 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.h\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svfloat16x4_t, + svmls_za16_f16_vg1x4 (w8 + 8, z4, z4), + svmls_za16_vg1x4 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmls za\.h\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svfloat16x4_t, + svmls_za16_f16_vg1x4 (w8 - 1, z4, z0), + svmls_za16_vg1x4 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svfloat16x4_t, svfloat16_t, + svmls_single_za16_f16_vg1x4 (0, z1, z0), + svmls_za16_vg1x4 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fmls za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svfloat16x4_t, svfloat16_t, + svmls_single_za16_f16_vg1x4 (w0, z1, z0), + svmls_za16_vg1x4 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** fmls za\.h\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svfloat16x4_t, svfloat16_t, + svmls_single_za16_f16_vg1x4 (w8, z1, z0), + svmls_za16_vg1x4 (w8, z1, z0)) + +/* +** mls_single_w8p7_z1_z0: +** fmls za\.h\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svfloat16x4_t, svfloat16_t, + svmls_single_za16_f16_vg1x4 (w8 + 7, z1, z0), + svmls_za16_vg1x4 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svfloat16x4_t, svfloat16_t, + svmls_single_za16_f16_vg1x4 (w8 + 8, z1, z0), + svmls_za16_vg1x4 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fmls za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svfloat16x4_t, svfloat16_t, + svmls_single_za16_f16_vg1x4 (w0 - 1, z1, z0), + svmls_za16_vg1x4 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fmls za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svfloat16x4_t, svfloat16_t, + svmls_single_za16_f16_vg1x4 (w8, z0, z15), + svmls_za16_vg1x4 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fmls za\.h\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svfloat16x4_t, svfloat16_t, + svmls_single_za16_f16_vg1x4 (w8, z20, z16), + svmls_za16_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za16_f16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za16_f16.c new file mode 100644 index 000000000000..1219f838e332 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za16_f16.c @@ -0,0 +1,34 @@ +/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f16f16" + +/* +** mopa_za16_f16_0_p0_p1_z0_z1: +** fmopa za0\.h, p0/m, p1/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_ZA (mopa_za16_f16_0_p0_p1_z0_z1, svfloat16_t, + svmopa_za16_f16_m (0, p0, p1, z0, z1), + svmopa_za16_m (0, p0, p1, z0, z1)) + +/* +** mopa_za16_f16_0_p1_p0_z1_z0: +** fmopa za0\.h, p1/m, p0/m, z1\.h, z0\.h +** ret +*/ +TEST_UNIFORM_ZA (mopa_za16_f16_0_p1_p0_z1_z0, svfloat16_t, + svmopa_za16_f16_m (0, p1, p0, z1, z0), + svmopa_za16_m (0, p1, p0, z1, z0)) + +/* +** mopa_za16_f16_1_p0_p1_z0_z1: +** fmopa za1\.h, p0/m, p1/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_ZA (mopa_za16_f16_1_p0_p1_z0_z1, svfloat16_t, + svmopa_za16_f16_m (1, p0, p1, z0, z1), + svmopa_za16_m (1, p0, p1, z0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mops_za16_f16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mops_za16_f16.c new file mode 100644 index 000000000000..fe22df6b0426 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mops_za16_f16.c @@ -0,0 +1,34 @@ +/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f16f16" + +/* +** mops_za16_f16_0_p0_p1_z0_z1: +** fmops za0\.h, p0/m, p1/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_ZA (mops_za16_f16_0_p0_p1_z0_z1, svfloat16_t, + svmops_za16_f16_m (0, p0, p1, z0, z1), + svmops_za16_m (0, p0, p1, z0, z1)) + +/* +** mops_za16_f16_0_p1_p0_z1_z0: +** fmops za0\.h, p1/m, p0/m, z1\.h, z0\.h +** ret +*/ +TEST_UNIFORM_ZA (mops_za16_f16_0_p1_p0_z1_z0, svfloat16_t, + svmops_za16_f16_m (0, p1, p0, z1, z0), + svmops_za16_m (0, p1, p0, z1, z0)) + +/* +** mops_za16_f16_1_p0_p1_z0_z1: +** fmops za1\.h, p0/m, p1/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_ZA (mops_za16_f16_1_p0_p1_z0_z1, svfloat16_t, + svmops_za16_f16_m (1, p0, p1, z0, z1), + svmops_za16_m (1, p0, p1, z0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x2.c new file mode 100644 index 000000000000..114e85e31f8e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x2.c @@ -0,0 +1,126 @@ +/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f16f16" + +/* +** sub_0_z0: +** mov (w8|w9|w10|w11), #?0 +** fsub za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_0_z0, svfloat16x2_t, + svsub_za16_f16_vg1x2 (0, z0), + svsub_za16_vg1x2 (0, z0)) + +/* +** sub_w0_z0: +** mov (w8|w9|w10|w11), w0 +** fsub za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_w0_z0, svfloat16x2_t, + svsub_za16_f16_vg1x2 (w0, z0), + svsub_za16_vg1x2 (w0, z0)) + +/* +** sub_w7_z0: +** mov (w8|w9|w10|w11), w7 +** fsub za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_w7_z0, svfloat16x2_t, + svsub_za16_f16_vg1x2 (w7, z0), + svsub_za16_vg1x2 (w7, z0)) + +/* +** sub_w8_z0: +** fsub za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_w8_z0, svfloat16x2_t, + svsub_za16_f16_vg1x2 (w8, z0), + svsub_za16_vg1x2 (w8, z0)) + +/* +** sub_w11_z0: +** fsub za\.h\[w11, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_w11_z0, svfloat16x2_t, + svsub_za16_f16_vg1x2 (w11, z0), + svsub_za16_vg1x2 (w11, z0)) + + +/* +** sub_w12_z0: +** mov (w8|w9|w10|w11), w12 +** fsub za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_w12_z0, svfloat16x2_t, + svsub_za16_f16_vg1x2 (w12, z0), + svsub_za16_vg1x2 (w12, z0)) + +/* +** sub_w8p7_z0: +** fsub za\.h\[w8, 7, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_w8p7_z0, svfloat16x2_t, + svsub_za16_f16_vg1x2 (w8 + 7, z0), + svsub_za16_vg1x2 (w8 + 7, z0)) + +/* +** sub_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fsub za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_w8p8_z0, svfloat16x2_t, + svsub_za16_f16_vg1x2 (w8 + 8, z0), + svsub_za16_vg1x2 (w8 + 8, z0)) + +/* +** sub_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fsub za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (sub_w8m1_z0, svfloat16x2_t, + svsub_za16_f16_vg1x2 (w8 - 1, z0), + svsub_za16_vg1x2 (w8 - 1, z0)) + +/* +** sub_w8_z18: +** fsub za\.h\[w8, 0, vgx2\], {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (sub_w8_z18, svfloat16x2_t, + svsub_za16_f16_vg1x2 (w8, z18), + svsub_za16_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** fsub za\.h\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z23, svfloat16x2_t, + svsub_za16_f16_vg1x2 (w8, z23), + svsub_za16_vg1x2 (w8, z23)) + +/* +** sub_w8_z28: +** fsub za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (sub_w8_z28, svfloat16x2_t, + svsub_za16_f16_vg1x2 (w8, z28), + svsub_za16_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x4.c new file mode 100644 index 000000000000..c007ed5bace6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x4.c @@ -0,0 +1,141 @@ +/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+sme-f16f16" + +/* +** sub_0_z0: +** mov (w8|w9|w10|w11), #?0 +** fsub za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_0_z0, svfloat16x4_t, + svsub_za16_f16_vg1x4 (0, z0), + svsub_za16_vg1x4 (0, z0)) + +/* +** sub_w0_z0: +** mov (w8|w9|w10|w11), w0 +** fsub za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_w0_z0, svfloat16x4_t, + svsub_za16_f16_vg1x4 (w0, z0), + svsub_za16_vg1x4 (w0, z0)) + +/* +** sub_w7_z0: +** mov (w8|w9|w10|w11), w7 +** fsub za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_w7_z0, svfloat16x4_t, + svsub_za16_f16_vg1x4 (w7, z0), + svsub_za16_vg1x4 (w7, z0)) + +/* +** sub_w8_z0: +** fsub za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_w8_z0, svfloat16x4_t, + svsub_za16_f16_vg1x4 (w8, z0), + svsub_za16_vg1x4 (w8, z0)) + +/* +** sub_w11_z0: +** fsub za\.h\[w11, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_w11_z0, svfloat16x4_t, + svsub_za16_f16_vg1x4 (w11, z0), + svsub_za16_vg1x4 (w11, z0)) + + +/* +** sub_w12_z0: +** mov (w8|w9|w10|w11), w12 +** fsub za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_w12_z0, svfloat16x4_t, + svsub_za16_f16_vg1x4 (w12, z0), + svsub_za16_vg1x4 (w12, z0)) + +/* +** sub_w8p7_z0: +** fsub za\.h\[w8, 7, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_w8p7_z0, svfloat16x4_t, + svsub_za16_f16_vg1x4 (w8 + 7, z0), + svsub_za16_vg1x4 (w8 + 7, z0)) + +/* +** sub_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fsub za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_w8p8_z0, svfloat16x4_t, + svsub_za16_f16_vg1x4 (w8 + 8, z0), + svsub_za16_vg1x4 (w8 + 8, z0)) + +/* +** sub_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fsub za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (sub_w8m1_z0, svfloat16x4_t, + svsub_za16_f16_vg1x4 (w8 - 1, z0), + svsub_za16_vg1x4 (w8 - 1, z0)) + +/* +** sub_w8_z4: +** fsub za\.h\[w8, 0, vgx4\], {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (sub_w8_z4, svfloat16x4_t, + svsub_za16_f16_vg1x4 (w8, z4), + svsub_za16_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fsub za\.h\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z18, svfloat16x4_t, + svsub_za16_f16_vg1x4 (w8, z18), + svsub_za16_vg1x4 (w8, z18)) + +/* +** sub_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fsub za\.h\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z23, svfloat16x4_t, + svsub_za16_f16_vg1x4 (w8, z23), + svsub_za16_vg1x4 (w8, z23)) + +/* +** sub_w8_z28: +** fsub za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (sub_w8_z28, svfloat16x4_t, + svsub_za16_f16_vg1x4 (w8, z28), + svsub_za16_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h index 6c966a188de9..e9112c02b3ed 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h @@ -764,4 +764,20 @@ "w" (z25)); \ } +#define TEST_X2_WIDE(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ + PROTO (NAME, void, ()) \ + { \ + register ZTYPE z0 __asm ("z0"); \ + register ZTYPE z5 __asm ("z5"); \ + register TTYPE z6 __asm ("z6"); \ + register TTYPE z16 __asm ("z16"); \ + register ZTYPE z22 __asm ("z22"); \ + register TTYPE z29 __asm ("z29"); \ + register TTYPE z0_res __asm ("z0"); \ + __asm volatile ("" : "=w" (z0), "=w" (z5), "=w" (z22)); \ + INVOKE (CODE1, CODE2); \ + __asm volatile ("" :: "w" (z0_res), "w" (z5), "w" (z6), \ + "w" (z16), "w" (z22), "w" (z29)); \ + } + #endif diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index ff973af6d74a..eaae2f61775d 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -12121,7 +12121,8 @@ proc check_effective_target_aarch64_tiny { } { foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve" "i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" "ls64" - "sme" "sme-i16i64" "sme2" "sve-b16b16" } { + "sme" "sme-i16i64" "sme2" "sve-b16b16" + "sme-f16f16" } { eval [string map [list FUNC $aarch64_ext] { proc check_effective_target_aarch64_asm_FUNC_ok { } { if { [istarget aarch64*-*-*] } {