https://gcc.gnu.org/g:3103441079fa30dc9f75a75bda38c39f1ffd708e
commit 3103441079fa30dc9f75a75bda38c39f1ffd708e Author: Saurabh Jha <saurabh....@arm.com> Date: Mon Nov 4 09:11:33 2024 +0000 aarch64: Add support for fp8 convert and scale The AArch64 FEAT_FP8 extension introduces instructions for conversion and scaling. This patch introduces the following intrinsics: 1. vcvt{1|2}_{bf16|high_bf16|low_bf16}_mf8_fpm. 2. vcvt{q}_mf8_f16_fpm. 3. vcvt_{high}_mf8_f32_fpm. 4. vscale{q}_{f16|f32|f64}. We introduced two aarch64_builtin_signatures enum variants, unary and ternary, and added support for these variants in the functions aarch64_fntype and aarch64_expand_pragma_builtin. We added new simd_types for integers (s32, s32q, and s64q) and for floating points (f8 and f8q). Because we added support for fp8 intrinsics here, we modified the check in acle/fp8.c that was checking that __ARM_FEATURE_FP8 macro is not defined. gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (ENTRY): Modified to support uses_fpmr flag. (enum class): New variants to support new signatures. (struct aarch64_pragma_builtins_data): Add a new boolean field, uses_fpmr. (aarch64_get_number_of_args): Helper function used in aarch64_fntype and aarch64_expand_pragma_builtin. (aarch64_fntype): Handle new signatures. (aarch64_expand_pragma_builtin): Handle new signatures. * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): New flag for FP8. * config/aarch64/aarch64-simd-pragma-builtins.def (ENTRY_BINARY): Macro to declare binary intrinsics. (ENTRY_TERNARY): Macro to declare ternary intrinsics. (ENTRY_UNARY): Macro to declare unary intrinsics. (ENTRY_VHSDF): Macro to declare binary intrinsics. (ENTRY_VHSDF_VHSDI): Macro to declare binary intrinsics. (REQUIRED_EXTENSIONS): Define to declare functions behind command line flags. * config/aarch64/aarch64-simd.md (@aarch64_<fpm_unary_bf_uns_op><V8BF_ONLY:mode><VB:mode>): Unary pattern. (@aarch64_<fpm_unary_hf_uns_op><V8HF_ONLY:mode><VB:mode>): Unary pattern. (@aarch64_lower_<fpm_unary_bf_uns_op><V8BF_ONLY:mode><V16QI_ONLY:mode>): Unary pattern. (@aarch64_lower_<fpm_unary_hf_uns_op><V8HF_ONLY:mode><V16QI_ONLY:mode>): Unary pattern. (@aarch64<fpm_uns_op><VB:mode><VCVTFPM:mode><VH_SF:mode>): Binary pattern. (@aarch64_<fpm_uns_op><V16QI_ONLY:mode><V8QI_ONLY:mode><V4SF_ONLY:mode><V4SF_ONLY:mode>): Unary pattern. (@aarch64_<fpm_uns_op><VHSDF:mode><VHSDI:mode>): Binary pattern. * config/aarch64/iterators.md: New attributes and iterators. gcc/testsuite/ChangeLog: * gcc.target/aarch64/acle/fp8.c: Remove check that fp8 feature macro doesn't exist. * gcc.target/aarch64/simd/scale_fpm.c: New test. * gcc.target/aarch64/simd/vcvt_fpm.c: New test. Diff: --- gcc/config/aarch64/aarch64-builtins.cc | 137 +++++++++++--- gcc/config/aarch64/aarch64-c.cc | 2 + .../aarch64/aarch64-simd-pragma-builtins.def | 67 +++++-- gcc/config/aarch64/aarch64-simd.md | 98 ++++++++++ gcc/config/aarch64/iterators.md | 65 +++++++ gcc/testsuite/gcc.target/aarch64/acle/fp8.c | 10 -- gcc/testsuite/gcc.target/aarch64/simd/scale_fpm.c | 60 +++++++ gcc/testsuite/gcc.target/aarch64/simd/vcvt_fpm.c | 197 +++++++++++++++++++++ 8 files changed, 587 insertions(+), 49 deletions(-) diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index ad82c680c6a0..9b7280a30d07 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -780,7 +780,7 @@ typedef struct AARCH64_SIMD_BUILTIN_##T##_##N##A, #undef ENTRY -#define ENTRY(N, S, M0, M1, M2, M3, U) \ +#define ENTRY(N, S, M0, M1, M2, M3, USES_FPMR, U) \ AARCH64_##N, enum aarch64_builtins @@ -1591,6 +1591,8 @@ aarch64_init_simd_builtin_functions (bool called_from_pragma) enum class aarch64_builtin_signatures { binary, + ternary, + unary, }; namespace { @@ -1602,6 +1604,9 @@ struct simd_type { namespace simd_types { + constexpr simd_type f8 { V8QImode, qualifier_modal_float }; + constexpr simd_type f8q { V16QImode, qualifier_modal_float }; + constexpr simd_type s8 { V8QImode, qualifier_none }; constexpr simd_type u8 { V8QImode, qualifier_unsigned }; constexpr simd_type s8q { V16QImode, qualifier_none }; @@ -1612,6 +1617,11 @@ namespace simd_types { constexpr simd_type s16q { V8HImode, qualifier_none }; constexpr simd_type u16q { V8HImode, qualifier_unsigned }; + constexpr simd_type s32 { V2SImode, qualifier_none }; + constexpr simd_type s32q { V4SImode, qualifier_none }; + + constexpr simd_type s64q { V2DImode, qualifier_none }; + constexpr simd_type p8 { V8QImode, qualifier_poly }; constexpr simd_type p8q { V16QImode, qualifier_poly }; constexpr simd_type p16 { V4HImode, qualifier_poly }; @@ -1632,10 +1642,10 @@ namespace simd_types { } #undef ENTRY -#define ENTRY(N, S, T0, T1, T2, T3, U) \ +#define ENTRY(N, S, T0, T1, T2, T3, USES_FPMR, U) \ {#N, aarch64_builtin_signatures::S, simd_types::T0, simd_types::T1, \ - simd_types::T2, simd_types::T3, U, \ - aarch64_required_extensions::REQUIRED_EXTENSIONS}, + simd_types::T2, simd_types::T3, U, USES_FPMR, \ + aarch64_required_extensions::REQUIRED_EXTENSIONS}, /* Initialize pragma builtins. */ @@ -1645,6 +1655,7 @@ struct aarch64_pragma_builtins_data aarch64_builtin_signatures signature; simd_type types[4]; int unspec; + bool uses_fpmr; aarch64_required_extensions required_extensions; }; @@ -1652,25 +1663,40 @@ static aarch64_pragma_builtins_data aarch64_pragma_builtins[] = { #include "aarch64-simd-pragma-builtins.def" }; +static unsigned int +aarch64_get_number_of_args (const aarch64_pragma_builtins_data &builtin_data) +{ + if (builtin_data.signature == aarch64_builtin_signatures::unary) + return 1; + else if (builtin_data.signature == aarch64_builtin_signatures::binary) + return 2; + else if (builtin_data.signature == aarch64_builtin_signatures::ternary) + return 3; + else + // No other signature supported. + gcc_unreachable (); +} + static tree aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data) { - tree type0, type1, type2; + tree return_type + = aarch64_simd_builtin_type (builtin_data.types[0].mode, + builtin_data.types[0].qualifiers); - switch (builtin_data.signature) + vec<tree, va_gc> *arg_types = NULL; + auto nargs = aarch64_get_number_of_args (builtin_data); + for (unsigned int i = 1; i <= nargs; ++i) { - case aarch64_builtin_signatures::binary: - type0 = aarch64_simd_builtin_type (builtin_data.types[0].mode, - builtin_data.types[0].qualifiers); - type1 = aarch64_simd_builtin_type (builtin_data.types[1].mode, - builtin_data.types[1].qualifiers); - type2 = aarch64_simd_builtin_type (builtin_data.types[2].mode, - builtin_data.types[2].qualifiers); - return build_function_type_list (type0, type1, type2, NULL_TREE); - - default: - gcc_unreachable (); + auto type = aarch64_simd_builtin_type (builtin_data.types[i].mode, + builtin_data.types[i].qualifiers); + vec_safe_push (arg_types, type); } + + if (builtin_data.uses_fpmr == true) + vec_safe_push (arg_types, uint64_type_node); + + return build_function_type_vec (return_type, arg_types); } static void @@ -3383,25 +3409,78 @@ static rtx aarch64_expand_pragma_builtin (tree exp, rtx target, const aarch64_pragma_builtins_data *builtin_data) { - expand_operand ops[3]; - auto op1 = expand_normal (CALL_EXPR_ARG (exp, 0)); - auto op2 = expand_normal (CALL_EXPR_ARG (exp, 1)); + auto nargs = aarch64_get_number_of_args (*builtin_data); + + expand_operand ops[5]; create_output_operand (&ops[0], target, builtin_data->types[0].mode); - create_input_operand (&ops[1], op1, builtin_data->types[1].mode); - create_input_operand (&ops[2], op2, builtin_data->types[2].mode); + for (unsigned int i = 1; i <= nargs; ++i) + create_input_operand (&ops[i], + expand_normal (CALL_EXPR_ARG (exp, i - 1)), + builtin_data->types[i].mode); - auto unspec = builtin_data->unspec; - insn_code icode; + if (builtin_data->uses_fpmr == true) + { + auto fpm_input = expand_normal (CALL_EXPR_ARG (exp, nargs)); + auto fpmr = gen_rtx_REG (DImode, FPM_REGNUM); + emit_move_insn (fpmr, fpm_input); + } - switch (builtin_data->signature) + enum insn_code icode; + switch (builtin_data->unspec) { - case aarch64_builtin_signatures::binary: - icode = code_for_aarch64 (unspec, builtin_data->types[0].mode); - expand_insn (icode, 3, ops); + case UNSPEC_FAMAX: + case UNSPEC_FAMIN: + icode = code_for_aarch64 (builtin_data->unspec, + builtin_data->types[0].mode); + expand_insn (icode, nargs + 1, ops); + break; + + case UNSPEC_VCVT1: + case UNSPEC_VCVT1_HIGH: + case UNSPEC_VCVT2: + case UNSPEC_VCVT2_HIGH: + icode = code_for_aarch64 (builtin_data->unspec, + builtin_data->types[0].mode, + builtin_data->types[1].mode); + expand_insn (icode, nargs + 1, ops); + break; + + case UNSPEC_VCVT1_LOW: + case UNSPEC_VCVT2_LOW: + icode = code_for_aarch64_lower (builtin_data->unspec, + builtin_data->types[0].mode, + builtin_data->types[1].mode); + expand_insn (icode, nargs + 1, ops); + break; + + case UNSPEC_FSCALE: + icode = code_for_aarch64 (builtin_data->unspec, + builtin_data->types[1].mode, + builtin_data->types[2].mode); + expand_insn (icode, nargs + 1, ops); + break; + + case UNSPEC_VCVT: + icode = code_for_aarch64 (builtin_data->unspec, + builtin_data->types[0].mode, + builtin_data->types[1].mode, + builtin_data->types[2].mode); + expand_insn (icode, nargs + 1, ops); + break; + + case UNSPEC_VCVT_HIGH: + icode = code_for_aarch64 (builtin_data->unspec, + builtin_data->types[0].mode, + builtin_data->types[1].mode, + builtin_data->types[2].mode, + builtin_data->types[3].mode); + expand_insn (icode, nargs + 1, ops); break; + default: - gcc_unreachable(); + gcc_unreachable (); } + return target; } diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index d1ae80c0bb3e..b13366b0621d 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -258,6 +258,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) aarch64_def_or_undef (TARGET_SVE_BF16, "__ARM_FEATURE_SVE_BF16", pfile); + aarch64_def_or_undef (TARGET_FP8, "__ARM_FEATURE_FP8", pfile); + aarch64_def_or_undef (TARGET_LS64, "__ARM_FEATURE_LS64", pfile); aarch64_def_or_undef (TARGET_RCPC, "__ARM_FEATURE_RCPC", pfile); diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def index c669919fa048..91897cffcd83 100644 --- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def @@ -20,19 +20,66 @@ #undef ENTRY_BINARY -#define ENTRY_BINARY(N, S, T0, T1, T2, U) \ - ENTRY (N, S, T0, T1, T2, none, U) +#define ENTRY_BINARY(N, T0, T1, T2, U) \ + ENTRY (N, binary, T0, T1, T2, none, false, U) + +#undef ENTRY_BINARY_FPM +#define ENTRY_BINARY_FPM(N, T0, T1, T2, U) \ + ENTRY (N, binary, T0, T1, T2, none, true, U) + +#undef ENTRY_TERNARY_FPM +#define ENTRY_TERNARY_FPM(N, T0, T1, T2, T3, U) \ + ENTRY (N, ternary, T0, T1, T2, T3, true, U) + +#undef ENTRY_UNARY_FPM +#define ENTRY_UNARY_FPM(N, T0, T1, U) \ + ENTRY (N, unary, T0, T1, none, none, true, U) #undef ENTRY_VHSDF -#define ENTRY_VHSDF(NAME, SIGNATURE, UNSPEC) \ - ENTRY_BINARY (NAME##_f16, SIGNATURE, f16, f16, f16, UNSPEC) \ - ENTRY_BINARY (NAME##q_f16, SIGNATURE, f16q, f16q, f16q, UNSPEC) \ - ENTRY_BINARY (NAME##_f32, SIGNATURE, f32, f32, f32, UNSPEC) \ - ENTRY_BINARY (NAME##q_f32, SIGNATURE, f32q, f32q, f32q, UNSPEC) \ - ENTRY_BINARY (NAME##q_f64, SIGNATURE, f64q, f64q, f64q, UNSPEC) +#define ENTRY_VHSDF(NAME, UNSPEC) \ + ENTRY_BINARY (NAME##_f16, f16, f16, f16, UNSPEC) \ + ENTRY_BINARY (NAME##q_f16, f16q, f16q, f16q, UNSPEC) \ + ENTRY_BINARY (NAME##_f32, f32, f32, f32, UNSPEC) \ + ENTRY_BINARY (NAME##q_f32, f32q, f32q, f32q, UNSPEC) \ + ENTRY_BINARY (NAME##q_f64, f64q, f64q, f64q, UNSPEC) + +#undef ENTRY_VHSDF_VHSDI +#define ENTRY_VHSDF_VHSDI(NAME, UNSPEC) \ + ENTRY_BINARY (NAME##_f16, f16, f16, s16, UNSPEC) \ + ENTRY_BINARY (NAME##q_f16, f16q, f16q, s16q, UNSPEC) \ + ENTRY_BINARY (NAME##_f32, f32, f32, s32, UNSPEC) \ + ENTRY_BINARY (NAME##q_f32, f32q, f32q, s32q, UNSPEC) \ + ENTRY_BINARY (NAME##q_f64, f64q, f64q, s64q, UNSPEC) // faminmax #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FAMINMAX) -ENTRY_VHSDF (vamax, binary, UNSPEC_FAMAX) -ENTRY_VHSDF (vamin, binary, UNSPEC_FAMIN) +ENTRY_VHSDF (vamax, UNSPEC_FAMAX) +ENTRY_VHSDF (vamin, UNSPEC_FAMIN) +#undef REQUIRED_EXTENSIONS + +// fpm conversion +#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8) +ENTRY_UNARY_FPM (vcvt1_bf16_mf8_fpm, bf16q, f8, UNSPEC_VCVT1) +ENTRY_UNARY_FPM (vcvt1_high_bf16_mf8_fpm, bf16q, f8q, UNSPEC_VCVT1_HIGH) +ENTRY_UNARY_FPM (vcvt1_low_bf16_mf8_fpm, bf16q, f8q, UNSPEC_VCVT1_LOW) +ENTRY_UNARY_FPM (vcvt1_f16_mf8_fpm, f16q, f8, UNSPEC_VCVT1) +ENTRY_UNARY_FPM (vcvt1_high_f16_mf8_fpm, f16q, f8q, UNSPEC_VCVT1_HIGH) +ENTRY_UNARY_FPM (vcvt1_low_f16_mf8_fpm, f16q, f8q, UNSPEC_VCVT1_LOW) +ENTRY_UNARY_FPM (vcvt2_bf16_mf8_fpm, bf16q, f8, UNSPEC_VCVT2) +ENTRY_UNARY_FPM (vcvt2_high_bf16_mf8_fpm, bf16q, f8q, UNSPEC_VCVT2_HIGH) +ENTRY_UNARY_FPM (vcvt2_low_bf16_mf8_fpm, bf16q, f8q, UNSPEC_VCVT2_LOW) +ENTRY_UNARY_FPM (vcvt2_f16_mf8_fpm, f16q, f8, UNSPEC_VCVT2) +ENTRY_UNARY_FPM (vcvt2_high_f16_mf8_fpm, f16q, f8q, UNSPEC_VCVT2_HIGH) +ENTRY_UNARY_FPM (vcvt2_low_f16_mf8_fpm, f16q, f8q, UNSPEC_VCVT2_LOW) + +ENTRY_BINARY_FPM (vcvt_mf8_f16_fpm, f8, f16, f16, UNSPEC_VCVT) +ENTRY_BINARY_FPM (vcvtq_mf8_f16_fpm, f8q, f16q, f16q, UNSPEC_VCVT) +ENTRY_BINARY_FPM (vcvt_mf8_f32_fpm, f8, f32q, f32q, UNSPEC_VCVT) + +ENTRY_TERNARY_FPM (vcvt_high_mf8_f32_fpm, f8q, f8, f32q, f32q, UNSPEC_VCVT_HIGH) +#undef REQUIRED_EXTENSIONS + +// fpm scaling +#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8) +ENTRY_VHSDF_VHSDI (vscale, UNSPEC_FSCALE) #undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index cfe95bd4c316..f8437469a7e7 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -9999,3 +9999,101 @@ "TARGET_FAMINMAX" "<faminmax_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" ) + +;; fpm unary instructions for brain float modes. +(define_insn "@aarch64_<fpm_unary_bf_uns_op><V8BF_ONLY:mode><VB:mode>" + [(set (match_operand:V8BF_ONLY 0 "register_operand" "=w") + (unspec:V8BF_ONLY + [(match_operand:VB 1 "register_operand" "w") + (reg:DI FPM_REGNUM)] + FPM_UNARY_UNS))] + "TARGET_FP8" + "<fpm_unary_bf_uns_op>\t%0.<V8BF_ONLY:Vtype>, %1.<VB:Vtype>" +) + +;; fpm unary instructions for half float modes. +(define_insn "@aarch64_<fpm_unary_hf_uns_op><V8HF_ONLY:mode><VB:mode>" + [(set (match_operand:V8HF_ONLY 0 "register_operand" "=w") + (unspec:V8HF_ONLY + [(match_operand:VB 1 "register_operand" "w") + (reg:DI FPM_REGNUM)] + FPM_UNARY_UNS))] + "TARGET_FP8" + "<fpm_unary_hf_uns_op>\t%0.<V8HF_ONLY:Vtype>, %1.<VB:Vtype>" +) + +;; fpm unary instructions for brain float modes, where the input is +;; lowered from V16QI to V8QI. +(define_insn + "@aarch64_lower_<fpm_unary_bf_uns_op><V8BF_ONLY:mode><V16QI_ONLY:mode>" + [(set (match_operand:V8BF_ONLY 0 "register_operand" "=w") + (unspec:V8BF_ONLY + [(match_operand:V16QI_ONLY 1 "register_operand" "w") + (reg:DI FPM_REGNUM)] + FPM_UNARY_LOW_UNS))] + "TARGET_FP8" + { + operands[1] = force_lowpart_subreg (V8QImode, + operands[1], + recog_data.operand[1]->mode); + return "<fpm_unary_bf_uns_op>\t%0.<V8BF_ONLY:Vtype>, %1.8b"; + } +) + +;; fpm unary instructions for half float modes, where the input is +;; lowered from V16QI to V8QI. +(define_insn + "@aarch64_lower_<fpm_unary_hf_uns_op><V8HF_ONLY:mode><V16QI_ONLY:mode>" + [(set (match_operand:V8HF_ONLY 0 "register_operand" "=w") + (unspec:V8HF_ONLY + [(match_operand:V16QI_ONLY 1 "register_operand" "w") + (reg:DI FPM_REGNUM)] + FPM_UNARY_LOW_UNS))] + "TARGET_FP8" + { + operands[1] = force_lowpart_subreg (V8QImode, + operands[1], + recog_data.operand[1]->mode); + return "<fpm_unary_hf_uns_op>\t%0.<V8HF_ONLY:Vtype>, %1.8b"; + } +) + +;; fpm binary instructions. +(define_insn + "@aarch64_<fpm_uns_op><VB:mode><VCVTFPM:mode><VH_SF:mode>" + [(set (match_operand:VB 0 "register_operand" "=w") + (unspec:VB + [(match_operand:VCVTFPM 1 "register_operand" "w") + (match_operand:VH_SF 2 "register_operand" "w") + (reg:DI FPM_REGNUM)] + FPM_BINARY_UNS))] + "TARGET_FP8" + "<fpm_uns_op>\t%0.<VB:Vtype>, %1.<VCVTFPM:Vtype>, %2.<VH_SF:Vtype>" +) + +;; fpm ternary instructions. +(define_insn + "@aarch64_<fpm_uns_op><V16QI_ONLY:mode><V8QI_ONLY:mode><V4SF_ONLY:mode><V4SF_ONLY:mode>" + [(set (match_operand:V16QI_ONLY 0 "register_operand" "=w") + (unspec:V16QI_ONLY + [(match_operand:V8QI_ONLY 1 "register_operand" "w") + (match_operand:V4SF_ONLY 2 "register_operand" "w") + (match_operand:V4SF_ONLY 3 "register_operand" "w") + (reg:DI FPM_REGNUM)] + FPM_TERNARY_VCVT_UNS))] + "TARGET_FP8" + { + operands[1] = force_reg (V16QImode, operands[1]); + return "<fpm_uns_op>\t%1.16b, %2.<V4SF_ONLY:Vtype>, %3.<V4SF_ONLY:Vtype>"; + } +) + +;; fpm scale instructions +(define_insn "@aarch64_<fpm_uns_op><VHSDF:mode><VHSDI:mode>" + [(set (match_operand:VHSDF 0 "register_operand" "=w") + (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") + (match_operand:VHSDI 2 "register_operand" "w")] + FPM_SCALE_UNS))] + "TARGET_FP8" + "<fpm_uns_op>\t%0.<VHSDF:Vtype>, %1.<VHSDF:Vtype>, %2.<VHSDI:Vtype>" +) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index ce8f032c1410..bdd276b554ba 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -41,6 +41,9 @@ ;; Iterators for single modes, for "@" patterns. (define_mode_iterator SI_ONLY [SI]) (define_mode_iterator DI_ONLY [DI]) +(define_mode_iterator V8QI_ONLY [V8QI]) +(define_mode_iterator V16QI_ONLY [V16QI]) +(define_mode_iterator V4SF_ONLY [V4SF]) ;; Iterator for all integer modes (up to 64-bit) (define_mode_iterator ALLI [QI HI SI DI]) @@ -163,6 +166,12 @@ (define_mode_iterator VHSDF [(V4HF "TARGET_SIMD_F16INST") (V8HF "TARGET_SIMD_F16INST") V2SF V4SF V2DF]) +(define_mode_iterator VH_SF [(V4HF "TARGET_SIMD_F16INST") + (V8HF "TARGET_SIMD_F16INST") + V4SF]) + +;; Advanced SIMD Integer modes. +(define_mode_iterator VHSDI [V4HI V8HI V2SI V4SI V2DI]) ;; Advanced SIMD Float modes, and DF. (define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF]) @@ -426,6 +435,12 @@ (V8HF "TARGET_SIMD_F16INST") V2SF V4SF]) +;; Modes available for Advanced SIMD FP8 conversion operations. +(define_mode_iterator VCVTFPM [V8QI + (V4HF "TARGET_SIMD_F16INST") + (V8HF "TARGET_SIMD_F16INST") + V4SF]) + ;; Iterators for single modes, for "@" patterns. (define_mode_iterator VNx16QI_ONLY [VNx16QI]) (define_mode_iterator VNx16SI_ONLY [VNx16SI]) @@ -635,6 +650,10 @@ ;; Bfloat16 modes to which V4SF can be converted (define_mode_iterator V4SF_TO_BF [V4BF V8BF]) +;; Float16 and Bfloat16 modes separately +(define_mode_iterator V8HF_ONLY [V8HF]) +(define_mode_iterator V8BF_ONLY [V8BF]) + (define_mode_iterator SVE_BHSx24 [VNx32QI VNx16HI VNx8SI VNx16BF VNx16HF VNx8SF VNx64QI VNx32HI VNx16SI @@ -699,6 +718,7 @@ UNSPEC_FMINV ; Used in aarch64-simd.md. UNSPEC_FADDV ; Used in aarch64-simd.md. UNSPEC_FNEG ; Used in aarch64-simd.md. + UNSPEC_FSCALE ; Used in aarch64-simd.md. UNSPEC_ADDV ; Used in aarch64-simd.md. UNSPEC_SMAXV ; Used in aarch64-simd.md. UNSPEC_SMINV ; Used in aarch64-simd.md. @@ -736,6 +756,14 @@ UNSPEC_SSHLL ; Used in aarch64-simd.md. UNSPEC_USHLL ; Used in aarch64-simd.md. UNSPEC_ADDP ; Used in aarch64-simd.md. + UNSPEC_VCVT ; Used in aarch64-simd.md. + UNSPEC_VCVT_HIGH ; Used in aarch64-simd.md. + UNSPEC_VCVT1 ; Used in aarch64-simd.md. + UNSPEC_VCVT1_HIGH ; Used in aarch64-simd.md. + UNSPEC_VCVT1_LOW ; Used in aarch64-simd.md. + UNSPEC_VCVT2 ; Used in aarch64-simd.md. + UNSPEC_VCVT2_HIGH ; Used in aarch64-simd.md. + UNSPEC_VCVT2_LOW ; Used in aarch64-simd.md. UNSPEC_TBL ; Used in vector permute patterns. UNSPEC_TBLQ ; Used in vector permute patterns. UNSPEC_TBX ; Used in vector permute patterns. @@ -4659,3 +4687,40 @@ (define_code_attr faminmax_op [(smax "famax") (smin "famin")]) + +;; Iterators and attributes for fpm instructions + +(define_int_iterator FPM_UNARY_UNS + [UNSPEC_VCVT1 + UNSPEC_VCVT1_HIGH + UNSPEC_VCVT2 + UNSPEC_VCVT2_HIGH]) + +(define_int_iterator FPM_UNARY_LOW_UNS [UNSPEC_VCVT1_LOW UNSPEC_VCVT2_LOW]) + +(define_int_iterator FPM_BINARY_UNS [UNSPEC_VCVT]) + +(define_int_iterator FPM_SCALE_UNS [UNSPEC_FSCALE]) + +(define_int_iterator FPM_TERNARY_VCVT_UNS [UNSPEC_VCVT_HIGH]) + +(define_int_attr fpm_unary_bf_uns_op + [(UNSPEC_VCVT1 "bf1cvtl") + (UNSPEC_VCVT1_HIGH "bf1cvtl2") + (UNSPEC_VCVT1_LOW "bf1cvtl") + (UNSPEC_VCVT2 "bf2cvtl") + (UNSPEC_VCVT2_HIGH "bf2cvtl2") + (UNSPEC_VCVT2_LOW "bf2cvtl")]) + +(define_int_attr fpm_unary_hf_uns_op + [(UNSPEC_VCVT1 "f1cvtl") + (UNSPEC_VCVT1_HIGH "f1cvtl2") + (UNSPEC_VCVT1_LOW "f1cvtl") + (UNSPEC_VCVT2 "f2cvtl") + (UNSPEC_VCVT2_HIGH "f2cvtl2") + (UNSPEC_VCVT2_LOW "f2cvtl")]) + +(define_int_attr fpm_uns_op + [(UNSPEC_FSCALE "fscale") + (UNSPEC_VCVT "fcvtn") + (UNSPEC_VCVT_HIGH "fcvtn2")]) diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c index afb44f83f60d..635a7eaf4a2c 100644 --- a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c +++ b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c @@ -5,19 +5,9 @@ #include <arm_acle.h> -#ifdef __ARM_FEATURE_FP8 -#error "__ARM_FEATURE_FP8 feature macro defined." -#endif - #pragma GCC push_options #pragma GCC target("arch=armv9.4-a+fp8") -/* We do not define __ARM_FEATURE_FP8 until all - relevant features have been added. */ -#ifdef __ARM_FEATURE_FP8 -#error "__ARM_FEATURE_FP8 feature macro defined." -#endif - /* **test_write_fpmr_sysreg_asm_64: ** msr fpmr, x0 diff --git a/gcc/testsuite/gcc.target/aarch64/simd/scale_fpm.c b/gcc/testsuite/gcc.target/aarch64/simd/scale_fpm.c new file mode 100644 index 000000000000..d95a861fcfdf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/scale_fpm.c @@ -0,0 +1,60 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -march=armv9-a+fp8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "arm_neon.h" + +/* +** test_vscale_f16: +** fscale v0.4h, v0.4h, v1.4h +** ret +*/ +float16x4_t +test_vscale_f16 (float16x4_t a, int16x4_t b) +{ + return vscale_f16 (a, b); +} + +/* +** test_vscaleq_f16: +** fscale v0.8h, v0.8h, v1.8h +** ret +*/ +float16x8_t +test_vscaleq_f16 (float16x8_t a, int16x8_t b) +{ + return vscaleq_f16 (a, b); +} + +/* +** test_vscale_f32: +** fscale v0.2s, v0.2s, v1.2s +** ret +*/ +float32x2_t +test_vscale_f32 (float32x2_t a, int32x2_t b) +{ + return vscale_f32 (a, b); +} + +/* +** test_vscaleq_f32: +** fscale v0.4s, v0.4s, v1.4s +** ret +*/ +float32x4_t +test_vscaleq_f32 (float32x4_t a, int32x4_t b) +{ + return vscaleq_f32 (a, b); +} + +/* +** test_vscaleq_f64: +** fscale v0.2d, v0.2d, v1.2d +** ret +*/ +float64x2_t +test_vscaleq_f64 (float64x2_t a, int64x2_t b) +{ + return vscaleq_f64 (a, b); +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vcvt_fpm.c b/gcc/testsuite/gcc.target/aarch64/simd/vcvt_fpm.c new file mode 100644 index 000000000000..39076684345f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vcvt_fpm.c @@ -0,0 +1,197 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -march=armv9-a+fp8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "arm_neon.h" + +/* +** test_vcvt1_bf16: +** msr fpmr, x0 +** bf1cvtl v0.8h, v0.8b +** ret +*/ +bfloat16x8_t +test_vcvt1_bf16 (mfloat8x8_t a, fpm_t b) +{ + return vcvt1_bf16_mf8_fpm(a, b); +} + +/* +** test_high_vcvt1_bf16: +** msr fpmr, x0 +** bf1cvtl2 v0.8h, v0.16b +** ret +*/ +bfloat16x8_t +test_high_vcvt1_bf16 (mfloat8x16_t a, fpm_t b) +{ + return vcvt1_high_bf16_mf8_fpm(a, b); +} + +/* +** test_low_vcvt1_bf16: +** msr fpmr, x0 +** bf1cvtl v0.8h, v0.8b +** ret +*/ +bfloat16x8_t +test_low_vcvt1_bf16 (mfloat8x16_t a, fpm_t b) +{ + return vcvt1_low_bf16_mf8_fpm(a, b); +} + +/* +** test_vcvt1_f16: +** msr fpmr, x0 +** f1cvtl v0.8h, v0.8b +** ret +*/ +float16x8_t +test_vcvt1_f16 (mfloat8x8_t a, fpm_t b) +{ + return vcvt1_f16_mf8_fpm(a, b); +} + +/* +** test_high_vcvt1_f16: +** msr fpmr, x0 +** f1cvtl2 v0.8h, v0.16b +** ret +*/ +float16x8_t +test_high_vcvt1_f16 (mfloat8x16_t a, fpm_t b) +{ + return vcvt1_high_f16_mf8_fpm(a, b); +} + +/* +** test_low_vcvt1_f16: +** msr fpmr, x0 +** f1cvtl v0.8h, v0.8b +** ret +*/ +float16x8_t +test_low_vcvt1_f16 (mfloat8x16_t a, fpm_t b) +{ + return vcvt1_low_f16_mf8_fpm(a, b); +} + +/* +** test_vcvt2_bf16: +** msr fpmr, x0 +** bf2cvtl v0.8h, v0.8b +** ret +*/ +bfloat16x8_t +test_vcvt2_bf16 (mfloat8x8_t a, fpm_t b) +{ + return vcvt2_bf16_mf8_fpm(a, b); +} + +/* +** test_high_vcvt2_bf16: +** msr fpmr, x0 +** bf2cvtl2 v0.8h, v0.16b +** ret +*/ +bfloat16x8_t +test_high_vcvt2_bf16 (mfloat8x16_t a, fpm_t b) +{ + return vcvt2_high_bf16_mf8_fpm(a, b); +} + +/* +** test_low_vcvt2_bf16: +** msr fpmr, x0 +** bf1cvtl v0.8h, v0.8b +** ret +*/ +bfloat16x8_t +test_low_vcvt2_bf16 (mfloat8x16_t a, fpm_t b) +{ + return vcvt1_low_bf16_mf8_fpm(a, b); +} + +/* +** test_vcvt2_f16: +** msr fpmr, x0 +** f2cvtl v0.8h, v0.8b +** ret +*/ +float16x8_t +test_vcvt2_f16 (mfloat8x8_t a, fpm_t b) +{ + return vcvt2_f16_mf8_fpm(a, b); +} + +/* +** test_high_vcvt2_f16: +** msr fpmr, x0 +** f2cvtl2 v0.8h, v0.16b +** ret +*/ +float16x8_t +test_high_vcvt2_f16 (mfloat8x16_t a, fpm_t b) +{ + return vcvt2_high_f16_mf8_fpm(a, b); +} + +/* +** test_low_vcvt2_f16: +** msr fpmr, x0 +** f1cvtl v0.8h, v0.8b +** ret +*/ +float16x8_t +test_low_vcvt2_f16 (mfloat8x16_t a, fpm_t b) +{ + return vcvt1_low_f16_mf8_fpm(a, b); +} + +/* +** test_vcvt_f16: +** msr fpmr, x0 +** fcvtn v0.8b, v0.4h, v1.4h +** ret +*/ +mfloat8x8_t +test_vcvt_f16 (float16x4_t a, float16x4_t b, fpm_t c) +{ + return vcvt_mf8_f16_fpm(a, b, c); +} + +/* +** test_vcvtq_f16: +** msr fpmr, x0 +** fcvtn v0.16b, v0.8h, v1.8h +** ret +*/ +mfloat8x16_t +test_vcvtq_f16 (float16x8_t a, float16x8_t b, fpm_t c) +{ + return vcvtq_mf8_f16_fpm(a, b, c); +} + +/* +** test_vcvt_f32: +** msr fpmr, x0 +** fcvtn v0.8b, v0.4s, v1.4s +** ret +*/ +mfloat8x8_t +test_vcvt_f32 (float32x4_t a, float32x4_t b, fpm_t c) +{ + return vcvt_mf8_f32_fpm(a, b, c); +} + +/* +** test_vcvt_high_f32: +** msr fpmr, x0 +** fcvtn2 v0.16b, v1.4s, v2.4s +** ret +*/ +mfloat8x16_t +test_vcvt_high_f32 (mfloat8x8_t a, float32x4_t b, float32x4_t c, fpm_t d) +{ + return vcvt_high_mf8_f32_fpm(a, b, c, d); +}