https://gcc.gnu.org/g:d71bb2c0c8b4bbf7fce5581932673fb67b58e8bb
commit r15-6076-gd71bb2c0c8b4bbf7fce5581932673fb67b58e8bb Author: Saurabh Jha <saurabh....@arm.com> Date: Tue Dec 10 13:21:21 2024 +0000 aarch64: Add support for fp8fma instructions The AArch64 FEAT_FP8FMA extension introduces instructions for multiply-add of vectors. This patch introduces the following instructions: 1. {vmlalbq|vmlaltq}_f16_mf8_fpm. 2. {vmlalbq|vmlaltq}_lane{q}_f16_mf8_fpm. 3. {vmlallbbq|vmlallbtq|vmlalltbq|vmlallttq}_f32_mf8_fpm. 4. {vmlallbbq|vmlallbtq|vmlalltbq|vmlallttq}_lane{q}_f32_mf8_fpm. gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (aarch64_pragma_builtins_checker::require_immediate_lane_index): New overload. (aarch64_pragma_builtins_checker::check): Add support for FP8FMA intrinsics. (aarch64_expand_pragma_builtins): Likewise. * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Conditionally define TARGET_FP8FMA. * config/aarch64/aarch64-simd-pragma-builtins.def: Add the FP8FMA intrinsics. * config/aarch64/aarch64-simd.md: (@aarch64_<FMLAL_FP8_HF:insn><mode): New pattern. (@aarch64_<FMLAL_FP8_HF:insn>_lane<V8HF_ONLY:mode><VB:mode>): Likewise. (@aarch64_<FMLALL_FP8_SF:insn><mode): Likewise. (@aarch64_<FMLALL_FP8_SF:insn>_lane<V8HF_ONLY:mode><VB:mode>): Likewise. * config/aarch64/iterators.md (V8HF_ONLY): New mode iterator. (SVE2_FP8_TERNARY_VNX8HF): Rename to... (FMLAL_FP8_HF): ...this. (SVE2_FP8_TERNARY_LANE_VNX8HF): Delete in favor of FMLAL_FP8_HF. (SVE2_FP8_TERNARY_VNX4SF): Rename to... (FMLALL_FP8_SF): ...this. (SVE2_FP8_TERNARY_LANE_VNX4SF): Delete in favor of FMLALL_FP8_SF. (sve2_fp8_fma_op_vnx8hf, sve2_fp8_fma_op_vnx4sf): Fold into... (insn): ...here. * config/aarch64/aarch64-sve2.md: Update uses accordingly. gcc/testsuite/ChangeLog: * gcc.target/aarch64/pragma_cpp_predefs_4.c: Test TARGET_FP8FMA. * gcc.target/aarch64/simd/vmla_fpm.c: New test. * gcc.target/aarch64/simd/vmla_lane_indices_1.c: Likewise. Co-authored-by: Richard Sandiford <richard.sandif...@arm.com> Diff: --- gcc/config/aarch64/aarch64-builtins.cc | 43 +++ gcc/config/aarch64/aarch64-c.cc | 2 + .../aarch64/aarch64-simd-pragma-builtins.def | 16 + gcc/config/aarch64/aarch64-simd.md | 63 ++++ gcc/config/aarch64/aarch64-sve2.md | 32 +- gcc/config/aarch64/iterators.md | 31 +- .../gcc.target/aarch64/pragma_cpp_predefs_4.c | 20 ++ gcc/testsuite/gcc.target/aarch64/simd/vmla_fpm.c | 365 +++++++++++++++++++++ .../gcc.target/aarch64/simd/vmla_lane_indices_1.c | 55 ++++ 9 files changed, 589 insertions(+), 38 deletions(-) diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index 63e17eeb20e5..ca1dc5a3e6a7 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -2596,6 +2596,7 @@ struct aarch64_pragma_builtins_checker bool require_immediate_range (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT); bool require_immediate_lane_index (unsigned int, unsigned int, unsigned int); + bool require_immediate_lane_index (unsigned int, unsigned int); bool check (); @@ -2659,6 +2660,16 @@ require_immediate_lane_index (unsigned int lane_argno, unsigned vec_argno, return require_immediate_range (lane_argno, 0, nunits - 1); } +/* Require argument LANE_ARGNO to be an immediate lane index that selects + one element of argument VEC_ARGNO. Return true if the argument + is valid. */ +bool +aarch64_pragma_builtins_checker:: +require_immediate_lane_index (unsigned int lane_argno, unsigned int vec_argno) +{ + return require_immediate_lane_index (lane_argno, vec_argno, vec_argno); +} + /* Check the arguments to the intrinsic call and return true if they are valid. */ bool @@ -2669,6 +2680,19 @@ aarch64_pragma_builtins_checker::check () case UNSPEC_FDOT_LANE_FP8: return require_immediate_lane_index (nargs - 2, nargs - 3, 0); + case UNSPEC_FMLALB_FP8: + case UNSPEC_FMLALT_FP8: + case UNSPEC_FMLALLBB_FP8: + case UNSPEC_FMLALLBT_FP8: + case UNSPEC_FMLALLTB_FP8: + case UNSPEC_FMLALLTT_FP8: + if (builtin_data.signature == aarch64_builtin_signatures::ternary_lane) + return require_immediate_lane_index (nargs - 2, nargs - 3); + else if (builtin_data.signature == aarch64_builtin_signatures::ternary) + return true; + else + gcc_unreachable (); + case UNSPEC_LUTI2: case UNSPEC_LUTI4: { @@ -3718,6 +3742,25 @@ aarch64_expand_pragma_builtin (tree exp, rtx target, ops[0].mode, ops[3].mode); break; + case UNSPEC_FMLALB_FP8: + case UNSPEC_FMLALT_FP8: + case UNSPEC_FMLALLBB_FP8: + case UNSPEC_FMLALLBT_FP8: + case UNSPEC_FMLALLTB_FP8: + case UNSPEC_FMLALLTT_FP8: + if (builtin_data.signature == aarch64_builtin_signatures::ternary_lane) + { + ops[4].value = aarch64_endian_lane_rtx (ops[3].mode, + INTVAL (ops[4].value)); + icode = code_for_aarch64_lane (builtin_data.unspec, + ops[0].mode, ops[3].mode); + } + else if (builtin_data.signature == aarch64_builtin_signatures::ternary) + icode = code_for_aarch64 (builtin_data.unspec, ops[0].mode); + else + gcc_unreachable (); + break; + case UNSPEC_LUTI2: case UNSPEC_LUTI4: create_integer_operand (ops.safe_push ({}), diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index 7591f1622d2d..4d308de1e2bd 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -274,6 +274,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) aarch64_def_or_undef (TARGET_FP8DOT4, "__ARM_FEATURE_FP8DOT4", pfile); + aarch64_def_or_undef (TARGET_FP8FMA, "__ARM_FEATURE_FP8FMA", pfile); + aarch64_def_or_undef (TARGET_LS64, "__ARM_FEATURE_LS64", pfile); aarch64_def_or_undef (TARGET_RCPC, "__ARM_FEATURE_RCPC", pfile); diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def index 19277860b8ce..5dafa7bb6b91 100644 --- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def @@ -104,6 +104,12 @@ ENTRY_TERNARY_LANE (vdotq_laneq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, \ UNSPEC_FDOT_LANE_FP8, FP8) +#undef ENTRY_FMA_FPM +#define ENTRY_FMA_FPM(N, T, U) \ + ENTRY_TERNARY (N##q_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U, FP8) \ + ENTRY_TERNARY_LANE (N##q_lane_##T##_mf8_fpm, T##q, T##q, f8q, f8, U, FP8) \ + ENTRY_TERNARY_LANE (N##q_laneq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U, FP8) + // faminmax #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FAMINMAX) ENTRY_BINARY_VHSDF (vamax, UNSPEC_FAMAX, FP) @@ -154,3 +160,13 @@ ENTRY_VDOT_FPM (f16) #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT4) ENTRY_VDOT_FPM (f32) #undef REQUIRED_EXTENSIONS + +// fp8 multiply-add +#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8FMA) +ENTRY_FMA_FPM (vmlalb, f16, UNSPEC_FMLALB_FP8) +ENTRY_FMA_FPM (vmlalt, f16, UNSPEC_FMLALT_FP8) +ENTRY_FMA_FPM (vmlallbb, f32, UNSPEC_FMLALLBB_FP8) +ENTRY_FMA_FPM (vmlallbt, f32, UNSPEC_FMLALLBT_FP8) +ENTRY_FMA_FPM (vmlalltb, f32, UNSPEC_FMLALLTB_FP8) +ENTRY_FMA_FPM (vmlalltt, f32, UNSPEC_FMLALLTT_FP8) +#undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 69035c797fb0..fa72e6a6a604 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -10124,3 +10124,66 @@ "" "<insn>\t%1.<VDQ_HSF_FDOT:Vtype>, %2.<VDQ_HSF_FDOT:Vnbtype>, %3.<VDQ_HSF_FDOT:Vnbsubtype>[%4]" ) + +;; fpm fma instructions. +(define_insn "@aarch64_<insn><mode>" + [(set (match_operand:V8HF_ONLY 0 "register_operand" "=w") + (unspec:V8HF_ONLY + [(match_operand:V8HF_ONLY 1 "register_operand" "0") + (match_operand:V16QI 2 "register_operand" "w") + (match_operand:V16QI 3 "register_operand" "w") + (reg:DI FPM_REGNUM)] + FMLAL_FP8_HF))] + "TARGET_FP8FMA" + "<insn>\t%0.<Vtype>, %2.16b, %3.16b" +) + +(define_insn "@aarch64_<insn>_lane<V8HF_ONLY:mode><VB:mode>" + [(set (match_operand:V8HF_ONLY 0 "register_operand" "=w") + (unspec:V8HF_ONLY + [(match_operand:V8HF_ONLY 1 "register_operand" "0") + (match_operand:V16QI 2 "register_operand" "w") + (vec_duplicate:V16QI + (vec_select:QI + (match_operand:VB 3 "register_operand" "w") + (parallel [(match_operand:SI 4 "immediate_operand")]))) + (reg:DI FPM_REGNUM)] + FMLAL_FP8_HF))] + "TARGET_FP8FMA" + { + operands[4] = aarch64_endian_lane_rtx (<VB:MODE>mode, + INTVAL (operands[4])); + return "<insn>\t%0.<V8HF_ONLY:Vtype>, %2.16b, %3.b[%4]"; + } +) + +(define_insn "@aarch64_<insn><mode>" + [(set (match_operand:V4SF_ONLY 0 "register_operand" "=w") + (unspec:V4SF_ONLY + [(match_operand:V4SF_ONLY 1 "register_operand" "0") + (match_operand:V16QI 2 "register_operand" "w") + (match_operand:V16QI 3 "register_operand" "w") + (reg:DI FPM_REGNUM)] + FMLALL_FP8_SF))] + "TARGET_FP8FMA" + "<insn>\t%0.<Vtype>, %2.16b, %3.16b" +) + +(define_insn "@aarch64_<insn>_lane<V4SF_ONLY:mode><VB:mode>" + [(set (match_operand:V4SF_ONLY 0 "register_operand" "=w") + (unspec:V4SF_ONLY + [(match_operand:V4SF_ONLY 1 "register_operand" "0") + (match_operand:V16QI 2 "register_operand" "w") + (vec_duplicate:V16QI + (vec_select:QI + (match_operand:VB 3 "register_operand" "w") + (parallel [(match_operand:SI 4 "immediate_operand")]))) + (reg:DI FPM_REGNUM)] + FMLALL_FP8_SF))] + "TARGET_FP8FMA" + { + operands[4] = aarch64_endian_lane_rtx (<VB:MODE>mode, + INTVAL (operands[4])); + return "<insn>\t%0.<V4SF_ONLY:Vtype>, %2.16b, %3.b[%4]"; + } +) diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index d26c0c55c555..7e8a505f707c 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -2013,37 +2013,37 @@ ;; - FMLALLTT (indexed) (FP8FMA) ;; ------------------------------------------------------------------------- -(define_insn "@aarch64_sve_add_<sve2_fp8_fma_op_vnx8hf><mode>" +(define_insn "@aarch64_sve_add_<insn><mode>" [(set (match_operand:VNx8HF_ONLY 0 "register_operand") (unspec:VNx8HF_ONLY [(match_operand:VNx8HF 1 "register_operand") (match_operand:VNx16QI 2 "register_operand") (match_operand:VNx16QI 3 "register_operand") (reg:DI FPM_REGNUM)] - SVE2_FP8_TERNARY_VNX8HF))] + FMLAL_FP8_HF))] "TARGET_SSVE_FP8FMA" {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , 0 , w , w ; * ] <sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b - [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b + [ w , 0 , w , w ; * ] <insn>\t%0.h, %2.b, %3.b + [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<insn>\t%0.h, %2.b, %3.b } ) -(define_insn "@aarch64_sve_add_<sve2_fp8_fma_op_vnx4sf><mode>" +(define_insn "@aarch64_sve_add_<insn><mode>" [(set (match_operand:VNx4SF_ONLY 0 "register_operand") (unspec:VNx4SF_ONLY [(match_operand:VNx4SF 1 "register_operand") (match_operand:VNx16QI 2 "register_operand") (match_operand:VNx16QI 3 "register_operand") (reg:DI FPM_REGNUM)] - SVE2_FP8_TERNARY_VNX4SF))] + FMLALL_FP8_SF))] "TARGET_SSVE_FP8FMA" {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , 0 , w , w ; * ] <sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b - [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b + [ w , 0 , w , w ; * ] <insn>\t%0.s, %2.b, %3.b + [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<insn>\t%0.s, %2.b, %3.b } ) -(define_insn "@aarch64_sve_add_lane_<sve2_fp8_fma_op_vnx8hf><mode>" +(define_insn "@aarch64_sve_add_lane_<insn><mode>" [(set (match_operand:VNx8HF_ONLY 0 "register_operand") (unspec:VNx8HF_ONLY [(match_operand:VNx8HF 1 "register_operand") @@ -2051,15 +2051,15 @@ (match_operand:VNx16QI 3 "register_operand") (match_operand:SI 4 "const_int_operand") (reg:DI FPM_REGNUM)] - SVE2_FP8_TERNARY_LANE_VNX8HF))] + FMLAL_FP8_HF))] "TARGET_SSVE_FP8FMA" {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , 0 , w , y ; * ] <sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b[%4] - [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;<sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b[%4] + [ w , 0 , w , y ; * ] <insn>\t%0.h, %2.b, %3.b[%4] + [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;<insn>\t%0.h, %2.b, %3.b[%4] } ) -(define_insn "@aarch64_sve_add_lane_<sve2_fp8_fma_op_vnx4sf><mode>" +(define_insn "@aarch64_sve_add_lane_<insn><mode>" [(set (match_operand:VNx4SF_ONLY 0 "register_operand") (unspec:VNx4SF_ONLY [(match_operand:VNx4SF 1 "register_operand") @@ -2067,11 +2067,11 @@ (match_operand:VNx16QI 3 "register_operand") (match_operand:SI 4 "const_int_operand") (reg:DI FPM_REGNUM)] - SVE2_FP8_TERNARY_LANE_VNX4SF))] + FMLALL_FP8_SF))] "TARGET_SSVE_FP8FMA" {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , 0 , w , y ; * ] <sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b[%4] - [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;<sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b[%4] + [ w , 0 , w , y ; * ] <insn>\t%0.s, %2.b, %3.b[%4] + [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;<insn>\t%0.s, %2.b, %3.b[%4] } ) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index b28ba63cc9c0..47caddfe0c0f 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -41,6 +41,7 @@ ;; Iterators for single modes, for "@" patterns. (define_mode_iterator SI_ONLY [SI]) (define_mode_iterator DI_ONLY [DI]) +(define_mode_iterator V8HF_ONLY [V8HF]) (define_mode_iterator V4SF_ONLY [V4SF]) ;; Iterator for all integer modes (up to 64-bit) @@ -3817,21 +3818,11 @@ UNSPEC_F1CVTLT UNSPEC_F2CVTLT]) -(define_int_iterator SVE2_FP8_TERNARY_VNX8HF +(define_int_iterator FMLAL_FP8_HF [UNSPEC_FMLALB_FP8 UNSPEC_FMLALT_FP8]) -(define_int_iterator SVE2_FP8_TERNARY_VNX4SF - [UNSPEC_FMLALLBB_FP8 - UNSPEC_FMLALLBT_FP8 - UNSPEC_FMLALLTB_FP8 - UNSPEC_FMLALLTT_FP8]) - -(define_int_iterator SVE2_FP8_TERNARY_LANE_VNX8HF - [UNSPEC_FMLALB_FP8 - UNSPEC_FMLALT_FP8]) - -(define_int_iterator SVE2_FP8_TERNARY_LANE_VNX4SF +(define_int_iterator FMLALL_FP8_SF [UNSPEC_FMLALLBB_FP8 UNSPEC_FMLALLBT_FP8 UNSPEC_FMLALLTB_FP8 @@ -3859,6 +3850,12 @@ (UNSPEC_FCVTN_FP8 "fcvtn") (UNSPEC_FDOT_FP8 "fdot") (UNSPEC_FDOT_LANE_FP8 "fdot") + (UNSPEC_FMLALB_FP8 "fmlalb") + (UNSPEC_FMLALT_FP8 "fmlalt") + (UNSPEC_FMLALLBB_FP8 "fmlallbb") + (UNSPEC_FMLALLBT_FP8 "fmlallbt") + (UNSPEC_FMLALLTB_FP8 "fmlalltb") + (UNSPEC_FMLALLTT_FP8 "fmlalltt") (UNSPEC_FSCALE "fscale")]) ;; The optab associated with an operation. Note that for ANDF, IORF @@ -4858,13 +4855,3 @@ (UNSPEC_F2CVT "f2cvt") (UNSPEC_F1CVTLT "f1cvtlt") (UNSPEC_F2CVTLT "f2cvtlt")]) - -(define_int_attr sve2_fp8_fma_op_vnx8hf - [(UNSPEC_FMLALB_FP8 "fmlalb") - (UNSPEC_FMLALT_FP8 "fmlalt")]) - -(define_int_attr sve2_fp8_fma_op_vnx4sf - [(UNSPEC_FMLALLBB_FP8 "fmlallbb") - (UNSPEC_FMLALLBT_FP8 "fmlallbt") - (UNSPEC_FMLALLTB_FP8 "fmlalltb") - (UNSPEC_FMLALLTT_FP8 "fmlalltt")]) diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c index fb3dc139f1f7..0dcfbec05bad 100644 --- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c +++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c @@ -274,10 +274,27 @@ #error Foo #endif +#pragma GCC target "arch=armv9-a+fp8fma" +#ifndef __ARM_FEATURE_FP8 +#error Foo +#endif +#ifdef __ARM_FEATURE_FP8DOT4 +#error Foo +#endif +#ifdef __ARM_FEATURE_FP8DOT2 +#error Foo +#endif +#ifndef __ARM_FEATURE_FP8FMA +#error Foo +#endif + #pragma GCC target "arch=armv9-a+fp8dot4" #ifndef __ARM_FEATURE_FP8 #error Foo #endif +#ifndef __ARM_FEATURE_FP8FMA +#error Foo +#endif #ifndef __ARM_FEATURE_FP8DOT4 #error Foo #endif @@ -289,6 +306,9 @@ #ifndef __ARM_FEATURE_FP8 #error Foo #endif +#ifndef __ARM_FEATURE_FP8FMA +#error Foo +#endif #ifndef __ARM_FEATURE_FP8DOT4 #error Foo #endif diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmla_fpm.c b/gcc/testsuite/gcc.target/aarch64/simd/vmla_fpm.c new file mode 100644 index 000000000000..51b47055ca2a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmla_fpm.c @@ -0,0 +1,365 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -march=armv9-a+fp8fma" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "arm_neon.h" + +/* +** test_vmlalbq_f16_fpm: +** msr fpmr, x0 +** fmlalb v0.8h, v1.16b, v2.16b +** ret +*/ +float16x8_t +test_vmlalbq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlalbq_f16_mf8_fpm (a, b, c, d); +} + +/* +** test_vmlaltq_f16_fpm: +** msr fpmr, x0 +** fmlalt v0.8h, v1.16b, v2.16b +** ret +*/ +float16x8_t +test_vmlaltq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlaltq_f16_mf8_fpm (a, b, c, d); +} + +/* +** test_vmlallbbq_f32_fpm: +** msr fpmr, x0 +** fmlallbb v0.4s, v1.16b, v2.16b +** ret +*/ +float32x4_t +test_vmlallbbq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlallbbq_f32_mf8_fpm (a, b, c, d); +} + +/* +** test_vmlallbtq_f32_fpm: +** msr fpmr, x0 +** fmlallbt v0.4s, v1.16b, v2.16b +** ret +*/ +float32x4_t +test_vmlallbtq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlallbtq_f32_mf8_fpm (a, b, c, d); +} + +/* +** test_vmlalltbq_f32_fpm: +** msr fpmr, x0 +** fmlalltb v0.4s, v1.16b, v2.16b +** ret +*/ +float32x4_t +test_vmlalltbq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlalltbq_f32_mf8_fpm (a, b, c, d); +} + +/* +** test_vmlallttq_f32_fpm: +** msr fpmr, x0 +** fmlalltt v0.4s, v1.16b, v2.16b +** ret +*/ +float32x4_t +test_vmlallttq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlallttq_f32_mf8_fpm (a, b, c, d); +} + +/* +** test_vmlalbq_lane_f16_fpm_0: +** msr fpmr, x0 +** fmlalb v0.8h, v1.16b, v2.b\[0\] +** ret +*/ +float16x8_t +test_vmlalbq_lane_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vmlalbq_lane_f16_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vmlalbq_lane_f16_fpm_7: +** msr fpmr, x0 +** fmlalb v0.8h, v1.16b, v2.b\[7\] +** ret +*/ +float16x8_t +test_vmlalbq_lane_f16_fpm_7 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vmlalbq_lane_f16_mf8_fpm (a, b, c, 7, d); +} + +/* +** test_vmlalbq_laneq_f16_fpm_0: +** msr fpmr, x0 +** fmlalb v0.8h, v1.16b, v2.b\[0\] +** ret +*/ +float16x8_t +test_vmlalbq_laneq_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlalbq_laneq_f16_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vmlalbq_laneq_f16_fpm_15: +** msr fpmr, x0 +** fmlalb v0.8h, v1.16b, v2.b\[15\] +** ret +*/ +float16x8_t +test_vmlalbq_laneq_f16_fpm_15 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlalbq_laneq_f16_mf8_fpm (a, b, c, 15, d); +} + +/* +** test_vmlaltq_lane_f16_fpm_0: +** msr fpmr, x0 +** fmlalt v0.8h, v1.16b, v2.b\[0\] +** ret +*/ +float16x8_t +test_vmlaltq_lane_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vmlaltq_lane_f16_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vmlaltq_lane_f16_fpm_7: +** msr fpmr, x0 +** fmlalt v0.8h, v1.16b, v2.b\[7\] +** ret +*/ +float16x8_t +test_vmlaltq_lane_f16_fpm_7 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vmlaltq_lane_f16_mf8_fpm (a, b, c, 7, d); +} + +/* +** test_vmlaltq_laneq_f16_fpm_0: +** msr fpmr, x0 +** fmlalt v0.8h, v1.16b, v2.b\[0\] +** ret +*/ +float16x8_t +test_vmlaltq_laneq_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlaltq_laneq_f16_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vmlaltq_laneq_f16_fpm_15: +** msr fpmr, x0 +** fmlalt v0.8h, v1.16b, v2.b\[15\] +** ret +*/ +float16x8_t +test_vmlaltq_laneq_f16_fpm_15 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlaltq_laneq_f16_mf8_fpm (a, b, c, 15, d); +} + +/* +** test_vmlallbbq_lane_f32_fpm_0: +** msr fpmr, x0 +** fmlallbb v0.4s, v1.16b, v2.b\[0\] +** ret +*/ +float32x4_t +test_vmlallbbq_lane_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vmlallbbq_lane_f32_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vmlallbbq_lane_f32_fpm_7: +** msr fpmr, x0 +** fmlallbb v0.4s, v1.16b, v2.b\[7\] +** ret +*/ +float32x4_t +test_vmlallbbq_lane_f32_fpm_7 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vmlallbbq_lane_f32_mf8_fpm (a, b, c, 7, d); +} + +/* +** test_vmlallbbq_laneq_f32_fpm_0: +** msr fpmr, x0 +** fmlallbb v0.4s, v1.16b, v2.b\[0\] +** ret +*/ +float32x4_t +test_vmlallbbq_laneq_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlallbbq_laneq_f32_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vmlallbbq_laneq_f32_fpm_15: +** msr fpmr, x0 +** fmlallbb v0.4s, v1.16b, v2.b\[15\] +** ret +*/ +float32x4_t +test_vmlallbbq_laneq_f32_fpm_15 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlallbbq_laneq_f32_mf8_fpm (a, b, c, 15, d); +} + +/* +** test_vmlallbtq_lane_f32_fpm_0: +** msr fpmr, x0 +** fmlallbt v0.4s, v1.16b, v2.b\[0\] +** ret +*/ +float32x4_t +test_vmlallbtq_lane_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vmlallbtq_lane_f32_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vmlallbtq_lane_f32_fpm_7: +** msr fpmr, x0 +** fmlallbt v0.4s, v1.16b, v2.b\[7\] +** ret +*/ +float32x4_t +test_vmlallbtq_lane_f32_fpm_7 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vmlallbtq_lane_f32_mf8_fpm (a, b, c, 7, d); +} + +/* +** test_vmlallbtq_laneq_f32_fpm_0: +** msr fpmr, x0 +** fmlallbt v0.4s, v1.16b, v2.b\[0\] +** ret +*/ +float32x4_t +test_vmlallbtq_laneq_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlallbtq_laneq_f32_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vmlallbtq_laneq_f32_fpm_15: +** msr fpmr, x0 +** fmlallbt v0.4s, v1.16b, v2.b\[15\] +** ret +*/ +float32x4_t +test_vmlallbtq_laneq_f32_fpm_15 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlallbtq_laneq_f32_mf8_fpm (a, b, c, 15, d); +} + +/* +** test_vmlalltbq_lane_f32_fpm_0: +** msr fpmr, x0 +** fmlalltb v0.4s, v1.16b, v2.b\[0\] +** ret +*/ +float32x4_t +test_vmlalltbq_lane_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vmlalltbq_lane_f32_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vmlalltbq_lane_f32_fpm_7: +** msr fpmr, x0 +** fmlalltb v0.4s, v1.16b, v2.b\[7\] +** ret +*/ +float32x4_t +test_vmlalltbq_lane_f32_fpm_7 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vmlalltbq_lane_f32_mf8_fpm (a, b, c, 7, d); +} + +/* +** test_vmlalltbq_laneq_f32_fpm_0: +** msr fpmr, x0 +** fmlalltb v0.4s, v1.16b, v2.b\[0\] +** ret +*/ +float32x4_t +test_vmlalltbq_laneq_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlalltbq_laneq_f32_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vmlalltbq_laneq_f32_fpm_15: +** msr fpmr, x0 +** fmlalltb v0.4s, v1.16b, v2.b\[15\] +** ret +*/ +float32x4_t +test_vmlalltbq_laneq_f32_fpm_15 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlalltbq_laneq_f32_mf8_fpm (a, b, c, 15, d); +} + +/* +** test_vmlallttq_lane_f32_fpm_0: +** msr fpmr, x0 +** fmlalltt v0.4s, v1.16b, v2.b\[0\] +** ret +*/ +float32x4_t +test_vmlallttq_lane_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vmlallttq_lane_f32_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vmlallttq_lane_f32_fpm_7: +** msr fpmr, x0 +** fmlalltt v0.4s, v1.16b, v2.b\[7\] +** ret +*/ +float32x4_t +test_vmlallttq_lane_f32_fpm_7 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vmlallttq_lane_f32_mf8_fpm (a, b, c, 7, d); +} + +/* +** test_vmlallttq_laneq_f32_fpm_0: +** msr fpmr, x0 +** fmlalltt v0.4s, v1.16b, v2.b\[0\] +** ret +*/ +float32x4_t +test_vmlallttq_laneq_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlallttq_laneq_f32_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vmlallttq_laneq_f32_fpm_15: +** msr fpmr, x0 +** fmlalltt v0.4s, v1.16b, v2.b\[15\] +** ret +*/ +float32x4_t +test_vmlallttq_laneq_f32_fpm_15 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vmlallttq_laneq_f32_mf8_fpm (a, b, c, 15, d); +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmla_lane_indices_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmla_lane_indices_1.c new file mode 100644 index 000000000000..d1a69f4ba541 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmla_lane_indices_1.c @@ -0,0 +1,55 @@ +/* { dg-do compile } */ + +#include "arm_neon.h" + +#pragma GCC target "+fp8dot4+fp8dot2" + +void +test(float16x4_t f16, float16x8_t f16q, float32x2_t f32, + float32x4_t f32q, mfloat8x8_t mf8, mfloat8x16_t mf8q, int x, + fpm_t fpm) +{ + vmlalbq_lane_f16_mf8_fpm (f16q, mf8q, mf8, x, fpm); /* { dg-error {argument 4 of 'vmlalbq_lane_f16_mf8_fpm' must be an integer constant expression} } */ + vmlalbq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, x, fpm); /* { dg-error {argument 4 of 'vmlalbq_laneq_f16_mf8_fpm' must be an integer constant expression} } */ + vmlaltq_lane_f16_mf8_fpm (f16q, mf8q, mf8, x, fpm); /* { dg-error {argument 4 of 'vmlaltq_lane_f16_mf8_fpm' must be an integer constant expression} } */ + vmlaltq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, x, fpm); /* { dg-error {argument 4 of 'vmlaltq_laneq_f16_mf8_fpm' must be an integer constant expression} } */ + + vmlallbbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, x, fpm); /* { dg-error {argument 4 of 'vmlallbbq_lane_f32_mf8_fpm' must be an integer constant expression} } */ + vmlallbbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, x, fpm); /* { dg-error {argument 4 of 'vmlallbbq_laneq_f32_mf8_fpm' must be an integer constant expression} } */ + vmlallbtq_lane_f32_mf8_fpm (f32q, mf8q, mf8, x, fpm); /* { dg-error {argument 4 of 'vmlallbtq_lane_f32_mf8_fpm' must be an integer constant expression} } */ + vmlallbtq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, x, fpm); /* { dg-error {argument 4 of 'vmlallbtq_laneq_f32_mf8_fpm' must be an integer constant expression} } */ + vmlalltbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, x, fpm); /* { dg-error {argument 4 of 'vmlalltbq_lane_f32_mf8_fpm' must be an integer constant expression} } */ + vmlalltbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, x, fpm); /* { dg-error {argument 4 of 'vmlalltbq_laneq_f32_mf8_fpm' must be an integer constant expression} } */ + vmlallttq_lane_f32_mf8_fpm (f32q, mf8q, mf8, x, fpm); /* { dg-error {argument 4 of 'vmlallttq_lane_f32_mf8_fpm' must be an integer constant expression} } */ + vmlallttq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, x, fpm); /* { dg-error {argument 4 of 'vmlallttq_laneq_f32_mf8_fpm' must be an integer constant expression} } */ + + vmlalbq_lane_f16_mf8_fpm (f16q, mf8q, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlalbq_lane_f16_mf8_fpm', which expects a value in the range \[0, 7\]} } */ + vmlalbq_lane_f16_mf8_fpm (f16q, mf8q, mf8, 8, fpm); /* { dg-error { passing 8 to argument 4 of 'vmlalbq_lane_f16_mf8_fpm', which expects a value in the range \[0, 7\]} } */ + vmlalbq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlalbq_laneq_f16_mf8_fpm', which expects a value in the range \[0, 15\]} } */ + vmlalbq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, 16, fpm); /* { dg-error { passing 16 to argument 4 of 'vmlalbq_laneq_f16_mf8_fpm', which expects a value in the range \[0, 15\]} } */ + + vmlaltq_lane_f16_mf8_fpm (f16q, mf8q, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlaltq_lane_f16_mf8_fpm', which expects a value in the range \[0, 7\]} } */ + vmlaltq_lane_f16_mf8_fpm (f16q, mf8q, mf8, 8, fpm); /* { dg-error { passing 8 to argument 4 of 'vmlaltq_lane_f16_mf8_fpm', which expects a value in the range \[0, 7\]} } */ + vmlaltq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlaltq_laneq_f16_mf8_fpm', which expects a value in the range \[0, 15\]} } */ + vmlaltq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, 16, fpm); /* { dg-error { passing 16 to argument 4 of 'vmlaltq_laneq_f16_mf8_fpm', which expects a value in the range \[0, 15\]} } */ + + vmlallbbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlallbbq_lane_f32_mf8_fpm', which expects a value in the range \[0, 7\]} } */ + vmlallbbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, 8, fpm); /* { dg-error { passing 8 to argument 4 of 'vmlallbbq_lane_f32_mf8_fpm', which expects a value in the range \[0, 7\]} } */ + vmlallbbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlallbbq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 15\]} } */ + vmlallbbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, 16, fpm); /* { dg-error { passing 16 to argument 4 of 'vmlallbbq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 15\]} } */ + + vmlallbtq_lane_f32_mf8_fpm (f32q, mf8q, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlallbtq_lane_f32_mf8_fpm', which expects a value in the range \[0, 7\]} } */ + vmlallbtq_lane_f32_mf8_fpm (f32q, mf8q, mf8, 8, fpm); /* { dg-error { passing 8 to argument 4 of 'vmlallbtq_lane_f32_mf8_fpm', which expects a value in the range \[0, 7\]} } */ + vmlallbtq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlallbtq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 15\]} } */ + vmlallbtq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, 16, fpm); /* { dg-error { passing 16 to argument 4 of 'vmlallbtq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 15\]} } */ + + vmlalltbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlalltbq_lane_f32_mf8_fpm', which expects a value in the range \[0, 7\]} } */ + vmlalltbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, 8, fpm); /* { dg-error { passing 8 to argument 4 of 'vmlalltbq_lane_f32_mf8_fpm', which expects a value in the range \[0, 7\]} } */ + vmlalltbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlalltbq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 15\]} } */ + vmlalltbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, 16, fpm); /* { dg-error { passing 16 to argument 4 of 'vmlalltbq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 15\]} } */ + + vmlallttq_lane_f32_mf8_fpm (f32q, mf8q, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlallttq_lane_f32_mf8_fpm', which expects a value in the range \[0, 7\]} } */ + vmlallttq_lane_f32_mf8_fpm (f32q, mf8q, mf8, 8, fpm); /* { dg-error { passing 8 to argument 4 of 'vmlallttq_lane_f32_mf8_fpm', which expects a value in the range \[0, 7\]} } */ + vmlallttq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vmlallttq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 15\]} } */ + vmlallttq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, 16, fpm); /* { dg-error { passing 16 to argument 4 of 'vmlallttq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 15\]} } */ +}