https://gcc.gnu.org/g:c88c7d345c26724ace5f69c0ce1895b57154ced2
commit r15-6075-gc88c7d345c26724ace5f69c0ce1895b57154ced2 Author: Saurabh Jha <saurabh....@arm.com> Date: Tue Dec 10 13:21:20 2024 +0000 aarch64: Add support for fp8dot2 and fp8dot4 The AArch64 FEAT_FP8DOT2 and FEAT_FP8DOT4 extension introduces instructions for dot product of vectors. This patch introduces the following intrinsics: 1. vdot{q}_{fp16|fp32}_mf8_fpm. 2. vdot{q}_lane{q}_{fp16|fp32}_mf8_fpm. We added a new aarch64_builtin_signature variant, ternary_lane, and added support for it in the functions aarch64_fntype and aarch64_expand_pragma_builtin. gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (enum class): Add ternary_lane. (aarch64_fntype): Hnadle ternary_lane. (aarch64_pragma_builtins_checker::require_immediate_lane_index): New function. (aarch64_pragma_builtins_checker::check): Handle the new intrinsics. (aarch64_expand_pragma_builtin): Likewise. * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define TARGET_FP8DOT2 and TARGET_FP8DOT4. * config/aarch64/aarch64-simd-pragma-builtins.def: Define vdot and vdot_lane intrinsics. * config/aarch64/aarch64-simd.md (@aarch64_<fpm_uns_op><mode>): New pattern. (@aarch64_<fpm_uns_op>_lane<VQ_HSF_VDOT:mode><VB:mode>): Likewise. * config/aarch64/iterators.md (VQ_HSF_VDOT): New mode iterator. (UNSPEC_VDOT, UNSPEC_VDOT_LANE): New unspecs. (fpm_uns_op): Handle them. (VNARROWB, Vnbtype): New mode attributes. (FPM_VDOT, FPM_VDOT_LANE): New int iterators. gcc/testsuite/ChangeLog: * gcc.target/aarch64/pragma_cpp_predefs_4.c: Test fp8dot2 and fp8dot4. * gcc.target/aarch64/simd/vdot2_fpm.c: New test. * gcc.target/aarch64/simd/vdot4_fpm.c: New test. * gcc.target/aarch64/simd/vdot_lane_indices_1.c: New test. Co-authored-by: Richard Sandiford <richard.sandif...@arm.com> Diff: --- gcc/config/aarch64/aarch64-builtins.cc | 29 +++++ gcc/config/aarch64/aarch64-c.cc | 4 + .../aarch64/aarch64-simd-pragma-builtins.def | 29 +++++ gcc/config/aarch64/aarch64-simd.md | 27 +++++ gcc/config/aarch64/iterators.md | 24 ++++ .../gcc.target/aarch64/pragma_cpp_predefs_4.c | 22 ++++ gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpm.c | 125 +++++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpm.c | 125 +++++++++++++++++++++ .../gcc.target/aarch64/simd/vdot_lane_indices_1.c | 45 ++++++++ 9 files changed, 430 insertions(+) diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index 99be5935c544..63e17eeb20e5 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -1621,6 +1621,7 @@ enum class aarch64_builtin_signatures binary, binary_lane, ternary, + ternary_lane, unary, }; @@ -1713,6 +1714,7 @@ aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data) break; case aarch64_builtin_signatures::ternary: + case aarch64_builtin_signatures::ternary_lane: return_type = builtin_data.types[0].type (); for (int i = 1; i <= 3; ++i) arg_types.quick_push (builtin_data.types[i].type ()); @@ -1726,6 +1728,7 @@ aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data) switch (builtin_data.signature) { case aarch64_builtin_signatures::binary_lane: + case aarch64_builtin_signatures::ternary_lane: arg_types.quick_push (integer_type_node); break; @@ -2592,6 +2595,7 @@ struct aarch64_pragma_builtins_checker bool require_immediate_range (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT); + bool require_immediate_lane_index (unsigned int, unsigned int, unsigned int); bool check (); @@ -2639,6 +2643,22 @@ require_immediate_range (unsigned int argno, HOST_WIDE_INT min, return true; } +/* Require argument LANE_ARGNO to be an immediate lane index into vector + argument VEC_ARGNO, given that each index selects enough data to fill + one element of argument ELT_ARGNO. Return true if the argument + is valid. */ +bool +aarch64_pragma_builtins_checker:: +require_immediate_lane_index (unsigned int lane_argno, unsigned vec_argno, + unsigned int elt_argno) +{ + auto vec_mode = TYPE_MODE (TREE_TYPE (args[vec_argno])); + auto elt_mode = TYPE_MODE (TREE_TYPE (args[elt_argno])); + auto nunits = exact_div (GET_MODE_SIZE (vec_mode), + GET_MODE_UNIT_SIZE (elt_mode)).to_constant (); + return require_immediate_range (lane_argno, 0, nunits - 1); +} + /* Check the arguments to the intrinsic call and return true if they are valid. */ bool @@ -2646,6 +2666,9 @@ aarch64_pragma_builtins_checker::check () { switch (builtin_data.unspec) { + case UNSPEC_FDOT_LANE_FP8: + return require_immediate_lane_index (nargs - 2, nargs - 3, 0); + case UNSPEC_LUTI2: case UNSPEC_LUTI4: { @@ -3656,6 +3679,7 @@ aarch64_expand_pragma_builtin (tree exp, rtx target, case UNSPEC_FAMIN: case UNSPEC_F1CVTL_FP8: case UNSPEC_F2CVTL_FP8: + case UNSPEC_FDOT_FP8: case UNSPEC_FSCALE: icode = code_for_aarch64 (builtin_data.unspec, ops[0].mode); break; @@ -3689,6 +3713,11 @@ aarch64_expand_pragma_builtin (tree exp, rtx target, break; } + case UNSPEC_FDOT_LANE_FP8: + icode = code_for_aarch64_lane (builtin_data.unspec, + ops[0].mode, ops[3].mode); + break; + case UNSPEC_LUTI2: case UNSPEC_LUTI4: create_integer_operand (ops.safe_push ({}), diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index ff0e5d21e937..7591f1622d2d 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -270,6 +270,10 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) aarch64_def_or_undef (TARGET_FP8, "__ARM_FEATURE_FP8", pfile); + aarch64_def_or_undef (TARGET_FP8DOT2, "__ARM_FEATURE_FP8DOT2", pfile); + + aarch64_def_or_undef (TARGET_FP8DOT4, "__ARM_FEATURE_FP8DOT4", pfile); + aarch64_def_or_undef (TARGET_LS64, "__ARM_FEATURE_LS64", pfile); aarch64_def_or_undef (TARGET_RCPC, "__ARM_FEATURE_RCPC", pfile); diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def index 6221652b38ff..19277860b8ce 100644 --- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def @@ -30,6 +30,10 @@ #define ENTRY_TERNARY(N, T0, T1, T2, T3, U, F) \ ENTRY (N, ternary, T0, T1, T2, T3, U, F) +#undef ENTRY_TERNARY_LANE +#define ENTRY_TERNARY_LANE(N, T0, T1, T2, T3, U, F) \ + ENTRY (N, ternary_lane, T0, T1, T2, T3, U, F) + #undef ENTRY_UNARY #define ENTRY_UNARY(N, T0, T1, U, F) \ ENTRY (N, unary, T0, T1, none, none, U, F) @@ -85,6 +89,21 @@ ENTRY_UNARY (N##_bf16_mf8_fpm, bf16q, T1, UNSPEC, FLAGS) \ ENTRY_UNARY (N##_f16_mf8_fpm, f16q, T1, UNSPEC, FLAGS) +#undef ENTRY_VDOT_FPM +#define ENTRY_VDOT_FPM(T) \ + ENTRY_TERNARY (vdot_##T##_mf8_fpm, T, T, f8, f8, \ + UNSPEC_FDOT_FP8, FP8) \ + ENTRY_TERNARY (vdotq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, \ + UNSPEC_FDOT_FP8, FP8) \ + ENTRY_TERNARY_LANE (vdot_lane_##T##_mf8_fpm, T, T, f8, f8, \ + UNSPEC_FDOT_LANE_FP8, FP8) \ + ENTRY_TERNARY_LANE (vdot_laneq_##T##_mf8_fpm, T, T, f8, f8q, \ + UNSPEC_FDOT_LANE_FP8, FP8) \ + ENTRY_TERNARY_LANE (vdotq_lane_##T##_mf8_fpm, T##q, T##q, f8q, f8, \ + UNSPEC_FDOT_LANE_FP8, FP8) \ + ENTRY_TERNARY_LANE (vdotq_laneq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, \ + UNSPEC_FDOT_LANE_FP8, FP8) + // faminmax #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FAMINMAX) ENTRY_BINARY_VHSDF (vamax, UNSPEC_FAMAX, FP) @@ -125,3 +144,13 @@ ENTRY_TERNARY (vcvt_high_mf8_f32_fpm, f8q, f8, f32q, f32q, #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8) ENTRY_BINARY_VHSDF_SIGNED (vscale, UNSPEC_FSCALE, FP) #undef REQUIRED_EXTENSIONS + +// fpm dot2 product +#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT2) +ENTRY_VDOT_FPM (f16) +#undef REQUIRED_EXTENSIONS + +// fpm dot4 product +#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT4) +ENTRY_VDOT_FPM (f32) +#undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e3b4e609a199..69035c797fb0 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -10097,3 +10097,30 @@ "TARGET_FP8" "<insn>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" ) + +;; fpm vdot instructions. The target requirements are enforced by +;; VDQ_HSF_FDOT. +(define_insn "@aarch64_<insn><mode>" + [(set (match_operand:VDQ_HSF_FDOT 0 "register_operand" "=w") + (unspec:VDQ_HSF_FDOT + [(match_operand:VDQ_HSF_FDOT 1 "register_operand" "0") + (match_operand:<VNARROWB> 2 "register_operand" "w") + (match_operand:<VNARROWB> 3 "register_operand" "w") + (reg:DI FPM_REGNUM)] + FPM_FDOT))] + "" + "<insn>\t%1.<Vtype>, %2.<Vnbtype>, %3.<Vnbtype>" +) + +(define_insn "@aarch64_<insn>_lane<VDQ_HSF_FDOT:mode><VB:mode>" + [(set (match_operand:VDQ_HSF_FDOT 0 "register_operand" "=w") + (unspec:VDQ_HSF_FDOT + [(match_operand:VDQ_HSF_FDOT 1 "register_operand" "0") + (match_operand:<VDQ_HSF_FDOT:VNARROWB> 2 "register_operand" "w") + (match_operand:VB 3 "register_operand" "w") + (match_operand 4 "const_int_operand") + (reg:DI FPM_REGNUM)] + FPM_FDOT_LANE))] + "" + "<insn>\t%1.<VDQ_HSF_FDOT:Vtype>, %2.<VDQ_HSF_FDOT:Vnbtype>, %3.<VDQ_HSF_FDOT:Vnbsubtype>[%4]" +) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 33cb513390d7..b28ba63cc9c0 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -188,6 +188,11 @@ ;; Quad vector Float modes with half/single elements. (define_mode_iterator VQ_HSF [V8HF V4SF]) +(define_mode_iterator VDQ_HSF_FDOT [(V4HF "TARGET_FP8DOT2") + (V8HF "TARGET_FP8DOT2") + (V2SF "TARGET_FP8DOT4") + (V4SF "TARGET_FP8DOT4")]) + ;; Modes suitable to use as the return type of a vcond expression. (define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI V2DF V2DI]) @@ -728,6 +733,8 @@ UNSPEC_F1CVTL2_FP8 ; Used in aarch64-builtins.cc. UNSPEC_F2CVTL_FP8 ; Used in aarch64-simd.md. UNSPEC_F2CVTL2_FP8 ; Used in aarch64-builtins.cc. + UNSPEC_FDOT_FP8 ; Used in aarch64-simd.md. + UNSPEC_FDOT_LANE_FP8 ; Used in aarch64-simd.md. UNSPEC_FMAX ; Used in aarch64-simd.md. UNSPEC_FMAXNMV ; Used in aarch64-simd.md. UNSPEC_FMAXV ; Used in aarch64-simd.md. @@ -1813,6 +1820,18 @@ (define_mode_attr VPACKB [(V4HF "V8QI") (V8HF "V16QI") (V4SF "V8QI")]) (define_mode_attr VPACKBtype [(V4HF "8b") (V8HF "16b") (V4SF "8b")]) +;; Modes narrowed all the way to bytes. +(define_mode_attr VNARROWB [(V4HF "V8QI") (V8HF "V16QI") + (V2SF "V8QI") (V4SF "V16QI")]) + +;; Register suffix for modes narrowed to bytes. +(define_mode_attr Vnbtype [(V4HF "8b") (V8HF "16b") + (V2SF "8b") (V4SF "16b")]) + +;; Register suffix representing one group of byte elements per wider element. +(define_mode_attr Vnbsubtype [(V4HF "2b") (V8HF "2b") + (V2SF "4b") (V4SF "4b")]) + ;; Widened modes of vector modes. (define_mode_attr VWIDE [(V8QI "V8HI") (V4HI "V4SI") (V2SI "V2DI") (V16QI "V8HI") @@ -3826,6 +3845,9 @@ (define_int_iterator FSCALE_UNS [UNSPEC_FSCALE]) +(define_int_iterator FPM_FDOT [UNSPEC_FDOT_FP8]) +(define_int_iterator FPM_FDOT_LANE [UNSPEC_FDOT_LANE_FP8]) + ;; ------------------------------------------------------------------- ;; Int Iterators Attributes. ;; ------------------------------------------------------------------- @@ -3835,6 +3857,8 @@ [(UNSPEC_F1CVTL_FP8 "f1cvtl") (UNSPEC_F2CVTL_FP8 "f2cvtl") (UNSPEC_FCVTN_FP8 "fcvtn") + (UNSPEC_FDOT_FP8 "fdot") + (UNSPEC_FDOT_LANE_FP8 "fdot") (UNSPEC_FSCALE "fscale")]) ;; The optab associated with an operation. Note that for ANDF, IORF diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c index e5a19aaefb65..fb3dc139f1f7 100644 --- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c +++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c @@ -273,3 +273,25 @@ #ifndef __ARM_FEATURE_FP8 #error Foo #endif + +#pragma GCC target "arch=armv9-a+fp8dot4" +#ifndef __ARM_FEATURE_FP8 +#error Foo +#endif +#ifndef __ARM_FEATURE_FP8DOT4 +#error Foo +#endif +#ifdef __ARM_FEATURE_FP8DOT2 +#error Foo +#endif + +#pragma GCC target "arch=armv9-a+fp8dot2" +#ifndef __ARM_FEATURE_FP8 +#error Foo +#endif +#ifndef __ARM_FEATURE_FP8DOT4 +#error Foo +#endif +#ifndef __ARM_FEATURE_FP8DOT2 +#error Foo +#endif diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpm.c b/gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpm.c new file mode 100644 index 000000000000..5fe139106c6a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpm.c @@ -0,0 +1,125 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -march=armv9-a+fp8dot2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "arm_neon.h" + +/* +** test_vdot_f16_fpm: +** msr fpmr, x0 +** fdot v0.4h, v1.8b, v2.8b +** ret +*/ +float16x4_t +test_vdot_f16_fpm (float16x4_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d) +{ + return vdot_f16_mf8_fpm (a, b, c, d); +} + +/* +** test_vdotq_f16_fpm: +** msr fpmr, x0 +** fdot v0.8h, v1.16b, v2.16b +** ret +*/ +float16x8_t +test_vdotq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vdotq_f16_mf8_fpm (a, b, c, d); +} + +/* +** test_vdot_lane_f16_fpm_0: +** msr fpmr, x0 +** fdot v0.4h, v1.8b, v2.2b\[0\] +** ret +*/ +float16x4_t +test_vdot_lane_f16_fpm_0 (float16x4_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d) +{ + return vdot_lane_f16_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vdot_lane_f16_fpm_3: +** msr fpmr, x0 +** fdot v0.4h, v1.8b, v2.2b\[3\] +** ret +*/ +float16x4_t +test_vdot_lane_f16_fpm_3 (float16x4_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d) +{ + return vdot_lane_f16_mf8_fpm (a, b, c, 3, d); +} + +/* +** test_vdot_laneq_f16_fpm_0: +** msr fpmr, x0 +** fdot v0.4h, v1.8b, v2.2b\[0\] +** ret +*/ +float16x4_t +test_vdot_laneq_f16_fpm_0 (float16x4_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t d) +{ + return vdot_laneq_f16_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vdot_laneq_f16_fpm_7: +** msr fpmr, x0 +** fdot v0.4h, v1.8b, v2.2b\[7\] +** ret +*/ +float16x4_t +test_vdot_laneq_f16_fpm_7 (float16x4_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t d) +{ + return vdot_laneq_f16_mf8_fpm (a, b, c, 7, d); +} + +/* +** test_vdotq_lane_f16_fpm_0: +** msr fpmr, x0 +** fdot v0.8h, v1.16b, v2.2b\[0\] +** ret +*/ +float16x8_t +test_vdotq_lane_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vdotq_lane_f16_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vdotq_lane_f16_fpm_3: +** msr fpmr, x0 +** fdot v0.8h, v1.16b, v2.2b\[3\] +** ret +*/ +float16x8_t +test_vdotq_lane_f16_fpm_3 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vdotq_lane_f16_mf8_fpm (a, b, c, 3, d); +} + +/* +** test_vdotq_laneq_f16_fpm_0: +** msr fpmr, x0 +** fdot v0.8h, v1.16b, v2.2b\[0\] +** ret +*/ +float16x8_t +test_vdotq_laneq_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vdotq_laneq_f16_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vdotq_laneq_f16_fpm_7: +** msr fpmr, x0 +** fdot v0.8h, v1.16b, v2.2b\[7\] +** ret +*/ +float16x8_t +test_vdotq_laneq_f16_fpm_7 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vdotq_laneq_f16_mf8_fpm (a, b, c, 7, d); +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpm.c b/gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpm.c new file mode 100644 index 000000000000..e47a737e8b5f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpm.c @@ -0,0 +1,125 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -march=armv9-a+fp8dot4" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "arm_neon.h" + +/* +** test_vdot_f32_fpm: +** msr fpmr, x0 +** fdot v0.2s, v1.8b, v2.8b +** ret +*/ +float32x2_t +test_vdot_f32_fpm (float32x2_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d) +{ + return vdot_f32_mf8_fpm (a, b, c, d); +} + +/* +** test_vdotq_f32_fpm: +** msr fpmr, x0 +** fdot v0.4s, v1.16b, v2.16b +** ret +*/ +float32x4_t +test_vdotq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vdotq_f32_mf8_fpm (a, b, c, d); +} + +/* +** test_vdot_lane_f32_fpm_0: +** msr fpmr, x0 +** fdot v0.2s, v1.8b, v2.4b\[0\] +** ret +*/ +float32x2_t +test_vdot_lane_f32_fpm_0 (float32x2_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d) +{ + return vdot_lane_f32_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vdot_lane_f32_fpm_1: +** msr fpmr, x0 +** fdot v0.2s, v1.8b, v2.4b\[1\] +** ret +*/ +float32x2_t +test_vdot_lane_f32_fpm_1 (float32x2_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d) +{ + return vdot_lane_f32_mf8_fpm (a, b, c, 1, d); +} + +/* +** test_vdot_laneq_f32_fpm_0: +** msr fpmr, x0 +** fdot v0.2s, v1.8b, v2.4b\[0\] +** ret +*/ +float32x2_t +test_vdot_laneq_f32_fpm_0 (float32x2_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t d) +{ + return vdot_laneq_f32_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vdot_laneq_f32_fpm_3: +** msr fpmr, x0 +** fdot v0.2s, v1.8b, v2.4b\[3\] +** ret +*/ +float32x2_t +test_vdot_laneq_f32_fpm_3 (float32x2_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t d) +{ + return vdot_laneq_f32_mf8_fpm (a, b, c, 3, d); +} + +/* +** test_vdotq_lane_f32_fpm_0: +** msr fpmr, x0 +** fdot v0.4s, v1.16b, v2.4b\[0\] +** ret +*/ +float32x4_t +test_vdotq_lane_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vdotq_lane_f32_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vdotq_lane_f32_fpm_1: +** msr fpmr, x0 +** fdot v0.4s, v1.16b, v2.4b\[1\] +** ret +*/ +float32x4_t +test_vdotq_lane_f32_fpm_1 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vdotq_lane_f32_mf8_fpm (a, b, c, 1, d); +} + +/* +** test_vdotq_laneq_f32_fpm_0: +** msr fpmr, x0 +** fdot v0.4s, v1.16b, v2.4b\[0\] +** ret +*/ +float32x4_t +test_vdotq_laneq_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vdotq_laneq_f32_mf8_fpm (a, b, c, 0, d); +} + +/* +** test_vdotq_laneq_f32_fpm_3: +** msr fpmr, x0 +** fdot v0.4s, v1.16b, v2.4b\[3\] +** ret +*/ +float32x4_t +test_vdotq_laneq_f32_fpm_3 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vdotq_laneq_f32_mf8_fpm (a, b, c, 3, d); +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vdot_lane_indices_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vdot_lane_indices_1.c new file mode 100644 index 000000000000..7585cff26465 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vdot_lane_indices_1.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ + +#include "arm_neon.h" + +#pragma GCC target "+fp8dot4+fp8dot2" + +void +test(float16x4_t f16, float16x8_t f16q, float32x2_t f32, + float32x4_t f32q, mfloat8x8_t mf8, mfloat8x16_t mf8q, int x, + fpm_t fpm) +{ + vdot_lane_f16_mf8_fpm (f16, mf8, mf8, x, fpm); /* { dg-error {argument 4 of 'vdot_lane_f16_mf8_fpm' must be an integer constant expression} } */ + vdot_laneq_f16_mf8_fpm (f16, mf8, mf8q, x, fpm); /* { dg-error {argument 4 of 'vdot_laneq_f16_mf8_fpm' must be an integer constant expression} } */ + vdotq_lane_f16_mf8_fpm (f16q, mf8q, mf8, x, fpm); /* { dg-error {argument 4 of 'vdotq_lane_f16_mf8_fpm' must be an integer constant expression} } */ + vdotq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, x, fpm); /* { dg-error {argument 4 of 'vdotq_laneq_f16_mf8_fpm' must be an integer constant expression} } */ + + vdot_lane_f32_mf8_fpm (f32, mf8, mf8, x, fpm); /* { dg-error {argument 4 of 'vdot_lane_f32_mf8_fpm' must be an integer constant expression} } */ + vdot_laneq_f32_mf8_fpm (f32, mf8, mf8q, x, fpm); /* { dg-error {argument 4 of 'vdot_laneq_f32_mf8_fpm' must be an integer constant expression} } */ + vdotq_lane_f32_mf8_fpm (f32q, mf8q, mf8, x, fpm); /* { dg-error {argument 4 of 'vdotq_lane_f32_mf8_fpm' must be an integer constant expression} } */ + vdotq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, x, fpm); /* { dg-error {argument 4 of 'vdotq_laneq_f32_mf8_fpm' must be an integer constant expression} } */ + + vdot_lane_f16_mf8_fpm (f16, mf8, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vdot_lane_f16_mf8_fpm', which expects a value in the range \[0, 3\]} } */ + vdot_lane_f16_mf8_fpm (f16, mf8, mf8, 4, fpm); /* { dg-error { passing 4 to argument 4 of 'vdot_lane_f16_mf8_fpm', which expects a value in the range \[0, 3\]} } */ + + vdot_laneq_f16_mf8_fpm (f16, mf8, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vdot_laneq_f16_mf8_fpm', which expects a value in the range \[0, 7\]} } */ + vdot_laneq_f16_mf8_fpm (f16, mf8, mf8q, 8, fpm); /* { dg-error { passing 8 to argument 4 of 'vdot_laneq_f16_mf8_fpm', which expects a value in the range \[0, 7\]} } */ + + vdotq_lane_f16_mf8_fpm (f16q, mf8q, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vdotq_lane_f16_mf8_fpm', which expects a value in the range \[0, 3\]} } */ + vdotq_lane_f16_mf8_fpm (f16q, mf8q, mf8, 4, fpm); /* { dg-error { passing 4 to argument 4 of 'vdotq_lane_f16_mf8_fpm', which expects a value in the range \[0, 3\]} } */ + + vdotq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vdotq_laneq_f16_mf8_fpm', which expects a value in the range \[0, 7\]} } */ + vdotq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, 8, fpm); /* { dg-error { passing 8 to argument 4 of 'vdotq_laneq_f16_mf8_fpm', which expects a value in the range \[0, 7\]} } */ + + vdot_lane_f32_mf8_fpm (f32, mf8, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vdot_lane_f32_mf8_fpm', which expects a value in the range \[0, 1\]} } */ + vdot_lane_f32_mf8_fpm (f32, mf8, mf8, 2, fpm); /* { dg-error { passing 2 to argument 4 of 'vdot_lane_f32_mf8_fpm', which expects a value in the range \[0, 1\]} } */ + + vdot_laneq_f32_mf8_fpm (f32, mf8, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vdot_laneq_f32_mf8_fpm', which expects a value in the range \[0, 3\]} } */ + vdot_laneq_f32_mf8_fpm (f32, mf8, mf8q, 4, fpm); /* { dg-error { passing 4 to argument 4 of 'vdot_laneq_f32_mf8_fpm', which expects a value in the range \[0, 3\]} } */ + + vdotq_lane_f32_mf8_fpm (f32q, mf8q, mf8, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vdotq_lane_f32_mf8_fpm', which expects a value in the range \[0, 1\]} } */ + vdotq_lane_f32_mf8_fpm (f32q, mf8q, mf8, 2, fpm); /* { dg-error { passing 2 to argument 4 of 'vdotq_lane_f32_mf8_fpm', which expects a value in the range \[0, 1\]} } */ + + vdotq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, -1, fpm); /* { dg-error { passing -1 to argument 4 of 'vdotq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 3\]} } */ + vdotq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, 4, fpm); /* { dg-error { passing 4 to argument 4 of 'vdotq_laneq_f32_mf8_fpm', which expects a value in the range \[0, 3\]} } */ +}