<[email protected]> writes:
> The AArch64 FEAT_FP8DOT2 and FEAT_FP8DOT4 extension introduces
> instructions for dot product of vectors.
>
> This patch introduces the following intrinsics:
> 1. vdot{q}_{fp16|fp32}_mf8_fpm.
> 2. vdot{q}_lane{q}_{fp16|fp32}_mf8_fpm.
>
> It introduces two flags: fp8dot2 and fp8dot4.
>
> We had to add space for another type in aarch64_pragma_builtins_data
> struct. The macros were updated to reflect that.
>
> We added a new aarch64_builtin_signature variant, quaternary, and added
> support for it in the functions aarch64_fntype and
> aarch64_expand_pragma_builtin.
>
> We added a new namespace, function_checker, to implement range checks
> for functions defined using the new pragma approach. The old intrinsic
> range checks will continue to work. All the new AdvSIMD intrinsics we
> define that need lane checks should be using the function in this
> namespace to implement the checks.
As explained in the reply to 1/3, this review is in the form of a patch.
The changes are along the same lines as for 1/3, along with:
* Move the mode-based target requirements to the mode iterator.
* Add a require_immediate_lane_index helper for checking lane indices.
* Add tests for the lane index checks.
Tested on aarch64-linux-gnu. I'll commit in about 24 hours or so
if there are no comments before then, but please let me know if you'd
like more time.
Thanks,
Richard
gcc/ChangeLog:
* config/aarch64/aarch64-builtins.cc
(enum class): Add ternary_lane.
(aarch64_fntype): Hnadle ternary_lane.
(aarch64_pragma_builtins_checker::require_immediate_lane_index): New
function.
(aarch64_pragma_builtins_checker::check): Handle the new intrinsics.
(aarch64_expand_pragma_builtin): Likewise.
* config/aarch64/aarch64-c.cc
(aarch64_update_cpp_builtins): Define TARGET_FP8DOT2 and
TARGET_FP8DOT4.
* config/aarch64/aarch64-simd-pragma-builtins.def: Define vdot
and vdot_lane intrinsics.
* config/aarch64/aarch64-simd.md
(@aarch64_<fpm_uns_op><mode>): New pattern.
(@aarch64_<fpm_uns_op>_lane<VQ_HSF_VDOT:mode><VB:mode>): Likewise.
* config/aarch64/iterators.md (VQ_HSF_VDOT): New mode iterator.
(UNSPEC_VDOT, UNSPEC_VDOT_LANE): New unspecs.
(fpm_uns_op): Handle them.
(VNARROWB, Vnbtype): New mode attributes.
(FPM_VDOT, FPM_VDOT_LANE): New int iterators.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/pragma_cpp_predefs_4.c: Test fp8dot2 and fp8dot4.
* gcc.target/aarch64/simd/vdot2_fpm.c: New test.
* gcc.target/aarch64/simd/vdot4_fpm.c: New test.
* gcc.target/aarch64/simd/vdot_lane_indices_1.c: New test.
Co-authored-by: Richard Sandiford <[email protected]>
---
gcc/config/aarch64/aarch64-builtins.cc | 29 ++++
gcc/config/aarch64/aarch64-c.cc | 4 +
.../aarch64/aarch64-simd-pragma-builtins.def | 29 ++++
gcc/config/aarch64/aarch64-simd.md | 27 ++++
gcc/config/aarch64/iterators.md | 24 ++++
.../gcc.target/aarch64/pragma_cpp_predefs_4.c | 22 +++
.../gcc.target/aarch64/simd/vdot2_fpm.c | 125 ++++++++++++++++++
.../gcc.target/aarch64/simd/vdot4_fpm.c | 125 ++++++++++++++++++
.../aarch64/simd/vdot_lane_indices_1.c | 45 +++++++
9 files changed, 430 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpm.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpm.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vdot_lane_indices_1.c
diff --git a/gcc/config/aarch64/aarch64-builtins.cc
b/gcc/config/aarch64/aarch64-builtins.cc
index 39a85699e51..756f730b2bf 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -1607,6 +1607,7 @@ enum class aarch64_builtin_signatures
binary,
binary_lane,
ternary,
+ ternary_lane,
unary,
};
@@ -1699,6 +1700,7 @@ aarch64_fntype (const aarch64_pragma_builtins_data
&builtin_data)
break;
case aarch64_builtin_signatures::ternary:
+ case aarch64_builtin_signatures::ternary_lane:
return_type = builtin_data.types[0].type ();
for (int i = 1; i <= 3; ++i)
arg_types.quick_push (builtin_data.types[i].type ());
@@ -1712,6 +1714,7 @@ aarch64_fntype (const aarch64_pragma_builtins_data
&builtin_data)
switch (builtin_data.signature)
{
case aarch64_builtin_signatures::binary_lane:
+ case aarch64_builtin_signatures::ternary_lane:
arg_types.quick_push (integer_type_node);
break;
@@ -2577,6 +2580,7 @@ struct aarch64_pragma_builtins_checker
bool require_immediate_range (unsigned int, HOST_WIDE_INT,
HOST_WIDE_INT);
+ bool require_immediate_lane_index (unsigned int, unsigned int, unsigned int);
bool check ();
@@ -2624,6 +2628,22 @@ require_immediate_range (unsigned int argno,
HOST_WIDE_INT min,
return true;
}
+/* Require argument LANE_ARGNO to be an immediate lane index into vector
+ argument VEC_ARGNO, given that each index selects enough data to fill
+ one element of argument ELT_ARGNO. Return true if the argument
+ is valid. */
+bool
+aarch64_pragma_builtins_checker::
+require_immediate_lane_index (unsigned int lane_argno, unsigned vec_argno,
+ unsigned int elt_argno)
+{
+ auto vec_mode = TYPE_MODE (TREE_TYPE (args[vec_argno]));
+ auto elt_mode = TYPE_MODE (TREE_TYPE (args[elt_argno]));
+ auto nunits = exact_div (GET_MODE_SIZE (vec_mode),
+ GET_MODE_UNIT_SIZE (elt_mode)).to_constant ();
+ return require_immediate_range (lane_argno, 0, nunits - 1);
+}
+
/* Check the arguments to the intrinsic call and return true if they
are valid. */
bool
@@ -2631,6 +2651,9 @@ aarch64_pragma_builtins_checker::check ()
{
switch (builtin_data.unspec)
{
+ case UNSPEC_FDOT_LANE_FP8:
+ return require_immediate_lane_index (nargs - 2, nargs - 3, 0);
+
case UNSPEC_LUTI2:
case UNSPEC_LUTI4:
{
@@ -3641,6 +3664,7 @@ aarch64_expand_pragma_builtin (tree exp, rtx target,
case UNSPEC_FAMIN:
case UNSPEC_F1CVTL_FP8:
case UNSPEC_F2CVTL_FP8:
+ case UNSPEC_FDOT_FP8:
case UNSPEC_FSCALE:
icode = code_for_aarch64 (builtin_data.unspec, ops[0].mode);
break;
@@ -3674,6 +3698,11 @@ aarch64_expand_pragma_builtin (tree exp, rtx target,
break;
}
+ case UNSPEC_FDOT_LANE_FP8:
+ icode = code_for_aarch64_lane (builtin_data.unspec,
+ ops[0].mode, ops[3].mode);
+ break;
+
case UNSPEC_LUTI2:
case UNSPEC_LUTI4:
create_integer_operand (ops.safe_push ({}),
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index ae255889f5e..b0e3235e669 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -270,6 +270,10 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
aarch64_def_or_undef (TARGET_FP8, "__ARM_FEATURE_FP8", pfile);
+ aarch64_def_or_undef (TARGET_FP8DOT2, "__ARM_FEATURE_FP8DOT2", pfile);
+
+ aarch64_def_or_undef (TARGET_FP8DOT4, "__ARM_FEATURE_FP8DOT4", pfile);
+
aarch64_def_or_undef (TARGET_LS64,
"__ARM_FEATURE_LS64", pfile);
aarch64_def_or_undef (TARGET_RCPC, "__ARM_FEATURE_RCPC", pfile);
diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
index 6221652b38f..19277860b8c 100644
--- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
@@ -30,6 +30,10 @@
#define ENTRY_TERNARY(N, T0, T1, T2, T3, U, F) \
ENTRY (N, ternary, T0, T1, T2, T3, U, F)
+#undef ENTRY_TERNARY_LANE
+#define ENTRY_TERNARY_LANE(N, T0, T1, T2, T3, U, F) \
+ ENTRY (N, ternary_lane, T0, T1, T2, T3, U, F)
+
#undef ENTRY_UNARY
#define ENTRY_UNARY(N, T0, T1, U, F) \
ENTRY (N, unary, T0, T1, none, none, U, F)
@@ -85,6 +89,21 @@
ENTRY_UNARY (N##_bf16_mf8_fpm, bf16q, T1, UNSPEC, FLAGS) \
ENTRY_UNARY (N##_f16_mf8_fpm, f16q, T1, UNSPEC, FLAGS)
+#undef ENTRY_VDOT_FPM
+#define ENTRY_VDOT_FPM(T) \
+ ENTRY_TERNARY (vdot_##T##_mf8_fpm, T, T, f8, f8, \
+ UNSPEC_FDOT_FP8, FP8) \
+ ENTRY_TERNARY (vdotq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, \
+ UNSPEC_FDOT_FP8, FP8) \
+ ENTRY_TERNARY_LANE (vdot_lane_##T##_mf8_fpm, T, T, f8, f8, \
+ UNSPEC_FDOT_LANE_FP8, FP8) \
+ ENTRY_TERNARY_LANE (vdot_laneq_##T##_mf8_fpm, T, T, f8, f8q, \
+ UNSPEC_FDOT_LANE_FP8, FP8) \
+ ENTRY_TERNARY_LANE (vdotq_lane_##T##_mf8_fpm, T##q, T##q, f8q, f8, \
+ UNSPEC_FDOT_LANE_FP8, FP8) \
+ ENTRY_TERNARY_LANE (vdotq_laneq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, \
+ UNSPEC_FDOT_LANE_FP8, FP8)
+
// faminmax
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FAMINMAX)
ENTRY_BINARY_VHSDF (vamax, UNSPEC_FAMAX, FP)
@@ -125,3 +144,13 @@ ENTRY_TERNARY (vcvt_high_mf8_f32_fpm, f8q, f8, f32q, f32q,
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8)
ENTRY_BINARY_VHSDF_SIGNED (vscale, UNSPEC_FSCALE, FP)
#undef REQUIRED_EXTENSIONS
+
+// fpm dot2 product
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT2)
+ENTRY_VDOT_FPM (f16)
+#undef REQUIRED_EXTENSIONS
+
+// fpm dot4 product
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT4)
+ENTRY_VDOT_FPM (f32)
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-simd.md
b/gcc/config/aarch64/aarch64-simd.md
index f38bad72781..ddcef6381e2 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -10097,3 +10097,30 @@ (define_insn "@aarch64_<insn><mode>"
"TARGET_FP8"
"<insn>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
)
+
+;; fpm vdot instructions. The target requirements are enforced by
+;; VDQ_HSF_FDOT.
+(define_insn "@aarch64_<insn><mode>"
+ [(set (match_operand:VDQ_HSF_FDOT 0 "register_operand" "=w")
+ (unspec:VDQ_HSF_FDOT
+ [(match_operand:VDQ_HSF_FDOT 1 "register_operand" "0")
+ (match_operand:<VNARROWB> 2 "register_operand" "w")
+ (match_operand:<VNARROWB> 3 "register_operand" "w")
+ (reg:DI FPM_REGNUM)]
+ FPM_FDOT))]
+ ""
+ "<insn>\t%1.<Vtype>, %2.<Vnbtype>, %3.<Vnbtype>"
+)
+
+(define_insn "@aarch64_<insn>_lane<VDQ_HSF_FDOT:mode><VB:mode>"
+ [(set (match_operand:VDQ_HSF_FDOT 0 "register_operand" "=w")
+ (unspec:VDQ_HSF_FDOT
+ [(match_operand:VDQ_HSF_FDOT 1 "register_operand" "0")
+ (match_operand:<VDQ_HSF_FDOT:VNARROWB> 2 "register_operand" "w")
+ (match_operand:VB 3 "register_operand" "w")
+ (match_operand 4 "const_int_operand")
+ (reg:DI FPM_REGNUM)]
+ FPM_FDOT_LANE))]
+ ""
+ "<insn>\t%1.<VDQ_HSF_FDOT:Vtype>, %2.<VDQ_HSF_FDOT:Vnbtype>,
%3.<VDQ_HSF_FDOT:Vnbsubtype>[%4]"
+)
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 7b426aae7a8..296b1a7c559 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -188,6 +188,11 @@ (define_mode_iterator VQ_BHF [V8HF V8BF])
;; Quad vector Float modes with half/single elements.
(define_mode_iterator VQ_HSF [V8HF V4SF])
+(define_mode_iterator VDQ_HSF_FDOT [(V4HF "TARGET_FP8DOT2")
+ (V8HF "TARGET_FP8DOT2")
+ (V2SF "TARGET_FP8DOT4")
+ (V4SF "TARGET_FP8DOT4")])
+
;; Modes suitable to use as the return type of a vcond expression.
(define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI V2DF V2DI])
@@ -728,6 +733,8 @@ (define_c_enum "unspec"
UNSPEC_F1CVTL2_FP8 ; Used in aarch64-builtins.cc.
UNSPEC_F2CVTL_FP8 ; Used in aarch64-simd.md.
UNSPEC_F2CVTL2_FP8 ; Used in aarch64-builtins.cc.
+ UNSPEC_FDOT_FP8 ; Used in aarch64-simd.md.
+ UNSPEC_FDOT_LANE_FP8 ; Used in aarch64-simd.md.
UNSPEC_FMAX ; Used in aarch64-simd.md.
UNSPEC_FMAXNMV ; Used in aarch64-simd.md.
UNSPEC_FMAXV ; Used in aarch64-simd.md.
@@ -1809,6 +1816,18 @@ (define_mode_attr V2ntype [(V8HI "16b") (V4SI "8h")
(define_mode_attr VPACKB [(V4HF "V8QI") (V8HF "V16QI") (V4SF "V8QI")])
(define_mode_attr VPACKBtype [(V4HF "8b") (V8HF "16b") (V4SF "8b")])
+;; Modes narrowed all the way to bytes.
+(define_mode_attr VNARROWB [(V4HF "V8QI") (V8HF "V16QI")
+ (V2SF "V8QI") (V4SF "V16QI")])
+
+;; Register suffix for modes narrowed to bytes.
+(define_mode_attr Vnbtype [(V4HF "8b") (V8HF "16b")
+ (V2SF "8b") (V4SF "16b")])
+
+;; Register suffix representing one group of byte elements per wider element.
+(define_mode_attr Vnbsubtype [(V4HF "2b") (V8HF "2b")
+ (V2SF "4b") (V4SF "4b")])
+
;; Widened modes of vector modes.
(define_mode_attr VWIDE [(V8QI "V8HI") (V4HI "V4SI")
(V2SI "V2DI") (V16QI "V8HI")
@@ -3822,6 +3841,9 @@ (define_int_iterator FPM_BINARY_UNS [UNSPEC_FCVTN_FP8])
(define_int_iterator FSCALE_UNS [UNSPEC_FSCALE])
+(define_int_iterator FPM_FDOT [UNSPEC_FDOT_FP8])
+(define_int_iterator FPM_FDOT_LANE [UNSPEC_FDOT_LANE_FP8])
+
;; -------------------------------------------------------------------
;; Int Iterators Attributes.
;; -------------------------------------------------------------------
@@ -3831,6 +3853,8 @@ (define_int_attr insn
[(UNSPEC_F1CVTL_FP8 "f1cvtl")
(UNSPEC_F2CVTL_FP8 "f2cvtl")
(UNSPEC_FCVTN_FP8 "fcvtn")
+ (UNSPEC_FDOT_FP8 "fdot")
+ (UNSPEC_FDOT_LANE_FP8 "fdot")
(UNSPEC_FSCALE "fscale")])
;; The optab associated with an operation. Note that for ANDF, IORF
diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
index e5a19aaefb6..fb3dc139f1f 100644
--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
@@ -273,3 +273,25 @@
#ifndef __ARM_FEATURE_FP8
#error Foo
#endif
+
+#pragma GCC target "arch=armv9-a+fp8dot4"
+#ifndef __ARM_FEATURE_FP8
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_FP8DOT4
+#error Foo
+#endif
+#ifdef __ARM_FEATURE_FP8DOT2
+#error Foo
+#endif
+
+#pragma GCC target "arch=armv9-a+fp8dot2"
+#ifndef __ARM_FEATURE_FP8
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_FP8DOT4
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_FP8DOT2
+#error Foo
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpm.c
b/gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpm.c
new file mode 100644
index 00000000000..5fe139106c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpm.c
@@ -0,0 +1,125 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -march=armv9-a+fp8dot2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+/*
+** test_vdot_f16_fpm:
+** msr fpmr, x0
+** fdot v0.4h, v1.8b, v2.8b
+** ret
+*/
+float16x4_t
+test_vdot_f16_fpm (float16x4_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vdot_f16_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vdotq_f16_fpm:
+** msr fpmr, x0
+** fdot v0.8h, v1.16b, v2.16b
+** ret
+*/
+float16x8_t
+test_vdotq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vdotq_f16_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vdot_lane_f16_fpm_0:
+** msr fpmr, x0
+** fdot v0.4h, v1.8b, v2.2b\[0\]
+** ret
+*/
+float16x4_t
+test_vdot_lane_f16_fpm_0 (float16x4_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vdot_lane_f16_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vdot_lane_f16_fpm_3:
+** msr fpmr, x0
+** fdot v0.4h, v1.8b, v2.2b\[3\]
+** ret
+*/
+float16x4_t
+test_vdot_lane_f16_fpm_3 (float16x4_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vdot_lane_f16_mf8_fpm (a, b, c, 3, d);
+}
+
+/*
+** test_vdot_laneq_f16_fpm_0:
+** msr fpmr, x0
+** fdot v0.4h, v1.8b, v2.2b\[0\]
+** ret
+*/
+float16x4_t
+test_vdot_laneq_f16_fpm_0 (float16x4_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t
d)
+{
+ return vdot_laneq_f16_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vdot_laneq_f16_fpm_7:
+** msr fpmr, x0
+** fdot v0.4h, v1.8b, v2.2b\[7\]
+** ret
+*/
+float16x4_t
+test_vdot_laneq_f16_fpm_7 (float16x4_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t
d)
+{
+ return vdot_laneq_f16_mf8_fpm (a, b, c, 7, d);
+}
+
+/*
+** test_vdotq_lane_f16_fpm_0:
+** msr fpmr, x0
+** fdot v0.8h, v1.16b, v2.2b\[0\]
+** ret
+*/
+float16x8_t
+test_vdotq_lane_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t
d)
+{
+ return vdotq_lane_f16_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vdotq_lane_f16_fpm_3:
+** msr fpmr, x0
+** fdot v0.8h, v1.16b, v2.2b\[3\]
+** ret
+*/
+float16x8_t
+test_vdotq_lane_f16_fpm_3 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t
d)
+{
+ return vdotq_lane_f16_mf8_fpm (a, b, c, 3, d);
+}
+
+/*
+** test_vdotq_laneq_f16_fpm_0:
+** msr fpmr, x0
+** fdot v0.8h, v1.16b, v2.2b\[0\]
+** ret
+*/
+float16x8_t
+test_vdotq_laneq_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c,
fpm_t d)
+{
+ return vdotq_laneq_f16_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vdotq_laneq_f16_fpm_7:
+** msr fpmr, x0
+** fdot v0.8h, v1.16b, v2.2b\[7\]
+** ret
+*/
+float16x8_t
+test_vdotq_laneq_f16_fpm_7 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c,
fpm_t d)
+{
+ return vdotq_laneq_f16_mf8_fpm (a, b, c, 7, d);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpm.c
b/gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpm.c
new file mode 100644
index 00000000000..e47a737e8b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpm.c
@@ -0,0 +1,125 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -march=armv9-a+fp8dot4" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+/*
+** test_vdot_f32_fpm:
+** msr fpmr, x0
+** fdot v0.2s, v1.8b, v2.8b
+** ret
+*/
+float32x2_t
+test_vdot_f32_fpm (float32x2_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vdot_f32_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vdotq_f32_fpm:
+** msr fpmr, x0
+** fdot v0.4s, v1.16b, v2.16b
+** ret
+*/
+float32x4_t
+test_vdotq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vdotq_f32_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vdot_lane_f32_fpm_0:
+** msr fpmr, x0
+** fdot v0.2s, v1.8b, v2.4b\[0\]
+** ret
+*/
+float32x2_t
+test_vdot_lane_f32_fpm_0 (float32x2_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vdot_lane_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vdot_lane_f32_fpm_1:
+** msr fpmr, x0
+** fdot v0.2s, v1.8b, v2.4b\[1\]
+** ret
+*/
+float32x2_t
+test_vdot_lane_f32_fpm_1 (float32x2_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vdot_lane_f32_mf8_fpm (a, b, c, 1, d);
+}
+
+/*
+** test_vdot_laneq_f32_fpm_0:
+** msr fpmr, x0
+** fdot v0.2s, v1.8b, v2.4b\[0\]
+** ret
+*/
+float32x2_t
+test_vdot_laneq_f32_fpm_0 (float32x2_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t
d)
+{
+ return vdot_laneq_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vdot_laneq_f32_fpm_3:
+** msr fpmr, x0
+** fdot v0.2s, v1.8b, v2.4b\[3\]
+** ret
+*/
+float32x2_t
+test_vdot_laneq_f32_fpm_3 (float32x2_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t
d)
+{
+ return vdot_laneq_f32_mf8_fpm (a, b, c, 3, d);
+}
+
+/*
+** test_vdotq_lane_f32_fpm_0:
+** msr fpmr, x0
+** fdot v0.4s, v1.16b, v2.4b\[0\]
+** ret
+*/
+float32x4_t
+test_vdotq_lane_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t
d)
+{
+ return vdotq_lane_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vdotq_lane_f32_fpm_1:
+** msr fpmr, x0
+** fdot v0.4s, v1.16b, v2.4b\[1\]
+** ret
+*/
+float32x4_t
+test_vdotq_lane_f32_fpm_1 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t
d)
+{
+ return vdotq_lane_f32_mf8_fpm (a, b, c, 1, d);
+}
+
+/*
+** test_vdotq_laneq_f32_fpm_0:
+** msr fpmr, x0
+** fdot v0.4s, v1.16b, v2.4b\[0\]
+** ret
+*/
+float32x4_t
+test_vdotq_laneq_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c,
fpm_t d)
+{
+ return vdotq_laneq_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vdotq_laneq_f32_fpm_3:
+** msr fpmr, x0
+** fdot v0.4s, v1.16b, v2.4b\[3\]
+** ret
+*/
+float32x4_t
+test_vdotq_laneq_f32_fpm_3 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c,
fpm_t d)
+{
+ return vdotq_laneq_f32_mf8_fpm (a, b, c, 3, d);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vdot_lane_indices_1.c
b/gcc/testsuite/gcc.target/aarch64/simd/vdot_lane_indices_1.c
new file mode 100644
index 00000000000..7585cff2646
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vdot_lane_indices_1.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+
+#include "arm_neon.h"
+
+#pragma GCC target "+fp8dot4+fp8dot2"
+
+void
+test(float16x4_t f16, float16x8_t f16q, float32x2_t f32,
+ float32x4_t f32q, mfloat8x8_t mf8, mfloat8x16_t mf8q, int x,
+ fpm_t fpm)
+{
+ vdot_lane_f16_mf8_fpm (f16, mf8, mf8, x, fpm); /* { dg-error {argument 4 of
'vdot_lane_f16_mf8_fpm' must be an integer constant expression} } */
+ vdot_laneq_f16_mf8_fpm (f16, mf8, mf8q, x, fpm); /* { dg-error {argument 4
of 'vdot_laneq_f16_mf8_fpm' must be an integer constant expression} } */
+ vdotq_lane_f16_mf8_fpm (f16q, mf8q, mf8, x, fpm); /* { dg-error {argument 4
of 'vdotq_lane_f16_mf8_fpm' must be an integer constant expression} } */
+ vdotq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, x, fpm); /* { dg-error {argument
4 of 'vdotq_laneq_f16_mf8_fpm' must be an integer constant expression} } */
+
+ vdot_lane_f32_mf8_fpm (f32, mf8, mf8, x, fpm); /* { dg-error {argument 4 of
'vdot_lane_f32_mf8_fpm' must be an integer constant expression} } */
+ vdot_laneq_f32_mf8_fpm (f32, mf8, mf8q, x, fpm); /* { dg-error {argument 4
of 'vdot_laneq_f32_mf8_fpm' must be an integer constant expression} } */
+ vdotq_lane_f32_mf8_fpm (f32q, mf8q, mf8, x, fpm); /* { dg-error {argument 4
of 'vdotq_lane_f32_mf8_fpm' must be an integer constant expression} } */
+ vdotq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, x, fpm); /* { dg-error {argument
4 of 'vdotq_laneq_f32_mf8_fpm' must be an integer constant expression} } */
+
+ vdot_lane_f16_mf8_fpm (f16, mf8, mf8, -1, fpm); /* { dg-error { passing -1
to argument 4 of 'vdot_lane_f16_mf8_fpm', which expects a value in the range
\[0, 3\]} } */
+ vdot_lane_f16_mf8_fpm (f16, mf8, mf8, 4, fpm); /* { dg-error { passing 4 to
argument 4 of 'vdot_lane_f16_mf8_fpm', which expects a value in the range \[0,
3\]} } */
+
+ vdot_laneq_f16_mf8_fpm (f16, mf8, mf8q, -1, fpm); /* { dg-error { passing -1
to argument 4 of 'vdot_laneq_f16_mf8_fpm', which expects a value in the range
\[0, 7\]} } */
+ vdot_laneq_f16_mf8_fpm (f16, mf8, mf8q, 8, fpm); /* { dg-error { passing 8
to argument 4 of 'vdot_laneq_f16_mf8_fpm', which expects a value in the range
\[0, 7\]} } */
+
+ vdotq_lane_f16_mf8_fpm (f16q, mf8q, mf8, -1, fpm); /* { dg-error { passing
-1 to argument 4 of 'vdotq_lane_f16_mf8_fpm', which expects a value in the
range \[0, 3\]} } */
+ vdotq_lane_f16_mf8_fpm (f16q, mf8q, mf8, 4, fpm); /* { dg-error { passing 4
to argument 4 of 'vdotq_lane_f16_mf8_fpm', which expects a value in the range
\[0, 3\]} } */
+
+ vdotq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, -1, fpm); /* { dg-error { passing
-1 to argument 4 of 'vdotq_laneq_f16_mf8_fpm', which expects a value in the
range \[0, 7\]} } */
+ vdotq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, 8, fpm); /* { dg-error { passing
8 to argument 4 of 'vdotq_laneq_f16_mf8_fpm', which expects a value in the
range \[0, 7\]} } */
+
+ vdot_lane_f32_mf8_fpm (f32, mf8, mf8, -1, fpm); /* { dg-error { passing -1
to argument 4 of 'vdot_lane_f32_mf8_fpm', which expects a value in the range
\[0, 1\]} } */
+ vdot_lane_f32_mf8_fpm (f32, mf8, mf8, 2, fpm); /* { dg-error { passing 2 to
argument 4 of 'vdot_lane_f32_mf8_fpm', which expects a value in the range \[0,
1\]} } */
+
+ vdot_laneq_f32_mf8_fpm (f32, mf8, mf8q, -1, fpm); /* { dg-error { passing -1
to argument 4 of 'vdot_laneq_f32_mf8_fpm', which expects a value in the range
\[0, 3\]} } */
+ vdot_laneq_f32_mf8_fpm (f32, mf8, mf8q, 4, fpm); /* { dg-error { passing 4
to argument 4 of 'vdot_laneq_f32_mf8_fpm', which expects a value in the range
\[0, 3\]} } */
+
+ vdotq_lane_f32_mf8_fpm (f32q, mf8q, mf8, -1, fpm); /* { dg-error { passing
-1 to argument 4 of 'vdotq_lane_f32_mf8_fpm', which expects a value in the
range \[0, 1\]} } */
+ vdotq_lane_f32_mf8_fpm (f32q, mf8q, mf8, 2, fpm); /* { dg-error { passing 2
to argument 4 of 'vdotq_lane_f32_mf8_fpm', which expects a value in the range
\[0, 1\]} } */
+
+ vdotq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, -1, fpm); /* { dg-error { passing
-1 to argument 4 of 'vdotq_laneq_f32_mf8_fpm', which expects a value in the
range \[0, 3\]} } */
+ vdotq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, 4, fpm); /* { dg-error { passing
4 to argument 4 of 'vdotq_laneq_f32_mf8_fpm', which expects a value in the
range \[0, 3\]} } */
+}
--
2.25.1